UCC Code Repository
Parent Directory
|
Revision Log
Re-import of repository after repository database corruption.
1 | # urlpath.py |
2 | |
3 | # 0.1.0 |
4 | # 2005/08/20 |
5 | |
6 | # Functions that handle url paths. |
7 | # Part of Pythonutils |
8 | # http://www.voidspace.org.uk/python/pythonutils.html |
9 | |
10 | # Copyright Michael Foord, 2004 & 2005. |
11 | # Released subject to the BSD License |
12 | # Please see http://www.voidspace.org.uk/documents/BSD-LICENSE.txt |
13 | |
14 | # For information about bugfixes, updates and support, please join the |
15 | # Pythonutils mailing list. |
16 | # http://voidspace.org.uk/mailman/listinfo/pythonutils_voidspace.org.uk |
17 | # Comments, suggestions and bug reports welcome. |
18 | # Scripts maintained at http://www.voidspace.org.uk/python/index.shtml |
19 | # E-mail [email protected] |
20 | |
21 | import posixpath |
22 | import os |
23 | from urllib import url2pathname, pathname2url |
24 | |
25 | __all__ = [ |
26 | 'nativejoin', |
27 | 'pathjoin', |
28 | 'relpathto', |
29 | 'tslash', |
30 | 'relpath' |
31 | ] |
32 | |
33 | def pathjoin(base, *paths): |
34 | """ |
35 | Join paths to a base, observing pardir. |
36 | |
37 | If base doesn't *end* with '/' we assume it's a file rather than a directory. |
38 | (so we get rid of it) |
39 | """ |
40 | # XXXX will posixpath.join do all this anyway? |
41 | if base and not base.endswith('/'): |
42 | # get rid of the filename |
43 | base = '/'.join(base.split('/')[:-1]) |
44 | base = tslash(base) |
45 | path = (base,) + paths |
46 | return posixpath.normpath(posixpath.join(*path)) |
47 | |
48 | def nativejoin(base, path): |
49 | """ |
50 | Joins two paths - returning a native file path. |
51 | |
52 | Given a base path and a relative location, (in posix format) |
53 | return a file path in a (relatively) OS native way. |
54 | """ |
55 | return url2pathname(pathjoin(base, path)) |
56 | |
57 | def relpathto(thisdir, origin, dest): |
58 | """ |
59 | Given two paths relative to a directory, work out a path from origin |
60 | to destination. |
61 | |
62 | Assumes UNIX/URL type relative paths. |
63 | If origin doesn't *end* with '/' we assume it's a file rather than a |
64 | directory. |
65 | |
66 | If the same paths are passed in : |
67 | if the path ends with ('/') then we return '' |
68 | else we return the last part of the path (presumably a filename) |
69 | |
70 | If thisdir doesn't start with '/' then we add one |
71 | (this makes the top level of thisdir our root directory) |
72 | """ |
73 | orig_thisdir = thisdir |
74 | if not thisdir.startswith('/'): |
75 | thisdir = '/' + thisdir |
76 | orig_abs = posixpath.normpath(posixpath.join(thisdir, origin)) |
77 | dest_abs = posixpath.normpath(posixpath.join(thisdir, dest)) |
78 | if origin.endswith('/') and not orig_abs.endswith('/'): |
79 | orig_abs = orig_abs + '/' |
80 | if dest.endswith('/') and not dest_abs.endswith('/'): |
81 | dest_abs = dest_abs + '/' |
82 | # print orig_abs, dest_abs |
83 | # |
84 | # if the first item is a filename, we want to get rid of it |
85 | orig_list = orig_abs.split('/')[:-1] |
86 | dest_list = dest_abs.split('/') |
87 | # print orig_list, dest_list |
88 | |
89 | if orig_list[0] != dest_list[0]: |
90 | # can't get here from there |
91 | # XXXX raise exception? |
92 | return dest |
93 | # |
94 | # find the location where the two paths start to differ. |
95 | i = 0 |
96 | for start_seg, dest_seg in zip(orig_list, dest_list): |
97 | if start_seg != dest_seg: |
98 | break |
99 | i += 1 |
100 | # |
101 | # now i is the point where the two paths diverge; |
102 | # need a certain number of "os.pardir"s to work up |
103 | # from the origin to the point of divergence. |
104 | segments = ['..'] * (len(orig_list) - i) |
105 | # need to add the diverging part of dest_list. |
106 | segments += dest_list[i:] |
107 | if len(segments) == 0: |
108 | # if they happen to be identical paths |
109 | # identical directories |
110 | if dest.endswith('/'): |
111 | return '' |
112 | # just the filename - the last part of dest |
113 | return dest_list[-1] |
114 | else: |
115 | return '/'.join(segments) |
116 | |
117 | def relpath(origin, dest): |
118 | """Given two absolute paths, work out a path from origin to destination. |
119 | |
120 | Assumes UNIX/URL type relative paths. |
121 | If origin doesn't *end* with '/' we assume it's a file rather than |
122 | a directory. |
123 | |
124 | If the same paths are passed in : |
125 | if the path ends with ('/') then we return '' |
126 | else we return the last part of the path (presumably a filename) |
127 | |
128 | If origin or dest don't start with '/' then we add it. |
129 | |
130 | We are *assuming* relative paths on the same device |
131 | (i.e. same top level directory) |
132 | """ |
133 | if not origin.startswith('/'): |
134 | origin = '/' + origin |
135 | if not dest.startswith('/'): |
136 | dest = '/' + dest |
137 | # |
138 | # if the first item is a filename, we want to get rid of it |
139 | orig_list = origin.split('/')[:-1] |
140 | dest_list = dest.split('/') |
141 | # |
142 | # find the location where the two paths start to differ. |
143 | i = 0 |
144 | for start_seg, dest_seg in zip(orig_list, dest_list): |
145 | if start_seg != dest_seg: |
146 | break |
147 | i += 1 |
148 | |
149 | # now i is the point where the two paths diverge. |
150 | # need a certain number of "os.pardir"s to work up |
151 | # from the origin to the point of divergence. |
152 | segments = ['..'] * (len(orig_list) - i) |
153 | # need to add the diverging part of dest_list. |
154 | segments += dest_list[i:] |
155 | if len(segments) == 0: |
156 | # if they happen to be identical paths |
157 | # identical directories |
158 | if dest.endswith('/'): |
159 | return '' |
160 | # just the filename - the last part of dest |
161 | return dest_list[-1] |
162 | else: |
163 | return '/'.join(segments) |
164 | |
165 | def tslash(apath): |
166 | """Add a trailing slash to a path if it needs one. |
167 | |
168 | Doesn't use os.sep because you end up jiggered on windoze - when you |
169 | want separators for URLs. |
170 | """ |
171 | if (apath and |
172 | apath != '.' and |
173 | not apath.endswith('/') and |
174 | not apath.endswith('\\')): |
175 | return apath + '/' |
176 | else: |
177 | return apath |
178 | |
179 | ############################################## |
180 | |
181 | def testJoin(): |
182 | thelist = [ |
183 | ('/', 'fish.html'), |
184 | ('/dir/dir/', '../file'), |
185 | ('dir/dir/', '../file'), |
186 | ('dir/dir/', '../../file'), |
187 | ('dir/dir/', '../../../file'), |
188 | ('/dir/dir/', '../notherdir/file'), |
189 | ('/dir/dir/', '../../notherdir/file'), |
190 | ('dir/dir/', '../../notherdir/file'), |
191 | ('dir/dir/', '../../../notherdir/file'), |
192 | ('', '../path'), |
193 | ] |
194 | for entry in thelist: |
195 | print entry, ' :: ', pathjoin(*entry) |
196 | print entry, ' :: ', nativejoin(*entry) |
197 | print '\n' |
198 | |
199 | def testRelpathto(): |
200 | thedir = '//toplevel/dirone/dirtwo/dirthree' |
201 | thelist = [ |
202 | ('file1.html', 'file2.html'), |
203 | ('file1.html', '../file2.html'), |
204 | ('../file1.html', '../file2.html'), |
205 | ('../file1.html', 'file2.html'), |
206 | ('../fish1/fish2/', '../../sub1/sub2/'), |
207 | ('../fish1/fish2/', 'sub1/sub2'), |
208 | ('../../../fish1/fish2/', 'sub1/sub2/'), |
209 | ('../../../fish1/fish2/', 'sub1/sub2/file1.html'), |
210 | ] |
211 | for orig, dest in thelist: |
212 | print '(%s, %s) : ' % (orig, dest), relpathto(thedir, orig, dest) |
213 | |
214 | def testRelpathto2(): |
215 | thedir = 'section3/' |
216 | thelist = [ |
217 | ('../archive/strangeindex1.html', 'article2.html'), |
218 | ] |
219 | for orig, dest in thelist: |
220 | answer = relpathto(thedir, orig, dest) |
221 | print '(%s, %s) : ' % (orig, dest), answer |
222 | |
223 | def testRelpath(): |
224 | thelist = [ |
225 | ('/hello/fish/', 'bungles'), |
226 | ] |
227 | for orig, dest in thelist: |
228 | answer = relpath(orig, dest) |
229 | print '(%s, %s) : ' % (orig, dest), answer |
230 | |
231 | |
232 | if __name__ == '__main__': |
233 | testJoin() |
234 | testRelpathto() |
235 | testRelpath() |
236 | # testRelpathto2() |
237 | |
238 | """ |
239 | TODO |
240 | ==== |
241 | |
242 | More comprehensive tests. |
243 | |
244 | CHANGELOG |
245 | 2005/07/31 |
246 | Can now pass mulitple args to ``pathjoin``. |
247 | Finalised as version 0.1.0 |
248 | |
249 | 2005/06/18 |
250 | Changes by Nicola Larosa |
251 | Code cleanup |
252 | lines shortened |
253 | comments on line above code |
254 | empty comments in empty lines |
255 | |
256 | 2005/05/28 |
257 | Added relpath to __all__ |
258 | |
259 | |
260 | TODO |
261 | Move into pythonutils |
262 | relpathto could call relpath (and so be shorter) |
263 | nativejoin could accept multiple paths |
264 | Could tslash be more elegant ? |
265 | """ |
266 |
Managed by UCC Webmasters | ViewVC Help |
Powered by ViewVC 1.1.26 |