UCC Code Repository
Parent Directory
|
Revision Log
Re-import of repository after repository database corruption.
1 | svn-admin | 1 | # urlpath.py |
2 | |||
3 | # 0.1.0 | ||
4 | # 2005/08/20 | ||
5 | |||
6 | # Functions that handle url paths. | ||
7 | # Part of Pythonutils | ||
8 | # http://www.voidspace.org.uk/python/pythonutils.html | ||
9 | |||
10 | # Copyright Michael Foord, 2004 & 2005. | ||
11 | # Released subject to the BSD License | ||
12 | # Please see http://www.voidspace.org.uk/documents/BSD-LICENSE.txt | ||
13 | |||
14 | # For information about bugfixes, updates and support, please join the | ||
15 | # Pythonutils mailing list. | ||
16 | # http://voidspace.org.uk/mailman/listinfo/pythonutils_voidspace.org.uk | ||
17 | # Comments, suggestions and bug reports welcome. | ||
18 | # Scripts maintained at http://www.voidspace.org.uk/python/index.shtml | ||
19 | # E-mail [email protected] | ||
20 | |||
21 | import posixpath | ||
22 | import os | ||
23 | from urllib import url2pathname, pathname2url | ||
24 | |||
25 | __all__ = [ | ||
26 | 'nativejoin', | ||
27 | 'pathjoin', | ||
28 | 'relpathto', | ||
29 | 'tslash', | ||
30 | 'relpath' | ||
31 | ] | ||
32 | |||
33 | def pathjoin(base, *paths): | ||
34 | """ | ||
35 | Join paths to a base, observing pardir. | ||
36 | |||
37 | If base doesn't *end* with '/' we assume it's a file rather than a directory. | ||
38 | (so we get rid of it) | ||
39 | """ | ||
40 | # XXXX will posixpath.join do all this anyway? | ||
41 | if base and not base.endswith('/'): | ||
42 | # get rid of the filename | ||
43 | base = '/'.join(base.split('/')[:-1]) | ||
44 | base = tslash(base) | ||
45 | path = (base,) + paths | ||
46 | return posixpath.normpath(posixpath.join(*path)) | ||
47 | |||
48 | def nativejoin(base, path): | ||
49 | """ | ||
50 | Joins two paths - returning a native file path. | ||
51 | |||
52 | Given a base path and a relative location, (in posix format) | ||
53 | return a file path in a (relatively) OS native way. | ||
54 | """ | ||
55 | return url2pathname(pathjoin(base, path)) | ||
56 | |||
57 | def relpathto(thisdir, origin, dest): | ||
58 | """ | ||
59 | Given two paths relative to a directory, work out a path from origin | ||
60 | to destination. | ||
61 | |||
62 | Assumes UNIX/URL type relative paths. | ||
63 | If origin doesn't *end* with '/' we assume it's a file rather than a | ||
64 | directory. | ||
65 | |||
66 | If the same paths are passed in : | ||
67 | if the path ends with ('/') then we return '' | ||
68 | else we return the last part of the path (presumably a filename) | ||
69 | |||
70 | If thisdir doesn't start with '/' then we add one | ||
71 | (this makes the top level of thisdir our root directory) | ||
72 | """ | ||
73 | orig_thisdir = thisdir | ||
74 | if not thisdir.startswith('/'): | ||
75 | thisdir = '/' + thisdir | ||
76 | orig_abs = posixpath.normpath(posixpath.join(thisdir, origin)) | ||
77 | dest_abs = posixpath.normpath(posixpath.join(thisdir, dest)) | ||
78 | if origin.endswith('/') and not orig_abs.endswith('/'): | ||
79 | orig_abs = orig_abs + '/' | ||
80 | if dest.endswith('/') and not dest_abs.endswith('/'): | ||
81 | dest_abs = dest_abs + '/' | ||
82 | # print orig_abs, dest_abs | ||
83 | # | ||
84 | # if the first item is a filename, we want to get rid of it | ||
85 | orig_list = orig_abs.split('/')[:-1] | ||
86 | dest_list = dest_abs.split('/') | ||
87 | # print orig_list, dest_list | ||
88 | |||
89 | if orig_list[0] != dest_list[0]: | ||
90 | # can't get here from there | ||
91 | # XXXX raise exception? | ||
92 | return dest | ||
93 | # | ||
94 | # find the location where the two paths start to differ. | ||
95 | i = 0 | ||
96 | for start_seg, dest_seg in zip(orig_list, dest_list): | ||
97 | if start_seg != dest_seg: | ||
98 | break | ||
99 | i += 1 | ||
100 | # | ||
101 | # now i is the point where the two paths diverge; | ||
102 | # need a certain number of "os.pardir"s to work up | ||
103 | # from the origin to the point of divergence. | ||
104 | segments = ['..'] * (len(orig_list) - i) | ||
105 | # need to add the diverging part of dest_list. | ||
106 | segments += dest_list[i:] | ||
107 | if len(segments) == 0: | ||
108 | # if they happen to be identical paths | ||
109 | # identical directories | ||
110 | if dest.endswith('/'): | ||
111 | return '' | ||
112 | # just the filename - the last part of dest | ||
113 | return dest_list[-1] | ||
114 | else: | ||
115 | return '/'.join(segments) | ||
116 | |||
117 | def relpath(origin, dest): | ||
118 | """Given two absolute paths, work out a path from origin to destination. | ||
119 | |||
120 | Assumes UNIX/URL type relative paths. | ||
121 | If origin doesn't *end* with '/' we assume it's a file rather than | ||
122 | a directory. | ||
123 | |||
124 | If the same paths are passed in : | ||
125 | if the path ends with ('/') then we return '' | ||
126 | else we return the last part of the path (presumably a filename) | ||
127 | |||
128 | If origin or dest don't start with '/' then we add it. | ||
129 | |||
130 | We are *assuming* relative paths on the same device | ||
131 | (i.e. same top level directory) | ||
132 | """ | ||
133 | if not origin.startswith('/'): | ||
134 | origin = '/' + origin | ||
135 | if not dest.startswith('/'): | ||
136 | dest = '/' + dest | ||
137 | # | ||
138 | # if the first item is a filename, we want to get rid of it | ||
139 | orig_list = origin.split('/')[:-1] | ||
140 | dest_list = dest.split('/') | ||
141 | # | ||
142 | # find the location where the two paths start to differ. | ||
143 | i = 0 | ||
144 | for start_seg, dest_seg in zip(orig_list, dest_list): | ||
145 | if start_seg != dest_seg: | ||
146 | break | ||
147 | i += 1 | ||
148 | |||
149 | # now i is the point where the two paths diverge. | ||
150 | # need a certain number of "os.pardir"s to work up | ||
151 | # from the origin to the point of divergence. | ||
152 | segments = ['..'] * (len(orig_list) - i) | ||
153 | # need to add the diverging part of dest_list. | ||
154 | segments += dest_list[i:] | ||
155 | if len(segments) == 0: | ||
156 | # if they happen to be identical paths | ||
157 | # identical directories | ||
158 | if dest.endswith('/'): | ||
159 | return '' | ||
160 | # just the filename - the last part of dest | ||
161 | return dest_list[-1] | ||
162 | else: | ||
163 | return '/'.join(segments) | ||
164 | |||
165 | def tslash(apath): | ||
166 | """Add a trailing slash to a path if it needs one. | ||
167 | |||
168 | Doesn't use os.sep because you end up jiggered on windoze - when you | ||
169 | want separators for URLs. | ||
170 | """ | ||
171 | if (apath and | ||
172 | apath != '.' and | ||
173 | not apath.endswith('/') and | ||
174 | not apath.endswith('\\')): | ||
175 | return apath + '/' | ||
176 | else: | ||
177 | return apath | ||
178 | |||
179 | ############################################## | ||
180 | |||
181 | def testJoin(): | ||
182 | thelist = [ | ||
183 | ('/', 'fish.html'), | ||
184 | ('/dir/dir/', '../file'), | ||
185 | ('dir/dir/', '../file'), | ||
186 | ('dir/dir/', '../../file'), | ||
187 | ('dir/dir/', '../../../file'), | ||
188 | ('/dir/dir/', '../notherdir/file'), | ||
189 | ('/dir/dir/', '../../notherdir/file'), | ||
190 | ('dir/dir/', '../../notherdir/file'), | ||
191 | ('dir/dir/', '../../../notherdir/file'), | ||
192 | ('', '../path'), | ||
193 | ] | ||
194 | for entry in thelist: | ||
195 | print entry, ' :: ', pathjoin(*entry) | ||
196 | print entry, ' :: ', nativejoin(*entry) | ||
197 | print '\n' | ||
198 | |||
199 | def testRelpathto(): | ||
200 | thedir = '//toplevel/dirone/dirtwo/dirthree' | ||
201 | thelist = [ | ||
202 | ('file1.html', 'file2.html'), | ||
203 | ('file1.html', '../file2.html'), | ||
204 | ('../file1.html', '../file2.html'), | ||
205 | ('../file1.html', 'file2.html'), | ||
206 | ('../fish1/fish2/', '../../sub1/sub2/'), | ||
207 | ('../fish1/fish2/', 'sub1/sub2'), | ||
208 | ('../../../fish1/fish2/', 'sub1/sub2/'), | ||
209 | ('../../../fish1/fish2/', 'sub1/sub2/file1.html'), | ||
210 | ] | ||
211 | for orig, dest in thelist: | ||
212 | print '(%s, %s) : ' % (orig, dest), relpathto(thedir, orig, dest) | ||
213 | |||
214 | def testRelpathto2(): | ||
215 | thedir = 'section3/' | ||
216 | thelist = [ | ||
217 | ('../archive/strangeindex1.html', 'article2.html'), | ||
218 | ] | ||
219 | for orig, dest in thelist: | ||
220 | answer = relpathto(thedir, orig, dest) | ||
221 | print '(%s, %s) : ' % (orig, dest), answer | ||
222 | |||
223 | def testRelpath(): | ||
224 | thelist = [ | ||
225 | ('/hello/fish/', 'bungles'), | ||
226 | ] | ||
227 | for orig, dest in thelist: | ||
228 | answer = relpath(orig, dest) | ||
229 | print '(%s, %s) : ' % (orig, dest), answer | ||
230 | |||
231 | |||
232 | if __name__ == '__main__': | ||
233 | testJoin() | ||
234 | testRelpathto() | ||
235 | testRelpath() | ||
236 | # testRelpathto2() | ||
237 | |||
238 | """ | ||
239 | TODO | ||
240 | ==== | ||
241 | |||
242 | More comprehensive tests. | ||
243 | |||
244 | CHANGELOG | ||
245 | 2005/07/31 | ||
246 | Can now pass mulitple args to ``pathjoin``. | ||
247 | Finalised as version 0.1.0 | ||
248 | |||
249 | 2005/06/18 | ||
250 | Changes by Nicola Larosa | ||
251 | Code cleanup | ||
252 | lines shortened | ||
253 | comments on line above code | ||
254 | empty comments in empty lines | ||
255 | |||
256 | 2005/05/28 | ||
257 | Added relpath to __all__ | ||
258 | |||
259 | |||
260 | TODO | ||
261 | Move into pythonutils | ||
262 | relpathto could call relpath (and so be shorter) | ||
263 | nativejoin could accept multiple paths | ||
264 | Could tslash be more elegant ? | ||
265 | """ | ||
266 |
Managed by UCC Webmasters | ViewVC Help |
Powered by ViewVC 1.1.26 |