/[theodore]/pyBB/modules/listquote.py


UCC Code Repository

Contents of /pyBB/modules/listquote.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (show annotations) (download) (as text)
Tue Jan 29 14:32:01 2008 UTC (12 years, 2 months ago) by svn-admin
File MIME type: text/x-python
File size: 29953 byte(s)
Re-import of repository after repository database corruption.

1 # 2005/08/28
2 # v1.4.0
3 # listquote.py
4
5 # Lists 'n' Quotes
6 # Handling lists and quoted strings
7 # Can be used for parsing/creating lists - or lines in a CSV file
8 # And also quoting or unquoting elements.
9
10 # Homepage : http://www.voidspace.org.uk/python/modules.shtml
11
12 # Copyright Michael Foord, 2004 & 2005.
13 # Released subject to the BSD License
14 # Please see http://www.voidspace.org.uk/documents/BSD-LICENSE.txt
15
16 # For information about bugfixes, updates and support, please join the Pythonutils mailing list.
17 # http://voidspace.org.uk/mailman/listinfo/pythonutils_voidspace.org.uk
18 # Comments, suggestions and bug reports welcome.
19 # Scripts maintained at http://www.voidspace.org.uk/python/index.shtml
20 # E-mail fuzzyman@voidspace.org.uk
21
22 """
23 Having written modules to handle turning a string representation of a list back
24 into a list (including nested lists) and also a very simple CSV parser, I
25 realised I needed a more solid set of functions for handling lists (comma
26 delimited lines) and quoting/unquoting elements of lists.
27
28 The test stuff provides useful examples of how the functions work.
29 """
30
31 # Pre-2.3 workaround for basestring.
32 try:
33 basestring
34 except NameError:
35 basestring = (str, unicode)
36
37 import re
38 inquotes = re.compile(r'''\s*(".*?"|'.*?')(.*)''')
39 badchars = re.compile(r'''^[^'," \[\]\(\)#]+$''')
40 ##commented_line = re.compile(r'''\s*([^#]*)\s*(#.*)''')
41 paramfinder = re.compile(r'''(?:'.*?')|(?:".*?")|(?:[^'",\s][^,]*)''')
42 unquoted = re.compile(r'''
43 ([^\#,"'\(\)\[\]][^\#,\]\)]*) # value
44 \s* # whitespace - XXX not caught
45 ([\#,\)\]].*)? # rest of the line
46 $''', re.VERBOSE)
47
48 __all__ = [
49 'elem_quote',
50 'unquote',
51 'ListQuoteError',
52 'QuoteError',
53 'UnQuoteError',
54 'BadLineError',
55 'CommentError',
56 'quote_escape',
57 'quote_unescape',
58 'simplelist',
59 'LineParser',
60 'lineparse',
61 'csvread',
62 'csvwrite',
63 'list_stringify',
64 'makelist'
65 ]
66
67 class ListQuoteError(SyntaxError):
68 """Base class for errors raised by the listquote module."""
69
70 class QuoteError(ListQuoteError):
71 """This value can't be quoted."""
72
73 class UnQuoteError(ListQuoteError):
74 """The value is badly quoted."""
75
76 class BadLineError(ListQuoteError):
77 """A line is badly built."""
78
79 class CommentError(BadLineError):
80 """A line contains a disallowed comment."""
81
82 class CSVError(ListQuoteError):
83 """The CSV File contained errors."""
84
85 #################################################################
86 # functions for quoting and unquoting
87
88 def elem_quote(member, nonquote=True, stringify=False, encoding=None):
89 """
90 Simple method to add the most appropriate quote to an element - either single
91 quotes or double quotes.
92
93 If member contains ``\n`` a ``QuoteError`` is raised - multiline values
94 can't be quoted by elem_quote.
95
96 If ``nonquote`` is set to ``True`` (the default), then if member contains none
97 of ``'," []()#;`` then it isn't quoted at all.
98
99 If member contains both single quotes *and* double quotes then all double
100 quotes (``"``) will be escaped as ``&mjf-quot;`` and member will then be quoted
101 with double quotes.
102
103 If ``stringify`` is set to ``True`` (the default is ``False``) then non string
104 (unicode or byte-string) values will be first converted to strings using the
105 ``str`` function. Otherwise elem_quote raises a ``TypeError``.
106
107 If ``encoding`` is not ``None`` and member is a byte string, then it will be
108 decoded into unicode using this encoding.
109
110 >>> elem_quote('hello')
111 'hello'
112 >>> elem_quote('hello', nonquote=False)
113 '"hello"'
114 >>> elem_quote('"hello"')
115 '\\'"hello"\\''
116 >>> elem_quote(3)
117 Traceback (most recent call last):
118 TypeError: Can only quote strings. "3"
119 >>> elem_quote(3, stringify=True)
120 '3'
121 >>> elem_quote('hello', encoding='ascii')
122 u'hello'
123 >>> elem_quote('\\n')
124 Traceback (most recent call last):
125 QuoteError: Multiline values can't be quoted.
126 "
127 "
128 """
129 if not isinstance(member, basestring):
130 if stringify:
131 member = str(member)
132 else:
133 # FIXME: is this the appropriate error message ?
134 raise TypeError('Can only quote strings. "%s"' % str(member))
135 if encoding and isinstance(member, str):
136 # from string to unicode
137 member = unicode(member, encoding)
138 if '\n' in member:
139 raise QuoteError('Multiline values can\'t be quoted.\n"%s"' % str(member))
140 #
141 if nonquote and badchars.match(member) is not None:
142 return member
143 # this ordering of tests determines which quote character will be used in
144 # preference - here we have \" first...
145 elif member.find('"') == -1:
146 return '"%s"' % member
147 # but we will use either... which may not suit some people
148 elif member.find("'") == -1:
149 return "'%s'" % member
150 else:
151 raise QuoteError('Value can\'t be quoted : "%s"' % member)
152
153 def unquote(inline, fullquote=True, retain=False):
154 """
155 Unquote a value.
156
157 If the value isn't quoted it returns the value.
158
159 If the value is badly quoted it raises ``UnQuoteError``.
160
161 If retain is ``True`` (default is ``False``) then the quotes are left
162 around the value (but leading or trailing whitespace will have been
163 removed).
164
165 If fullquote is ``False`` (default is ``True``) then unquote will only
166 unquote the first part of the ``inline``. If there is anything after the
167 quoted element, this will be returned as well (instead of raising an
168 error).
169
170 In this case the return value is ``(value, rest)``.
171
172 >>> unquote('hello')
173 'hello'
174 >>> unquote('"hello"')
175 'hello'
176 >>> unquote('"hello')
177 Traceback (most recent call last):
178 UnQuoteError: Value is badly quoted: ""hello"
179 >>> unquote('"hello" fish')
180 Traceback (most recent call last):
181 UnQuoteError: Value is badly quoted: ""hello" fish"
182 >>> unquote("'hello'", retain=True)
183 "'hello'"
184 >>> unquote('"hello" fish', fullquote=False)
185 ('hello', ' fish')
186 """
187 mat = inquotes.match(inline)
188 if mat is None:
189 if inline.strip()[0] not in '\'\"': # not quoted
190 return inline
191 else:
192 # badly quoted
193 raise UnQuoteError('Value is badly quoted: "%s"' % inline)
194 quoted, rest = mat.groups()
195 if fullquote and rest.strip():
196 # badly quoted
197 raise UnQuoteError('Value is badly quoted: "%s"' % inline)
198 if not retain:
199 quoted = quoted[1:-1]
200 if not fullquote:
201 return quoted, rest
202 else:
203 return quoted
204
205 def quote_escape(value, lf='&mjf-lf;', quot='&mjf-quot;'):
206 """
207 Escape a string so that it can safely be quoted. You should use this if the
208 value to be quoted *may* contain line-feeds or both single quotes and double
209 quotes.
210
211 If the value contains ``\n`` then it will be escaped using ``lf``. By
212 default this is ``&mjf-lf;``.
213
214 If the value contains single quotes *and* double quotes, then all double
215 quotes will be escaped using ``quot``. By default this is ``&mjf-quot;``.
216
217 >>> quote_escape('hello')
218 'hello'
219 >>> quote_escape('hello\\n')
220 'hello&mjf-lf;'
221 >>> quote_escape('hello"')
222 'hello"'
223 >>> quote_escape('hello"\\'')
224 "hello&mjf-quot;'"
225 >>> quote_escape('hello"\\'\\n', '&fish;', '&wobble;')
226 "hello&wobble;'&fish;"
227 """
228 if '\n' in value:
229 value = value.replace('\n', lf)
230 if '\'' in value and '\"' in value:
231 value = value.replace('"', quot)
232 return value
233
234 def quote_unescape(value, lf='&mjf-lf;', quot='&mjf-quot;'):
235 """
236 Unescape a string escaped by ``quote_escape``.
237
238 If it was escaped using anything other than the defaults for ``lf`` and
239 ``quot`` you must pass them to this function.
240
241 >>> quote_unescape("hello&wobble;'&fish;", '&fish;', '&wobble;')
242 'hello"\\'\\n'
243 >>> quote_unescape('hello')
244 'hello'
245 >>> quote_unescape('hello&mjf-lf;')
246 'hello\\n'
247 >>> quote_unescape("'hello'")
248 "'hello'"
249 >>> quote_unescape('hello"')
250 'hello"'
251 >>> quote_unescape("hello&mjf-quot;'")
252 'hello"\\''
253 >>> quote_unescape("hello&wobble;'&fish;", '&fish;', '&wobble;')
254 'hello"\\'\\n'
255 """
256 return value.replace(lf, '\n').replace(quot, '"')
257
258 def simplelist(inline):
259 """
260 Parse a string to a list.
261
262 A simple regex that extracts quoted items from a list.
263
264 It retains quotes around elements. (So unquote each element)
265
266 >>> simplelist('''hello, goodbye, 'title', "name", "I can't"''')
267 ['hello', 'goodbye', "'title'", '"name"', '"I can\\'t"']
268
269 FIXME: This doesn't work fully (allows some badly formed lists):
270 e.g.
271 >>> simplelist('hello, fish, "wobble" bottom hooray')
272 ['hello', 'fish', '"wobble"', 'bottom hooray']
273 """
274 return paramfinder.findall(inline)
275
276 ##############################################
277 # LineParser - a multi purpose line parser
278 # handles lines with comma seperated values on it, followed by a comment
279 # correctly handles quoting
280 # *and* can handle nested lists - marked between '[...]' or '(...)'
281 # See the docstring for how this works
282 # by default it returns a (list, comment) tuple !
283 # There are several keyword arguments that control how LineParser works.
284
285 class LineParser(object):
286 """An object to parse nested lists from strings."""
287
288 liststart = { '[' : ']', '(' : ')' }
289 quotes = ['\'', '"']
290
291 def __init__(self, options=None, **keywargs):
292 """Initialise the LineParser."""
293 self.reset(options, **keywargs)
294
295 def reset(self, options=None, **keywargs):
296 """Reset the parser with the specified options."""
297 if options is None:
298 options = {}
299 options.update(keywargs)
300 #
301 defaults = {
302 'recursive': True,
303 'comment': True,
304 'retain': False,
305 'force_list': False,
306 'csv': False
307 }
308 defaults.update(options)
309 if defaults['csv']:
310 defaults.update({
311 'recursive': False,
312 'force_list': True,
313 'comment': False,
314 })
315 # check all the options are valid
316 for entry in defaults.keys():
317 if entry not in ['comment',
318 'retain',
319 'csv',
320 'recursive',
321 'force_list']:
322 raise TypeError, ("'%s' is an invalid keyword argument for "
323 "this function" % entry)
324 #
325 self.recursive = defaults['recursive']
326 self.comment = defaults['comment']
327 self.retain = defaults['retain']
328 self.force_list = defaults['force_list']
329
330 def feed(self, inline, endchar=None):
331 """
332 Parse a single line (or fragment).
333
334 Uses the options set in the parser object.
335
336 Can parse lists - including nested lists. (If ``recursive`` is
337 ``False`` then nested lists will cause a ``BadLineError``).
338
339 Return value depends on options.
340
341 If ``comment`` is ``False`` it returns ``outvalue``
342
343 If ``comment`` is ``True`` it returns ``(outvalue, comment)``. (Even if
344 comment is just ``''``).
345
346 If ``force_list`` is ``False`` then ``outvalue`` may be a list or a
347 single item.
348
349 If ``force_list`` is ``True`` then ``outvalue`` will always be a list -
350 even if it has just one member.
351
352 List syntax :
353
354 * Comma separated lines ``a, b, c, d``
355 * Lists can optionally be between square or ordinary brackets
356 - ``[a, b, c, d]``
357 - ``(a, b, c, d)``
358 * Nested lists *must* be between brackets - ``a, [a, b, c, d], c``
359 * A single element list can be shown by a trailing quote - ``a,``
360 * An empty list is shown by ``()`` or ``[]``
361
362 Elements can be quoted with single or double quotes (but can't contain
363 both).
364
365 The line can optionally end with a comment (preeded by a '#').
366 This depends on the ``comment`` attribute.
367
368 If the line is badly built then this method will raise one of : ::
369
370 CommentError, BadLineError, UnQuoteError
371
372 Using the ``csv`` option is the same as setting : ::
373
374 'recursive': False
375 'force_list': True
376 'comment': False
377 """
378 # preserve the original line
379 # for error messages
380 if endchar is None:
381 self.origline = inline
382 inline = inline.lstrip()
383 #
384 outlist = []
385 comma_needed = False
386 found_comma = False
387 while inline:
388 # NOTE: this sort of operation would be quicker
389 # with lists - but then can't use regexes
390 thischar = inline[0]
391 if thischar == '#':
392 # reached a comment
393 # end of the line...
394 break
395 #
396 if thischar == endchar:
397 return outlist, inline[1:]
398 #
399 if comma_needed:
400 if thischar == ',':
401 inline = inline[1:].lstrip()
402 comma_needed = False
403 found_comma = True
404 continue
405 raise BadLineError('Line is badly built :\n%s' % self.origline)
406 #
407 try:
408 # the character that marks the end of the list
409 listend = self.liststart[thischar]
410 except KeyError:
411 pass
412 else:
413 if not self.recursive and endchar is not None:
414 raise BadLineError('Line is badly built :\n%s' % self.origline)
415 newlist, inline = self.feed(inline[1:], endchar=listend)
416 outlist.append(newlist)
417 inline = inline.lstrip()
418 comma_needed = True
419 continue
420 #
421 if thischar in self.quotes:
422 # this might raise an error
423 # FIXME: trap the error and raise a more appropriate one ?
424 element, inline = unquote(inline, fullquote=False,
425 retain=self.retain)
426 inline = inline.lstrip()
427 outlist.append(element)
428 comma_needed = True
429 continue
430 #
431 # must be an unquoted element
432 mat = unquoted.match(inline)
433 if mat is not None:
434 # FIXME: if the regex was better we wouldn't need an rstrip
435 element = mat.group(1).rstrip()
436 # group 2 will be ``None`` if we reach the end of the line
437 inline = mat.group(2) or ''
438 outlist.append(element)
439 comma_needed = True
440 continue
441 # or it's a badly built line
442 raise BadLineError('Line is badly built :\n%s' % self.origline)
443 #
444 # if we've been called recursively
445 # we shouldn't have got this far
446 if endchar is not None:
447 raise BadLineError('Line is badly built :\n%s' % self.origline)
448 #
449 if not found_comma:
450 # if we didn't find a comma
451 # the value could be a nested list
452 if outlist:
453 outlist = outlist[0]
454 else:
455 outlist = ''
456 if self.force_list and not isinstance(outlist, list):
457 if outlist:
458 outlist = [outlist]
459 else:
460 outlist = []
461 if not self.comment:
462 if inline:
463 raise CommentError('Comment not allowed :\n%s' % self.origline)
464 return outlist
465 return outlist, inline
466
467 def lineparse(inline, options=None, **keywargs):
468 """
469 A compatibility function that mimics the old lineparse.
470
471 Also more convenient for single line use.
472
473 Note: It still uses the new ``LineParser`` - and so takes the same
474 keyword arguments as that.
475
476 >>> lineparse('''"hello", 'goodbye', "I can't do that", 'You "can" !' # a comment''')
477 (['hello', 'goodbye', "I can't do that", 'You "can" !'], '# a comment')
478 >>> lineparse('''"hello", 'goodbye', "I can't do that", 'You "can" !' # a comment''', comment=False)
479 Traceback (most recent call last):
480 CommentError: Comment not allowed :
481 "hello", 'goodbye', "I can't do that", 'You "can" !' # a comment
482 >>> lineparse('''"hello", 'goodbye', "I can't do that", 'You "can" !' # a comment''', recursive=False)
483 (['hello', 'goodbye', "I can't do that", 'You "can" !'], '# a comment')
484 >>> lineparse('''"hello", 'goodbye', "I can't do that", 'You "can" !' # a comment''', csv=True)
485 Traceback (most recent call last):
486 CommentError: Comment not allowed :
487 "hello", 'goodbye', "I can't do that", 'You "can" !' # a comment
488 >>> lineparse('''"hello", 'goodbye', "I can't do that", 'You "can" !' ''', comment=False)
489 ['hello', 'goodbye', "I can't do that", 'You "can" !']
490 >>> lineparse('')
491 ('', '')
492 >>> lineparse('', force_list=True)
493 ([], '')
494 >>> lineparse('[]')
495 ([], '')
496 >>> lineparse('()')
497 ([], '')
498 >>> lineparse('()', force_list=True)
499 ([], '')
500 >>> lineparse('1,')
501 (['1'], '')
502 >>> lineparse('"Yo"')
503 ('Yo', '')
504 >>> lineparse('"Yo"', force_list=True)
505 (['Yo'], '')
506 >>> lineparse('''h, i, j, (h, i, ['hello', "f"], [], ([]),), k''')
507 (['h', 'i', 'j', ['h', 'i', ['hello', 'f'], [], [[]]], 'k'], '')
508 >>> lineparse('''h, i, j, (h, i, ['hello', "f"], [], ([]),), k''', recursive=False)
509 Traceback (most recent call last):
510 BadLineError: Line is badly built :
511 h, i, j, (h, i, ['hello', "f"], [], ([]),), k
512 >>> lineparse('fish#dog')
513 ('fish', '#dog')
514 >>> lineparse('"fish"#dog')
515 ('fish', '#dog')
516 >>> lineparse('(((())))')
517 ([[[[]]]], '')
518 >>> lineparse('((((,))))')
519 Traceback (most recent call last):
520 BadLineError: Line is badly built :
521 ((((,))))
522 >>> lineparse('hi, ()')
523 (['hi', []], '')
524 >>> lineparse('"hello", "",')
525 (['hello', ''], '')
526 >>> lineparse('"hello", ,')
527 Traceback (most recent call last):
528 BadLineError: Line is badly built :
529 "hello", ,
530 >>> lineparse('"hello", ["hi", ""], ""')
531 (['hello', ['hi', ''], ''], '')
532 >>> lineparse('''"member 1", "member 2", ["nest 1", ("nest 2", 'nest 2b', ['nest 3', 'value'], nest 2c), nest1b]''')
533 (['member 1', 'member 2', ['nest 1', ['nest 2', 'nest 2b', ['nest 3', 'value'], 'nest 2c'], 'nest1b']], '')
534 >>> lineparse('''"member 1", "member 2", ["nest 1", ("nest 2", 'nest 2b', ['nest 3', 'value'], nest 2c), nest1b]]''')
535 Traceback (most recent call last):
536 BadLineError: Line is badly built :
537 "member 1", "member 2", ["nest 1", ("nest 2", 'nest 2b', ['nest 3', 'value'], nest 2c), nest1b]]
538 """
539 p = LineParser(options, **keywargs)
540 return p.feed(inline)
541
542 ############################################################################
543 # a couple of functions to help build lists
544
545 def list_stringify(inlist):
546 """
547 Recursively rebuilds a list - making sure all the members are strings.
548
549 Can take any iterable or a sequence as the argument and always
550 returns a list.
551
552 Useful before writing out lists.
553
554 Used by makelist if stringify is set.
555
556 Uses the ``str`` function for stringification.
557
558 Every element will be a string or a unicode object.
559
560 Doesn't handle decoding strings into unicode objects (or vice-versa).
561
562 >>> list_stringify([2, 2, 2, 2, (3, 3, 2.9)])
563 ['2', '2', '2', '2', ['3', '3', '2.9']]
564 >>> list_stringify(None)
565 Traceback (most recent call last):
566 TypeError: iteration over non-sequence
567 >>> list_stringify([])
568 []
569
570 FIXME: can receive any iterable - e.g. a sequence
571 >>> list_stringify('')
572 []
573 >>> list_stringify('Hello There')
574 ['H', 'e', 'l', 'l', 'o', ' ', 'T', 'h', 'e', 'r', 'e']
575 """
576 outlist = []
577 for item in inlist:
578 if not isinstance(item, (tuple, list)):
579 if not isinstance(item, basestring):
580 item = str(item)
581 else:
582 item = list_stringify(item)
583 outlist.append(item)
584 return outlist
585
586
587 def makelist(inlist, listchar='', stringify=False, escape=False, encoding=None):
588 """
589 Given a list - turn it into a string that represents that list. (Suitable
590 for parsing by ``LineParser``).
591
592 listchar should be ``'['``, ``'('`` or ``''``. This is the type of bracket
593 used to enclose the list. (``''`` meaning no bracket of course).
594
595 If you have nested lists and listchar is ``''``, makelist will
596 automatically use ``'['`` for the nested lists.
597
598 If stringify is ``True`` (default is ``False``) makelist will stringify the
599 inlist first (using ``list_stringify``).
600
601 If ``escape`` is ``True`` (default is ``False``) makelist will call
602 ``quote_escape`` on each element before passing them to ``elem_quote`` to
603 be quoted.
604
605 If encoding keyword is not ``None``, all strings are decoded to unicode
606 with the specified encoding. Each item will then be a unicode object
607 instead of a string.
608
609 >>> makelist([])
610 '[]'
611 >>> makelist(['a', 'b', 'I can\\'t do it', 'Yes you "can" !'])
612 'a, b, "I can\\'t do it", \\'Yes you "can" !\\''
613 >>> makelist([3, 4, 5, [6, 7, 8]], stringify=True)
614 '3, 4, 5, [6, 7, 8]'
615 >>> makelist([3, 4, 5, [6, 7, 8]])
616 Traceback (most recent call last):
617 TypeError: Can only quote strings. "3"
618 >>> makelist(['a', 'b', 'c', ('d', 'e'), ('f', 'g')], listchar='(')
619 '(a, b, c, (d, e), (f, g))'
620 >>> makelist(['hi\\n', 'Quote "heck\\''], escape=True)
621 'hi&mjf-lf;, "Quote &mjf-quot;heck\\'"'
622 >>> makelist(['a', 'b', 'c', ('d', 'e'), ('f', 'g')], encoding='UTF8')
623 u'a, b, c, [d, e], [f, g]'
624 """
625 if stringify:
626 inlist = list_stringify(inlist)
627 listdict = {'[' : '[%s]', '(' : '(%s)', '' : '%s'}
628 outline = []
629 # this makes '[' the default for empty or single value lists
630 if len(inlist) < 2:
631 listchar = listchar or '['
632 for item in inlist:
633 if not isinstance(item, (list, tuple)):
634 if escape:
635 item = quote_escape(item)
636 outline.append(elem_quote(item, encoding=encoding))
637 else:
638 # recursive for nested lists
639 outline.append(makelist(item, listchar or '[',
640 stringify, escape, encoding))
641 return listdict[listchar] % (', '.join(outline))
642
643 ############################################################################
644 # CSV functions
645 # csvread, csvwrite
646
647 def csvread(infile):
648 """
649 Given an infile as an iterable, return the CSV as a list of lists.
650
651 infile can be an open file object or a list of lines.
652
653 If any of the lines are badly built then a ``CSVError`` will be raised.
654 This has a ``csv`` attribute - which is a reference to the parsed CSV.
655 Every line that couldn't be parsed will have ``[]`` for it's entry.
656
657 The error *also* has an ``errors`` attribute. This is a list of all the
658 errors raised. Error in this will have an ``index`` attribute, which is the
659 line number, and a ``line`` attribute - which is the actual line that
660 caused the error.
661
662 Example of usage :
663
664 .. raw:: html
665
666 {+coloring}
667
668 handle = open(filename)
669 # remove the trailing '\n' from each line
670 the_file = [line.rstrip('\n') for line in handle.readlines()]
671 csv = csvread(the_file)
672
673 {-coloring}
674
675 >>> a = '''"object 1", 'object 2', object 3
676 ... test 1 , "test 2" ,'test 3'
677 ... 'obj 1',obj 2,"obj 3"'''
678 >>> csvread(a.splitlines())
679 [['object 1', 'object 2', 'object 3'], ['test 1', 'test 2', 'test 3'], ['obj 1', 'obj 2', 'obj 3']]
680 >>> csvread(['object 1,'])
681 [['object 1']]
682 >>> try:
683 ... csvread(['object 1, "hello', 'object 1, # a comment in a csv ?'])
684 ... except CSVError, e:
685 ... for entry in e.errors:
686 ... print entry.index, entry
687 0 Value is badly quoted: ""hello"
688 1 Comment not allowed :
689 object 1, # a comment in a csv ?
690 """
691 out_csv = []
692 errors = []
693 index = -1
694 p = LineParser(csv=True)
695 for line in infile:
696 index += 1
697 try:
698 values = p.feed(line)
699 except ListQuoteError, e:
700 values = []
701 e.line = line
702 e.index = index
703 errors.append(e)
704 #
705 out_csv.append(values)
706 #
707 if errors:
708 e = CSVError("Parsing CSV failed. See 'errors' attribute.")
709 e.csv = out_csv
710 e.errors = errors
711 raise e
712 return out_csv
713
714 def csvwrite(inlist, stringify=False):
715 """
716 Given a list of lists it turns each entry into a line in a CSV.
717 (Given a list of lists it returns a list of strings).
718
719 The lines will *not* be ``\n`` terminated.
720
721 Set stringify to ``True`` (default is ``False``) to convert entries to
722 strings before creating the line.
723
724 If stringify is ``False`` then any non string value will raise a
725 ``TypeError``.
726
727 Every member will be quoted using ``elem_quote``, but no escaping is done.
728
729 Example of usage :
730
731 .. raw:: html
732
733 {+coloring}
734
735 # escape each entry in each line (optional)
736 for index in range(len(the_list)):
737 the_list[index] = [quote_escape(val) for val in the_list[index]]
738 #
739 the_file = csvwrite(the_list)
740 # add a '\n' to each line - ready to write to file
741 the_file = [line + '\n' for line in the_file]
742
743 {-coloring}
744
745 >>> csvwrite([['object 1', 'object 2', 'object 3'], ['test 1', 'test 2', 'test 3'], ['obj 1', 'obj 2', 'obj 3']])
746 ['"object 1", "object 2", "object 3"', '"test 1", "test 2", "test 3"', '"obj 1", "obj 2", "obj 3"']
747 >>> csvwrite([[3, 3, 3]])
748 Traceback (most recent call last):
749 TypeError: Can only quote strings. "3"
750 >>> csvwrite([[3, 3, 3]], True)
751 ['3, 3, 3']
752 """
753 out_list = []
754 for entry in inlist:
755 if stringify:
756 new_entry = []
757 for val in entry:
758 if not isinstance(val, basestring):
759 val = str(val)
760 new_entry.append(val)
761 entry = new_entry
762 this_line = ', '.join([elem_quote(val) for val in entry])
763 out_list.append(this_line)
764 return out_list
765
766 ############################################################################
767
768 def _test():
769 import doctest
770 doctest.testmod()
771
772 if __name__ == "__main__":
773 _test()
774
775
776 """
777 ISSUES/TODO
778 ===========
779
780 Fix bug in simplelist
781
782 Triple quote multiline values ?
783
784 Doesn't allow Python style string escaping (but has '&mjf-quot;' and '&mjf-lf;').
785
786 Uses both \' and \" as quotes and sometimes doesn't quote at all - see
787 elem_quote - may not *always* be compatible with other programs.
788
789 Allow space seperated lists ? e.g. 10 5 100 20
790
791 Lineparser could create tuples.
792
793 Allow ',' as an empty list ?
794
795 CHANGELOG
796 =========
797
798 2005/08/28 - Version 1.4.0
799 --------------------------
800
801 * Greater use of regular expressions for added speed
802 * Re-implemented ``lineparse`` as the ``LineParser`` object
803 * Added doctests
804 * Custom exceptions
805 * Changed the behaviour of ``csvread`` and ``csvwrite``
806 * Removed the CSV ``compare`` function and the ``uncomment`` function
807 * Only ``'#'`` allowed for comments
808 * ``elem_quote`` raises exceptions
809 * Changed behaviour of ``unquote``
810 * Added ``quote_escape`` and ``quote_unescape``
811 * Removed the ``uni_conv`` option in the CSV functions
812
813 .. note::
814
815 These changes are quite extensive. If any of them cause you problems then
816 let me know. I can provide a workaround in the next release.
817
818 2005/06/01 Version 1.3.0
819 Fixed bug in lineparse handling of empty list members.
820 Thnks to bug report and fix by Par Pandit <ppandit@yahoo.com>
821 The 'unquote' function is now regex based.
822 (bugfix it now doesn't return a tuple if fullquote is 0)
823 Added the simplelist regex/function.
824 elem_quote and uncomment use a regex for clarity and speed.
825 Added a bunch of asserts to the tests.
826
827 2005/03/07 Version 1.2.1
828 makelist improved - better handling of empty or single member lists
829
830 2005/02/23 Version 1.2.0
831 Added uncomment for ConfigObj 3.3.0
832 Optimised unquote - not a character by character search any more.
833 lineparse does full '&mjf..;' escape conversions - even when unquote isn't used
834 makelist and elem_quote takes an 'encoding' keyword for string members to be used to decode strigns to unicode
835 optimised makelist (including a minor bugfix)
836 Change to lineparse - it wouldn't allow '[' or '(' inside elements unless they were quoted.
837
838 2004/12/04 Version 1.1.2
839 Changed the license (*again* - now OSI compatible).
840 Empty values are now quoted by elem_quote.
841
842 30-08-04 Version 1.1.1
843 Removed the unicode hammer in csvread.
844 Improved docs.
845
846 16-08-04 Version 1.1.0
847 Added handling for non-string elements in elem_quote (optional).
848 Replaced some old += with lists and ''.join() for speed improvements...
849 Using basestring and hasattr('__getitem__') tests instead of isinstance(list) and str in a couple of places.
850 Changed license text.
851 Made the tests useful.
852
853 19-06-04 Version 1.0.0
854 Seems to work ok. A worthy successor to listparse and csv_s - although not as elegant as it could be.
855
856 """

Managed by UCC Webmasters ViewVC Help
Powered by ViewVC 1.1.26