3 # License: MIT (see LICENSE file provided)
4 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
7 **polib** allows you to manipulate, create, modify gettext files (pot, po and
8 mo files). You can load existing files, iterate through it's entries, add,
9 modify entries, comments or metadata, etc. or create new po files from scratch.
11 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
12 :func:`~polib.mofile` convenience functions.
15 __author__
= 'David Jean Louis <izimobil@gmail.com>'
17 __all__
= ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry',
18 'default_encoding', 'escape', 'unescape', 'detect_encoding', ]
32 # replacement of io.open() for python < 2.6
33 # we use codecs instead
36 def open(fpath
, mode
='r', encoding
=None):
37 return codecs
.open(fpath
, mode
, encoding
)
40 # the default encoding to use when encoding cannot be detected
41 default_encoding
= 'utf-8'
43 # python 2/3 compatibility helpers {{{
46 if sys
.version_info
[:2] < (3, 0):
54 return unicode(s
, "unicode_escape")
61 return s
.encode("latin-1")
66 # _pofile_or_mofile {{{
69 def _pofile_or_mofile(f
, type, **kwargs
):
71 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
72 honor the DRY concept.
74 # get the file encoding
75 enc
= kwargs
.get('encoding')
77 enc
= detect_encoding(f
, type == 'mofile')
80 kls
= type == 'pofile' and _POFileParser
or _MOFileParser
84 check_for_duplicates
=kwargs
.get('check_for_duplicates', False),
85 klass
=kwargs
.get('klass')
87 instance
= parser
.parse()
88 instance
.wrapwidth
= kwargs
.get('wrapwidth', 78)
94 def _is_file(filename_or_contents
):
96 Safely returns the value of os.path.exists(filename_or_contents).
100 ``filename_or_contents``
101 either a filename, or a string holding the contents of some file.
102 In the latter case, this function will always return False.
105 return os
.path
.exists(filename_or_contents
)
106 except (ValueError, UnicodeEncodeError):
109 # function pofile() {{{
112 def pofile(pofile
, **kwargs
):
114 Convenience function that parses the po or pot file ``pofile`` and returns
115 a :class:`~polib.POFile` instance.
120 string, full or relative path to the po/pot file or its content (data).
123 integer, the wrap width, only useful when the ``-w`` option was passed
124 to xgettext (optional, default: ``78``).
127 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
128 encoding will be auto-detected).
130 ``check_for_duplicates``
131 whether to check for duplicate entries when adding entries to the
132 file (optional, default: ``False``).
135 class which is used to instantiate the return value (optional,
136 default: ``None``, the return value with be a :class:`~polib.POFile`
139 return _pofile_or_mofile(pofile
, 'pofile', **kwargs
)
141 # function mofile() {{{
144 def mofile(mofile
, **kwargs
):
146 Convenience function that parses the mo file ``mofile`` and returns a
147 :class:`~polib.MOFile` instance.
152 string, full or relative path to the mo file or its content (data).
155 integer, the wrap width, only useful when the ``-w`` option was passed
156 to xgettext to generate the po file that was used to format the mo file
157 (optional, default: ``78``).
160 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
161 encoding will be auto-detected).
163 ``check_for_duplicates``
164 whether to check for duplicate entries when adding entries to the
165 file (optional, default: ``False``).
168 class which is used to instantiate the return value (optional,
169 default: ``None``, the return value with be a :class:`~polib.POFile`
172 return _pofile_or_mofile(mofile
, 'mofile', **kwargs
)
174 # function detect_encoding() {{{
177 def detect_encoding(file, binary_mode
=False):
179 Try to detect the encoding used by the ``file``. The ``file`` argument can
180 be a PO or MO file path or a string containing the contents of the file.
181 If the encoding cannot be detected, the function will return the value of
182 ``default_encoding``.
187 string, full or relative path to the po/mo file or its content.
190 boolean, set this to True if ``file`` is a mo file.
192 PATTERN
= r
'"?Content-Type:.+? charset=([\w_\-:\.]+)'
193 rxt
= re
.compile(u(PATTERN
))
194 rxb
= re
.compile(b(PATTERN
))
196 def charset_exists(charset
):
197 """Check whether ``charset`` is valid or not."""
199 codecs
.lookup(charset
)
204 if not _is_file(file):
205 match
= rxt
.search(file)
207 enc
= match
.group(1).strip()
208 if charset_exists(enc
):
211 # For PY3, always treat as binary
212 if binary_mode
or PY3
:
219 for l
in f
.readlines():
223 enc
= match
.group(1).strip()
224 if not isinstance(enc
, text_type
):
225 enc
= enc
.decode('utf-8')
226 if charset_exists(enc
):
229 return default_encoding
231 # function escape() {{{
236 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
237 the given string ``st`` and returns it.
239 return st
.replace('\\', r
'\\')\
240 .replace('\t', r
'\t')\
241 .replace('\r', r
'\r')\
242 .replace('\n', r
'\n')\
243 .replace('\"', r
'\"')
245 # function unescape() {{{
250 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
251 the given string ``st`` and returns it.
253 def unescape_repl(m
):
263 return m
# handles escaped double quote
264 return re
.sub(r
'\\(\\|n|t|r|")', unescape_repl
, st
)
266 # function natural_sort() {{{
269 def natural_sort(lst
):
271 Sort naturally the given list.
272 Credits: http://stackoverflow.com/a/4836734
274 convert
= lambda text
: int(text
) if text
.isdigit() else text
.lower()
275 alphanum_key
= lambda key
: [ convert(c
) for c
in re
.split('([0-9]+)', key
) ]
276 return sorted(lst
, key
= alphanum_key
)
278 # class _BaseFile {{{
281 class _BaseFile(list):
283 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
284 classes. This class should **not** be instantiated directly.
287 def __init__(self
, *args
, **kwargs
):
289 Constructor, accepts the following keyword arguments:
292 string, the path to the po or mo file, or its content as a string.
295 integer, the wrap width, only useful when the ``-w`` option was
296 passed to xgettext (optional, default: ``78``).
299 string, the encoding to use, defaults to ``default_encoding``
300 global variable (optional).
302 ``check_for_duplicates``
303 whether to check for duplicate entries when adding entries to the
304 file, (optional, default: ``False``).
307 # the opened file handle
308 pofile
= kwargs
.get('pofile', None)
309 if pofile
and _is_file(pofile
):
312 self
.fpath
= kwargs
.get('fpath')
313 # the width at which lines should be wrapped
314 self
.wrapwidth
= kwargs
.get('wrapwidth', 78)
316 self
.encoding
= kwargs
.get('encoding', default_encoding
)
317 # whether to check for duplicate entries or not
318 self
.check_for_duplicates
= kwargs
.get('check_for_duplicates', False)
321 # both po and mo files have metadata
323 self
.metadata_is_fuzzy
= 0
325 def __unicode__(self
):
327 Returns the unicode representation of the file.
330 entries
= [self
.metadata_as_entry()] + \
331 [e
for e
in self
if not e
.obsolete
]
332 for entry
in entries
:
333 ret
.append(entry
.__unicode
__(self
.wrapwidth
))
334 for entry
in self
.obsolete_entries():
335 ret
.append(entry
.__unicode
__(self
.wrapwidth
))
336 ret
= u('\n').join(ret
)
338 assert isinstance(ret
, text_type
)
339 #if type(ret) != text_type:
340 # return unicode(ret, self.encoding)
345 return self
.__unicode
__()
349 Returns the string representation of the file.
351 return unicode(self
).encode(self
.encoding
)
353 def __contains__(self
, entry
):
355 Overridden ``list`` method to implement the membership test (in and
357 The method considers that an entry is in the file if it finds an entry
358 that has the same msgid (the test is **case sensitive**) and the same
359 msgctxt (or none for both entries).
364 an instance of :class:`~polib._BaseEntry`.
366 return self
.find(entry
.msgid
, by
='msgid', msgctxt
=entry
.msgctxt
) \
369 def __eq__(self
, other
):
370 return str(self
) == str(other
)
372 def append(self
, entry
):
374 Overridden method to check for duplicates entries, if a user tries to
375 add an entry that is already in the file, the method will raise a
376 ``ValueError`` exception.
381 an instance of :class:`~polib._BaseEntry`.
383 # check_for_duplicates may not be defined (yet) when unpickling.
384 # But if pickling, we never want to check for duplicates anyway.
385 if getattr(self
, 'check_for_duplicates', False) and entry
in self
:
386 raise ValueError('Entry "%s" already exists' % entry
.msgid
)
387 super(_BaseFile
, self
).append(entry
)
389 def insert(self
, index
, entry
):
391 Overridden method to check for duplicates entries, if a user tries to
392 add an entry that is already in the file, the method will raise a
393 ``ValueError`` exception.
398 index at which the entry should be inserted.
401 an instance of :class:`~polib._BaseEntry`.
403 if self
.check_for_duplicates
and entry
in self
:
404 raise ValueError('Entry "%s" already exists' % entry
.msgid
)
405 super(_BaseFile
, self
).insert(index
, entry
)
407 def metadata_as_entry(self
):
409 Returns the file metadata as a :class:`~polib.POFile` instance.
411 e
= POEntry(msgid
='')
412 mdata
= self
.ordered_metadata()
415 for name
, value
in mdata
:
416 # Strip whitespace off each line in a multi-line entry
417 strs
.append('%s: %s' % (name
, value
))
418 e
.msgstr
= '\n'.join(strs
) + '\n'
419 if self
.metadata_is_fuzzy
:
420 e
.flags
.append('fuzzy')
423 def save(self
, fpath
=None, repr_method
='__unicode__'):
425 Saves the po file to ``fpath``.
426 If it is an existing file and no ``fpath`` is provided, then the
427 existing file is rewritten with the modified data.
432 string, full or relative path to the file.
435 string, the method to use for output.
437 if self
.fpath
is None and fpath
is None:
438 raise IOError('You must provide a file path to save() method')
439 contents
= getattr(self
, repr_method
)()
442 if repr_method
== 'to_binary':
443 fhandle
= open(fpath
, 'wb')
445 fhandle
= io
.open(fpath
, 'w', encoding
=self
.encoding
)
446 if not isinstance(contents
, text_type
):
447 contents
= contents
.decode(self
.encoding
)
448 fhandle
.write(contents
)
450 # set the file path if not set
451 if self
.fpath
is None and fpath
:
454 def find(self
, st
, by
='msgid', include_obsolete_entries
=False,
457 Find the entry which msgid (or property identified by the ``by``
458 argument) matches the string ``st``.
463 string, the string to search for.
466 string, the property to use for comparison (default: ``msgid``).
468 ``include_obsolete_entries``
469 boolean, whether to also search in entries that are obsolete.
472 string, allows specifying a specific message context for the
475 if include_obsolete_entries
:
478 entries
= [e
for e
in self
if not e
.obsolete
]
480 if getattr(e
, by
) == st
:
481 if msgctxt
is not False and e
.msgctxt
!= msgctxt
:
486 def ordered_metadata(self
):
488 Convenience method that returns an ordered version of the metadata
489 dictionary. The return value is list of tuples (metadata name,
492 # copy the dict first
493 metadata
= self
.metadata
.copy()
495 'Project-Id-Version',
496 'Report-Msgid-Bugs-To',
504 'Content-Transfer-Encoding',
508 for data
in data_order
:
510 value
= metadata
.pop(data
)
511 ordered_data
.append((data
, value
))
514 # the rest of the metadata will be alphabetically ordered since there
515 # are no specs for this AFAIK
516 for data
in natural_sort(metadata
.keys()):
517 value
= metadata
[data
]
518 ordered_data
.append((data
, value
))
523 Return the binary representation of the file.
526 entries
= self
.translated_entries()
528 # the keys are sorted in the .mo file
529 def cmp(_self
, other
):
530 # msgfmt compares entries with msgctxt if it exists
531 self_msgid
= _self
.msgctxt
and _self
.msgctxt
or _self
.msgid
532 other_msgid
= other
.msgctxt
and other
.msgctxt
or other
.msgid
533 if self_msgid
> other_msgid
:
535 elif self_msgid
< other_msgid
:
540 entries
.sort(key
=lambda o
: o
.msgctxt
or o
.msgid
)
541 mentry
= self
.metadata_as_entry()
542 #mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip()
543 entries
= [mentry
] + entries
544 entries_len
= len(entries
)
545 ids
, strs
= b(''), b('')
547 # For each string, we need size and file offset. Each string is
548 # NUL terminated; the NUL does not count into the size.
551 # Contexts are stored by storing the concatenation of the
552 # context, a <EOT> byte, and the original string
553 msgid
= self
._encode
(e
.msgctxt
+ '\4')
556 for index
in sorted(e
.msgstr_plural
.keys()):
557 msgstr
.append(e
.msgstr_plural
[index
])
558 msgid
+= self
._encode
(e
.msgid
+ '\0' + e
.msgid_plural
)
559 msgstr
= self
._encode
('\0'.join(msgstr
))
561 msgid
+= self
._encode
(e
.msgid
)
562 msgstr
= self
._encode
(e
.msgstr
)
563 offsets
.append((len(ids
), len(msgid
), len(strs
), len(msgstr
)))
564 ids
+= msgid
+ b('\0')
565 strs
+= msgstr
+ b('\0')
567 # The header is 7 32-bit unsigned integers.
568 keystart
= 7 * 4 + 16 * entries_len
569 # and the values start after the keys
570 valuestart
= keystart
+ len(ids
)
573 # The string table first has the list of keys, then the list of values.
574 # Each entry has first the size of the string, then the file offset.
575 for o1
, l1
, o2
, l2
in offsets
:
576 koffsets
+= [l1
, o1
+ keystart
]
577 voffsets
+= [l2
, o2
+ valuestart
]
578 offsets
= koffsets
+ voffsets
580 output
= struct
.pack(
590 # start of value index
591 7 * 4 + entries_len
* 8,
592 # size and offset of hash table, we don't use hash tables
596 if PY3
and sys
.version_info
.minor
> 1: # python 3.2 or superior
597 output
+= array
.array("i", offsets
).tobytes()
599 output
+= array
.array("i", offsets
).tostring()
604 def _encode(self
, mixed
):
606 Encodes the given ``mixed`` argument with the file encoding if and
607 only if it's an unicode string and returns the encoded string.
609 if isinstance(mixed
, text_type
):
610 mixed
= mixed
.encode(self
.encoding
)
616 class POFile(_BaseFile
):
618 Po (or Pot) file reader/writer.
619 This class inherits the :class:`~polib._BaseFile` class and, by extension,
620 the python ``list`` type.
623 def __unicode__(self
):
625 Returns the unicode representation of the po file.
627 ret
, headers
= '', self
.header
.split('\n')
628 for header
in headers
:
631 elif header
[:1] in [',', ':']:
632 ret
+= '#%s\n' % header
634 ret
+= '# %s\n' % header
636 if not isinstance(ret
, text_type
):
637 ret
= ret
.decode(self
.encoding
)
639 return ret
+ _BaseFile
.__unicode
__(self
)
641 def save_as_mofile(self
, fpath
):
643 Saves the binary representation of the file to given ``fpath``.
648 string, full or relative path to the mo file.
650 _BaseFile
.save(self
, fpath
, 'to_binary')
652 def percent_translated(self
):
654 Convenience method that returns the percentage of translated
657 total
= len([e
for e
in self
if not e
.obsolete
])
660 translated
= len(self
.translated_entries())
661 return int(translated
* 100 / float(total
))
663 def translated_entries(self
):
665 Convenience method that returns the list of translated entries.
667 return [e
for e
in self
if e
.translated()]
669 def untranslated_entries(self
):
671 Convenience method that returns the list of untranslated entries.
673 return [e
for e
in self
if not e
.translated() and not e
.obsolete
674 and not 'fuzzy' in e
.flags
]
676 def fuzzy_entries(self
):
678 Convenience method that returns the list of fuzzy entries.
680 return [e
for e
in self
if 'fuzzy' in e
.flags
]
682 def obsolete_entries(self
):
684 Convenience method that returns the list of obsolete entries.
686 return [e
for e
in self
if e
.obsolete
]
688 def merge(self
, refpot
):
690 Convenience method that merges the current pofile with the pot file
691 provided. It behaves exactly as the gettext msgmerge utility:
693 * comments of this file will be preserved, but extracted comments and
694 occurrences will be discarded;
695 * any translations or comments in the file will be discarded, however,
696 dot comments and file positions will be preserved;
697 * the fuzzy flags are preserved.
702 object POFile, the reference catalog.
704 # Store entries in dict/set for faster access
705 self_entries
= dict((entry
.msgid
, entry
) for entry
in self
)
706 refpot_msgids
= set(entry
.msgid
for entry
in refpot
)
707 # Merge entries that are in the refpot
709 e
= self_entries
.get(entry
.msgid
)
714 # ok, now we must "obsolete" entries that are not in the refpot anymore
716 if entry
.msgid
not in refpot_msgids
:
717 entry
.obsolete
= True
722 class MOFile(_BaseFile
):
724 Mo file reader/writer.
725 This class inherits the :class:`~polib._BaseFile` class and, by
726 extension, the python ``list`` type.
729 MAGIC_SWAPPED
= 0xde120495
731 def __init__(self
, *args
, **kwargs
):
733 Constructor, accepts all keywords arguments accepted by
734 :class:`~polib._BaseFile` class.
736 _BaseFile
.__init
__(self
, *args
, **kwargs
)
737 self
.magic_number
= None
740 def save_as_pofile(self
, fpath
):
742 Saves the mofile as a pofile to ``fpath``.
747 string, full or relative path to the file.
749 _BaseFile
.save(self
, fpath
)
751 def save(self
, fpath
=None):
753 Saves the mofile to ``fpath``.
758 string, full or relative path to the file.
760 _BaseFile
.save(self
, fpath
, 'to_binary')
762 def percent_translated(self
):
764 Convenience method to keep the same interface with POFile instances.
768 def translated_entries(self
):
770 Convenience method to keep the same interface with POFile instances.
774 def untranslated_entries(self
):
776 Convenience method to keep the same interface with POFile instances.
780 def fuzzy_entries(self
):
782 Convenience method to keep the same interface with POFile instances.
786 def obsolete_entries(self
):
788 Convenience method to keep the same interface with POFile instances.
792 # class _BaseEntry {{{
795 class _BaseEntry(object):
797 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
798 This class should **not** be instantiated directly.
801 def __init__(self
, *args
, **kwargs
):
803 Constructor, accepts the following keyword arguments:
806 string, the entry msgid.
809 string, the entry msgstr.
812 string, the entry msgid_plural.
815 list, the entry msgstr_plural lines.
818 string, the entry context (msgctxt).
821 bool, whether the entry is "obsolete" or not.
824 string, the encoding to use, defaults to ``default_encoding``
825 global variable (optional).
827 self
.msgid
= kwargs
.get('msgid', '')
828 self
.msgstr
= kwargs
.get('msgstr', '')
829 self
.msgid_plural
= kwargs
.get('msgid_plural', '')
830 self
.msgstr_plural
= kwargs
.get('msgstr_plural', {})
831 self
.msgctxt
= kwargs
.get('msgctxt', None)
832 self
.obsolete
= kwargs
.get('obsolete', False)
833 self
.encoding
= kwargs
.get('encoding', default_encoding
)
835 def __unicode__(self
, wrapwidth
=78):
837 Returns the unicode representation of the entry.
844 # write the msgctxt if any
845 if self
.msgctxt
is not None:
846 ret
+= self
._str
_field
("msgctxt", delflag
, "", self
.msgctxt
,
849 ret
+= self
._str
_field
("msgid", delflag
, "", self
.msgid
, wrapwidth
)
850 # write the msgid_plural if any
851 if self
.msgid_plural
:
852 ret
+= self
._str
_field
("msgid_plural", delflag
, "",
853 self
.msgid_plural
, wrapwidth
)
854 if self
.msgstr_plural
:
855 # write the msgstr_plural if any
856 msgstrs
= self
.msgstr_plural
860 msgstr
= msgstrs
[index
]
861 plural_index
= '[%s]' % index
862 ret
+= self
._str
_field
("msgstr", delflag
, plural_index
, msgstr
,
865 # otherwise write the msgstr
866 ret
+= self
._str
_field
("msgstr", delflag
, "", self
.msgstr
,
870 if not PY3
and type(ret
[0] != unicode):
873 ret
= u('\n').join(x
.decode('utf-8') for x
in ret
)
877 ret
= u('\n').join(ret
)
882 return self
.__unicode
__()
886 Returns the string representation of the entry.
888 return unicode(self
).encode(self
.encoding
)
890 def __eq__(self
, other
):
891 return str(self
) == str(other
)
893 def _str_field(self
, fieldname
, delflag
, plural_index
, field
,
895 lines
= field
.splitlines(True)
897 lines
= [''] + lines
# start with initial empty line
899 escaped_field
= escape(field
)
900 specialchars_count
= 0
901 for c
in ['\\', '\n', '\r', '\t', '"']:
902 specialchars_count
+= field
.count(c
)
903 # comparison must take into account fieldname length + one space
904 # + 2 quotes (eg. msgid "<string>")
905 flength
= len(fieldname
) + 3
907 flength
+= len(plural_index
)
908 real_wrapwidth
= wrapwidth
- flength
+ specialchars_count
909 if wrapwidth
> 0 and len(field
) > real_wrapwidth
:
910 # Wrap the line but take field name into account
911 lines
= [''] + [unescape(item
) for item
in wrap(
913 wrapwidth
- 2, # 2 for quotes ""
914 drop_whitespace
=False,
915 break_long_words
=False
919 if fieldname
.startswith('previous_'):
920 # quick and dirty trick to get the real field name
921 fieldname
= fieldname
[9:]
923 ret
= ['%s%s%s "%s"' % (delflag
, fieldname
, plural_index
,
924 escape(lines
.pop(0)))]
926 ret
.append('%s"%s"' % (delflag
, escape(line
)))
932 class POEntry(_BaseEntry
):
934 Represents a po file entry.
937 def __init__(self
, *args
, **kwargs
):
939 Constructor, accepts the following keyword arguments:
942 string, the entry comment.
945 string, the entry translator comment.
948 list, the entry occurrences.
951 list, the entry flags.
954 string, the entry previous context.
957 string, the entry previous msgid.
959 ``previous_msgid_plural``
960 string, the entry previous msgid_plural.
963 integer, the line number of the entry
965 _BaseEntry
.__init
__(self
, *args
, **kwargs
)
966 self
.comment
= kwargs
.get('comment', '')
967 self
.tcomment
= kwargs
.get('tcomment', '')
968 self
.occurrences
= kwargs
.get('occurrences', [])
969 self
.flags
= kwargs
.get('flags', [])
970 self
.previous_msgctxt
= kwargs
.get('previous_msgctxt', None)
971 self
.previous_msgid
= kwargs
.get('previous_msgid', None)
972 self
.previous_msgid_plural
= kwargs
.get('previous_msgid_plural', None)
973 self
.linenum
= kwargs
.get('linenum', None)
975 def __unicode__(self
, wrapwidth
=0):
977 Returns the unicode representation of the entry.
980 # comments first, if any (with text wrapping as xgettext does)
982 comments
= [('tcomment', '# ')]
984 comments
= [('comment', '#. '), ('tcomment', '# ')]
986 val
= getattr(self
, c
[0])
988 for comment
in val
.split('\n'):
989 if wrapwidth
> 0 and len(comment
) + len(c
[1]) > wrapwidth
:
994 subsequent_indent
=c
[1],
995 break_long_words
=False
998 ret
.append('%s%s' % (c
[1], comment
))
1000 # occurrences (with text wrapping as xgettext does)
1001 if not self
.obsolete
and self
.occurrences
:
1003 for fpath
, lineno
in self
.occurrences
:
1005 filelist
.append('%s:%s' % (fpath
, lineno
))
1007 filelist
.append(fpath
)
1008 filestr
= ' '.join(filelist
)
1009 if wrapwidth
> 0 and len(filestr
) + 3 > wrapwidth
:
1010 # textwrap split words that contain hyphen, this is not
1011 # what we want for filenames, so the dirty hack is to
1012 # temporally replace hyphens with a char that a file cannot
1014 ret
+= [l
.replace('*', '-') for l
in wrap(
1015 filestr
.replace('-', '*'),
1017 initial_indent
='#: ',
1018 subsequent_indent
='#: ',
1019 break_long_words
=False
1022 ret
.append('#: ' + filestr
)
1024 # flags (TODO: wrapping ?)
1026 ret
.append('#, %s' % ', '.join(self
.flags
))
1028 # previous context and previous msgid/msgid_plural
1029 fields
= ['previous_msgctxt', 'previous_msgid',
1030 'previous_msgid_plural']
1036 val
= getattr(self
, f
)
1038 ret
+= self
._str
_field
(f
, prefix
, "", val
, wrapwidth
)
1040 ret
.append(_BaseEntry
.__unicode
__(self
, wrapwidth
))
1041 ret
= u('\n').join(ret
)
1044 def __cmp__(self
, other
):
1046 Called by comparison operations if rich comparison is not defined.
1049 # First: Obsolete test
1050 if self
.obsolete
!= other
.obsolete
:
1055 # Work on a copy to protect original
1056 occ1
= sorted(self
.occurrences
[:])
1057 occ2
= sorted(other
.occurrences
[:])
1065 if entry1
[0] != entry2
[0]:
1066 if entry1
[0] > entry2
[0]:
1070 if entry1
[1] != entry2
[1]:
1071 if entry1
[1] > entry2
[1]:
1075 # Compare msgid_plural if set
1076 if self
.msgid_plural
:
1077 if not other
.msgid_plural
:
1079 for pos
in self
.msgid_plural
:
1080 if pos
not in other
.msgid_plural
:
1082 if self
.msgid_plural
[pos
] > other
.msgid_plural
[pos
]:
1084 if self
.msgid_plural
[pos
] < other
.msgid_plural
[pos
]:
1086 # Finally: Compare message ID
1087 if self
.msgid
> other
.msgid
:
1089 elif self
.msgid
< other
.msgid
:
1093 def __gt__(self
, other
):
1094 return self
.__cmp
__(other
) > 0
1096 def __lt__(self
, other
):
1097 return self
.__cmp
__(other
) < 0
1099 def __ge__(self
, other
):
1100 return self
.__cmp
__(other
) >= 0
1102 def __le__(self
, other
):
1103 return self
.__cmp
__(other
) <= 0
1105 def __eq__(self
, other
):
1106 return self
.__cmp
__(other
) == 0
1108 def __ne__(self
, other
):
1109 return self
.__cmp
__(other
) != 0
1111 def translated(self
):
1113 Returns ``True`` if the entry has been translated or ``False``
1116 if self
.obsolete
or 'fuzzy' in self
.flags
:
1118 if self
.msgstr
!= '':
1120 if self
.msgstr_plural
:
1121 for pos
in self
.msgstr_plural
:
1122 if self
.msgstr_plural
[pos
] == '':
1127 def merge(self
, other
):
1129 Merge the current entry with the given pot entry.
1131 self
.msgid
= other
.msgid
1132 self
.msgctxt
= other
.msgctxt
1133 self
.occurrences
= other
.occurrences
1134 self
.comment
= other
.comment
1135 fuzzy
= 'fuzzy' in self
.flags
1136 self
.flags
= other
.flags
[:] # clone flags
1138 self
.flags
.append('fuzzy')
1139 self
.msgid_plural
= other
.msgid_plural
1140 self
.obsolete
= other
.obsolete
1141 self
.previous_msgctxt
= other
.previous_msgctxt
1142 self
.previous_msgid
= other
.previous_msgid
1143 self
.previous_msgid_plural
= other
.previous_msgid_plural
1144 if other
.msgstr_plural
:
1145 for pos
in other
.msgstr_plural
:
1147 # keep existing translation at pos if any
1148 self
.msgstr_plural
[pos
]
1150 self
.msgstr_plural
[pos
] = ''
1153 return hash((self
.msgid
, self
.msgstr
))
1158 class MOEntry(_BaseEntry
):
1160 Represents a mo file entry.
1162 def __init__(self
, *args
, **kwargs
):
1164 Constructor, accepts the following keyword arguments,
1165 for consistency with :class:`~polib.POEntry`:
1171 ``previous_msgctxt``
1173 ``previous_msgid_plural``
1175 Note: even though these keyword arguments are accepted,
1176 they hold no real meaning in the context of MO files
1177 and are simply ignored.
1179 _BaseEntry
.__init
__(self
, *args
, **kwargs
)
1182 self
.occurrences
= []
1184 self
.previous_msgctxt
= None
1185 self
.previous_msgid
= None
1186 self
.previous_msgid_plural
= None
1189 return hash((self
.msgid
, self
.msgstr
))
1192 # class _POFileParser {{{
1195 class _POFileParser(object):
1197 A finite state machine to parse efficiently and correctly po
1201 def __init__(self
, pofile
, *args
, **kwargs
):
1208 string, path to the po file or its content
1211 string, the encoding to use, defaults to ``default_encoding``
1212 global variable (optional).
1214 ``check_for_duplicates``
1215 whether to check for duplicate entries when adding entries to the
1216 file (optional, default: ``False``).
1218 enc
= kwargs
.get('encoding', default_encoding
)
1219 if _is_file(pofile
):
1221 self
.fhandle
= io
.open(pofile
, 'rt', encoding
=enc
)
1223 enc
= default_encoding
1224 self
.fhandle
= io
.open(pofile
, 'rt', encoding
=enc
)
1226 self
.fhandle
= pofile
.splitlines()
1228 klass
= kwargs
.get('klass')
1231 self
.instance
= klass(
1234 check_for_duplicates
=kwargs
.get('check_for_duplicates', False)
1236 self
.transitions
= {}
1237 self
.current_line
= 0
1238 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1239 self
.current_state
= 'st'
1240 self
.current_token
= None
1241 # two memo flags used in handlers
1242 self
.msgstr_index
= 0
1243 self
.entry_obsolete
= 0
1244 # Configure the state machine, by adding transitions.
1245 # Signification of symbols:
1246 # * ST: Beginning of the file (start)
1248 # * TC: a translation comment
1249 # * GC: a generated comment
1250 # * OC: a file/line occurrence
1251 # * FL: a flags line
1252 # * CT: a message context
1253 # * PC: a previous msgctxt
1254 # * PM: a previous msgid
1255 # * PP: a previous msgid_plural
1257 # * MP: a msgid plural
1259 # * MX: a msgstr plural
1260 # * MC: a msgid or msgstr continuation line
1261 all
= ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'pc', 'pm', 'pp', 'tc',
1262 'ms', 'mp', 'mx', 'mi']
1264 self
.add('tc', ['st', 'he'], 'he')
1265 self
.add('tc', ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms',
1266 'mp', 'mx', 'mi'], 'tc')
1267 self
.add('gc', all
, 'gc')
1268 self
.add('oc', all
, 'oc')
1269 self
.add('fl', all
, 'fl')
1270 self
.add('pc', all
, 'pc')
1271 self
.add('pm', all
, 'pm')
1272 self
.add('pp', all
, 'pp')
1273 self
.add('ct', ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm',
1274 'pp', 'ms', 'mx'], 'ct')
1275 self
.add('mi', ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc',
1276 'pm', 'pp', 'ms', 'mx'], 'mi')
1277 self
.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1278 self
.add('ms', ['mi', 'mp', 'tc'], 'ms')
1279 self
.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1280 self
.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1284 Run the state machine, parse the file line by line and call process()
1285 with the current matched symbol.
1292 'msgid_plural': 'mp',
1295 'msgid_plural': 'pp',
1300 for line
in self
.fhandle
:
1301 self
.current_line
+= 1
1306 tokens
= line
.split(None, 2)
1307 nb_tokens
= len(tokens
)
1309 if tokens
[0] == '#~|':
1312 if tokens
[0] == '#~' and nb_tokens
> 1:
1313 line
= line
[3:].strip()
1316 self
.entry_obsolete
= 1
1318 self
.entry_obsolete
= 0
1320 # Take care of keywords like
1321 # msgid, msgid_plural, msgctxt & msgstr.
1322 if tokens
[0] in keywords
and nb_tokens
> 1:
1323 line
= line
[len(tokens
[0]):].lstrip()
1324 if re
.search(r
'([^\\]|^)"', line
[1:-1]):
1325 raise IOError('Syntax error in po file %s (line %s): '
1326 'unescaped double quote found' %
1327 (self
.instance
.fpath
, self
.current_line
))
1328 self
.current_token
= line
1329 self
.process(keywords
[tokens
[0]])
1332 self
.current_token
= line
1334 if tokens
[0] == '#:':
1337 # we are on a occurrences line
1340 elif line
[:1] == '"':
1341 # we are on a continuation line
1342 if re
.search(r
'([^\\]|^)"', line
[1:-1]):
1343 raise IOError('Syntax error in po file %s (line %s): '
1344 'unescaped double quote found' %
1345 (self
.instance
.fpath
, self
.current_line
))
1348 elif line
[:7] == 'msgstr[':
1349 # we are on a msgstr plural
1352 elif tokens
[0] == '#,':
1355 # we are on a flags line
1358 elif tokens
[0] == '#' or tokens
[0].startswith('##'):
1361 # we are on a translator comment line
1364 elif tokens
[0] == '#.':
1367 # we are on a generated comment line
1370 elif tokens
[0] == '#|':
1372 raise IOError('Syntax error in po file %s (line %s)' %
1373 (self
.instance
.fpath
, self
.current_line
))
1375 # Remove the marker and any whitespace right after that.
1376 line
= line
[2:].lstrip()
1377 self
.current_token
= line
1379 if tokens
[1].startswith('"'):
1380 # Continuation of previous metadata.
1385 # Invalid continuation line.
1386 raise IOError('Syntax error in po file %s (line %s): '
1387 'invalid continuation line' %
1388 (self
.instance
.fpath
, self
.current_line
))
1390 # we are on a "previous translation" comment line,
1391 if tokens
[1] not in prev_keywords
:
1392 # Unknown keyword in previous translation comment.
1393 raise IOError('Syntax error in po file %s (line %s): '
1394 'unknown keyword %s' %
1395 (self
.instance
.fpath
, self
.current_line
,
1398 # Remove the keyword and any whitespace
1399 # between it and the starting quote.
1400 line
= line
[len(tokens
[1]):].lstrip()
1401 self
.current_token
= line
1402 self
.process(prev_keywords
[tokens
[1]])
1405 raise IOError('Syntax error in po file %s (line %s)' %
1406 (self
.instance
.fpath
, self
.current_line
))
1408 if self
.current_entry
and len(tokens
) > 0 and \
1409 not tokens
[0].startswith('#'):
1410 # since entries are added when another entry is found, we must add
1411 # the last entry here (only if there are lines). Trailing comments
1413 self
.instance
.append(self
.current_entry
)
1415 # before returning the instance, check if there's metadata and if
1416 # so extract it in a dict
1417 metadataentry
= self
.instance
.find('')
1418 if metadataentry
: # metadata found
1420 self
.instance
.remove(metadataentry
)
1421 self
.instance
.metadata_is_fuzzy
= metadataentry
.flags
1423 for msg
in metadataentry
.msgstr
.splitlines():
1425 key
, val
= msg
.split(':', 1)
1426 self
.instance
.metadata
[key
] = val
.strip()
1427 except (ValueError, KeyError):
1429 self
.instance
.metadata
[key
] += '\n' + msg
.strip()
1431 if not isinstance(self
.fhandle
, list): # must be file
1432 self
.fhandle
.close()
1433 return self
.instance
1435 def add(self
, symbol
, states
, next_state
):
1437 Add a transition to the state machine.
1442 string, the matched token (two chars symbol).
1445 list, a list of states (two chars symbols).
1448 the next state the fsm will have after the action.
1450 for state
in states
:
1451 action
= getattr(self
, 'handle_%s' % next_state
)
1452 self
.transitions
[(symbol
, state
)] = (action
, next_state
)
1454 def process(self
, symbol
):
1456 Process the transition corresponding to the current state and the
1462 string, the matched token (two chars symbol).
1465 integer, the current line number of the parsed file.
1468 (action
, state
) = self
.transitions
[(symbol
, self
.current_state
)]
1470 self
.current_state
= state
1472 raise IOError('Syntax error in po file (line %s)' %
1477 def handle_he(self
):
1478 """Handle a header comment."""
1479 if self
.instance
.header
!= '':
1480 self
.instance
.header
+= '\n'
1481 self
.instance
.header
+= self
.current_token
[2:]
1484 def handle_tc(self
):
1485 """Handle a translator comment."""
1486 if self
.current_state
in ['mc', 'ms', 'mx']:
1487 self
.instance
.append(self
.current_entry
)
1488 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1489 if self
.current_entry
.tcomment
!= '':
1490 self
.current_entry
.tcomment
+= '\n'
1491 tcomment
= self
.current_token
.lstrip('#')
1492 if tcomment
.startswith(' '):
1493 tcomment
= tcomment
[1:]
1494 self
.current_entry
.tcomment
+= tcomment
1497 def handle_gc(self
):
1498 """Handle a generated comment."""
1499 if self
.current_state
in ['mc', 'ms', 'mx']:
1500 self
.instance
.append(self
.current_entry
)
1501 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1502 if self
.current_entry
.comment
!= '':
1503 self
.current_entry
.comment
+= '\n'
1504 self
.current_entry
.comment
+= self
.current_token
[3:]
1507 def handle_oc(self
):
1508 """Handle a file:num occurrence."""
1509 if self
.current_state
in ['mc', 'ms', 'mx']:
1510 self
.instance
.append(self
.current_entry
)
1511 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1512 occurrences
= self
.current_token
[3:].split()
1513 for occurrence
in occurrences
:
1514 if occurrence
!= '':
1516 fil
, line
= occurrence
.rsplit(':', 1)
1517 if not line
.isdigit():
1520 self
.current_entry
.occurrences
.append((fil
, line
))
1521 except (ValueError, AttributeError):
1522 self
.current_entry
.occurrences
.append((occurrence
, ''))
1525 def handle_fl(self
):
1526 """Handle a flags line."""
1527 if self
.current_state
in ['mc', 'ms', 'mx']:
1528 self
.instance
.append(self
.current_entry
)
1529 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1530 self
.current_entry
.flags
+= [c
.strip() for c
in
1531 self
.current_token
[3:].split(',')]
1534 def handle_pp(self
):
1535 """Handle a previous msgid_plural line."""
1536 if self
.current_state
in ['mc', 'ms', 'mx']:
1537 self
.instance
.append(self
.current_entry
)
1538 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1539 self
.current_entry
.previous_msgid_plural
= \
1540 unescape(self
.current_token
[1:-1])
1543 def handle_pm(self
):
1544 """Handle a previous msgid line."""
1545 if self
.current_state
in ['mc', 'ms', 'mx']:
1546 self
.instance
.append(self
.current_entry
)
1547 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1548 self
.current_entry
.previous_msgid
= \
1549 unescape(self
.current_token
[1:-1])
1552 def handle_pc(self
):
1553 """Handle a previous msgctxt line."""
1554 if self
.current_state
in ['mc', 'ms', 'mx']:
1555 self
.instance
.append(self
.current_entry
)
1556 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1557 self
.current_entry
.previous_msgctxt
= \
1558 unescape(self
.current_token
[1:-1])
1561 def handle_ct(self
):
1562 """Handle a msgctxt."""
1563 if self
.current_state
in ['mc', 'ms', 'mx']:
1564 self
.instance
.append(self
.current_entry
)
1565 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1566 self
.current_entry
.msgctxt
= unescape(self
.current_token
[1:-1])
1569 def handle_mi(self
):
1570 """Handle a msgid."""
1571 if self
.current_state
in ['mc', 'ms', 'mx']:
1572 self
.instance
.append(self
.current_entry
)
1573 self
.current_entry
= POEntry(linenum
=self
.current_line
)
1574 self
.current_entry
.obsolete
= self
.entry_obsolete
1575 self
.current_entry
.msgid
= unescape(self
.current_token
[1:-1])
1578 def handle_mp(self
):
1579 """Handle a msgid plural."""
1580 self
.current_entry
.msgid_plural
= unescape(self
.current_token
[1:-1])
1583 def handle_ms(self
):
1584 """Handle a msgstr."""
1585 self
.current_entry
.msgstr
= unescape(self
.current_token
[1:-1])
1588 def handle_mx(self
):
1589 """Handle a msgstr plural."""
1590 index
= self
.current_token
[7]
1591 value
= self
.current_token
[self
.current_token
.find('"') + 1:-1]
1592 self
.current_entry
.msgstr_plural
[int(index
)] = unescape(value
)
1593 self
.msgstr_index
= int(index
)
1596 def handle_mc(self
):
1597 """Handle a msgid or msgstr continuation line."""
1598 token
= unescape(self
.current_token
[1:-1])
1599 if self
.current_state
== 'ct':
1600 self
.current_entry
.msgctxt
+= token
1601 elif self
.current_state
== 'mi':
1602 self
.current_entry
.msgid
+= token
1603 elif self
.current_state
== 'mp':
1604 self
.current_entry
.msgid_plural
+= token
1605 elif self
.current_state
== 'ms':
1606 self
.current_entry
.msgstr
+= token
1607 elif self
.current_state
== 'mx':
1608 self
.current_entry
.msgstr_plural
[self
.msgstr_index
] += token
1609 elif self
.current_state
== 'pp':
1610 self
.current_entry
.previous_msgid_plural
+= token
1611 elif self
.current_state
== 'pm':
1612 self
.current_entry
.previous_msgid
+= token
1613 elif self
.current_state
== 'pc':
1614 self
.current_entry
.previous_msgctxt
+= token
1615 # don't change the current state
1618 # class _MOFileParser {{{
1621 class _MOFileParser(object):
1623 A class to parse binary mo files.
1626 def __init__(self
, mofile
, *args
, **kwargs
):
1633 string, path to the mo file or its content
1636 string, the encoding to use, defaults to ``default_encoding``
1637 global variable (optional).
1639 ``check_for_duplicates``
1640 whether to check for duplicate entries when adding entries to the
1641 file (optional, default: ``False``).
1643 self
.fhandle
= open(mofile
, 'rb')
1645 klass
= kwargs
.get('klass')
1648 self
.instance
= klass(
1650 encoding
=kwargs
.get('encoding', default_encoding
),
1651 check_for_duplicates
=kwargs
.get('check_for_duplicates', False)
1656 Make sure the file is closed, this prevents warnings on unclosed file
1657 when running tests with python >= 3.2.
1660 self
.fhandle
.close()
1664 Build the instance with the file handle provided in the
1667 # parse magic number
1668 magic_number
= self
._readbinary
('<I', 4)
1669 if magic_number
== MOFile
.MAGIC
:
1671 elif magic_number
== MOFile
.MAGIC_SWAPPED
:
1674 raise IOError('Invalid mo file, magic number is incorrect !')
1675 self
.instance
.magic_number
= magic_number
1676 # parse the version number and the number of strings
1677 version
, numofstrings
= self
._readbinary
(ii
, 8)
1678 # from MO file format specs: "A program seeing an unexpected major
1679 # revision number should stop reading the MO file entirely"
1680 if version
not in (0, 1):
1681 raise IOError('Invalid mo file, unexpected major revision number')
1682 self
.instance
.version
= version
1683 # original strings and translation strings hash table offset
1684 msgids_hash_offset
, msgstrs_hash_offset
= self
._readbinary
(ii
, 8)
1685 # move to msgid hash table and read length and offset of msgids
1686 self
.fhandle
.seek(msgids_hash_offset
)
1688 for i
in range(numofstrings
):
1689 msgids_index
.append(self
._readbinary
(ii
, 8))
1690 # move to msgstr hash table and read length and offset of msgstrs
1691 self
.fhandle
.seek(msgstrs_hash_offset
)
1693 for i
in range(numofstrings
):
1694 msgstrs_index
.append(self
._readbinary
(ii
, 8))
1696 encoding
= self
.instance
.encoding
1697 for i
in range(numofstrings
):
1698 self
.fhandle
.seek(msgids_index
[i
][1])
1699 msgid
= self
.fhandle
.read(msgids_index
[i
][0])
1701 self
.fhandle
.seek(msgstrs_index
[i
][1])
1702 msgstr
= self
.fhandle
.read(msgstrs_index
[i
][0])
1703 if i
== 0 and not msgid
: # metadata
1704 raw_metadata
, metadata
= msgstr
.split(b('\n')), {}
1705 for line
in raw_metadata
:
1706 tokens
= line
.split(b(':'), 1)
1707 if tokens
[0] != b(''):
1709 k
= tokens
[0].decode(encoding
)
1710 v
= tokens
[1].decode(encoding
)
1711 metadata
[k
] = v
.strip()
1714 self
.instance
.metadata
= metadata
1716 # test if we have a plural entry
1717 msgid_tokens
= msgid
.split(b('\0'))
1718 if len(msgid_tokens
) > 1:
1719 entry
= self
._build
_entry
(
1720 msgid
=msgid_tokens
[0],
1721 msgid_plural
=msgid_tokens
[1],
1722 msgstr_plural
=dict((k
, v
) for k
, v
in
1723 enumerate(msgstr
.split(b('\0'))))
1726 entry
= self
._build
_entry
(msgid
=msgid
, msgstr
=msgstr
)
1727 self
.instance
.append(entry
)
1729 self
.fhandle
.close()
1730 return self
.instance
1732 def _build_entry(self
, msgid
, msgstr
=None, msgid_plural
=None,
1733 msgstr_plural
=None):
1734 msgctxt_msgid
= msgid
.split(b('\x04'))
1735 encoding
= self
.instance
.encoding
1736 if len(msgctxt_msgid
) > 1:
1738 'msgctxt': msgctxt_msgid
[0].decode(encoding
),
1739 'msgid': msgctxt_msgid
[1].decode(encoding
),
1742 kwargs
= {'msgid': msgid
.decode(encoding
)}
1744 kwargs
['msgstr'] = msgstr
.decode(encoding
)
1746 kwargs
['msgid_plural'] = msgid_plural
.decode(encoding
)
1748 for k
in msgstr_plural
:
1749 msgstr_plural
[k
] = msgstr_plural
[k
].decode(encoding
)
1750 kwargs
['msgstr_plural'] = msgstr_plural
1751 return MOEntry(**kwargs
)
1753 def _readbinary(self
, fmt
, numbytes
):
1755 Private method that unpack n bytes of data using format <fmt>.
1756 It returns a tuple or a mixed value if the tuple length is 1.
1758 bytes
= self
.fhandle
.read(numbytes
)
1759 tup
= struct
.unpack(fmt
, bytes
)
1764 # class TextWrapper {{{
1767 class TextWrapper(textwrap
.TextWrapper
):
1769 Subclass of textwrap.TextWrapper that backport the
1770 drop_whitespace option.
1772 def __init__(self
, *args
, **kwargs
):
1773 drop_whitespace
= kwargs
.pop('drop_whitespace', True)
1774 textwrap
.TextWrapper
.__init
__(self
, *args
, **kwargs
)
1775 self
.drop_whitespace
= drop_whitespace
1777 def _wrap_chunks(self
, chunks
):
1778 """_wrap_chunks(chunks : [string]) -> [string]
1780 Wrap a sequence of text chunks and return a list of lines of
1781 length 'self.width' or less. (If 'break_long_words' is false,
1782 some lines may be longer than this.) Chunks correspond roughly
1783 to words and the whitespace between them: each chunk is
1784 indivisible (modulo 'break_long_words'), but a line break can
1785 come between any two chunks. Chunks should not have internal
1786 whitespace; ie. a chunk is either all whitespace or a "word".
1787 Whitespace chunks will be removed from the beginning and end of
1788 lines, but apart from that whitespace is preserved.
1792 raise ValueError("invalid width %r (must be > 0)" % self
.width
)
1794 # Arrange in reverse order so items can be efficiently popped
1795 # from a stack of chucks.
1800 # Start the list of chunks that will make up the current line.
1801 # cur_len is just the length of all the chunks in cur_line.
1805 # Figure out which static string will prefix this line.
1807 indent
= self
.subsequent_indent
1809 indent
= self
.initial_indent
1811 # Maximum width for this line.
1812 width
= self
.width
- len(indent
)
1814 # First chunk on line is whitespace -- drop it, unless this
1815 # is the very beginning of the text (ie. no lines started yet).
1816 if self
.drop_whitespace
and chunks
[-1].strip() == '' and lines
:
1822 # Can at least squeeze this chunk onto the current line.
1823 if cur_len
+ l
<= width
:
1824 cur_line
.append(chunks
.pop())
1827 # Nope, this line is full.
1831 # The current line is full, and the next chunk is too big to
1832 # fit on *any* line (not just this one).
1833 if chunks
and len(chunks
[-1]) > width
:
1834 self
._handle
_long
_word
(chunks
, cur_line
, cur_len
, width
)
1836 # If the last chunk on this line is all whitespace, drop it.
1837 if self
.drop_whitespace
and cur_line
and not cur_line
[-1].strip():
1840 # Convert current line back to a string and store it in list
1841 # of all lines (return value).
1843 lines
.append(indent
+ ''.join(cur_line
))
1847 # function wrap() {{{
1850 def wrap(text
, width
=70, **kwargs
):
1852 Wrap a single paragraph of text, returning a list of wrapped lines.
1854 if sys
.version_info
< (2, 6):
1855 return TextWrapper(width
=width
, **kwargs
).wrap(text
)
1856 return textwrap
.wrap(text
, width
=width
, **kwargs
)
1860 def genKeyId(inkey
):
1861 crc
= binascii
.crc32(bytes(inkey
)) & 0xffffffff
1862 # Use simple ASCII characters, exclude I, l, 1 and O, 0 to avoid confusing IDs
1863 symbols
= "ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz23456789";
1865 for keyind
in range(0, 5):
1866 outkey
+= symbols
[(crc
& 63) % len(symbols
)];