This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Lib / test / test_textwrap.py
blob6811119d4caa506e956535a2416eda0fc3ce0cb9
2 # Test script for the textwrap module.
4 # Original tests written by Greg Ward <gward@python.net>.
5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
6 # Currently maintained by Greg Ward.
8 # $Id$
11 import unittest
12 from test import test_support
14 from textwrap import TextWrapper, wrap, fill, dedent
17 class BaseTestCase(unittest.TestCase):
18 '''Parent class with utility methods for textwrap tests.'''
20 def show(self, textin):
21 if isinstance(textin, list):
22 result = []
23 for i in range(len(textin)):
24 result.append(" %d: %r" % (i, textin[i]))
25 result = '\n'.join(result)
26 elif isinstance(textin, (str, unicode)):
27 result = " %s\n" % repr(textin)
28 return result
31 def check(self, result, expect):
32 self.assertEquals(result, expect,
33 'expected:\n%s\nbut got:\n%s' % (
34 self.show(expect), self.show(result)))
36 def check_wrap(self, text, width, expect, **kwargs):
37 result = wrap(text, width, **kwargs)
38 self.check(result, expect)
40 def check_split(self, text, expect):
41 result = self.wrapper._split(text)
42 self.assertEquals(result, expect,
43 "\nexpected %r\n"
44 "but got %r" % (expect, result))
47 class WrapTestCase(BaseTestCase):
49 def setUp(self):
50 self.wrapper = TextWrapper(width=45)
52 def test_simple(self):
53 # Simple case: just words, spaces, and a bit of punctuation
55 text = "Hello there, how are you this fine day? I'm glad to hear it!"
57 self.check_wrap(text, 12,
58 ["Hello there,",
59 "how are you",
60 "this fine",
61 "day? I'm",
62 "glad to hear",
63 "it!"])
64 self.check_wrap(text, 42,
65 ["Hello there, how are you this fine day?",
66 "I'm glad to hear it!"])
67 self.check_wrap(text, 80, [text])
70 def test_whitespace(self):
71 # Whitespace munging and end-of-sentence detection
73 text = """\
74 This is a paragraph that already has
75 line breaks. But some of its lines are much longer than the others,
76 so it needs to be wrapped.
77 Some lines are \ttabbed too.
78 What a mess!
79 """
81 expect = ["This is a paragraph that already has line",
82 "breaks. But some of its lines are much",
83 "longer than the others, so it needs to be",
84 "wrapped. Some lines are tabbed too. What a",
85 "mess!"]
87 wrapper = TextWrapper(45, fix_sentence_endings=True)
88 result = wrapper.wrap(text)
89 self.check(result, expect)
91 result = wrapper.fill(text)
92 self.check(result, '\n'.join(expect))
94 def test_fix_sentence_endings(self):
95 wrapper = TextWrapper(60, fix_sentence_endings=True)
97 # SF #847346: ensure that fix_sentence_endings=True does the
98 # right thing even on input short enough that it doesn't need to
99 # be wrapped.
100 text = "A short line. Note the single space."
101 expect = ["A short line. Note the single space."]
102 self.check(wrapper.wrap(text), expect)
104 # Test some of the hairy end cases that _fix_sentence_endings()
105 # is supposed to handle (the easy stuff is tested in
106 # test_whitespace() above).
107 text = "Well, Doctor? What do you think?"
108 expect = ["Well, Doctor? What do you think?"]
109 self.check(wrapper.wrap(text), expect)
111 text = "Well, Doctor?\nWhat do you think?"
112 self.check(wrapper.wrap(text), expect)
114 text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
115 expect = ['I say, chaps! Anyone for "tennis?" Hmmph!']
116 self.check(wrapper.wrap(text), expect)
118 wrapper.width = 20
119 expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
120 self.check(wrapper.wrap(text), expect)
122 text = 'And she said, "Go to hell!"\nCan you believe that?'
123 expect = ['And she said, "Go to',
124 'hell!" Can you',
125 'believe that?']
126 self.check(wrapper.wrap(text), expect)
128 wrapper.width = 60
129 expect = ['And she said, "Go to hell!" Can you believe that?']
130 self.check(wrapper.wrap(text), expect)
132 def test_wrap_short(self):
133 # Wrapping to make short lines longer
135 text = "This is a\nshort paragraph."
137 self.check_wrap(text, 20, ["This is a short",
138 "paragraph."])
139 self.check_wrap(text, 40, ["This is a short paragraph."])
142 def test_wrap_short_1line(self):
143 # Test endcases
145 text = "This is a short line."
147 self.check_wrap(text, 30, ["This is a short line."])
148 self.check_wrap(text, 30, ["(1) This is a short line."],
149 initial_indent="(1) ")
152 def test_hyphenated(self):
153 # Test breaking hyphenated words
155 text = ("this-is-a-useful-feature-for-"
156 "reformatting-posts-from-tim-peters'ly")
158 self.check_wrap(text, 40,
159 ["this-is-a-useful-feature-for-",
160 "reformatting-posts-from-tim-peters'ly"])
161 self.check_wrap(text, 41,
162 ["this-is-a-useful-feature-for-",
163 "reformatting-posts-from-tim-peters'ly"])
164 self.check_wrap(text, 42,
165 ["this-is-a-useful-feature-for-reformatting-",
166 "posts-from-tim-peters'ly"])
168 def test_em_dash(self):
169 # Test text with em-dashes
170 text = "Em-dashes should be written -- thus."
171 self.check_wrap(text, 25,
172 ["Em-dashes should be",
173 "written -- thus."])
175 # Probe the boundaries of the properly written em-dash,
176 # ie. " -- ".
177 self.check_wrap(text, 29,
178 ["Em-dashes should be written",
179 "-- thus."])
180 expect = ["Em-dashes should be written --",
181 "thus."]
182 self.check_wrap(text, 30, expect)
183 self.check_wrap(text, 35, expect)
184 self.check_wrap(text, 36,
185 ["Em-dashes should be written -- thus."])
187 # The improperly written em-dash is handled too, because
188 # it's adjacent to non-whitespace on both sides.
189 text = "You can also do--this or even---this."
190 expect = ["You can also do",
191 "--this or even",
192 "---this."]
193 self.check_wrap(text, 15, expect)
194 self.check_wrap(text, 16, expect)
195 expect = ["You can also do--",
196 "this or even---",
197 "this."]
198 self.check_wrap(text, 17, expect)
199 self.check_wrap(text, 19, expect)
200 expect = ["You can also do--this or even",
201 "---this."]
202 self.check_wrap(text, 29, expect)
203 self.check_wrap(text, 31, expect)
204 expect = ["You can also do--this or even---",
205 "this."]
206 self.check_wrap(text, 32, expect)
207 self.check_wrap(text, 35, expect)
209 # All of the above behaviour could be deduced by probing the
210 # _split() method.
211 text = "Here's an -- em-dash and--here's another---and another!"
212 expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
213 "and", "--", "here's", " ", "another", "---",
214 "and", " ", "another!"]
215 self.check_split(text, expect)
217 text = "and then--bam!--he was gone"
218 expect = ["and", " ", "then", "--", "bam!", "--",
219 "he", " ", "was", " ", "gone"]
220 self.check_split(text, expect)
223 def test_unix_options (self):
224 # Test that Unix-style command-line options are wrapped correctly.
225 # Both Optik (OptionParser) and Docutils rely on this behaviour!
227 text = "You should use the -n option, or --dry-run in its long form."
228 self.check_wrap(text, 20,
229 ["You should use the",
230 "-n option, or --dry-",
231 "run in its long",
232 "form."])
233 self.check_wrap(text, 21,
234 ["You should use the -n",
235 "option, or --dry-run",
236 "in its long form."])
237 expect = ["You should use the -n option, or",
238 "--dry-run in its long form."]
239 self.check_wrap(text, 32, expect)
240 self.check_wrap(text, 34, expect)
241 self.check_wrap(text, 35, expect)
242 self.check_wrap(text, 38, expect)
243 expect = ["You should use the -n option, or --dry-",
244 "run in its long form."]
245 self.check_wrap(text, 39, expect)
246 self.check_wrap(text, 41, expect)
247 expect = ["You should use the -n option, or --dry-run",
248 "in its long form."]
249 self.check_wrap(text, 42, expect)
251 # Again, all of the above can be deduced from _split().
252 text = "the -n option, or --dry-run or --dryrun"
253 expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
254 "--dry-", "run", " ", "or", " ", "--dryrun"]
255 self.check_split(text, expect)
257 def test_funky_hyphens (self):
258 # Screwy edge cases cooked up by David Goodger. All reported
259 # in SF bug #596434.
260 self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
261 self.check_split("what the--", ["what", " ", "the--"])
262 self.check_split("what the--.", ["what", " ", "the--."])
263 self.check_split("--text--.", ["--text--."])
265 # When I first read bug #596434, this is what I thought David
266 # was talking about. I was wrong; these have always worked
267 # fine. The real problem is tested in test_funky_parens()
268 # below...
269 self.check_split("--option", ["--option"])
270 self.check_split("--option-opt", ["--option-", "opt"])
271 self.check_split("foo --option-opt bar",
272 ["foo", " ", "--option-", "opt", " ", "bar"])
274 def test_funky_parens (self):
275 # Second part of SF bug #596434: long option strings inside
276 # parentheses.
277 self.check_split("foo (--option) bar",
278 ["foo", " ", "(--option)", " ", "bar"])
280 # Related stuff -- make sure parens work in simpler contexts.
281 self.check_split("foo (bar) baz",
282 ["foo", " ", "(bar)", " ", "baz"])
283 self.check_split("blah (ding dong), wubba",
284 ["blah", " ", "(ding", " ", "dong),",
285 " ", "wubba"])
287 def test_initial_whitespace(self):
288 # SF bug #622849 reported inconsistent handling of leading
289 # whitespace; let's test that a bit, shall we?
290 text = " This is a sentence with leading whitespace."
291 self.check_wrap(text, 50,
292 [" This is a sentence with leading whitespace."])
293 self.check_wrap(text, 30,
294 [" This is a sentence with", "leading whitespace."])
296 def test_unicode(self):
297 # *Very* simple test of wrapping Unicode strings. I'm sure
298 # there's more to it than this, but let's at least make
299 # sure textwrap doesn't crash on Unicode input!
300 text = u"Hello there, how are you today?"
301 self.check_wrap(text, 50, [u"Hello there, how are you today?"])
302 self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
303 olines = self.wrapper.wrap(text)
304 assert isinstance(olines, list) and isinstance(olines[0], unicode)
305 otext = self.wrapper.fill(text)
306 assert isinstance(otext, unicode)
308 def test_split(self):
309 # Ensure that the standard _split() method works as advertised
310 # in the comments
312 text = "Hello there -- you goof-ball, use the -b option!"
314 result = self.wrapper._split(text)
315 self.check(result,
316 ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
317 "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"])
319 def test_bad_width(self):
320 # Ensure that width <= 0 is caught.
321 text = "Whatever, it doesn't matter."
322 self.assertRaises(ValueError, wrap, text, 0)
323 self.assertRaises(ValueError, wrap, text, -1)
326 class LongWordTestCase (BaseTestCase):
327 def setUp(self):
328 self.wrapper = TextWrapper()
329 self.text = '''\
330 Did you say "supercalifragilisticexpialidocious?"
331 How *do* you spell that odd word, anyways?
334 def test_break_long(self):
335 # Wrap text with long words and lots of punctuation
337 self.check_wrap(self.text, 30,
338 ['Did you say "supercalifragilis',
339 'ticexpialidocious?" How *do*',
340 'you spell that odd word,',
341 'anyways?'])
342 self.check_wrap(self.text, 50,
343 ['Did you say "supercalifragilisticexpialidocious?"',
344 'How *do* you spell that odd word, anyways?'])
346 # SF bug 797650. Prevent an infinite loop by making sure that at
347 # least one character gets split off on every pass.
348 self.check_wrap('-'*10+'hello', 10,
349 ['----------',
350 ' h',
351 ' e',
352 ' l',
353 ' l',
354 ' o'],
355 subsequent_indent = ' '*15)
357 def test_nobreak_long(self):
358 # Test with break_long_words disabled
359 self.wrapper.break_long_words = 0
360 self.wrapper.width = 30
361 expect = ['Did you say',
362 '"supercalifragilisticexpialidocious?"',
363 'How *do* you spell that odd',
364 'word, anyways?'
366 result = self.wrapper.wrap(self.text)
367 self.check(result, expect)
369 # Same thing with kwargs passed to standalone wrap() function.
370 result = wrap(self.text, width=30, break_long_words=0)
371 self.check(result, expect)
374 class IndentTestCases(BaseTestCase):
376 # called before each test method
377 def setUp(self):
378 self.text = '''\
379 This paragraph will be filled, first without any indentation,
380 and then with some (including a hanging indent).'''
383 def test_fill(self):
384 # Test the fill() method
386 expect = '''\
387 This paragraph will be filled, first
388 without any indentation, and then with
389 some (including a hanging indent).'''
391 result = fill(self.text, 40)
392 self.check(result, expect)
395 def test_initial_indent(self):
396 # Test initial_indent parameter
398 expect = [" This paragraph will be filled,",
399 "first without any indentation, and then",
400 "with some (including a hanging indent)."]
401 result = wrap(self.text, 40, initial_indent=" ")
402 self.check(result, expect)
404 expect = "\n".join(expect)
405 result = fill(self.text, 40, initial_indent=" ")
406 self.check(result, expect)
409 def test_subsequent_indent(self):
410 # Test subsequent_indent parameter
412 expect = '''\
413 * This paragraph will be filled, first
414 without any indentation, and then
415 with some (including a hanging
416 indent).'''
418 result = fill(self.text, 40,
419 initial_indent=" * ", subsequent_indent=" ")
420 self.check(result, expect)
423 # Despite the similar names, DedentTestCase is *not* the inverse
424 # of IndentTestCase!
425 class DedentTestCase(unittest.TestCase):
427 def test_dedent_nomargin(self):
428 # No lines indented.
429 text = "Hello there.\nHow are you?\nOh good, I'm glad."
430 self.assertEquals(dedent(text), text)
432 # Similar, with a blank line.
433 text = "Hello there.\n\nBoo!"
434 self.assertEquals(dedent(text), text)
436 # Some lines indented, but overall margin is still zero.
437 text = "Hello there.\n This is indented."
438 self.assertEquals(dedent(text), text)
440 # Again, add a blank line.
441 text = "Hello there.\n\n Boo!\n"
442 self.assertEquals(dedent(text), text)
444 def test_dedent_even(self):
445 # All lines indented by two spaces.
446 text = " Hello there.\n How are ya?\n Oh good."
447 expect = "Hello there.\nHow are ya?\nOh good."
448 self.assertEquals(dedent(text), expect)
450 # Same, with blank lines.
451 text = " Hello there.\n\n How are ya?\n Oh good.\n"
452 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
453 self.assertEquals(dedent(text), expect)
455 # Now indent one of the blank lines.
456 text = " Hello there.\n \n How are ya?\n Oh good.\n"
457 expect = "Hello there.\n\nHow are ya?\nOh good.\n"
458 self.assertEquals(dedent(text), expect)
460 def test_dedent_uneven(self):
461 # Lines indented unevenly.
462 text = '''\
463 def foo():
464 while 1:
465 return foo
467 expect = '''\
468 def foo():
469 while 1:
470 return foo
472 self.assertEquals(dedent(text), expect)
474 # Uneven indentation with a blank line.
475 text = " Foo\n Bar\n\n Baz\n"
476 expect = "Foo\n Bar\n\n Baz\n"
477 self.assertEquals(dedent(text), expect)
479 # Uneven indentation with a whitespace-only line.
480 text = " Foo\n Bar\n \n Baz\n"
481 expect = "Foo\n Bar\n\n Baz\n"
482 self.assertEquals(dedent(text), expect)
486 def test_main():
487 test_support.run_unittest(WrapTestCase,
488 LongWordTestCase,
489 IndentTestCases,
490 DedentTestCase)
492 if __name__ == '__main__':
493 test_main()