Lib/test/test_textwrap.py

   1 #
   2 # Test script for the textwrap module.
   3 #
   4 # Original tests written by Greg Ward <gward@python.net>.
   5 # Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
   6 # Currently maintained by Greg Ward.
   7 #
   8 # $Id$
   9 #
  10
  11 import unittest
  12 from test import test_support
  13
  14 from textwrap import TextWrapper, wrap, fill, dedent
  15
  16
  17 class BaseTestCase(unittest.TestCase):
  18     '''Parent class with utility methods for textwrap tests.'''
  19
  20     def show(self, textin):
  21         if isinstance(textin, list):
  22             result = []
  23             for i in range(len(textin)):
  24                 result.append("  %d: %r" % (i, textin[i]))
  25             result = '\n'.join(result)
  26         elif isinstance(textin, (str, unicode)):
  27             result = "  %s\n" % repr(textin)
  28         return result
  29
  30
  31     def check(self, result, expect):
  32         self.assertEquals(result, expect,
  33             'expected:\n%s\nbut got:\n%s' % (
  34                 self.show(expect), self.show(result)))
  35
  36     def check_wrap(self, text, width, expect, **kwargs):
  37         result = wrap(text, width, **kwargs)
  38         self.check(result, expect)
  39
  40     def check_split(self, text, expect):
  41         result = self.wrapper._split(text)
  42         self.assertEquals(result, expect,
  43                           "\nexpected %r\n"
  44                           "but got  %r" % (expect, result))
  45
  46
  47 class WrapTestCase(BaseTestCase):
  48
  49     def setUp(self):
  50         self.wrapper = TextWrapper(width=45)
  51
  52     def test_simple(self):
  53         # Simple case: just words, spaces, and a bit of punctuation
  54
  55         text = "Hello there, how are you this fine day?  I'm glad to hear it!"
  56
  57         self.check_wrap(text, 12,
  58                         ["Hello there,",
  59                          "how are you",
  60                          "this fine",
  61                          "day?  I'm",
  62                          "glad to hear",
  63                          "it!"])
  64         self.check_wrap(text, 42,
  65                         ["Hello there, how are you this fine day?",
  66                          "I'm glad to hear it!"])
  67         self.check_wrap(text, 80, [text])
  68
  69
  70     def test_whitespace(self):
  71         # Whitespace munging and end-of-sentence detection
  72
  73         text = """\
  74 This is a paragraph that already has
  75 line breaks.  But some of its lines are much longer than the others,
  76 so it needs to be wrapped.
  77 Some lines are \ttabbed too.
  78 What a mess!
  79 """
  80
  81         expect = ["This is a paragraph that already has line",
  82                   "breaks.  But some of its lines are much",
  83                   "longer than the others, so it needs to be",
  84                   "wrapped.  Some lines are  tabbed too.  What a",
  85                   "mess!"]
  86
  87         wrapper = TextWrapper(45, fix_sentence_endings=True)
  88         result = wrapper.wrap(text)
  89         self.check(result, expect)
  90
  91         result = wrapper.fill(text)
  92         self.check(result, '\n'.join(expect))
  93
  94     def test_fix_sentence_endings(self):
  95         wrapper = TextWrapper(60, fix_sentence_endings=True)
  96
  97         # SF #847346: ensure that fix_sentence_endings=True does the
  98         # right thing even on input short enough that it doesn't need to
  99         # be wrapped.
 100         text = "A short line. Note the single space."
 101         expect = ["A short line.  Note the single space."]
 102         self.check(wrapper.wrap(text), expect)
 103
 104         # Test some of the hairy end cases that _fix_sentence_endings()
 105         # is supposed to handle (the easy stuff is tested in
 106         # test_whitespace() above).
 107         text = "Well, Doctor? What do you think?"
 108         expect = ["Well, Doctor?  What do you think?"]
 109         self.check(wrapper.wrap(text), expect)
 110
 111         text = "Well, Doctor?\nWhat do you think?"
 112         self.check(wrapper.wrap(text), expect)
 113
 114         text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
 115         expect = ['I say, chaps!  Anyone for "tennis?"  Hmmph!']
 116         self.check(wrapper.wrap(text), expect)
 117
 118         wrapper.width = 20
 119         expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
 120         self.check(wrapper.wrap(text), expect)
 121
 122         text = 'And she said, "Go to hell!"\nCan you believe that?'
 123         expect = ['And she said, "Go to',
 124                   'hell!"  Can you',
 125                   'believe that?']
 126         self.check(wrapper.wrap(text), expect)
 127
 128         wrapper.width = 60
 129         expect = ['And she said, "Go to hell!"  Can you believe that?']
 130         self.check(wrapper.wrap(text), expect)
 131
 132     def test_wrap_short(self):
 133         # Wrapping to make short lines longer
 134
 135         text = "This is a\nshort paragraph."
 136
 137         self.check_wrap(text, 20, ["This is a short",
 138                                    "paragraph."])
 139         self.check_wrap(text, 40, ["This is a short paragraph."])
 140
 141
 142     def test_wrap_short_1line(self):
 143         # Test endcases
 144
 145         text = "This is a short line."
 146
 147         self.check_wrap(text, 30, ["This is a short line."])
 148         self.check_wrap(text, 30, ["(1) This is a short line."],
 149                         initial_indent="(1) ")
 150
 151
 152     def test_hyphenated(self):
 153         # Test breaking hyphenated words
 154
 155         text = ("this-is-a-useful-feature-for-"
 156                 "reformatting-posts-from-tim-peters'ly")
 157
 158         self.check_wrap(text, 40,
 159                         ["this-is-a-useful-feature-for-",
 160                          "reformatting-posts-from-tim-peters'ly"])
 161         self.check_wrap(text, 41,
 162                         ["this-is-a-useful-feature-for-",
 163                          "reformatting-posts-from-tim-peters'ly"])
 164         self.check_wrap(text, 42,
 165                         ["this-is-a-useful-feature-for-reformatting-",
 166                          "posts-from-tim-peters'ly"])
 167
 168     def test_em_dash(self):
 169         # Test text with em-dashes
 170         text = "Em-dashes should be written -- thus."
 171         self.check_wrap(text, 25,
 172                         ["Em-dashes should be",
 173                          "written -- thus."])
 174
 175         # Probe the boundaries of the properly written em-dash,
 176         # ie. " -- ".
 177         self.check_wrap(text, 29,
 178                         ["Em-dashes should be written",
 179                          "-- thus."])
 180         expect = ["Em-dashes should be written --",
 181                   "thus."]
 182         self.check_wrap(text, 30, expect)
 183         self.check_wrap(text, 35, expect)
 184         self.check_wrap(text, 36,
 185                         ["Em-dashes should be written -- thus."])
 186
 187         # The improperly written em-dash is handled too, because
 188         # it's adjacent to non-whitespace on both sides.
 189         text = "You can also do--this or even---this."
 190         expect = ["You can also do",
 191                   "--this or even",
 192                   "---this."]
 193         self.check_wrap(text, 15, expect)
 194         self.check_wrap(text, 16, expect)
 195         expect = ["You can also do--",
 196                   "this or even---",
 197                   "this."]
 198         self.check_wrap(text, 17, expect)
 199         self.check_wrap(text, 19, expect)
 200         expect = ["You can also do--this or even",
 201                   "---this."]
 202         self.check_wrap(text, 29, expect)
 203         self.check_wrap(text, 31, expect)
 204         expect = ["You can also do--this or even---",
 205                   "this."]
 206         self.check_wrap(text, 32, expect)
 207         self.check_wrap(text, 35, expect)
 208
 209         # All of the above behaviour could be deduced by probing the
 210         # _split() method.
 211         text = "Here's an -- em-dash and--here's another---and another!"
 212         expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
 213                   "and", "--", "here's", " ", "another", "---",
 214                   "and", " ", "another!"]
 215         self.check_split(text, expect)
 216
 217         text = "and then--bam!--he was gone"
 218         expect = ["and", " ", "then", "--", "bam!", "--",
 219                   "he", " ", "was", " ", "gone"]
 220         self.check_split(text, expect)
 221
 222
 223     def test_unix_options (self):
 224         # Test that Unix-style command-line options are wrapped correctly.
 225         # Both Optik (OptionParser) and Docutils rely on this behaviour!
 226
 227         text = "You should use the -n option, or --dry-run in its long form."
 228         self.check_wrap(text, 20,
 229                         ["You should use the",
 230                          "-n option, or --dry-",
 231                          "run in its long",
 232                          "form."])
 233         self.check_wrap(text, 21,
 234                         ["You should use the -n",
 235                          "option, or --dry-run",
 236                          "in its long form."])
 237         expect = ["You should use the -n option, or",
 238                   "--dry-run in its long form."]
 239         self.check_wrap(text, 32, expect)
 240         self.check_wrap(text, 34, expect)
 241         self.check_wrap(text, 35, expect)
 242         self.check_wrap(text, 38, expect)
 243         expect = ["You should use the -n option, or --dry-",
 244                   "run in its long form."]
 245         self.check_wrap(text, 39, expect)
 246         self.check_wrap(text, 41, expect)
 247         expect = ["You should use the -n option, or --dry-run",
 248                   "in its long form."]
 249         self.check_wrap(text, 42, expect)
 250
 251         # Again, all of the above can be deduced from _split().
 252         text = "the -n option, or --dry-run or --dryrun"
 253         expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
 254                   "--dry-", "run", " ", "or", " ", "--dryrun"]
 255         self.check_split(text, expect)
 256
 257     def test_funky_hyphens (self):
 258         # Screwy edge cases cooked up by David Goodger.  All reported
 259         # in SF bug #596434.
 260         self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
 261         self.check_split("what the--", ["what", " ", "the--"])
 262         self.check_split("what the--.", ["what", " ", "the--."])
 263         self.check_split("--text--.", ["--text--."])
 264
 265         # When I first read bug #596434, this is what I thought David
 266         # was talking about.  I was wrong; these have always worked
 267         # fine.  The real problem is tested in test_funky_parens()
 268         # below...
 269         self.check_split("--option", ["--option"])
 270         self.check_split("--option-opt", ["--option-", "opt"])
 271         self.check_split("foo --option-opt bar",
 272                          ["foo", " ", "--option-", "opt", " ", "bar"])
 273
 274     def test_funky_parens (self):
 275         # Second part of SF bug #596434: long option strings inside
 276         # parentheses.
 277         self.check_split("foo (--option) bar",
 278                          ["foo", " ", "(--option)", " ", "bar"])
 279
 280         # Related stuff -- make sure parens work in simpler contexts.
 281         self.check_split("foo (bar) baz",
 282                          ["foo", " ", "(bar)", " ", "baz"])
 283         self.check_split("blah (ding dong), wubba",
 284                          ["blah", " ", "(ding", " ", "dong),",
 285                           " ", "wubba"])
 286
 287     def test_initial_whitespace(self):
 288         # SF bug #622849 reported inconsistent handling of leading
 289         # whitespace; let's test that a bit, shall we?
 290         text = " This is a sentence with leading whitespace."
 291         self.check_wrap(text, 50,
 292                         [" This is a sentence with leading whitespace."])
 293         self.check_wrap(text, 30,
 294                         [" This is a sentence with", "leading whitespace."])
 295
 296     def test_unicode(self):
 297         # *Very* simple test of wrapping Unicode strings.  I'm sure
 298         # there's more to it than this, but let's at least make
 299         # sure textwrap doesn't crash on Unicode input!
 300         text = u"Hello there, how are you today?"
 301         self.check_wrap(text, 50, [u"Hello there, how are you today?"])
 302         self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
 303         olines = self.wrapper.wrap(text)
 304         assert isinstance(olines, list) and isinstance(olines[0], unicode)
 305         otext = self.wrapper.fill(text)
 306         assert isinstance(otext, unicode)
 307
 308     def test_split(self):
 309         # Ensure that the standard _split() method works as advertised
 310         # in the comments
 311
 312         text = "Hello there -- you goof-ball, use the -b option!"
 313
 314         result = self.wrapper._split(text)
 315         self.check(result,
 316              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
 317               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
 318
 319     def test_bad_width(self):
 320         # Ensure that width <= 0 is caught.
 321         text = "Whatever, it doesn't matter."
 322         self.assertRaises(ValueError, wrap, text, 0)
 323         self.assertRaises(ValueError, wrap, text, -1)
 324
 325
 326 class LongWordTestCase (BaseTestCase):
 327     def setUp(self):
 328         self.wrapper = TextWrapper()
 329         self.text = '''\
 330 Did you say "supercalifragilisticexpialidocious?"
 331 How *do* you spell that odd word, anyways?
 332 '''
 333
 334     def test_break_long(self):
 335         # Wrap text with long words and lots of punctuation
 336
 337         self.check_wrap(self.text, 30,
 338                         ['Did you say "supercalifragilis',
 339                          'ticexpialidocious?" How *do*',
 340                          'you spell that odd word,',
 341                          'anyways?'])
 342         self.check_wrap(self.text, 50,
 343                         ['Did you say "supercalifragilisticexpialidocious?"',
 344                          'How *do* you spell that odd word, anyways?'])
 345
 346         # SF bug 797650.  Prevent an infinite loop by making sure that at
 347         # least one character gets split off on every pass.
 348         self.check_wrap('-'*10+'hello', 10,
 349                         ['----------',
 350                          '               h',
 351                          '               e',
 352                          '               l',
 353                          '               l',
 354                          '               o'],
 355                         subsequent_indent = ' '*15)
 356
 357     def test_nobreak_long(self):
 358         # Test with break_long_words disabled
 359         self.wrapper.break_long_words = 0
 360         self.wrapper.width = 30
 361         expect = ['Did you say',
 362                   '"supercalifragilisticexpialidocious?"',
 363                   'How *do* you spell that odd',
 364                   'word, anyways?'
 365                   ]
 366         result = self.wrapper.wrap(self.text)
 367         self.check(result, expect)
 368
 369         # Same thing with kwargs passed to standalone wrap() function.
 370         result = wrap(self.text, width=30, break_long_words=0)
 371         self.check(result, expect)
 372
 373
 374 class IndentTestCases(BaseTestCase):
 375
 376     # called before each test method
 377     def setUp(self):
 378         self.text = '''\
 379 This paragraph will be filled, first without any indentation,
 380 and then with some (including a hanging indent).'''
 381
 382
 383     def test_fill(self):
 384         # Test the fill() method
 385
 386         expect = '''\
 387 This paragraph will be filled, first
 388 without any indentation, and then with
 389 some (including a hanging indent).'''
 390
 391         result = fill(self.text, 40)
 392         self.check(result, expect)
 393
 394
 395     def test_initial_indent(self):
 396         # Test initial_indent parameter
 397
 398         expect = ["     This paragraph will be filled,",
 399                   "first without any indentation, and then",
 400                   "with some (including a hanging indent)."]
 401         result = wrap(self.text, 40, initial_indent="     ")
 402         self.check(result, expect)
 403
 404         expect = "\n".join(expect)
 405         result = fill(self.text, 40, initial_indent="     ")
 406         self.check(result, expect)
 407
 408
 409     def test_subsequent_indent(self):
 410         # Test subsequent_indent parameter
 411
 412         expect = '''\
 413   * This paragraph will be filled, first
 414     without any indentation, and then
 415     with some (including a hanging
 416     indent).'''
 417
 418         result = fill(self.text, 40,
 419                       initial_indent="  * ", subsequent_indent="    ")
 420         self.check(result, expect)
 421
 422
 423 # Despite the similar names, DedentTestCase is *not* the inverse
 424 # of IndentTestCase!
 425 class DedentTestCase(unittest.TestCase):
 426
 427     def test_dedent_nomargin(self):
 428         # No lines indented.
 429         text = "Hello there.\nHow are you?\nOh good, I'm glad."
 430         self.assertEquals(dedent(text), text)
 431
 432         # Similar, with a blank line.
 433         text = "Hello there.\n\nBoo!"
 434         self.assertEquals(dedent(text), text)
 435
 436         # Some lines indented, but overall margin is still zero.
 437         text = "Hello there.\n  This is indented."
 438         self.assertEquals(dedent(text), text)
 439
 440         # Again, add a blank line.
 441         text = "Hello there.\n\n  Boo!\n"
 442         self.assertEquals(dedent(text), text)
 443
 444     def test_dedent_even(self):
 445         # All lines indented by two spaces.
 446         text = "  Hello there.\n  How are ya?\n  Oh good."
 447         expect = "Hello there.\nHow are ya?\nOh good."
 448         self.assertEquals(dedent(text), expect)
 449
 450         # Same, with blank lines.
 451         text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
 452         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 453         self.assertEquals(dedent(text), expect)
 454
 455         # Now indent one of the blank lines.
 456         text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
 457         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
 458         self.assertEquals(dedent(text), expect)
 459
 460     def test_dedent_uneven(self):
 461         # Lines indented unevenly.
 462         text = '''\
 463         def foo():
 464             while 1:
 465                 return foo
 466         '''
 467         expect = '''\
 468 def foo():
 469     while 1:
 470         return foo
 471 '''
 472         self.assertEquals(dedent(text), expect)
 473
 474         # Uneven indentation with a blank line.
 475         text = "  Foo\n    Bar\n\n   Baz\n"
 476         expect = "Foo\n  Bar\n\n Baz\n"
 477         self.assertEquals(dedent(text), expect)
 478
 479         # Uneven indentation with a whitespace-only line.
 480         text = "  Foo\n    Bar\n \n   Baz\n"
 481         expect = "Foo\n  Bar\n\n Baz\n"
 482         self.assertEquals(dedent(text), expect)
 483
 484
 485
 486 def test_main():
 487     test_support.run_unittest(WrapTestCase,
 488                               LongWordTestCase,
 489                               IndentTestCases,
 490                               DedentTestCase)
 491
 492 if __name__ == '__main__':
 493     test_main()