i18npool/source/breakiterator/data/line.txt

   1 # Copyright (c) 2002-2006  International Business Machines Corporation and
   2 # others. All Rights Reserved.
   3 #
   4 #  file:  line.txt
   5 #
   6 #         Line Breaking Rules
   7 #         Implement default line breaking as defined by Unicode Standard Annex #14 version 5.0.0
   8 #         http://www.unicode.org/reports/tr14/
   9
  10
  11
  12 #
  13 #  Character Classes defined by TR 14.
  14 #
  15
  16 !!chain;
  17 !!LBCMNoChain;
  18
  19
  20 !!lookAheadHardBreak;
  21 #
  22 #  !!lookAheadHardBreak    Described here because it is (as yet) undocumented elsewhere
  23 #                          and only used for the line break rules.
  24 #
  25 #           It is used in the implementation of the incredibly annoying rule LB 10
  26 #           which says to treat any combining mark that is not attached to a base
  27 #           character as if it were of class AL  (alphabetic).
  28 #
  29 #           The problem occurs in the reverse rules.
  30 #
  31 #           Consider a sequence like, with correct breaks as shown
  32 #               LF  ID  CM  AL  AL
  33 #                  ^       ^       ^
  34 #           Then consider the sequence without the initial ID (ideographic)
  35 #                 LF  CM  AL  AL
  36 #                    ^           ^
  37 #           Our CM, which in the first example was attached to the ideograph,
  38 #           is now unattached, becomes an alpha, and joins in with the other
  39 #           alphas.
  40 #
  41 #           When iterating forwards, these sequences do not present any problems
  42 #           When iterating backwards, we need to look ahead when encountering
  43 #           a CM to see whether it attaches to something further on or not.
  44 #           (Look-ahead in a reverse rule is looking towards the start)
  45 #
  46 #           If the CM is unattached, we need to force a break.
  47 #
  48 #           !!lookAheadHardBreak forces the run time state machine to
  49 #           stop immediately when a look ahead rule ( '/' operator) matches,
  50 #           and set the match position to that of the look-ahead operator,
  51 #           no matter what other rules may be in play at the time.
  52 #
  53 #           See rule LB 19 for an example.
  54 #
  55
  56 $AI = [:LineBreak =  Ambiguous:];
  57 $DG = \u00B0;
  58 $AL = [[:LineBreak =  Alphabetic:] $DG];
  59 $BA = [:LineBreak =  Break_After:];
  60 $BB = [:LineBreak =  Break_Before:];
  61 $BK = [:LineBreak =  Mandatory_Break:];
  62 $B2 = [:LineBreak =  Break_Both:];
  63 $CB = [:LineBreak =  Contingent_Break:];
  64 $CL = [:LineBreak =  Close_Punctuation:] ;
  65 $CM = [:LineBreak =  Combining_Mark:];
  66 $CR = [:LineBreak =  Carriage_Return:];
  67 $EX = [:LineBreak =  Exclamation:];
  68 $GL = [:LineBreak =  Glue:];
  69 $HY = [:LineBreak =  Hyphen:];
  70 $H2 = [:LineBreak =  H2:];
  71 $H3 = [:LineBreak =  H3:];
  72 $ID = [[:LineBreak =  Ideographic:] - [\ufe30]];
  73 $IN = [:LineBreak =  Inseperable:];
  74 $IS = [[:LineBreak =  Infix_Numeric:] [\ufe30]];
  75 $JL = [:LineBreak =  JL:];
  76 $JV = [:LineBreak =  JV:];
  77 $JT = [:LineBreak =  JT:];
  78 $LF = [:LineBreak =  Line_Feed:];
  79 $NL = [:LineBreak =  Next_Line:];
  80 $NS = [:LineBreak =  Nonstarter:];
  81 $NU = [:LineBreak =  Numeric:];
  82 $OP = [[:LineBreak =  Open_Punctuation:] - $DG];
  83 $PO = [:LineBreak =  Postfix_Numeric:];
  84 $BS = \u005C;
  85 $PR = [[:LineBreak =  Prefix_Numeric:] - $BS];
  86 $QU = [:LineBreak =  Quotation:];
  87 $SA = [:LineBreak =  Complex_Context:];
  88 $SG = [:LineBreak =  Surrogate:];
  89 $SP = [:LineBreak =  Space:];
  90 $SY = [[:LineBreak =  Break_Symbols:] $BS];
  91 $WJ = [:LineBreak =  Word_Joiner:];
  92 $XX = [:LineBreak =  Unknown:];
  93 $ZW = [:LineBreak =  ZWSpace:];
  94
  95 #   Dictionary character set, for triggering language-based break engines. Currently
  96 #   limited to LineBreak=Complex_Context. Note that this set only works in Unicode
  97 #   5.0 or later as the definition of Complex_Context was corrected to include all
  98 #   characters requiring dictionary break.
  99
 100 $dictionary = [:LineBreak = Complex_Context:];
 101
 102 #
 103 #  Rule LB1.  By default, treat AI  (characters with ambiguous east Asian width),
 104 #                               SA  (South East Asian: Thai, Lao, Khmer)
 105 #                               SG  (Unpaired Surrogates)
 106 #                               XX  (Unknown, unassigned)
 107 #                         as $AL  (Alphabetic)
 108 #
 109 $ALPlus = [$AL $AI $SA $SG $XX];
 110
 111 #
 112 #  Combining Marks.   X $CM*  behaves as if it were X.  Rule LB6.
 113 #
 114 $ALcm = $ALPlus $CM*;
 115 $BAcm = $BA $CM*;
 116 $BBcm = $BB $CM*;
 117 $B2cm = $B2 $CM*;
 118 $CLcm = $CL $CM*;
 119 $EXcm = $EX $CM*;
 120 $GLcm = $GL $CM*;
 121 $HYcm = $HY $CM*;
 122 $H2cm = $H2 $CM*;
 123 $H3cm = $H3 $CM*;
 124 $IDcm = $ID $CM*;
 125 $INcm = $IN $CM*;
 126 $IScm = $IS $CM*;
 127 $JLcm = $JL $CM*;
 128 $JVcm = $JV $CM*;
 129 $JTcm = $JT $CM*;
 130 $NScm = $NS $CM*;
 131 $NUcm = $NU $CM*;
 132 $OPcm = $OP $CM*;
 133 $POcm = $PO $CM*;
 134 $PRcm = $PR $CM*;
 135 $QUcm = $QU $CM*;
 136 $SYcm = $SY $CM*;
 137 $WJcm = $WJ $CM*;
 138
 139 ## -------------------------------------------------
 140
 141 !!forward;
 142
 143 #
 144 #  Each class of character can stand by itself as an unbroken token, with trailing combining stuff
 145 #
 146 $ALPlus $CM+;
 147 $BA $CM+;
 148 $BB $CM+;
 149 $B2 $CM+;
 150 $CL $CM+;
 151 $EX $CM+;
 152 $GL $CM+;
 153 $HY $CM+;
 154 $H2 $CM+;
 155 $H3 $CM+;
 156 $ID $CM+;
 157 $IN $CM+;
 158 $IS $CM+;
 159 $JL $CM+;
 160 $JV $CM+;
 161 $JT $CM+;
 162 $NS $CM+;
 163 $NU $CM+;
 164 $OP $CM+;
 165 $PO $CM+;
 166 $PR $CM+;
 167 $QU $CM+;
 168 $SY $CM+;
 169 $WJ $CM+;
 170
 171 #
 172 # CAN_CM  is the set of characters that may combine with CM combining chars.
 173 #         Note that Linebreak UAX 14's concept of a combining char and the rules
 174 #         for what they can combine with are _very_ different from the rest of Unicode.
 175 #
 176 #         Note that $CM itself is left out of this set.  If CM is needed as a base
 177 #         it must be listed separately in the rule.
 178 #
 179 $CAN_CM  = [^$SP $BK $CR $LF $NL $ZW $CM];       # Bases that can   take CMs
 180 $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 181
 182 #
 183 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 184 #            Needed in rules where stand-alone $CM s are treated as AL.
 185 #            Chaining is disabled with CM because it causes other failures,
 186 #            so for this one case we need to manually list out longer sequences.
 187 #
 188 $AL_FOLLOW_NOCM = [$BK $CR $LF $NL $ZW $SP];
 189 $AL_FOLLOW_CM   = [$CL $EX $IS $SY $WJ $GL $QU $BA $HY $NS $IN $NU $ALPlus $OP];
 190 $AL_FOLLOW      = [$AL_FOLLOW_NOCM $AL_FOLLOW_CM];
 191
 192
 193 #
 194 #  Rule LB 4, 5    Mandatory (Hard) breaks.
 195 #
 196 $LB4Breaks    = [$BK $CR $LF $NL];
 197 $LB4NonBreaks = [^$BK $CR $LF $NL];
 198 $CR $LF {100};
 199
 200 #
 201 #  LB 6    Do not break before hard line breaks.
 202 #
 203 $LB4NonBreaks?  $LB4Breaks {100};    # LB 5  do not break before hard breaks.
 204 $CAN_CM $CM*    $LB4Breaks {100};
 205 $CM+            $LB4Breaks {100};
 206
 207 # LB 7         x SP
 208 #              x ZW
 209 $LB4NonBreaks [$SP $ZW];
 210 $CAN_CM $CM*  [$SP $ZW];
 211 $CM+          [$SP $ZW];
 212
 213 #
 214 # LB 8         Break after zero width space
 215 #
 216 $LB8Breaks    = [$LB4Breaks $ZW];
 217 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 218
 219
 220 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 221 #                                $CM not covered by the above needs to behave like $AL
 222 #                                See definition of $CAN_CM.
 223
 224 $CAN_CM $CM+;                   #  Stick together any combining sequences that don't match other rules.
 225 $CM+;
 226
 227 #
 228 # LB 11  Do not break before or after WORD JOINER & related characters.
 229 #
 230 $CAN_CM $CM*  $WJcm;
 231 $LB8NonBreaks $WJcm;
 232 $CM+          $WJcm;
 233
 234 $WJcm [^$CAN_CM];
 235 $WJcm $CAN_CM $CM*;
 236
 237 #
 238 # LB 12  Do not break before or after NBSP and related characters.
 239 #
 240 #         (!SP) x GL
 241 [$LB8NonBreaks-$SP] $CM* $GLcm;
 242 $CM+               $GLcm;
 243
 244 #         GL  x
 245 $GLcm ($LB8Breaks | $SP);
 246 $GLcm [$LB8NonBreaks-$SP] $CM*;     # Don't let a combining mark go onto $CR, $BK, etc.
 247                               #  TODO:  I don't think we need this rule.
 248                               #         All but $CM will chain off of preceding rule.
 249                               #         $GLcm will pick up the CM case by itself.
 250
 251
 252
 253
 254 #
 255 # LB 13   Don't break before ']' or '!' or ';' or '/', even after spaces.
 256 #
 257 $LB8NonBreaks $CL;
 258 $CAN_CM $CM*  $CL;
 259 $CM+          $CL;              # by rule 10, stand-alone CM behaves as AL
 260
 261 $LB8NonBreaks $EX;
 262 $CAN_CM $CM*  $EX;
 263 $CM+          $EX;              # by rule 10, stand-alone CM behaves as AL
 264
 265 $LB8NonBreaks $IS;
 266 $CAN_CM $CM*  $IS;
 267 $CM+          $IS;              # by rule 10, stand-alone CM behaves as AL
 268
 269 $LB8NonBreaks $SY;
 270 $CAN_CM $CM*  $SY;
 271 $CM+          $SY;              # by rule 10, stand-alone CM behaves as AL
 272
 273
 274 #
 275 # LB 14  Do not break after OP, even after spaced
 276 #
 277 $OPcm $SP* $CAN_CM $CM*;
 278 $OPcm $SP* $CANT_CM;
 279
 280 $OPcm $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
 281
 282 # LB 15
 283 # $QUcm $SP* $OPcm;
 284
 285 # LB 16
 286 $CLcm $SP* $NScm;
 287
 288 # LB 17
 289 $B2cm $SP* $B2cm;
 290
 291 #
 292 # LB 18  Break after spaces.
 293 #
 294 $LB18NonBreaks = [$LB8NonBreaks - [$SP]];
 295 $LB18Breaks    = [$LB8Breaks $SP];
 296
 297
 298 # LB 19
 299 #         x QU
 300 $LB18NonBreaks $CM* $QUcm;
 301 $CM+                $QUcm;
 302
 303 #         QU  x
 304 $QUcm .?;
 305 $QUcm $LB18NonBreaks $CM*;    # Don't let a combining mark go onto $CR, $BK, etc.
 306                               #  TODO:  I don't think this rule is needed.
 307
 308
 309 # LB 20
 310 #        <break>  $CB
 311 #        $CB   <break>
 312
 313 $LB20NonBreaks = [$LB18NonBreaks - $CB];
 314
 315 # LB 21        x   (BA | HY | NS)
 316 #           BB x
 317 #
 318 $LB20NonBreaks $CM* ($BAcm | $HYcm | $NScm);
 319
 320 $BBcm [^$CB];                                  #  $BB  x
 321 $BBcm $LB20NonBreaks $CM*;
 322
 323 # LB 22
 324 $ALcm    $INcm;
 325 $CM+     $INcm;     #  by rule 10, any otherwise unattached CM behaves as AL
 326 $IDcm    $INcm;
 327 $INcm    $INcm;
 328 $NUcm    $INcm;
 329
 330
 331 # $LB 23
 332 $IDcm  $POcm;
 333 $ALcm  $NUcm;       # includes $LB19
 334 $CM+   $NUcm;       # Rule 10, any otherwise unattached CM behaves as AL
 335 $NUcm  $ALcm;
 336
 337 #
 338 # LB 24
 339 #
 340 $PRcm $IDcm;
 341 $ALcm $PRcm;
 342 $PRcm $ALcm;
 343 $POcm $ALcm;
 344
 345 #
 346 # LB 25   Numbers.
 347 #
 348 ($PRcm | $POcm)? ($OPcm)? $NUcm ($NUcm | $SYcm | $IScm)* $CLcm? ($PRcm | $POcm)?;
 349
 350 # LB 26  Do not break a Korean syllable
 351 #
 352 $JLcm ($JLcm | $JVcm | $H2cm | $H3cm);
 353 ($JVcm | $H2cm) ($JVcm | $JTcm);
 354 ($JTcm | $H3cm) $JTcm;
 355
 356 # LB 27  Treat korean Syllable Block the same as ID  (don't break it)
 357 ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $INcm;
 358 ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm) $POcm;
 359 $PRcm ($JLcm | $JVcm | $JTcm | $H2cm | $H3cm);
 360
 361
 362 # LB 28   Do not break between alphabetics
 363 #
 364 $ALcm $ALcm;
 365 $CM+ $ALcm;      # The $CM+ is from rule 10, and unattached CM is treated as AL
 366
 367 # LB 29
 368 $IScm ($ALcm | $NUcm);
 369
 370 #
 371 # Rule 30   Do not break between letters, numbers or ordinary symbols
 372 #           and opening or closing punctuation
 373 #
 374 ($ALcm | $NUcm) $OPcm;
 375 $CM+ $OPcm;
 376 $CLcm ($ALcm | $NUcm);
 377
 378
 379
 380 #
 381 #  Reverse Rules.
 382 #
 383 ## -------------------------------------------------
 384
 385 !!reverse;
 386
 387 $CM+ $ALPlus;
 388 $CM+ $BA;
 389 $CM+ $BB;
 390 $CM+ $B2;
 391 $CM+ $CL;
 392 $CM+ $EX;
 393 $CM+ $GL;
 394 $CM+ $HY;
 395 $CM+ $H2;
 396 $CM+ $H3;
 397 $CM+ $ID;
 398 $CM+ $IN;
 399 $CM+ $IS;
 400 $CM+ $JL;
 401 $CM+ $JV;
 402 $CM+ $JT;
 403 $CM+ $NS;
 404 $CM+ $NU;
 405 $CM+ $OP;
 406 $CM+ $PO;
 407 $CM+ $PR;
 408 $CM+ $QU;
 409 $CM+ $SY;
 410 $CM+ $WJ;
 411 $CM+;
 412
 413
 414 #
 415 #  Sequences of the form  (shown forwards)
 416 #      [CANT_CM]  <break>  [CM]  [whatever]
 417 #  The CM needs to behave as an AL
 418 #
 419 $AL_FOLLOW $CM+ / (
 420           [$BK $CR $LF $NL $ZW {eof}] |
 421           $SP+ $CM+ $SP |
 422           $SP+ $CM* ([^$OP $CM $SP] | [$AL {eof}]));   # if LB 14 will match, need to surpress this break.
 423                                                #  LB14 says    OP SP* x .
 424                                                #    becomes    OP SP* x AL
 425                                                #    becomes    OP SP* x CM+ AL_FOLLOW
 426                                                #
 427                                                # Further note:  the $AL in [$AL {eof}] is only to work around
 428                                                #                a rule compiler bug which complains about
 429                                                #                empty sets otherwise.
 430
 431 #
 432 #  Sequences of the form  (shown forwards)
 433 #      [CANT_CM]  <break> [CM]  <break>  [PR]
 434 #  The CM needs to behave as an AL
 435 #  This rule is concerned about getting the second of the two <breaks> in place.
 436 #
 437
 438 [$PR   ] / $CM+ [$BK $CR $LF $NL $ZW $SP {eof}];
 439
 440
 441
 442 # LB 4, 5, 5
 443
 444 $LB4Breaks [$LB4NonBreaks-$CM];
 445 $LB4Breaks $CM+ $CAN_CM;
 446 $LF $CR;
 447
 448
 449 # LB 7         x SP
 450 #              x ZW
 451 [$SP $ZW] [$LB4NonBreaks-$CM];
 452 [$SP $ZW] $CM+ $CAN_CM;
 453
 454 # LB 8 Break after zero width space
 455
 456
 457 # LB 9,10  Combining marks.
 458 #    X   $CM needs to behave like X, where X is not $SP or controls.
 459 #    $CM not covered by the above needs to behave like $AL
 460 # Stick together any combining sequences that don't match other rules.
 461 $CM+ $CAN_CM;
 462
 463
 464 # LB 11
 465 $CM* $WJ $CM* $CAN_CM;
 466 $CM* $WJ      [$LB8NonBreaks-$CM];
 467
 468      $CANT_CM $CM* $WJ;
 469 $CM* $CAN_CM  $CM* $WJ;
 470
 471 # LB 12
 472 #         x GL
 473 #
 474 $CM* $GL $CM* [$LB8NonBreaks-$CM-$SP];
 475
 476 #
 477 #     GL  x
 478 #
 479 $CANT_CM $CM* $GL;
 480 $CM* $CAN_CM $CM* $GL;
 481
 482
 483 # LB 13
 484 $CL $CM+ $CAN_CM;
 485 $EX $CM+ $CAN_CM;
 486 $IS $CM+ $CAN_CM;
 487 $SY $CM+ $CAN_CM;
 488
 489 $CL [$LB8NonBreaks-$CM];
 490 $EX [$LB8NonBreaks-$CM];
 491 $IS [$LB8NonBreaks-$CM];
 492 $SY [$LB8NonBreaks-$CM];
 493
 494 # Rule 13 & 14 taken together for an edge case.
 495 #   Match this, shown forward
 496 #     OP SP+  ($CM+ behaving as $AL) (CL | EX | IS | IY)
 497 #   This really wants to chain at the $CM+ (which is acting as an $AL)
 498 #   except for $CM chaining being disabled.
 499 [$CL $EX $IS $SY] $CM+ $SP+ $CM* $OP;
 500
 501 # LB 14    OP SP* x
 502 #
 503 $CM* $CAN_CM    $SP* $CM* $OP;
 504      $CANT_CM   $SP* $CM* $OP;
 505 $AL_FOLLOW? $CM+  $SP $SP* $CM* $OP;     #  by LB 10, behaves like $AL_FOLLOW? $AL $SP* $CM* $OP
 506
 507      $AL_FOLLOW_NOCM $CM+ $SP+ $CM* $OP;
 508 $CM* $AL_FOLLOW_CM   $CM+ $SP+ $CM* $OP;
 509 $SY $CM $SP+ $OP;   # TODO:  Experiment.  Remove.
 510
 511
 512
 513 # LB 15
 514 # $CM* $OP $SP* $CM* $QU;
 515
 516 # LB 16
 517 $CM* $NS $SP* $CM* $CL;
 518
 519 # LB 17
 520 $CM* $B2 $SP* $CM* $B2;
 521
 522 # LB 18  break after spaces
 523 #        Nothing explicit needed here.
 524
 525
 526 #
 527 # LB 19
 528 #
 529 $CM* $QU $CM* $CAN_CM;                                #   . x QU
 530 $CM* $QU      $LB18NonBreaks;
 531
 532
 533 $CM* $CAN_CM  $CM* $QU;                               #   QU x .
 534      $CANT_CM $CM* $QU;
 535
 536 #
 537 #  LB 20  Break before and after CB.
 538 #         nothing needed here.
 539 #
 540
 541 # LB 21
 542 $CM* ($BA | $HY | $NS) $CM* [$LB20NonBreaks-$CM];     #  . x (BA | HY | NS)
 543
 544 $CM* [$LB20NonBreaks-$CM] $CM* $BB;                   #  BB x .
 545 [^$CB] $CM* $BB;                                      #
 546
 547
 548
 549 # LB 22
 550 $CM* $IN $CM* $ALPlus;
 551 $CM* $IN $CM* $ID;
 552 $CM* $IN $CM* $IN;
 553 $CM* $IN $CM* $NU;
 554
 555 # LB 23
 556 $CM* $PO $CM* $ID;
 557 $CM* $NU $CM* $ALPlus;
 558 $CM* $ALPlus $CM* $NU;
 559
 560 # LB 24
 561 $CM* $ID $CM* $PR;
 562 $CM* $PR $CM* $ALPlus;
 563 $CM* $ALPlus $CM* $PR;
 564 $CM* $ALPlus $CM* $PO;
 565
 566 $CM* $ALPlus $CM* ($IS | $SY | $HY)+ / $SP;
 567 $CM* $NU+ $CM* $HY+ / $SP;
 568
 569 # LB 25
 570 ($CM* ($PR | $PO))? ($CM* $CL)? ($CM* ($NU | $IS | $SY))* $CM* $NU ($CM* ($OP))? ($CM* ($PR | $PO))?;
 571
 572 # LB 26
 573 $CM* ($H3 | $H2 | $JV | $JL) $CM* $JL;
 574 $CM* ($JT | $JV) $CM* ($H2 | $JV);
 575 $CM* $JT $CM* ($H3 | $JT);
 576
 577 # LB 27
 578 $CM* $IN $CM* ($H3 | $H2 | $JT | $JV | $JL);
 579 $CM* $PO $CM* ($H3 | $H2 | $JT | $JV | $JL);
 580 $CM* ($H3 | $H2 | $JT | $JV | $JL) $CM* $PR;
 581
 582 # LB 28
 583 $CM* $ALPlus $CM* $ALPlus;
 584
 585
 586 # LB 29
 587 $CM* ($NU | $ALPlus) $CM* $IS+ [^$SP];
 588
 589 # LB 30
 590 $CM* $OP $CM* ($NU | $ALPlus);
 591 $CM* ($NU | $ALPlus) $CM* ($CL | $SY)+ [^$SP];
 592
 593
 594 ## -------------------------------------------------
 595
 596 !!safe_reverse;
 597
 598 # LB 7
 599 $CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
 600 $CM+ $SP / .;
 601
 602 # LB 9
 603 $SP+ $CM* $OP;
 604
 605 # LB 10
 606 $SP+ $CM* $QU;
 607
 608 # LB 11
 609 $SP+ $CM* $CL;
 610 $SP+ $CM* $B2;
 611
 612 # LB 18
 613 ($CM* ($IS | $SY))+ $CM* $NU;
 614 $CL $CM* ($NU | $IS | $SY);
 615
 616 # For dictionary-based break
 617 $dictionary $dictionary;
 618
 619 ## -------------------------------------------------
 620
 621 !!safe_forward;
 622
 623 # Skip forward over all character classes that are involved in
 624 #   rules containing patterns with possibly more than one char
 625 #   of context.
 626 #
 627 #  It might be slightly more efficient to have specific rules
 628 #  instead of one generic one, but only if we could
 629 #  turn off rule chaining.  We don't want to move more
 630 #  than necessary.
 631 #
 632 [$CM $OP $QU $CL $B2 $PR $HY $SP $dictionary]+ [^$CM $OP $QU $CL $B2 $PR $HY $dictionary];
 633 $dictionary $dictionary;
 634