t/op/string_cs.t

   1 #!perl
   2 # Copyright (C) 2001-2008, Parrot Foundation.
   3 # $Id$
   4
   5 use strict;
   6 use warnings;
   7 use lib qw( . lib ../lib ../../lib );
   8 use Test::More;
   9 use Parrot::Test tests => 49;
  10 use Parrot::Config;
  11
  12 =head1 NAME
  13
  14 t/op/string_cs.t - String Charset Tests
  15
  16 =head1 SYNOPSIS
  17
  18         % prove t/op/string_cs.t
  19
  20 =head1 DESCRIPTION
  21
  22 Tests encoding support.
  23
  24 =cut
  25
  26 pasm_output_is( <<'CODE', <<OUTPUT, "basic syntax" );
  27     set S0, ascii:"ok 1\n"
  28     print S0
  29     set S0, binary:"ok 2\n"
  30     print S0
  31     set S0, iso-8859-1:"ok 3\n"
  32     print S0
  33     end
  34 CODE
  35 ok 1
  36 ok 2
  37 ok 3
  38 OUTPUT
  39
  40 pasm_output_is( <<'CODE', <<OUTPUT, "encoding name" );
  41     set S0, "ok 1\n"
  42     encoding I0, S0
  43     encodingname S1, I0
  44     print S1
  45     print "\n"
  46     end
  47 CODE
  48 ascii
  49 OUTPUT
  50
  51 pasm_output_is( <<'CODE', <<OUTPUT, "find_encoding" );
  52     find_encoding I0, "iso-8859-1"
  53     print "ok 1\n"
  54     find_encoding I0, "ascii"
  55     print "ok 2\n"
  56     find_encoding I0, "binary"
  57     print "ok 3\n"
  58     end
  59 CODE
  60 ok 1
  61 ok 2
  62 ok 3
  63 OUTPUT
  64
  65 pasm_error_output_like( <<'CODE', <<OUTPUT, "find_encoding - not existing" );
  66     find_encoding I0, "no_such"
  67     end
  68 CODE
  69 /encoding 'no_such' not found/
  70 OUTPUT
  71
  72 pasm_output_is( <<'CODE', <<OUTPUT, "downcase" );
  73     set S0, iso-8859-1:"AEIOU_ÄÖÜ\n"
  74     downcase S1, S0
  75     print S1
  76     end
  77 CODE
  78 aeiou_äöü
  79 OUTPUT
  80
  81 pasm_output_is( <<'CODE', <<OUTPUT, "upcase" );
  82     set S0, iso-8859-1:"aeiou_äöüß\n"
  83     upcase S1, S0
  84     print S1
  85     end
  86 CODE
  87 AEIOU_ÄÖÜß
  88 OUTPUT
  89
  90 pasm_output_is( <<'CODE', <<OUTPUT, "titlecase" );
  91     set S0, iso-8859-1:"zAEIOU_ÄÖÜ\n"
  92     titlecase S1, S0
  93     print S1
  94     end
  95 CODE
  96 Zaeiou_äöü
  97 OUTPUT
  98
  99 pasm_output_is( <<'CODE', <<OUTPUT, "is_whitespace" );
 100     set S0, iso-8859-1:"a\t\n \xa0" # is 0xa0 a whitespace in iso-8859-1??
 101     .include "cclass.pasm"
 102     is_cclass I0, .CCLASS_WHITESPACE, S0, 0
 103     is_cclass I1, .CCLASS_WHITESPACE, S0, 1
 104     is_cclass I2, .CCLASS_WHITESPACE, S0, 2
 105     is_cclass I3, .CCLASS_WHITESPACE, S0, 3
 106     set I4, 4
 107     is_cclass I4, .CCLASS_WHITESPACE, S0, I4
 108     print I0
 109     print I1
 110     print I2
 111     print I3
 112     print I4
 113     print "\n"
 114     set S0, ascii:"a\t\n "
 115     is_cclass I0, .CCLASS_WHITESPACE, S0, 0
 116     is_cclass I1, .CCLASS_WHITESPACE, S0, 1
 117     is_cclass I2, .CCLASS_WHITESPACE, S0, 2
 118     is_cclass I3, .CCLASS_WHITESPACE, S0, 3
 119     is_cclass I4, .CCLASS_WHITESPACE, S0, 4 # access past string boundary: not a whitespace
 120     print I0
 121     print I1
 122     print I2
 123     print I3
 124     print I4
 125     print "\n"
 126     end
 127 CODE
 128 01111
 129 01110
 130 OUTPUT
 131
 132 pasm_output_is( <<'CODE', <<OUTPUT, "is_wordchar" );
 133     .include "cclass.pasm"
 134     set S0, "az019-,._"
 135     length I1, S0
 136     set I2, 0
 137 lp:
 138     is_cclass I0, .CCLASS_WORD, S0, I2
 139     print I0
 140     inc I2
 141     lt I2, I1, lp
 142     print "\n"
 143     end
 144 CODE
 145 111110001
 146 OUTPUT
 147
 148 pasm_output_is( <<'CODE', <<OUTPUT, "is_digit" );
 149     .include "cclass.pasm"
 150     set S0, "az019-,._"
 151     length I1, S0
 152     set I2, 0
 153 lp:
 154     is_cclass I0, .CCLASS_NUMERIC, S0, I2
 155     print I0
 156     inc I2
 157     lt I2, I1, lp
 158     print "\n"
 159     end
 160 CODE
 161 001110000
 162 OUTPUT
 163
 164 pasm_output_is( <<'CODE', <<OUTPUT, "is_punctuation" );
 165     .include "cclass.pasm"
 166     set S0, "az019-,._"
 167     length I1, S0
 168     set I2, 0
 169 lp:
 170     is_cclass I0, .CCLASS_PUNCTUATION, S0, I2
 171     print I0
 172     inc I2
 173     lt I2, I1, lp
 174     print "\n"
 175     end
 176 CODE
 177 000001111
 178 OUTPUT
 179
 180 pasm_output_is( <<'CODE', <<OUTPUT, "is_newline" );
 181     .include "cclass.pasm"
 182     set S0, "a\n"
 183     is_cclass I0, .CCLASS_NEWLINE, S0, 0
 184     print I0
 185     is_cclass I0, .CCLASS_NEWLINE, S0, 1
 186     print I0
 187     print "\n"
 188     end
 189 CODE
 190 01
 191 OUTPUT
 192
 193 pasm_output_is( <<'CODE', <<OUTPUT, "find_wordchar" );
 194     .include "cclass.pasm"
 195     set S0, "_ ab 09"
 196     set I0, 0
 197     length I1, S0
 198 lp:
 199     find_cclass I0, .CCLASS_WORD, S0, I0, I1
 200     print I0
 201     print " "
 202     eq I0, I1, done
 203     inc I0
 204     branch lp
 205 done:
 206     print "ok\n"
 207     end
 208 CODE
 209 0 2 3 5 6 7 ok
 210 OUTPUT
 211
 212 pasm_output_is( <<'CODE', <<OUTPUT, "find_digit" );
 213     .include "cclass.pasm"
 214     set S0, "_ ab 09"
 215     set I0, 0
 216     length I1, S0
 217 lp:
 218     find_cclass I0, .CCLASS_NUMERIC, S0, I0, I1
 219     print I0
 220     print " "
 221     eq I0, I1, done
 222     inc I0
 223     branch lp
 224 done:
 225     print "ok\n"
 226     end
 227 CODE
 228 5 6 7 ok
 229 OUTPUT
 230
 231 pasm_output_is( <<'CODE', <<OUTPUT, "find_punctuation" );
 232     .include "cclass.pasm"
 233     set S0, "_ .b ,9"
 234     set I0, 0
 235     length I1, S0
 236 lp:
 237     find_cclass I0, .CCLASS_PUNCTUATION, S0, I0, I1
 238     print I0
 239     print " "
 240     eq I0, I1, done
 241     inc I0
 242     branch lp
 243 done:
 244     print "ok\n"
 245     end
 246 CODE
 247 0 2 5 7 ok
 248 OUTPUT
 249
 250 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i" );
 251     set S0, "abc"
 252     find_encoding I0, "iso-8859-1"
 253     trans_encoding S1, S0, I0
 254     print S1
 255     print "\n"
 256     encoding I0, S1
 257     encodingname S2, I0
 258     print S2
 259     print "\n"
 260     end
 261 CODE
 262 abc
 263 iso-8859-1
 264 OUTPUT
 265
 266 pasm_error_output_like( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i - lossy" );
 267     set S1, iso-8859-1:"abcä"
 268     find_encoding I0, "ascii"
 269     trans_encoding S2, S1, I0
 270     print "never\n"
 271     end
 272 CODE
 273 /lossy conversion to ascii/
 274 OUTPUT
 275
 276 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i iso-8859-1 to binary" );
 277     set S0, iso-8859-1:"abc"
 278     find_encoding I0, "binary"
 279     trans_encoding S1, S0, I0
 280     print S1
 281     print "\n"
 282     encoding I0, S1
 283     encodingname S2, I0
 284     print S2
 285     print "\n"
 286     end
 287 CODE
 288 abc
 289 binary
 290 OUTPUT
 291
 292 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i ascii to binary" );
 293     set S0, ascii:"abc"
 294     find_encoding I0, "binary"
 295     trans_encoding S1, S0, I0
 296     print S1
 297     print "\n"
 298     encoding I0, S1
 299     encodingname S2, I0
 300     print S2
 301     print "\n"
 302     end
 303 CODE
 304 abc
 305 binary
 306 OUTPUT
 307
 308 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i ascii to iso-8859-1" );
 309     set S0, ascii:"abc"
 310     find_encoding I0, "iso-8859-1"
 311     trans_encoding S1, S0, I0
 312     print S1
 313     print "\n"
 314     encoding I0, S1
 315     encodingname S2, I0
 316     print S2
 317     print "\n"
 318     end
 319 CODE
 320 abc
 321 iso-8859-1
 322 OUTPUT
 323
 324 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i iso-8859-1 to utf8" );
 325     set S0, iso-8859-1:"abc_ä_"
 326     find_encoding I0, "utf8"
 327     trans_encoding S1, S0, I0
 328     print S1
 329     print "\n"
 330     encoding I0, S1
 331     encodingname S2, I0
 332     print S2
 333     print "\n"
 334     length I2, S1
 335     print I2
 336     print "\n"
 337     end
 338 CODE
 339 abc_\xc3\xa4_
 340 utf8
 341 6
 342 OUTPUT
 343
 344 pasm_output_is( <<'CODE', <<OUTPUT, "trans_encoding_s_s_i utf8 to iso-8859-1" );
 345     set S0, utf8:"abc_\xe4_"
 346     bytelength I2, S0   # XXX its 7 for utf8 only
 347     print I2
 348     print "\n"
 349     find_encoding I0, "iso-8859-1"
 350     trans_encoding S1, S0, I0
 351     print S1
 352     print "\n"
 353     encoding I0, S1
 354     encodingname S2, I0
 355     print S2
 356     print "\n"
 357     length I2, S1
 358     print I2
 359     print "\n"
 360     end
 361 CODE
 362 7
 363 abc_ä_
 364 iso-8859-1
 365 6
 366 OUTPUT
 367
 368 pir_output_is( <<'CODE', <<'OUTPUT', "bug #34661 literal" );
 369 .sub main :main
 370     $S0 = utf8:"\"]\nif I3 == "
 371     print "ok 1\n"
 372 .end
 373 CODE
 374 ok 1
 375 OUTPUT
 376
 377 pir_output_is( <<'CODE', <<'OUTPUT', "todo #34660 hash" );
 378 .sub main :main
 379     $P0 = new 'Integer'
 380     $P0 = 42
 381     set_global ['Foo'], utf8:"Bar", $P0
 382     print "ok 1\n"
 383     $P1 = get_global ['Foo'], "Bar"
 384     print "ok 2\n"
 385     print $P1
 386     print "\n"
 387 .end
 388 CODE
 389 ok 1
 390 ok 2
 391 42
 392 OUTPUT
 393
 394 pir_output_is( <<'CODE', <<'OUTPUT', "concat ascii, utf8" );
 395 .sub main
 396     .local string s, t, u
 397     s = "abcd"
 398     t = utf8:"efg\n"
 399     u = s . t
 400     print u
 401     s = utf8:"abcd"
 402     t = "efg\n"
 403     u = s . t
 404     print u
 405 .end
 406 CODE
 407 abcdefg
 408 abcdefg
 409 OUTPUT
 410
 411 SKIP: {
 412     skip( 'no ICU lib', 19 ) unless $PConfig{has_icu};
 413
 414     pir_output_is( <<'CODE', <<OUTPUT, "literal encoding persistence - TT #468" );
 415 .include 'stdio.pasm'
 416 .sub main
 417     # set output encoding to normalize printed strings
 418     $P0 = getinterp
 419     $P1 = $P0.'stdhandle'(.PIO_STDOUT_FILENO)
 420     $P1.'encoding'('utf8')
 421
 422     load_bytecode 't/op/testlib/test_strings.pbc'
 423     $P0 = 'get_hellos'()
 424     $P1 = iter $P0
 425
 426   loop:
 427     unless $P1 goto end_loop
 428     $S0 = shift $P1
 429     show($S0)
 430     goto loop
 431   end_loop:
 432 .end
 433
 434 .sub show
 435     .param string s
 436     print s
 437     $I0 = bytelength s
 438     print '('
 439     print $I0
 440     print "): "
 441     $I0 = encoding s
 442     $S0 = encodingname $I0
 443     say $S0
 444 .end
 445 CODE
 446 hello(5): ascii
 447 hello(5): utf8
 448 hello(5): utf8
 449 hello(10): utf16
 450 hello(10): ucs2
 451 OUTPUT
 452
 453     pir_output_is( <<'CODE', <<OUTPUT, "empty literal encoding persistence - TT #1791");
 454 .sub main
 455     load_bytecode 't/op/testlib/test_strings.pbc'
 456     $P0 = 'get_empties'()
 457     $P1 = iter $P0
 458
 459   loop:
 460     unless $P1 goto end_loop
 461     $S0 = shift $P1
 462     show($S0)
 463     goto loop
 464   end_loop:
 465 .end
 466
 467 .sub show
 468     .param string s
 469     print s
 470     $I0 = bytelength s
 471     print '('
 472     print $I0
 473     print "): "
 474     $I0 = encoding s
 475     $S0 = encodingname $I0
 476     say $S0
 477 .end
 478 CODE
 479 (0): ascii
 480 (0): utf8
 481 (0): utf8
 482 (0): utf16
 483 (0): ucs2
 484 OUTPUT
 485
 486     pir_output_is( <<'CODE', <<"OUTPUT", "unicode downcase" );
 487 .sub main :main
 488     set $S0, iso-8859-1:"TÖTSCH"
 489     find_encoding $I0, "utf8"
 490     trans_encoding $S1, $S0, $I0
 491     $S1 = downcase $S1
 492     getstdout $P0           # need to convert back to utf8
 493     $P0.'encoding'("utf8")  # set utf8 output
 494     print $S1
 495     print "\n"
 496     end
 497 .end
 498 CODE
 499 t\xc3\xb6tsch
 500 OUTPUT
 501
 502     pasm_output_is( <<'CODE', <<"OUTPUT", "unicode downcase, trans_encoding_s_s_i" );
 503     set S0, iso-8859-1:"TÖTSCH"
 504     find_encoding I0, "utf8"
 505     trans_encoding S1, S0, I0
 506     downcase S1, S1
 507     find_encoding I0, "iso-8859-1"
 508     trans_encoding S1, S1, I0
 509     print S1
 510     print "\n"
 511     end
 512 CODE
 513 t\xf6tsch
 514 OUTPUT
 515
 516     pasm_error_output_like( <<'CODE', <<"OUTPUT", "negative encoding number" );
 517     trans_encoding S2, 'foo', -1
 518     end
 519 CODE
 520 /encoding #-1 not found/
 521 OUTPUT
 522
 523     pasm_output_is( <<'CODE', <<"OUTPUT", "unicode downcase - transencoding" );
 524     set S0, iso-8859-1:"TÖTSCH"
 525     find_encoding I0, "utf8"
 526     trans_encoding S1, S0, I0
 527     downcase S1, S1
 528     find_encoding I0, "utf8"
 529     trans_encoding S2, S1, I0
 530     print S2
 531     print "\n"
 532     end
 533 CODE
 534 t\xc3\xb6tsch
 535 OUTPUT
 536
 537     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 ord, length" );
 538     set S1, iso-8859-1:"TÖTSCH"
 539     find_encoding I0, "utf16"
 540     trans_encoding S1, S1, I0
 541     length I1, S1
 542     print I1
 543     print "\n"
 544     null I0
 545 loop:
 546     ord I2, S1, I0
 547     print I2
 548     print '_'
 549     inc I0
 550     lt I0, I1, loop
 551     print "\n"
 552     end
 553 CODE
 554 6
 555 84_214_84_83_67_72_
 556 OUTPUT
 557
 558     pasm_output_is( <<'CODE', <<"OUTPUT", "chopn utf8" );
 559     set S0, iso-8859-1:"TTÖÖ"
 560     find_encoding I0, "utf8"
 561     trans_encoding S1, S0, I0
 562     chopn S1, S1, 2
 563     print S1
 564     print ' '
 565     length I0, S1
 566     print I0
 567     print ' '
 568     .include "stringinfo.pasm"
 569     stringinfo I0, S1, .STRINGINFO_BUFUSED
 570     print I0
 571     print "\n"
 572     end
 573 CODE
 574 TT 2 2
 575 OUTPUT
 576
 577     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 append" );
 578     set S1, iso-8859-1:"Tötsch"
 579     find_encoding I0, "utf16"
 580     trans_encoding S1, S1, I0
 581     concat S1, " Leo"
 582     length I0, S1
 583     print I0
 584     print ' '
 585     .include "stringinfo.pasm"
 586     stringinfo I0, S1, .STRINGINFO_BUFUSED
 587     print I0
 588     print "\n"
 589     find_encoding I0, "utf8"
 590     trans_encoding S2, S1, I0
 591     print S2
 592     print "\n"
 593     end
 594 CODE
 595 10 20
 596 T\xc3\xb6tsch Leo
 597 OUTPUT
 598
 599     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 concat" );
 600     set S1, iso-8859-1:"Tötsch"
 601     find_encoding I0, "utf16"
 602     trans_encoding S1, S1, I0
 603     concat S2, S1, " Leo"
 604     length I0, S2
 605     print I0
 606     print ' '
 607     .include "stringinfo.pasm"
 608     stringinfo I0, S2, .STRINGINFO_BUFUSED
 609     print I0
 610     print "\n"
 611     find_encoding I0, "utf8"
 612     trans_encoding S2, S2, I0
 613     print S2
 614     print "\n"
 615     end
 616 CODE
 617 10 20
 618 T\xc3\xb6tsch Leo
 619 OUTPUT
 620
 621     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 substr" );
 622     set S1, iso-8859-1:"Tötsch"
 623     find_encoding I0, "utf16"
 624     trans_encoding S1, S1, I0
 625     substr S2, S1, 1, 2
 626     find_encoding I0, "utf8"
 627     trans_encoding S2, S2, I0
 628     print S2
 629     print "\n"
 630     end
 631 CODE
 632 \xc3\xb6t
 633 OUTPUT
 634
 635     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 replace" );
 636     set S1, iso-8859-1:"Tötsch"
 637     find_encoding I0, "utf16"
 638     trans_encoding S1, S1, I0
 639     substr  S2, S1, 1, 1
 640     replace S1, S1, 1, 1, "oe"
 641     find_encoding I0, "utf8"
 642     trans_encoding S2, S2, I0
 643     trans_encoding S1, S1, I0
 644     print S2
 645     print "\n"
 646     print S1
 647     print "\n"
 648     end
 649 CODE
 650 \xc3\xb6
 651 Toetsch
 652 OUTPUT
 653
 654     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 index, latin1 search" );
 655     set S0, iso-8859-1:"TÖTSCH"
 656     find_encoding I0, "utf8"
 657     trans_encoding S1, S0, I0
 658     downcase S1, S1
 659     set S2, iso-8859-1:"öt"
 660     index I0, S1, S2
 661     print I0
 662     print "\n"
 663     end
 664 CODE
 665 1
 666 OUTPUT
 667
 668     pasm_output_is( <<'CODE', <<"OUTPUT", "utf16 index, latin1 search" );
 669     set S0, iso-8859-1:"TÖTSCH"
 670     find_encoding I0, "utf8"
 671     trans_encoding S1, S0, I0
 672     downcase S1, S1
 673     set S2, iso-8859-1:"öt"
 674     index I0, S1, S2
 675     print I0
 676     print "\n"
 677     concat S1, S2
 678     index I0, S1, S2, 2
 679     print I0
 680     print "\n"
 681     end
 682 CODE
 683 1
 684 6
 685 OUTPUT
 686
 687     pir_output_is( <<'CODE', <<"OUTPUT", "unicode upcase" );
 688 .sub main :main
 689     set $S0, iso-8859-1:"tötsch"
 690     find_encoding $I0, "utf8"
 691     trans_encoding $S1, $S0, $I0
 692     upcase $S1, $S1
 693     getstdout $P0         # need to convert back to utf8
 694     $P0.'encoding'("utf8") # set utf8 output
 695     print $S1
 696     print "\n"
 697     end
 698 .end
 699 CODE
 700 T\x{c3}\x{96}TSCH
 701 OUTPUT
 702
 703     pir_output_is( <<'CODE', <<"OUTPUT", "unicode upcase to combined char" );
 704 .sub main :main
 705     set $S1, utf8:"hacek j \u01f0"
 706     upcase $S1, $S1
 707     getstdout $P0          # need to convert back to utf8
 708     $P0.'encoding'("utf8") # set utf8 output
 709     print $S1
 710     print "\n"
 711     end
 712 .end
 713 CODE
 714 HACEK J J\xcc\x8c
 715 OUTPUT
 716
 717     # charset/unicode.c
 718     #
 719     # 106         dest_len = u_strToUpper(src->strstart, dest_len,
 720     # (gdb) p src_len
 721     # $17 = 7
 722     # (gdb) p dest_len
 723     # $18 = 7
 724     # (gdb) x /8h src->strstart
 725     # 0x844fb60:      0x005f  0x005f  0x005f  0x01f0  0x0031  0x0032  0x0033  0x0000
 726     # (gdb) n
 727     # 110         src->bufused = dest_len * sizeof(UChar);
 728     # (gdb) p dest_len
 729     # $19 = 8
 730     # (gdb) x /8h src->strstart
 731     # 0x844fb60:      0x005f  0x005f  0x005f  0x004a  0x030c  0x0031  0x0032  0x0000
 732
 733     pir_output_is( <<'CODE', <<"OUTPUT", "unicode upcase to combined char 3.2 bug?" );
 734 .sub main :main
 735     set $S1, utf8:"___\u01f0123"
 736     upcase $S1, $S1
 737     getstdout $P0          # need to convert back to utf8
 738     $P0.'encoding'("utf8") # set utf8 output
 739     print $S1
 740     print "\n"
 741     end
 742 .end
 743 CODE
 744 ___J\xcc\x8c123
 745 OUTPUT
 746
 747     pir_output_is( <<'CODE', <<"OUTPUT", "unicode titlecase" );
 748 .sub main :main
 749     set $S0, iso-8859-1:"tötsch leo"
 750     find_encoding $I0, "utf8"
 751     trans_encoding $S1, $S0, $I0
 752     titlecase $S1, $S1
 753     getstdout $P0          # need to convert back to utf8
 754     $P0.'encoding'("utf8") # set utf8 output
 755     print $S1
 756     print "\n"
 757     end
 758 .end
 759 CODE
 760 T\x{c3}\x{b6}tsch Leo
 761 OUTPUT
 762
 763     pir_output_is( <<'CODE', <<OUTPUT, "combose combined char" );
 764 .sub main :main
 765     set $S1, utf8:"___\u01f0___"
 766     length $I0, $S1
 767     upcase $S1, $S1    # decompose J+hacek
 768     length $I1, $S1    # 1 longer
 769     downcase $S1, $S1  # j+hacek
 770     length $I2, $S1
 771     compose $S1, $S1
 772     length $I3, $S1        # back at original string
 773     getstdout $P0          # need to convert back to utf8
 774     $P0.'encoding'("utf8") # set utf8 output
 775     print $S1
 776     print "\n"
 777     print $I0
 778     print ' '
 779     print $I1
 780     print ' '
 781     print $I2
 782     print ' '
 783     print $I3
 784     print "\n"
 785     end
 786 .end
 787 CODE
 788 ___\x{c7}\x{b0}___
 789 7 8 8 7
 790 OUTPUT
 791
 792 }    # SKIP
 793
 794 pasm_output_is( <<'CODE', <<'OUTPUT', "escape ascii" );
 795     set S0, "abcdefghi\n"
 796     escape S1, S0
 797     print S1
 798     print "\n"
 799     end
 800 CODE
 801 abcdefghi\n
 802 OUTPUT
 803
 804 pasm_output_is( <<'CODE', <<'OUTPUT', "escape ctrl" );
 805     set S0, "\x00\x01\x1f\x7f"
 806     escape S1, S0
 807     print S1
 808     print "\n"
 809     end
 810 CODE
 811 \x{0}\x{1}\x{1f}\x{7f}
 812 OUTPUT
 813
 814 pasm_output_is( <<'CODE', <<'OUTPUT', "escape latin1" );
 815     set S0, iso-8859-1:"tötsch leo"
 816     escape S1, S0
 817     print S1
 818     print "\n"
 819     end
 820 CODE
 821 t\x{f6}tsch leo
 822 OUTPUT
 823
 824 pasm_output_is( <<'CODE', <<'OUTPUT', "escape unicode" );
 825     set S0, utf8:"\u2001\u2002\u2003\u2004\x{e01ef}\u0114"
 826     escape S1, S0
 827     print S1
 828     print "\n"
 829     end
 830 CODE
 831 \u2001\u2002\u2003\u2004\x{e01ef}\u0114
 832 OUTPUT
 833
 834 pir_output_is(<<'CODE', <<'OUTPUT', 'escape unicode w/ literal 0' );
 835 .sub 'main'
 836     $S0 = utf8:"x/\u0445\u0440\u0435\u043d\u044c_09-10.txt"
 837     $S1 = escape $S0
 838     say $S1
 839 .end
 840 CODE
 841 x/\u0445\u0440\u0435\u043d\u044c_09-10.txt
 842 OUTPUT
 843
 844 # Local Variables:
 845 #   mode: cperl
 846 #   cperl-indent-level: 4
 847 #   fill-column: 100
 848 # End:
 849 # vim: expandtab shiftwidth=4: