archive/1.vm/061text.mu

   1 # Some useful helpers for dealing with text (arrays of characters)
   2
   3 def equal a:text, b:text -> result:bool [
   4   local-scope
   5   load-inputs
   6   an:num, bn:num <- deaddress a, b
   7   address-equal?:boolean <- equal an, bn
   8   return-if address-equal?, true
   9   return-unless a, false
  10   return-unless b, false
  11   a-len:num <- length *a
  12   b-len:num <- length *b
  13   # compare lengths
  14   trace 99, [text-equal], [comparing lengths]
  15   length-equal?:bool <- equal a-len, b-len
  16   return-unless length-equal?, false
  17   # compare each corresponding character
  18   trace 99, [text-equal], [comparing characters]
  19   i:num <- copy 0
  20   {
  21     done?:bool <- greater-or-equal i, a-len
  22     break-if done?
  23     a2:char <- index *a, i
  24     b2:char <- index *b, i
  25     chars-match?:bool <- equal a2, b2
  26     return-unless chars-match?, false
  27     i <- add i, 1
  28     loop
  29   }
  30   return true
  31 ]
  32
  33 scenario text-equal-reflexive [
  34   local-scope
  35   x:text <- new [abc]
  36   run [
  37     10:bool/raw <- equal x, x
  38   ]
  39   memory-should-contain [
  40     10 <- 1  # x == x for all x
  41   ]
  42 ]
  43
  44 scenario text-equal-identical [
  45   local-scope
  46   x:text <- new [abc]
  47   y:text <- new [abc]
  48   run [
  49     10:bool/raw <- equal x, y
  50   ]
  51   memory-should-contain [
  52     10 <- 1  # abc == abc
  53   ]
  54 ]
  55
  56 scenario text-equal-distinct-lengths [
  57   local-scope
  58   x:text <- new [abc]
  59   y:text <- new [abcd]
  60   run [
  61     10:bool/raw <- equal x, y
  62   ]
  63   memory-should-contain [
  64     10 <- 0  # abc != abcd
  65   ]
  66   trace-should-contain [
  67     text-equal: comparing lengths
  68   ]
  69   trace-should-not-contain [
  70     text-equal: comparing characters
  71   ]
  72 ]
  73
  74 scenario text-equal-with-empty [
  75   local-scope
  76   x:text <- new []
  77   y:text <- new [abcd]
  78   run [
  79     10:bool/raw <- equal x, y
  80   ]
  81   memory-should-contain [
  82     10 <- 0  # "" != abcd
  83   ]
  84 ]
  85
  86 scenario text-equal-with-null [
  87   local-scope
  88   x:text <- new [abcd]
  89   y:text <- copy null
  90   run [
  91     10:bool/raw <- equal x, null
  92     11:bool/raw <- equal null, x
  93     12:bool/raw <- equal x, y
  94     13:bool/raw <- equal y, x
  95     14:bool/raw <- equal y, y
  96   ]
  97   memory-should-contain [
  98     10 <- 0
  99     11 <- 0
 100     12 <- 0
 101     13 <- 0
 102     14 <- 1
 103   ]
 104   check-trace-count-for-label 0, [error]
 105 ]
 106
 107 scenario text-equal-common-lengths-but-distinct [
 108   local-scope
 109   x:text <- new [abc]
 110   y:text <- new [abd]
 111   run [
 112     10:bool/raw <- equal x, y
 113   ]
 114   memory-should-contain [
 115     10 <- 0  # abc != abd
 116   ]
 117 ]
 118
 119 # A new type to help incrementally construct texts.
 120 container buffer:_elem [
 121   length:num
 122   data:&:@:_elem
 123 ]
 124
 125 def new-buffer capacity:num -> result:&:buffer:_elem [
 126   local-scope
 127   load-inputs
 128   result <- new {(buffer _elem): type}
 129   *result <- put *result, length:offset, 0
 130   {
 131     break-if capacity
 132     # capacity not provided
 133     capacity <- copy 10
 134   }
 135   data:&:@:_elem <- new _elem:type, capacity
 136   *result <- put *result, data:offset, data
 137   return result
 138 ]
 139
 140 def grow-buffer buf:&:buffer:_elem -> buf:&:buffer:_elem [
 141   local-scope
 142   load-inputs
 143   # double buffer size
 144   olddata:&:@:_elem <- get *buf, data:offset
 145   oldlen:num <- length *olddata
 146   newlen:num <- multiply oldlen, 2
 147   newdata:&:@:_elem <- new _elem:type, newlen
 148   *buf <- put *buf, data:offset, newdata
 149   # copy old contents
 150   i:num <- copy 0
 151   {
 152     done?:bool <- greater-or-equal i, oldlen
 153     break-if done?
 154     src:_elem <- index *olddata, i
 155     *newdata <- put-index *newdata, i, src
 156     i <- add i, 1
 157     loop
 158   }
 159 ]
 160
 161 def buffer-full? in:&:buffer:_elem -> result:bool [
 162   local-scope
 163   load-inputs
 164   len:num <- get *in, length:offset
 165   s:&:@:_elem <- get *in, data:offset
 166   capacity:num <- length *s
 167   result <- greater-or-equal len, capacity
 168 ]
 169
 170 # most broadly applicable definition of append to a buffer
 171 def append buf:&:buffer:_elem, x:_elem -> buf:&:buffer:_elem [
 172   local-scope
 173   load-inputs
 174   len:num <- get *buf, length:offset
 175   {
 176     # grow buffer if necessary
 177     full?:bool <- buffer-full? buf
 178     break-unless full?
 179     buf <- grow-buffer buf
 180   }
 181   s:&:@:_elem <- get *buf, data:offset
 182   *s <- put-index *s, len, x
 183   len <- add len, 1
 184   *buf <- put *buf, length:offset, len
 185 ]
 186
 187 # most broadly applicable definition of append to a buffer of characters: just
 188 # call to-text
 189 def append buf:&:buffer:char, x:_elem -> buf:&:buffer:char [
 190   local-scope
 191   load-inputs
 192   text:text <- to-text x
 193   buf <- append buf, text
 194 ]
 195
 196 # specialization for characters that is backspace-aware
 197 def append buf:&:buffer:char, c:char -> buf:&:buffer:char [
 198   local-scope
 199   load-inputs
 200   len:num <- get *buf, length:offset
 201   {
 202     # backspace? just drop last character if it exists and return
 203     backspace?:bool <- equal c, 8/backspace
 204     break-unless backspace?
 205     empty?:bool <- lesser-or-equal len, 0
 206     return-if empty?
 207     len <- subtract len, 1
 208     *buf <- put *buf, length:offset, len
 209     return
 210   }
 211   {
 212     # grow buffer if necessary
 213     full?:bool <- buffer-full? buf
 214     break-unless full?
 215     buf <- grow-buffer buf
 216   }
 217   s:text <- get *buf, data:offset
 218   *s <- put-index *s, len, c
 219   len <- add len, 1
 220   *buf <- put *buf, length:offset, len
 221 ]
 222
 223 def append buf:&:buffer:_elem, t:&:@:_elem -> buf:&:buffer:_elem [
 224   local-scope
 225   load-inputs
 226   len:num <- length *t
 227   i:num <- copy 0
 228   {
 229     done?:bool <- greater-or-equal i, len
 230     break-if done?
 231     x:_elem <- index *t, i
 232     buf <- append buf, x
 233     i <- add i, 1
 234     loop
 235   }
 236 ]
 237
 238 scenario append-to-empty-buffer [
 239   local-scope
 240   x:&:buffer:char <- new-buffer
 241   run [
 242     c:char <- copy 97/a
 243     x <- append x, c
 244     10:num/raw <- get *x, length:offset
 245     s:text <- get *x, data:offset
 246     11:char/raw <- index *s, 0
 247     12:char/raw <- index *s, 1
 248   ]
 249   memory-should-contain [
 250     10 <- 1  # buffer length
 251     11 <- 97  # a
 252     12 <- 0  # rest of buffer is empty
 253   ]
 254 ]
 255
 256 scenario append-to-buffer [
 257   local-scope
 258   x:&:buffer:char <- new-buffer
 259   c:char <- copy 97/a
 260   x <- append x, c
 261   run [
 262     c <- copy 98/b
 263     x <- append x, c
 264     10:num/raw <- get *x, length:offset
 265     s:text <- get *x, data:offset
 266     11:char/raw <- index *s, 0
 267     12:char/raw <- index *s, 1
 268     13:char/raw <- index *s, 2
 269   ]
 270   memory-should-contain [
 271     10 <- 2  # buffer length
 272     11 <- 97  # a
 273     12 <- 98  # b
 274     13 <- 0  # rest of buffer is empty
 275   ]
 276 ]
 277
 278 scenario append-grows-buffer [
 279   local-scope
 280   x:&:buffer:char <- new-buffer 3
 281   s1:text <- get *x, data:offset
 282   x <- append x, [abc]  # buffer is now full
 283   s2:text <- get *x, data:offset
 284   run [
 285     10:bool/raw <- equal s1, s2
 286     11:@:char/raw <- copy *s2
 287     +buffer-filled
 288     c:char <- copy 100/d
 289     x <- append x, c
 290     s3:text <- get *x, data:offset
 291     20:bool/raw <- equal s1, s3
 292     21:num/raw <- get *x, length:offset
 293     30:@:char/raw <- copy *s3
 294   ]
 295   memory-should-contain [
 296     # before +buffer-filled
 297     10 <- 1   # no change in data pointer after original append
 298     11 <- 3   # size of data
 299     12 <- 97  # data
 300     13 <- 98
 301     14 <- 99
 302     # in the end
 303     20 <- 0   # data pointer has grown after second append
 304     21 <- 4   # final length
 305     30 <- 6   # but data's capacity has doubled
 306     31 <- 97  # data
 307     32 <- 98
 308     33 <- 99
 309     34 <- 100
 310     35 <- 0
 311     36 <- 0
 312   ]
 313 ]
 314
 315 scenario buffer-append-handles-backspace [
 316   local-scope
 317   x:&:buffer:char <- new-buffer
 318   x <- append x, [ab]
 319   run [
 320     c:char <- copy 8/backspace
 321     x <- append x, c
 322     s:text <- buffer-to-array x
 323     10:@:char/raw <- copy *s
 324   ]
 325   memory-should-contain [
 326     10 <- 1   # length
 327     11 <- 97  # contents
 328     12 <- 0
 329   ]
 330 ]
 331
 332 scenario append-to-buffer-of-non-characters [
 333   local-scope
 334   x:&:buffer:text <- new-buffer 1/capacity
 335   # no errors
 336 ]
 337
 338 def buffer-to-array in:&:buffer:_elem -> result:&:@:_elem [
 339   local-scope
 340   load-inputs
 341   # propagate null buffer
 342   return-unless in, null
 343   len:num <- get *in, length:offset
 344   s:&:@:_elem <- get *in, data:offset
 345   # we can't just return s because it is usually the wrong length
 346   result <- new _elem:type, len
 347   i:num <- copy 0
 348   {
 349     done?:bool <- greater-or-equal i, len
 350     break-if done?
 351     src:_elem <- index *s, i
 352     *result <- put-index *result, i, src
 353     i <- add i, 1
 354     loop
 355   }
 356 ]
 357
 358 def blank? x:&:@:_elem -> result:bool [
 359   local-scope
 360   load-inputs
 361   return-unless x, true
 362   len:num <- length *x
 363   result <- equal len, 0
 364 ]
 365
 366 # Append any number of texts together.
 367 # A later layer also translates calls to this to implicitly call to-text, so
 368 # append to string becomes effectively dynamically typed.
 369 #
 370 # Beware though: this hack restricts how much 'append' can be overridden. Any
 371 # new variants that match:
 372 #   append _:text, ___
 373 # will never ever get used.
 374 def append first:text -> result:text [
 375   local-scope
 376   load-inputs
 377   buf:&:buffer:char <- new-buffer 30
 378   # append first input
 379   {
 380     break-unless first
 381     buf <- append buf, first
 382   }
 383   # append remaining inputs
 384   {
 385     arg:text, arg-found?:bool <- next-input
 386     break-unless arg-found?
 387     loop-unless arg
 388     buf <- append buf, arg
 389     loop
 390   }
 391   result <- buffer-to-array buf
 392 ]
 393
 394 scenario text-append-1 [
 395   local-scope
 396   x:text <- new [hello,]
 397   y:text <- new [ world!]
 398   run [
 399     z:text <- append x, y
 400     10:@:char/raw <- copy *z
 401   ]
 402   memory-should-contain [
 403     10:array:character <- [hello, world!]
 404   ]
 405 ]
 406
 407 scenario text-append-null [
 408   local-scope
 409   x:text <- copy null
 410   y:text <- new [ world!]
 411   run [
 412     z:text <- append x, y
 413     10:@:char/raw <- copy *z
 414   ]
 415   memory-should-contain [
 416     10:array:character <- [ world!]
 417   ]
 418 ]
 419
 420 scenario text-append-null-2 [
 421   local-scope
 422   x:text <- new [hello,]
 423   y:text <- copy null
 424   run [
 425     z:text <- append x, y
 426     10:@:char/raw <- copy *z
 427   ]
 428   memory-should-contain [
 429     10:array:character <- [hello,]
 430   ]
 431 ]
 432
 433 scenario text-append-multiary [
 434   local-scope
 435   x:text <- new [hello, ]
 436   y:text <- new [world]
 437   z:text <- new [!]
 438   run [
 439     z:text <- append x, y, z
 440     10:@:char/raw <- copy *z
 441   ]
 442   memory-should-contain [
 443     10:array:character <- [hello, world!]
 444   ]
 445 ]
 446
 447 scenario replace-character-in-text [
 448   local-scope
 449   x:text <- new [abc]
 450   run [
 451     x <- replace x, 98/b, 122/z
 452     10:@:char/raw <- copy *x
 453   ]
 454   memory-should-contain [
 455     10:array:character <- [azc]
 456   ]
 457 ]
 458
 459 def replace s:text, oldc:char, newc:char, from:num/optional -> s:text [
 460   local-scope
 461   load-inputs
 462   len:num <- length *s
 463   i:num <- find-next s, oldc, from
 464   done?:bool <- greater-or-equal i, len
 465   return-if done?
 466   *s <- put-index *s, i, newc
 467   i <- add i, 1
 468   s <- replace s, oldc, newc, i
 469 ]
 470
 471 scenario replace-character-at-start [
 472   local-scope
 473   x:text <- new [abc]
 474   run [
 475     x <- replace x, 97/a, 122/z
 476     10:@:char/raw <- copy *x
 477   ]
 478   memory-should-contain [
 479     10:array:character <- [zbc]
 480   ]
 481 ]
 482
 483 scenario replace-character-at-end [
 484   local-scope
 485   x:text <- new [abc]
 486   run [
 487     x <- replace x, 99/c, 122/z
 488     10:@:char/raw <- copy *x
 489   ]
 490   memory-should-contain [
 491     10:array:character <- [abz]
 492   ]
 493 ]
 494
 495 scenario replace-character-missing [
 496   local-scope
 497   x:text <- new [abc]
 498   run [
 499     x <- replace x, 100/d, 122/z
 500     10:@:char/raw <- copy *x
 501   ]
 502   memory-should-contain [
 503     10:array:character <- [abc]
 504   ]
 505 ]
 506
 507 scenario replace-all-characters [
 508   local-scope
 509   x:text <- new [banana]
 510   run [
 511     x <- replace x, 97/a, 122/z
 512     10:@:char/raw <- copy *x
 513   ]
 514   memory-should-contain [
 515     10:array:character <- [bznznz]
 516   ]
 517 ]
 518
 519 # replace underscores in first with remaining args
 520 def interpolate template:text -> result:text [
 521   local-scope
 522   load-inputs  # consume just the template
 523   # compute result-len, space to allocate for result
 524   tem-len:num <- length *template
 525   result-len:num <- copy tem-len
 526   {
 527     # while inputs remain
 528     a:text, arg-received?:bool <- next-input
 529     break-unless arg-received?
 530     # result-len = result-len + arg.length - 1 (for the 'underscore' being replaced)
 531     a-len:num <- length *a
 532     result-len <- add result-len, a-len
 533     result-len <- subtract result-len, 1
 534     loop
 535   }
 536   rewind-inputs
 537   _ <- next-input  # skip template
 538   result <- new character:type, result-len
 539   # repeatedly copy sections of template and 'holes' into result
 540   result-idx:num <- copy 0
 541   i:num <- copy 0
 542   {
 543     # while arg received
 544     a:text, arg-received?:bool <- next-input
 545     break-unless arg-received?
 546     # copy template into result until '_'
 547     {
 548       # while i < template.length
 549       tem-done?:bool <- greater-or-equal i, tem-len
 550       break-if tem-done?, +done
 551       # while template[i] != '_'
 552       in:char <- index *template, i
 553       underscore?:bool <- equal in, 95/_
 554       break-if underscore?
 555       # result[result-idx] = template[i]
 556       *result <- put-index *result, result-idx, in
 557       i <- add i, 1
 558       result-idx <- add result-idx, 1
 559       loop
 560     }
 561     # copy 'a' into result
 562     j:num <- copy 0
 563     {
 564       # while j < a.length
 565       arg-done?:bool <- greater-or-equal j, a-len
 566       break-if arg-done?
 567       # result[result-idx] = a[j]
 568       in:char <- index *a, j
 569       *result <- put-index *result, result-idx, in
 570       j <- add j, 1
 571       result-idx <- add result-idx, 1
 572       loop
 573     }
 574     # skip '_' in template
 575     i <- add i, 1
 576     loop  # interpolate next arg
 577   }
 578   +done
 579   # done with holes; copy rest of template directly into result
 580   {
 581     # while i < template.length
 582     tem-done?:bool <- greater-or-equal i, tem-len
 583     break-if tem-done?
 584     # result[result-idx] = template[i]
 585     in:char <- index *template, i
 586     *result <- put-index *result, result-idx, in
 587     i <- add i, 1
 588     result-idx <- add result-idx, 1
 589     loop
 590   }
 591 ]
 592
 593 scenario interpolate-works [
 594   local-scope
 595   x:text <- new [abc_ghi]
 596   y:text <- new [def]
 597   run [
 598     z:text <- interpolate x, y
 599     10:@:char/raw <- copy *z
 600   ]
 601   memory-should-contain [
 602     10:array:character <- [abcdefghi]
 603   ]
 604 ]
 605
 606 scenario interpolate-at-start [
 607   local-scope
 608   x:text <- new [_, hello!]
 609   y:text <- new [abc]
 610   run [
 611     z:text <- interpolate x, y
 612     10:@:char/raw <- copy *z
 613   ]
 614   memory-should-contain [
 615     10:array:character <- [abc, hello!]
 616     22 <- 0  # out of bounds
 617   ]
 618 ]
 619
 620 scenario interpolate-at-end [
 621   local-scope
 622   x:text <- new [hello, _]
 623   y:text <- new [abc]
 624   run [
 625     z:text <- interpolate x, y
 626     10:@:char/raw <- copy *z
 627   ]
 628   memory-should-contain [
 629     10:array:character <- [hello, abc]
 630   ]
 631 ]
 632
 633 # result:bool <- space? c:char
 634 def space? c:char -> result:bool [
 635   local-scope
 636   load-inputs
 637   # most common case first
 638   result <- equal c, 32/space
 639   return-if result
 640   result <- equal c, 10/newline
 641   return-if result
 642   result <- equal c, 9/tab
 643   return-if result
 644   result <- equal c, 13/carriage-return
 645   return-if result
 646   # remaining uncommon cases in sorted order
 647   # http://unicode.org code-points in unicode-set Z and Pattern_White_Space
 648   result <- equal c, 11/ctrl-k
 649   return-if result
 650   result <- equal c, 12/ctrl-l
 651   return-if result
 652   result <- equal c, 133/ctrl-0085
 653   return-if result
 654   result <- equal c, 160/no-break-space
 655   return-if result
 656   result <- equal c, 5760/ogham-space-mark
 657   return-if result
 658   result <- equal c, 8192/en-quad
 659   return-if result
 660   result <- equal c, 8193/em-quad
 661   return-if result
 662   result <- equal c, 8194/en-space
 663   return-if result
 664   result <- equal c, 8195/em-space
 665   return-if result
 666   result <- equal c, 8196/three-per-em-space
 667   return-if result
 668   result <- equal c, 8197/four-per-em-space
 669   return-if result
 670   result <- equal c, 8198/six-per-em-space
 671   return-if result
 672   result <- equal c, 8199/figure-space
 673   return-if result
 674   result <- equal c, 8200/punctuation-space
 675   return-if result
 676   result <- equal c, 8201/thin-space
 677   return-if result
 678   result <- equal c, 8202/hair-space
 679   return-if result
 680   result <- equal c, 8206/left-to-right
 681   return-if result
 682   result <- equal c, 8207/right-to-left
 683   return-if result
 684   result <- equal c, 8232/line-separator
 685   return-if result
 686   result <- equal c, 8233/paragraph-separator
 687   return-if result
 688   result <- equal c, 8239/narrow-no-break-space
 689   return-if result
 690   result <- equal c, 8287/medium-mathematical-space
 691   return-if result
 692   result <- equal c, 12288/ideographic-space
 693 ]
 694
 695 def trim s:text -> result:text [
 696   local-scope
 697   load-inputs
 698   len:num <- length *s
 699   # left trim: compute start
 700   start:num <- copy 0
 701   {
 702     {
 703       at-end?:bool <- greater-or-equal start, len
 704       break-unless at-end?
 705       result <- new character:type, 0
 706       return
 707     }
 708     curr:char <- index *s, start
 709     whitespace?:bool <- space? curr
 710     break-unless whitespace?
 711     start <- add start, 1
 712     loop
 713   }
 714   # right trim: compute end
 715   end:num <- subtract len, 1
 716   {
 717     not-at-start?:bool <- greater-than end, start
 718     assert not-at-start?, [end ran up against start]
 719     curr:char <- index *s, end
 720     whitespace?:bool <- space? curr
 721     break-unless whitespace?
 722     end <- subtract end, 1
 723     loop
 724   }
 725   # result = new character[end+1 - start]
 726   new-len:num <- subtract end, start, -1
 727   result:text <- new character:type, new-len
 728   # copy the untrimmed parts between start and end
 729   i:num <- copy start
 730   j:num <- copy 0
 731   {
 732     # while i <= end
 733     done?:bool <- greater-than i, end
 734     break-if done?
 735     # result[j] = s[i]
 736     src:char <- index *s, i
 737     *result <- put-index *result, j, src
 738     i <- add i, 1
 739     j <- add j, 1
 740     loop
 741   }
 742 ]
 743
 744 scenario trim-unmodified [
 745   local-scope
 746   x:text <- new [abc]
 747   run [
 748     y:text <- trim x
 749     1:@:char/raw <- copy *y
 750   ]
 751   memory-should-contain [
 752     1:array:character <- [abc]
 753   ]
 754 ]
 755
 756 scenario trim-left [
 757   local-scope
 758   x:text <- new [  abc]
 759   run [
 760     y:text <- trim x
 761     1:@:char/raw <- copy *y
 762   ]
 763   memory-should-contain [
 764     1:array:character <- [abc]
 765   ]
 766 ]
 767
 768 scenario trim-right [
 769   local-scope
 770   x:text <- new [abc  ]
 771   run [
 772     y:text <- trim x
 773     1:@:char/raw <- copy *y
 774   ]
 775   memory-should-contain [
 776     1:array:character <- [abc]
 777   ]
 778 ]
 779
 780 scenario trim-left-right [
 781   local-scope
 782   x:text <- new [  abc   ]
 783   run [
 784     y:text <- trim x
 785     1:@:char/raw <- copy *y
 786   ]
 787   memory-should-contain [
 788     1:array:character <- [abc]
 789   ]
 790 ]
 791
 792 scenario trim-newline-tab [
 793   local-scope
 794   x:text <- new [       abc
 795 ]
 796   run [
 797     y:text <- trim x
 798     1:@:char/raw <- copy *y
 799   ]
 800   memory-should-contain [
 801     1:array:character <- [abc]
 802   ]
 803 ]
 804
 805 def find-next text:text, pattern:char, idx:num -> next-index:num [
 806   local-scope
 807   load-inputs
 808   len:num <- length *text
 809   {
 810     eof?:bool <- greater-or-equal idx, len
 811     break-if eof?
 812     curr:char <- index *text, idx
 813     found?:bool <- equal curr, pattern
 814     break-if found?
 815     idx <- add idx, 1
 816     loop
 817   }
 818   return idx
 819 ]
 820
 821 scenario text-find-next [
 822   local-scope
 823   x:text <- new [a/b]
 824   run [
 825     10:num/raw <- find-next x, 47/slash, 0/start-index
 826   ]
 827   memory-should-contain [
 828     10 <- 1
 829   ]
 830 ]
 831
 832 scenario text-find-next-empty [
 833   local-scope
 834   x:text <- new []
 835   run [
 836     10:num/raw <- find-next x, 47/slash, 0/start-index
 837   ]
 838   memory-should-contain [
 839     10 <- 0
 840   ]
 841 ]
 842
 843 scenario text-find-next-initial [
 844   local-scope
 845   x:text <- new [/abc]
 846   run [
 847     10:num/raw <- find-next x, 47/slash, 0/start-index
 848   ]
 849   memory-should-contain [
 850     10 <- 0  # prefix match
 851   ]
 852 ]
 853
 854 scenario text-find-next-final [
 855   local-scope
 856   x:text <- new [abc/]
 857   run [
 858     10:num/raw <- find-next x, 47/slash, 0/start-index
 859   ]
 860   memory-should-contain [
 861     10 <- 3  # suffix match
 862   ]
 863 ]
 864
 865 scenario text-find-next-missing [
 866   local-scope
 867   x:text <- new [abcd]
 868   run [
 869     10:num/raw <- find-next x, 47/slash, 0/start-index
 870   ]
 871   memory-should-contain [
 872     10 <- 4  # no match
 873   ]
 874 ]
 875
 876 scenario text-find-next-invalid-index [
 877   local-scope
 878   x:text <- new [abc]
 879   run [
 880     10:num/raw <- find-next x, 47/slash, 4/start-index
 881   ]
 882   memory-should-contain [
 883     10 <- 4  # no change
 884   ]
 885 ]
 886
 887 scenario text-find-next-first [
 888   local-scope
 889   x:text <- new [ab/c/]
 890   run [
 891     10:num/raw <- find-next x, 47/slash, 0/start-index
 892   ]
 893   memory-should-contain [
 894     10 <- 2  # first '/' of multiple
 895   ]
 896 ]
 897
 898 scenario text-find-next-second [
 899   local-scope
 900   x:text <- new [ab/c/]
 901   run [
 902     10:num/raw <- find-next x, 47/slash, 3/start-index
 903   ]
 904   memory-should-contain [
 905     10 <- 4  # second '/' of multiple
 906   ]
 907 ]
 908
 909 # search for a pattern of multiple characters
 910 # fairly dumb algorithm
 911 def find-next text:text, pattern:text, idx:num -> next-index:num [
 912   local-scope
 913   load-inputs
 914   first:char <- index *pattern, 0
 915   # repeatedly check for match at current idx
 916   len:num <- length *text
 917   {
 918     # does some unnecessary work checking even when there isn't enough of text left
 919     done?:bool <- greater-or-equal idx, len
 920     break-if done?
 921     found?:bool <- match-at text, pattern, idx
 922     break-if found?
 923     idx <- add idx, 1
 924     # optimization: skip past indices that definitely won't match
 925     idx <- find-next text, first, idx
 926     loop
 927   }
 928   return idx
 929 ]
 930
 931 scenario find-next-text-1 [
 932   local-scope
 933   x:text <- new [abc]
 934   y:text <- new [bc]
 935   run [
 936     10:num/raw <- find-next x, y, 0
 937   ]
 938   memory-should-contain [
 939     10 <- 1
 940   ]
 941 ]
 942
 943 scenario find-next-text-2 [
 944   local-scope
 945   x:text <- new [abcd]
 946   y:text <- new [bc]
 947   run [
 948     10:num/raw <- find-next x, y, 1
 949   ]
 950   memory-should-contain [
 951     10 <- 1
 952   ]
 953 ]
 954
 955 scenario find-next-no-match [
 956   local-scope
 957   x:text <- new [abc]
 958   y:text <- new [bd]
 959   run [
 960     10:num/raw <- find-next x, y, 0
 961   ]
 962   memory-should-contain [
 963     10 <- 3  # not found
 964   ]
 965 ]
 966
 967 scenario find-next-suffix-match [
 968   local-scope
 969   x:text <- new [abcd]
 970   y:text <- new [cd]
 971   run [
 972     10:num/raw <- find-next x, y, 0
 973   ]
 974   memory-should-contain [
 975     10 <- 2
 976   ]
 977 ]
 978
 979 scenario find-next-suffix-match-2 [
 980   local-scope
 981   x:text <- new [abcd]
 982   y:text <- new [cde]
 983   run [
 984     10:num/raw <- find-next x, y, 0
 985   ]
 986   memory-should-contain [
 987     10 <- 4  # not found
 988   ]
 989 ]
 990
 991 # checks if pattern matches at index 'idx'
 992 def match-at text:text, pattern:text, idx:num -> result:bool [
 993   local-scope
 994   load-inputs
 995   pattern-len:num <- length *pattern
 996   # check that there's space left for the pattern
 997   x:num <- length *text
 998   x <- subtract x, pattern-len
 999   enough-room?:bool <- lesser-or-equal idx, x
1000   return-unless enough-room?, false/not-found
1001   # check each character of pattern
1002   pattern-idx:num <- copy 0
1003   {
1004     done?:bool <- greater-or-equal pattern-idx, pattern-len
1005     break-if done?
1006     c:char <- index *text, idx
1007     exp:char <- index *pattern, pattern-idx
1008     match?:bool <- equal c, exp
1009     return-unless match?, false/not-found
1010     idx <- add idx, 1
1011     pattern-idx <- add pattern-idx, 1
1012     loop
1013   }
1014   return true/found
1015 ]
1016
1017 scenario match-at-checks-pattern-at-index [
1018   local-scope
1019   x:text <- new [abc]
1020   y:text <- new [ab]
1021   run [
1022     10:bool/raw <- match-at x, y, 0
1023   ]
1024   memory-should-contain [
1025     10 <- 1  # match found
1026   ]
1027 ]
1028
1029 scenario match-at-reflexive [
1030   local-scope
1031   x:text <- new [abc]
1032   run [
1033     10:bool/raw <- match-at x, x, 0
1034   ]
1035   memory-should-contain [
1036     10 <- 1  # match found
1037   ]
1038 ]
1039
1040 scenario match-at-outside-bounds [
1041   local-scope
1042   x:text <- new [abc]
1043   y:text <- new [a]
1044   run [
1045     10:bool/raw <- match-at x, y, 4
1046   ]
1047   memory-should-contain [
1048     10 <- 0  # never matches
1049   ]
1050 ]
1051
1052 scenario match-at-empty-pattern [
1053   local-scope
1054   x:text <- new [abc]
1055   y:text <- new []
1056   run [
1057     10:bool/raw <- match-at x, y, 0
1058   ]
1059   memory-should-contain [
1060     10 <- 1  # always matches empty pattern given a valid index
1061   ]
1062 ]
1063
1064 scenario match-at-empty-pattern-outside-bound [
1065   local-scope
1066   x:text <- new [abc]
1067   y:text <- new []
1068   run [
1069     10:bool/raw <- match-at x, y, 4
1070   ]
1071   memory-should-contain [
1072     10 <- 0  # no match
1073   ]
1074 ]
1075
1076 scenario match-at-empty-text [
1077   local-scope
1078   x:text <- new []
1079   y:text <- new [abc]
1080   run [
1081     10:bool/raw <- match-at x, y, 0
1082   ]
1083   memory-should-contain [
1084     10 <- 0  # no match
1085   ]
1086 ]
1087
1088 scenario match-at-empty-against-empty [
1089   local-scope
1090   x:text <- new []
1091   run [
1092     10:bool/raw <- match-at x, x, 0
1093   ]
1094   memory-should-contain [
1095     10 <- 1  # matches because pattern is also empty
1096   ]
1097 ]
1098
1099 scenario match-at-inside-bounds [
1100   local-scope
1101   x:text <- new [abc]
1102   y:text <- new [bc]
1103   run [
1104     10:bool/raw <- match-at x, y, 1
1105   ]
1106   memory-should-contain [
1107     10 <- 1  # match
1108   ]
1109 ]
1110
1111 scenario match-at-inside-bounds-2 [
1112   local-scope
1113   x:text <- new [abc]
1114   y:text <- new [bc]
1115   run [
1116     10:bool/raw <- match-at x, y, 0
1117   ]
1118   memory-should-contain [
1119     10 <- 0  # no match
1120   ]
1121 ]
1122
1123 def split s:text, delim:char -> result:&:@:text [
1124   local-scope
1125   load-inputs
1126   # empty text? return empty array
1127   len:num <- length *s
1128   {
1129     empty?:bool <- equal len, 0
1130     break-unless empty?
1131     result <- new {(address array character): type}, 0
1132     return
1133   }
1134   # count #pieces we need room for
1135   count:num <- copy 1  # n delimiters = n+1 pieces
1136   idx:num <- copy 0
1137   {
1138     idx <- find-next s, delim, idx
1139     done?:bool <- greater-or-equal idx, len
1140     break-if done?
1141     idx <- add idx, 1
1142     count <- add count, 1
1143     loop
1144   }
1145   # allocate space
1146   result <- new {(address array character): type}, count
1147   # repeatedly copy slices start..end until delimiter into result[curr-result]
1148   curr-result:num <- copy 0
1149   start:num <- copy 0
1150   {
1151     # while next delim exists
1152     done?:bool <- greater-or-equal start, len
1153     break-if done?
1154     end:num <- find-next s, delim, start
1155     # copy start..end into result[curr-result]
1156     dest:text <- copy-range s, start, end
1157     *result <- put-index *result, curr-result, dest
1158     # slide over to next slice
1159     start <- add end, 1
1160     curr-result <- add curr-result, 1
1161     loop
1162   }
1163 ]
1164
1165 scenario text-split-1 [
1166   local-scope
1167   x:text <- new [a/b]
1168   run [
1169     y:&:@:text <- split x, 47/slash
1170     10:num/raw <- length *y
1171     a:text <- index *y, 0
1172     b:text <- index *y, 1
1173     20:@:char/raw <- copy *a
1174     30:@:char/raw <- copy *b
1175   ]
1176   memory-should-contain [
1177     10 <- 2  # length of result
1178     20:array:character <- [a]
1179     30:array:character <- [b]
1180   ]
1181 ]
1182
1183 scenario text-split-2 [
1184   local-scope
1185   x:text <- new [a/b/c]
1186   run [
1187     y:&:@:text <- split x, 47/slash
1188     10:num/raw <- length *y
1189     a:text <- index *y, 0
1190     b:text <- index *y, 1
1191     c:text <- index *y, 2
1192     20:@:char/raw <- copy *a
1193     30:@:char/raw <- copy *b
1194     40:@:char/raw <- copy *c
1195   ]
1196   memory-should-contain [
1197     10 <- 3  # length of result
1198     20:array:character <- [a]
1199     30:array:character <- [b]
1200     40:array:character <- [c]
1201   ]
1202 ]
1203
1204 scenario text-split-missing [
1205   local-scope
1206   x:text <- new [abc]
1207   run [
1208     y:&:@:text <- split x, 47/slash
1209     10:num/raw <- length *y
1210     a:text <- index *y, 0
1211     20:@:char/raw <- copy *a
1212   ]
1213   memory-should-contain [
1214     10 <- 1  # length of result
1215     20:array:character <- [abc]
1216   ]
1217 ]
1218
1219 scenario text-split-empty [
1220   local-scope
1221   x:text <- new []
1222   run [
1223     y:&:@:text <- split x, 47/slash
1224     10:num/raw <- length *y
1225   ]
1226   memory-should-contain [
1227     10 <- 0  # empty result
1228   ]
1229 ]
1230
1231 scenario text-split-empty-piece [
1232   local-scope
1233   x:text <- new [a/b//c]
1234   run [
1235     y:&:@:text <- split x:text, 47/slash
1236     10:num/raw <- length *y
1237     a:text <- index *y, 0
1238     b:text <- index *y, 1
1239     c:text <- index *y, 2
1240     d:text <- index *y, 3
1241     20:@:char/raw <- copy *a
1242     30:@:char/raw <- copy *b
1243     40:@:char/raw <- copy *c
1244     50:@:char/raw <- copy *d
1245   ]
1246   memory-should-contain [
1247     10 <- 4  # length of result
1248     20:array:character <- [a]
1249     30:array:character <- [b]
1250     40:array:character <- []
1251     50:array:character <- [c]
1252   ]
1253 ]
1254
1255 def split-first text:text, delim:char -> x:text, y:text [
1256   local-scope
1257   load-inputs
1258   # empty text? return empty texts
1259   len:num <- length *text
1260   {
1261     empty?:bool <- equal len, 0
1262     break-unless empty?
1263     x:text <- new []
1264     y:text <- new []
1265     return
1266   }
1267   idx:num <- find-next text, delim, 0
1268   x:text <- copy-range text, 0, idx
1269   idx <- add idx, 1
1270   y:text <- copy-range text, idx, len
1271 ]
1272
1273 scenario text-split-first [
1274   local-scope
1275   x:text <- new [a/b]
1276   run [
1277     y:text, z:text <- split-first x, 47/slash
1278     10:@:char/raw <- copy *y
1279     20:@:char/raw <- copy *z
1280   ]
1281   memory-should-contain [
1282     10:array:character <- [a]
1283     20:array:character <- [b]
1284   ]
1285 ]
1286
1287 def copy-range buf:text, start:num, end:num -> result:text [
1288   local-scope
1289   load-inputs
1290   # if end is out of bounds, trim it
1291   len:num <- length *buf
1292   end:num <- min len, end
1293   # allocate space for result
1294   len <- subtract end, start
1295   result:text <- new character:type, len
1296   # copy start..end into result[curr-result]
1297   src-idx:num <- copy start
1298   dest-idx:num <- copy 0
1299   {
1300     done?:bool <- greater-or-equal src-idx, end
1301     break-if done?
1302     src:char <- index *buf, src-idx
1303     *result <- put-index *result, dest-idx, src
1304     src-idx <- add src-idx, 1
1305     dest-idx <- add dest-idx, 1
1306     loop
1307   }
1308 ]
1309
1310 scenario copy-range-works [
1311   local-scope
1312   x:text <- new [abc]
1313   run [
1314     y:text <- copy-range x, 1, 3
1315     1:@:char/raw <- copy *y
1316   ]
1317   memory-should-contain [
1318     1:array:character <- [bc]
1319   ]
1320 ]
1321
1322 scenario copy-range-out-of-bounds [
1323   local-scope
1324   x:text <- new [abc]
1325   run [
1326     y:text <- copy-range x, 2, 4
1327     1:@:char/raw <- copy *y
1328   ]
1329   memory-should-contain [
1330     1:array:character <- [c]
1331   ]
1332 ]
1333
1334 scenario copy-range-out-of-bounds-2 [
1335   local-scope
1336   x:text <- new [abc]
1337   run [
1338     y:text <- copy-range x, 3, 3
1339     1:@:char/raw <- copy *y
1340   ]
1341   memory-should-contain [
1342     1:array:character <- []
1343   ]
1344 ]
1345
1346 def parse-whole-number in:text -> out:num, error?:bool [
1347   local-scope
1348   load-inputs
1349   out <- copy 0
1350   result:num <- copy 0  # temporary location
1351   i:num <- copy 0
1352   len:num <- length *in
1353   {
1354     done?:bool <- greater-or-equal i, len
1355     break-if done?
1356     c:char <- index *in, i
1357     x:num <- character-to-code c
1358     digit:num, error?:bool <- character-code-to-digit x
1359     return-if error?
1360     result <- multiply result, 10
1361     result <- add result, digit
1362     i <- add i, 1
1363     loop
1364   }
1365   # no error; all digits were valid
1366   out <- copy result
1367 ]
1368
1369 # (contributed by Ella Couch)
1370 recipe character-code-to-digit character-code:number -> result:number, error?:boolean [
1371   local-scope
1372   load-inputs
1373   result <- copy 0
1374   error? <- lesser-than character-code, 48  # '0'
1375   return-if error?
1376   error? <- greater-than character-code, 57  # '9'
1377   return-if error?
1378   result <- subtract character-code, 48
1379 ]
1380
1381 scenario character-code-to-digit-contain-only-digit [
1382   local-scope
1383   a:number <- copy 48  # character code for '0'
1384   run [
1385     10:number/raw, 11:boolean/raw <- character-code-to-digit a
1386   ]
1387   memory-should-contain [
1388     10 <- 0
1389     11 <- 0  # no error
1390   ]
1391 ]
1392
1393 scenario character-code-to-digit-contain-only-digit-2 [
1394   local-scope
1395   a:number <- copy 57  # character code for '9'
1396   run [
1397     1:number/raw, 2:boolean/raw <- character-code-to-digit a
1398   ]
1399   memory-should-contain [
1400     1 <- 9
1401     2 <- 0  # no error
1402   ]
1403 ]
1404
1405 scenario character-code-to-digit-handles-codes-lower-than-zero [
1406   local-scope
1407   a:number <- copy 47
1408   run [
1409     10:number/raw, 11:boolean/raw <- character-code-to-digit a
1410   ]
1411   memory-should-contain [
1412     10 <- 0
1413     11 <- 1  # error
1414   ]
1415 ]
1416
1417 scenario character-code-to-digit-handles-codes-larger-than-nine [
1418   local-scope
1419   a:number <- copy 58
1420   run [
1421     10:number/raw, 11:boolean/raw <- character-code-to-digit a
1422   ]
1423   memory-should-contain [
1424     10 <- 0
1425     11 <- 1  # error
1426   ]
1427 ]