level0/urforth0_w_parse.asm

   1 ;; Native x86 GNU/Linux Forth System, Direct Threaded Code
   2 ;;
   3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
   4 ;;
   5 ;; This program is free software: you can redistribute it and/or modify
   6 ;; it under the terms of the GNU General Public License as published by
   7 ;; the Free Software Foundation, version 3 of the License ONLY.
   8 ;;
   9 ;; This program is distributed in the hope that it will be useful,
  10 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 ;; GNU General Public License for more details.
  13 ;;
  14 ;; You should have received a copy of the GNU General Public License
  15 ;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 ;; if set to true (non-zero), "PARSE-NAME" will understand comments without space delimiters
  18 ;; (except a lone banana)
  19 urword_value "PARSE-NAME-ADV-COMMENTS",parse_name_adv_comments,1
  20
  21
  22 ;DIGIT ( c n1 -- n2 tf  ok)
  23 ;      ( c n1 -- ff     bad)
  24 ;Converts the ascii character c (using base n1) to its binary equivalent
  25 ;n2, accompanied by a true flag. If the conversion is invalid, leaves
  26 ;only a false flag.
  27 urword_code "DIGIT",digit
  28   pop   eax
  29   ; TOS=base
  30   ; EAX=char
  31   jecxz .bad
  32   test  TOS,0x80000000
  33   jr    nz,.bad
  34   cp    TOS,36+1
  35   jr    nc,.bad
  36   sub   al,'0'
  37   jr    c,.bad
  38   cp    al,10
  39   jr    c,.check_base
  40   sub   al,7
  41   jr    c,.bad
  42   cp    al,10
  43   jr    c,.bad
  44   ; upcase it
  45   cp    al,36
  46   jr    c,@f
  47   cp    al,42
  48   jr    c,.bad
  49   sub   al,32
  50 @@:
  51 .check_base:
  52   cp    al,cl     ; ECX is TOS
  53   jr    nc,.bad
  54   movzx eax,al
  55   push  eax
  56   mov   TOS,1
  57   urnext
  58 .bad:
  59   xor   TOS,TOS
  60   urnext
  61 urword_end
  62
  63 urword_forth "DIGIT?",digitq
  64 ;; ( ch base -- flag )
  65   UF digit dup
  66   ur_if
  67     UF nip
  68   ur_endif
  69 urword_end
  70
  71
  72 ;; convert the ASCII text beginning at addr with regard to BASE.
  73 ;; the new value is accumulated into unsigned number u0, being left as u1.
  74 ;; addr1 and count1 are unparsed part of the string
  75 ;; will never read more than count bytes
  76 ;; doesn't skip any spaces, doesn't parse prefixes and signs
  77 ;; but skips '_'
  78 urword_forth "(NUMBER-PARSE-SIMPLE)",par_number_parse_simple
  79 ;; ( addr count u0 -- addr1 count1 u1 )
  80   UF over 0great
  81   ur_ifnot
  82     UF exit
  83   ur_endif
  84   UF rpush
  85   ;; ( addr count | u )
  86   ;; first must be a digit
  87   UF over cpeek base @ digitq
  88   ur_if
  89     ;; main loop
  90     ur_begin
  91       UF dup
  92     ur_while
  93       ;; ( addr count | u )
  94       UF over cpeek
  95       ;; skip '_'
  96       UF dup 95 equal
  97       ur_if
  98         UF drop
  99       ur_else
 100         ;; try digit
 101         UF base @ digit
 102         ur_ifnot
 103           UF rpop exit  ;; replace with BREAK
 104         ur_endif
 105         UF rpop base @ umul + rpush
 106       ur_endif
 107       ;; skip char
 108       UF 1dec swap 1inc swap
 109     ur_repeat
 110   ur_endif
 111   UF rpop
 112 urword_end
 113
 114 ;; k8: non-conforming, because we cannot parse double numbers
 115 urword_forth ">NUMBER",in_number
 116 ;; ( ud1 c-addr1 u1 -- ud2 c-addr2 u2 )
 117   UF rot drop rot
 118   UF par_number_parse_simple
 119   UF nrot 0 nrot
 120 urword_end
 121
 122
 123 urword_forth "NUMBER-PARSE-PFX-SIGIL",number_parse_sigil
 124 ;; ( addr count -- addr count newbase )
 125   ;; simple sigils
 126   UF dup 1 great
 127   ur_ifnot
 128     UF 0 exit
 129   ur_endif
 130   ;; simple sigils
 131   UF over cpeek
 132   ;; ( addr count char )
 133   UF dup 36 equal  ;; dollar
 134   ur_if
 135     UF drop swap 1inc swap 1dec
 136     UF 16 exit
 137   ur_endif
 138   UF dup 35 equal  ;; hash
 139   ur_if
 140     UF drop swap 1inc swap 1dec
 141     UF shit2012shit 1 and
 142     ur_if
 143       UF 10
 144     ur_else
 145       UF 16
 146     ur_endif
 147     UF exit
 148   ur_endif
 149   UF 37 equal  ;; percent
 150   ur_if
 151     UF swap 1inc swap 1dec
 152     UF 2 exit
 153   ur_endif
 154   UF 0
 155 urword_end
 156
 157 urword_forth "NUMBER-PARSE-PFX-0X",number_parse_0x
 158 ;; ( addr count -- addr count newbase )
 159   UF dup 2 great
 160   ur_ifnot
 161     UF 0 exit
 162   ur_endif
 163   UF over cpeek 48 equal
 164   ur_ifnot
 165     UF 0 exit
 166   ur_endif
 167   UF over 1inc cpeek upcase_char
 168   UF dup 88 equal  ;; X
 169   ur_if
 170     ;; ( addr count char )
 171     UF drop swap 2inc swap 2dec
 172     UF 16 exit
 173   ur_endif
 174   UF dup 79 equal  ;; O
 175   ur_if
 176     UF drop swap 2inc swap 2dec
 177     UF 8 exit
 178   ur_endif
 179   UF dup 66 equal  ;; B
 180   ur_if
 181     UF drop swap 2inc swap 2dec
 182     UF 2 exit
 183   ur_endif
 184   UF 68 equal  ;; D
 185   ur_if
 186     UF swap 2inc swap 2dec
 187     UF 10 exit
 188   ur_endif
 189   UF 0
 190 urword_end
 191
 192 urword_forth "NUMBER-PARSE-PFX-&",number_parse_pfxamp
 193 ;; ( addr count -- addr count newbase )
 194   UF dup 2 great
 195   ur_ifnot
 196     UF 0 exit
 197   ur_endif
 198   UF over cpeek 38 equal
 199   ur_ifnot
 200     UF 0 exit
 201   ur_endif
 202   UF over 1inc cpeek upcase_char
 203   UF dup 72 equal  ;; H
 204   ur_if
 205     ;; ( addr count char )
 206     UF drop swap 2inc swap 2dec
 207     UF 16 exit
 208   ur_endif
 209   UF dup 79 equal  ;; O
 210   ur_if
 211     UF drop swap 2inc swap 2dec
 212     UF 8 exit
 213   ur_endif
 214   UF dup 66 equal  ;; B
 215   ur_if
 216     UF drop swap 2inc swap 2dec
 217     UF 2 exit
 218   ur_endif
 219   UF 68 equal  ;; D
 220   ur_if
 221     UF swap 2inc swap 2dec
 222     UF 10 exit
 223   ur_endif
 224   UF 0
 225 urword_end
 226
 227
 228 urword_forth "(NUMBER-GOOD-BIN-DIGIT?)",par_number_isbindig
 229 urword_hidden
 230 ;; ( ch -- flag )
 231   UF dup 95 equal
 232   ur_if
 233     UF drop true exit
 234   ur_endif
 235   UF 2 digitq
 236 urword_end
 237
 238 urword_forth "(NUMBER-PARSE-BOOL?)",number_parse_boolq
 239 urword_hidden
 240 ;; ( addr count -- flag )
 241   UF dup 0great
 242   ur_ifnot
 243     UF 2drop 0 exit
 244   ur_endif
 245   ;; first must be digit
 246   UF over cpeek 2 digitq
 247   ur_ifnot
 248     UF 2drop 0 exit
 249   ur_endif
 250   UF true nrot
 251   UF over + swap
 252   ur_do
 253     ;; ( okflag )
 254     UF i cpeek par_number_isbindig
 255     ur_ifnot
 256       UF drop false leave
 257     ur_endif
 258   ur_loop
 259 urword_end
 260
 261 urword_forth "NUMBER-PARSE-SFX",number_parse_sfx
 262 ;; ( addr count -- addr count newbase )
 263   ;; check suffixes
 264   UF dup 2 great
 265   ur_ifnot
 266     UF 0 exit
 267   ur_endif
 268   UF 2dup + 1dec cpeek upcase_char
 269   UF dup 72 equal  ;; H
 270   ur_if
 271     UF drop 1dec
 272     UF 16 exit
 273   ur_endif
 274   UF dup 79 equal  ;; O
 275   ur_if
 276     UF drop 1dec
 277     UF 8 exit
 278   ur_endif
 279   ;; the following suffix is allowed only if all digits are right
 280   UF 66 equal  ;; B
 281   ur_if
 282     UF 1dec
 283     ;; ( addr count )
 284     UF 2dup number_parse_boolq
 285     ur_if
 286       UF 2 exit
 287     ur_endif
 288     UF 1inc
 289   ur_endif
 290   UF 0
 291 urword_end
 292
 293
 294 ;; will return base according to prefix/suffix, and remove pfx/sfx from the string
 295 ;; returns 0 if no special base change found
 296 urword_forth "NUMBER-PARSE-PFX-SFX",number_parse_prefix_suffix
 297 ;; ( addr count -- addr count newbase )
 298   UF number_parse_sigil qdup
 299   ur_if
 300     UF exit
 301   ur_endif
 302   UF number_parse_0x qdup
 303   ur_if
 304     UF exit
 305   ur_endif
 306   UF number_parse_pfxamp qdup
 307   ur_if
 308     UF exit
 309   ur_endif
 310   UF number_parse_sfx
 311 urword_end
 312
 313
 314 urword_forth "NUMBER",number
 315 ;; ( addr count -- n true // false )
 316 ; convert a character string left at addr to a signed number, using the current numeric base.
 317   ;; check length
 318   UF dup 0lessequ
 319   ur_if
 320     UF 2drop 0 exit
 321   ur_endif
 322   ;; ok, we have at least one valid char
 323   ;; check for leading minus (only if 2012 so-called-standard idiocity is not turned on)
 324   UF shit2012shit 1 and
 325   ur_if
 326     UF 0
 327   ur_else
 328     UF over cpeek 45 equal
 329     ur_if
 330       UF swap 1inc swap 1dec 1
 331     ur_else
 332       UF 0
 333     ur_endif
 334   ur_endif
 335   UF nrot
 336   ;; ( negflag addr count )
 337   UF base @ rpush  ;; it can be changed by number prefix/suffix
 338   UF number_parse_prefix_suffix
 339   ;; done checking
 340   UF qdup
 341   ur_if
 342     UF base !
 343   ur_endif
 344   ;; check for leading minus (only if 2012 so-called-standard idiocity is turned on)
 345   UF shit2012shit 1 and
 346   ur_if
 347     UF dup 0great
 348     ur_if
 349       UF over cpeek 45 equal
 350       ur_if
 351         UF swap 1inc swap 1dec
 352         UF rot drop 1 nrot
 353       ur_endif
 354     ur_endif
 355   ur_endif
 356   ;; zero count means "nan"
 357   UF dup 0great
 358   ur_ifnot
 359     ;; restore base
 360     UF rpop base !
 361     ;; exit with failure
 362     UF 2drop drop 0 exit
 363   ur_endif
 364   ;; ( negflag addr count | oldbase )
 365   UF 0 par_number_parse_simple
 366   ;; ( negflag addr count u | oldbase )
 367   ;; restore base
 368   UF rpop base !
 369   ;; ( negflag addr count u )
 370   ;; if not fully parsed, it is nan
 371   UF swap
 372   ;; ( negflag addr u count )
 373   ur_if
 374     ;; exit with failure
 375     UF drop 2drop 0 exit
 376   ur_endif
 377   ;; ( negflag addr u )
 378   UF nip swap
 379   ur_if
 380     UF negate
 381   ur_endif
 382   ;; success
 383   UF 1
 384 urword_end
 385
 386
 387 ;; scans TIB, returns parsed word
 388 ;; doesn't do any copying
 389 ;; trailing delimiter is skipped
 390 ;; HACK: sets (WORD-LAST-DELIMITER)
 391 ;;       this is so "comment-to-eol" could work
 392 urword_code "(WORD)",par_word
 393 urword_hidden
 394 ;; ( c skip-leading-delim? -- addr count )
 395   ; reset last delimiter (it will be set later)
 396   mov   edx,[fvar_tibsize_data]
 397   sub   edx,[fvar_inptr_data]
 398   jc    .noinput_drop
 399   mov   edi,[fvar_tib_data]
 400   add   edi,[fvar_inptr_data]
 401   ; do we need to skip leading delimiters?
 402   jecxz .noskipdel
 403   pop   TOS   ; get char in TOS
 404   ; TOS=char
 405   ; EDI=tibptr
 406   ; EDX=tibleft
 407   ; skip leading delimiters
 408 .skipdel_loop:
 409   or    edx,edx
 410   jz    .noinput
 411   call  .cmp_cl_memedi
 412   jz    .skipchar
 413   or    al,al
 414   jz    .noinput
 415   jmp   .startword
 416 .skipchar:
 417   call  .count_lines
 418   inc   edi
 419   dec   edx
 420   jmp   .skipdel_loop
 421
 422 .noskipdel:
 423   pop   TOS   ; get char in TOS
 424
 425 .startword:
 426   ; remember current position
 427   push  edi
 428   ; skip until delimiter
 429 .collect_loop:
 430   or    edx,edx
 431   jz    .done_noadv
 432   ; count lines here, so we won't have to call it on word completion
 433   call  .count_lines
 434   call  .cmp_cl_memedi
 435   jz    .done
 436   or    al,al
 437   jz    .done_noadv
 438   inc   edi
 439   dec   edx
 440   jmp   .collect_loop
 441
 442 .done:
 443   ; skip delimiter
 444   or    edx,edx
 445   jr    z,.done_noadv
 446   cmp   byte [edi],0
 447   jr    z,.done_noadv
 448   dec   edx
 449   movzx eax,byte [edi]
 450   ld    [fvar_par_last_read_char_data],eax
 451 .done_noadv:
 452   ; the word is never empty here
 453   ; stack: word start
 454   ; EDI: word end (after the last char, at a delimiter)
 455   ; EDX=tibleft
 456   ; fix inptr
 457   mov   eax,[fvar_tibsize_data]
 458   sub   eax,edx
 459   mov   [fvar_inptr_data],eax
 460   ; calc and store counter
 461   pop   ecx     ; start address
 462   mov   eax,edi ; EAX=end address
 463   sub   eax,ecx ; EAX=length
 464   ; truncate length
 465   ld    edx,1020
 466   cp    eax,edx
 467   cmovnc eax,edx
 468   ; ECX=start address
 469   ; EDI=end address
 470   ; EAX=length
 471   ; copy bytes
 472   push  TOS
 473   ld    TOS,eax
 474   urnext
 475
 476 .noinput_drop:
 477   add   esp,4   ; drop char, as we didn't poped it yet
 478 .noinput:
 479   ; TOS=char
 480   ; EDX=tibleft
 481   mov   eax,[fvar_tibsize_data]
 482   sub   eax,edx
 483   ld    edx,0
 484   cmovc eax,edx
 485   mov   [fvar_inptr_data],eax
 486   ; push current tib position, and zero length
 487   add   eax,[fvar_tib_data]
 488   push  eax
 489   xor   TOS,TOS
 490   urnext
 491
 492 ; zero flag: equality
 493 ; al: byte at [edi]
 494 .cmp_cl_memedi:
 495   movzx eax,byte [edi]
 496   ld    [fvar_par_last_read_char_data],eax
 497   cmp   al,1
 498   jnc   .cmp_cl_memedi_nonzero
 499   ret
 500 .cmp_cl_memedi_nonzero:
 501   cmp   cl,32
 502   jnz   .cmp_cl_memedi_ok
 503   ; coerce to space
 504   cmp   al,32
 505   jnc   .cmp_cl_memedi_ok
 506   mov   al,32
 507 .cmp_cl_memedi_ok:
 508   cmp   al,cl
 509   ret
 510
 511 .count_lines:
 512   cp    byte [edi],10
 513   jr    nz,@f
 514   cp    dword [fvar_tiblineno_data],0
 515   jr    z,@f
 516   inc   dword [fvar_tiblineno_data]
 517 @@:
 518   ret
 519 urword_end
 520
 521 ;; this is a leftover from '"WORD" is always using real here'
 522 ;urword_forth "WORD-HERE",word_here
 523 ;  UF dp @
 524 ;urword_end
 525 urword_alias "WORD-HERE",word_here,here
 526
 527 urword_forth "(WORD-OR-PARSE)",par_word_or_parse
 528 urword_hidden
 529 ;; ( c skip-leading-delim?  -- wordhere )
 530   UF par_word
 531   UF 1020 umin        ;; truncate length
 532   UF dup word_here !  ;; set counter
 533   UF word_here cellinc swap move  ;; copy string
 534   UF word_here count + 0cpoke     ;; put trailing zero byte
 535   UF word_here
 536 urword_end
 537
 538 ;; HACK: sets (WORD-LAST-DELIMITER)
 539 ;;       this is so "comment-to-eol" could work
 540 ;; WARNING! it is using "HERE", so "DP-TEMP" is in effect
 541 ;; artificial word length limit: 1020 chars
 542 ;; longer words will be properly scanned, but truncated
 543 ;; adds trailing zero after the string (but doesn't include it in count)
 544 ;; string is cell-counted
 545 urword_forth "WORD",word_obsolete
 546 ;; ( c  -- wordhere )
 547   UF 1 par_word_or_parse
 548 urword_end
 549
 550 urword_forth "PARSE-TO-HERE",parse_to_here
 551 ;; ( c -- wordhere )
 552   UF 0 par_word_or_parse
 553 urword_end
 554
 555 urword_forth "PARSE",parse
 556 ;; ( c -- addr count )
 557   UF 0 par_word       ;; parse, don't skip leading delimiters
 558 urword_end
 559
 560 urword_forth "PARSE-SKIP-BLANKS",par_parse_skip_blanks
 561 ;; ( -- )
 562   ur_begin
 563     UF tib_peekch qdup
 564   ur_while
 565     UF 32 great
 566     ur_if
 567       UF exit
 568     ur_endif
 569     UF tib_getch drop
 570   ur_repeat
 571 urword_end
 572
 573 urword_forth "PARSE-SKIP-BLANKS-NO-EOL",par_parse_skip_blanks_no_eol
 574 ;; ( -- )
 575   ur_begin
 576     UF tib_peekch qdup
 577   ur_while
 578     UF dup 32 great
 579     ur_if
 580       UF drop exit
 581     ur_endif
 582     UF dup 13 equal swap 10 equal or
 583     ur_if
 584       UF exit
 585     ur_endif
 586     UF tib_getch drop
 587   ur_repeat
 588 urword_end
 589
 590 urword_forth "PARSE-SKIP-BLANKS-EX",par_parse_skip_blanks_ex
 591 ;; ( skipeol-flag -- )
 592   ur_if
 593     UF par_parse_skip_blanks
 594   ur_else
 595     UF par_parse_skip_blanks_no_eol
 596   ur_endif
 597 urword_end
 598
 599
 600 ;; simple heuristic: if the word ends with ")", try to find it in current vocabs
 601 ;; as our searching is very fast, and this word is not invoked that often, it is ok
 602 urword_forth "(PARSE-GOOD-COMMENT?)",par_parse_good_commentq
 603 ;; ( addr count -- addr count flag )
 604 if 0
 605     UF 2dup endcr pardottype "CHECKING: <" type pardottype ">: "
 606   UF 2dup wfind_str dup
 607     UF dup dot cr
 608   ur_if
 609     UF nip
 610   ur_endif
 611   UF not
 612 else
 613   UF true
 614 end if
 615 urword_end
 616
 617 ;; sorry for this huge mess
 618 urword_forth "(PARSE-NAME-EX)",par_parse_name_ex
 619 urword_hidden
 620 ;; ( -- addr count 1 // 0 )
 621   UF par_tibstate_rpush
 622   UF bl 1 par_word
 623  if 0
 624   ;; if word length is enough for normal comment processing, do it
 625   UF dup 1 great
 626   ur_ifnot
 627     ;; empty word, or one-char word
 628     UF par_tibstate_rdrop true exit
 629   ur_endif
 630   ;; check for single-char backslash
 631   UF over cpeek 92 equal
 632   ur_if
 633     UF par_parse_good_commentq
 634     ur_if
 635       UF 2drop par_tibstate_rpop
 636       UF par_parse_skip_blanks
 637       ;; skip word char
 638       UF tib_getch drop
 639       UF comment_toeol
 640       UF false exit
 641     ur_else
 642       UF true exit
 643     ur_endif
 644   ur_endif
 645  end if
 646   ;; check for double-char comments
 647   UF dup 2 great
 648   ur_ifnot
 649     UF par_tibstate_rdrop true exit
 650   ur_endif
 651   ;; check two chars at once
 652   UF over wpeek
 653   ;; two semicolons or two slashes
 654   UF dup 0x3b3b equal over 0x2f2f equal or
 655   ur_if
 656     UF drop
 657     UF par_parse_good_commentq
 658     ur_if
 659       UF 2drop par_tibstate_rpop
 660       UF par_parse_skip_blanks
 661       ;; skip word char
 662       UF tib_getch drop
 663       UF comment_toeol
 664       UF false exit
 665     ur_else
 666       UF true exit
 667     ur_endif
 668   ur_endif
 669   ;; (* and (+
 670   UF dup 0x2a28 equal over 0x2b28 equal or
 671   ur_if
 672     ;; UF nrot 2drop par_tibstate_rpop
 673     UF nrot par_parse_good_commentq
 674     ur_if
 675       UF 2drop par_tibstate_rpop
 676       ;; ( 2chars )
 677       ;; skip to the word
 678       UF par_parse_skip_blanks
 679       ;; skip two word chars
 680       UF tib_getch tib_getch 2drop
 681       UF 0x2a28 equal
 682       ur_if
 683         UF comment_multiline
 684       ur_else
 685         UF comment_multiline_nested
 686       ur_endif
 687       UF false exit
 688     ur_endif
 689   ur_endif
 690   ;; not a comment
 691   UF drop par_tibstate_rdrop
 692   UF true
 693 urword_end
 694
 695 urword_forth "PARSE-NAME",parse_name
 696 ;; ( -- addr count )
 697   UF bl 1 par_word    ;; parse, skip leading delimiters
 698 urword_end
 699
 700 urword_forth "PARSE-NAME-EX",parse_name_ex
 701 ;; ( -- addr count )
 702   UF parse_name_adv_comments
 703   ur_if
 704     ;; save current tib, so we will be able to reparse comments
 705     ur_begin
 706       UF par_parse_name_ex
 707     ur_until
 708   ur_else
 709     UF parse_name
 710   ur_endif
 711 urword_end
 712