zpu/sw/startup/crt0.S

   1 /* Startup code for ZPU
   2    Copyright (C) 2005 Free Software Foundation, Inc.
   3
   4 This file is free software; you can redistribute it and/or modify it
   5 under the terms of the GNU General Public License as published by the
   6 Free Software Foundation; either version 2, or (at your option) any
   7 later version.
   8
   9 In addition to the permissions in the GNU General Public License, the
  10 Free Software Foundation gives you unlimited permission to link the
  11 compiled version of this file with other programs, and to distribute
  12 those programs without any restriction coming from the use of this
  13 file.  (The General Public License restrictions do apply in other
  14 respects; for example, they cover modification of the file, and
  15 distribution when not linked into another program.)
  16
  17 This file is distributed in the hope that it will be useful, but
  18 WITHOUT ANY WARRANTY; without even the implied warranty of
  19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20 General Public License for more details.
  21
  22 You should have received a copy of the GNU General Public License
  23 along with this program; see the file COPYING.  If not, write to
  24 the Free Software Foundation, 59 Temple Place - Suite 330,
  25 Boston, MA 02111-1307, USA.  */
  26         .file   "crt0.S"
  27
  28
  29
  30
  31 ;       .section ".fixed_vectors","ax"
  32 ; KLUDGE!!! we remove the executable bit to avoid relaxation
  33         .section ".fixed_vectors","a"
  34
  35 ; DANGER!!!!
  36 ; we need to align these code sections to 32 bytes, which
  37 ; means we must not use any assembler instructions that are relaxed
  38 ; at linker time
  39 ; DANGER!!!!
  40
  41         .macro fixedim value
  42                         im \value
  43         .endm
  44
  45         .macro  jsr address
  46
  47                         im 0            ; save R0
  48                         load
  49                         im 4            ; save R1
  50                         load
  51                         im 8            ; save R2
  52                         load
  53
  54                         fixedim \address
  55                         call
  56
  57                         im 8
  58                         store           ; restore R2
  59                         im 4
  60                         store           ; restore R1
  61                         im 0
  62                         store           ; restore R0
  63         .endm
  64
  65
  66         .macro  jmp address
  67                         fixedim \address
  68                         poppc
  69         .endm
  70
  71
  72         .macro fast_neg
  73         not
  74         im 1
  75         add
  76         .endm
  77
  78         .macro cimpl funcname
  79         ; save R0
  80         im 0
  81         load
  82
  83         ; save R1
  84         im 4
  85         load
  86
  87         ; save R2
  88         im 8
  89         load
  90
  91         loadsp 20
  92         loadsp 20
  93
  94         fixedim \funcname
  95         call
  96
  97         ; destroy arguments on stack
  98         storesp 0
  99         storesp 0
 100
 101         im 0
 102         load
 103
 104         ; poke the result into the right slot
 105         storesp 24
 106
 107         ; restore R2
 108         im 8
 109         store
 110
 111         ; restore R1
 112         im 4
 113         store
 114
 115         ; restore r0
 116         im 0
 117         store
 118
 119
 120         storesp 4
 121         poppc
 122         .endm
 123
 124         .macro mult1bit
 125         ; create mask of lowest bit in A
 126         loadsp 8 ; A
 127         im 1
 128         and
 129         im -1
 130         add
 131         not
 132         loadsp 8 ; B
 133         and
 134         add ; accumulate in C
 135
 136         ; shift B left 1 bit
 137         loadsp 4 ; B
 138         addsp 0
 139         storesp 8 ; B
 140
 141         ; shift A right 1 bit
 142         loadsp 8 ; A
 143         flip
 144         addsp 0
 145         flip
 146         storesp 12 ; A
 147         .endm
 148
 149
 150
 151 /* vectors */
 152         .balign 32,0
 153 # offset 0x0000 0000
 154                 .globl _start
 155 _start:
 156                 ; intSp must be 0 when we jump to _premain
 157
 158                 im ZPU_ID
 159                 loadsp 0
 160                 im _cpu_config
 161                 store
 162                 config
 163                 jmp _premain
 164
 165
 166
 167         .balign 32,0
 168 # offset 0x0000 0020
 169                 .globl _zpu_interrupt_vector
 170 _zpu_interrupt_vector:
 171                 jsr _zpu_interrupt
 172                 poppc
 173
 174
 175 /* instruction emulation code */
 176
 177 # opcode 34
 178 # offset 0x0000 0040
 179         .balign 32,0
 180 _loadh:
 181         loadsp 4
 182         ; by not masking out bit 0, we cause a memory access error
 183         ; on unaligned access
 184         im ~0x2
 185         and
 186         load
 187
 188         ; mult 8
 189         loadsp 8
 190         im 3
 191         and
 192         fast_neg
 193         im 2
 194         add
 195         im 3
 196         ashiftleft
 197         ; shift right addr&3 * 8
 198         lshiftright
 199         im 0xffff
 200         and
 201         storesp 8
 202
 203         poppc
 204
 205 # opcode 35
 206 # offset 0x0000 0060
 207         .balign 32,0
 208 _storeh:
 209         loadsp 4
 210         ; by not masking out bit 0, we cause a memory access error
 211         ; on unaligned access
 212         im ~0x2
 213         and
 214         load
 215
 216         ; mask
 217         im 0xffff
 218         loadsp 12
 219         im 3
 220         and
 221         fast_neg
 222         im 2
 223         add
 224         im 3
 225         ashiftleft
 226         ashiftleft
 227         not
 228
 229         and
 230
 231         loadsp 12
 232         im 0xffff
 233
 234         nop
 235
 236         fixedim _storehtail
 237         poppc
 238
 239
 240 # opcode 36
 241 # offset 0x0000 0080
 242         .balign 32,0
 243 _lessthan:
 244         loadsp 8
 245         fast_neg
 246         loadsp 8
 247         add
 248
 249         ; DANGER!!!!
 250         ; 0x80000000 will overflow when negated, so we need to mask
 251         ; the result above with the compare positive to negative
 252         ; number case
 253         loadsp 12
 254         loadsp 12
 255         not
 256         and
 257         not
 258         and
 259
 260
 261         ; handle case where we are comparing a negative number
 262         ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000
 263         loadsp 12
 264         not
 265         loadsp 12
 266         and
 267
 268         or
 269
 270
 271
 272         flip
 273         im 1
 274         and
 275
 276
 277         storesp 12
 278         storesp 4
 279         poppc
 280
 281
 282 # opcode 37
 283 # offset 0x0000 00a0
 284         .balign 32,0
 285 _lessthanorequal:
 286         loadsp 8
 287         loadsp 8
 288         lessthan
 289         loadsp 12
 290         loadsp 12
 291         eq
 292         or
 293
 294         storesp 12
 295         storesp 4
 296         poppc
 297
 298
 299 # opcode 38
 300 # offset 0x0000 00c0
 301         .balign 32,0
 302 _ulessthan:
 303         ; fish up arguments
 304         loadsp 4
 305         loadsp 12
 306
 307         /* low: -1 if low bit dif is negative 0 otherwise:  neg (not x&1 and (y&1))
 308                 x&1             y&1             neg (not x&1 and (y&1))
 309                 1               1               0
 310                 1               0               0
 311                 0               1               -1
 312                 0               0               0
 313
 314         */
 315         loadsp 4
 316         not
 317         loadsp 4
 318         and
 319         im 1
 320         and
 321         neg
 322
 323
 324         /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1
 325                 high=x>>1 - y>>1 + low
 326
 327                 extremes
 328
 329                 0000 - 1111:
 330                 low= neg(not 0 and 1) = 1111 (-1)
 331                 high=000+ neg(111) +low = 000 + 1001 + low = 1000
 332                 OK
 333
 334                 1111 - 0000
 335                 low=neg(not 1 and 0) = 0
 336                 high=111+neg(000) + low = 0111
 337                 OK
 338
 339
 340          */
 341         loadsp 8
 342
 343         flip
 344         addsp 0
 345         flip
 346
 347         loadsp 8
 348
 349         flip
 350         addsp 0
 351         flip
 352
 353         sub
 354
 355         ; if they are equal, then the last bit decides...
 356         add
 357
 358         /* test if negative: result = flip(diff) & 1 */
 359         flip
 360         im 1
 361         and
 362
 363         ; destroy a&b which are on stack
 364         storesp 4
 365         storesp 4
 366
 367         storesp 12
 368         storesp 4
 369         poppc
 370
 371 # opcode 39
 372 # offset 0x0000 00e0
 373         .balign 32,0
 374 _ulessthanorequal:
 375         loadsp 8
 376         loadsp 8
 377         ulessthan
 378         loadsp 12
 379         loadsp 12
 380         eq
 381         or
 382
 383         storesp 12
 384         storesp 4
 385         poppc
 386
 387
 388 # opcode 40
 389 # offset 0x0000 0100
 390         .balign 32,0
 391         .globl _swap
 392 _swap:
 393         breakpoint ; tbd
 394
 395 # opcode 41
 396 # offset 0x0000 0120
 397         .balign 32,0
 398 _slowmult:
 399         im _slowmultImpl
 400         poppc
 401
 402 # opcode 42
 403 # offset 0x0000 0140
 404         .balign 32,0
 405 _lshiftright:
 406         loadsp 8
 407         flip
 408
 409         loadsp 8
 410         ashiftleft
 411         flip
 412
 413         storesp 12
 414         storesp 4
 415
 416         poppc
 417
 418
 419 # opcode 43
 420 # offset 0x0000 0160
 421         .balign 32,0
 422 _ashiftleft:
 423         loadsp 8
 424
 425         loadsp 8
 426         im 0x1f
 427         and
 428         fast_neg
 429         im _ashiftleftEnd
 430         add
 431         poppc
 432
 433
 434
 435 # opcode 44
 436 # offset 0x0000 0180
 437         .balign 32,0
 438 _ashiftright:
 439         loadsp 8
 440         loadsp 8
 441         lshiftright
 442
 443         ; handle signed value
 444         im -1
 445         loadsp 12
 446         im 0x1f
 447         and
 448         lshiftright
 449         not     ; now we have an integer on the stack with the signed
 450                 ; bits in the right position
 451
 452         ; mask these bits with the signed bit.
 453         loadsp 16
 454         not
 455         flip
 456         im 1
 457         and
 458         im -1
 459         add
 460
 461         and
 462
 463         ; stuff in the signed bits...
 464         or
 465
 466         ; store result into correct stack slot
 467         storesp 12
 468
 469         ; move up return value
 470         storesp 4
 471         poppc
 472
 473 # opcode 45
 474 # offset 0x0000 01a0
 475         .balign 32,0
 476 _call:
 477         ; fn
 478         loadsp 4
 479
 480         ; return address
 481         loadsp 4
 482
 483         ; store return address
 484         storesp 12
 485
 486         ; fn to call
 487         storesp 4
 488
 489         pushsp  ; flush internal stack
 490         popsp
 491
 492         poppc
 493
 494 _storehtail:
 495
 496         and
 497         loadsp 12
 498         im 3
 499         and
 500         fast_neg
 501         im 2
 502         add
 503         im 3
 504         ashiftleft
 505         nop
 506         ashiftleft
 507
 508         or
 509
 510         loadsp 8
 511         im  ~0x3
 512         and
 513
 514         store
 515
 516         storesp 4
 517         storesp 4
 518         poppc
 519
 520
 521 # opcode 46
 522 # offset 0x0000 01c0
 523         .balign 32,0
 524 _eq:
 525         loadsp 8
 526         fast_neg
 527         loadsp 8
 528         add
 529
 530         not
 531         loadsp 0
 532         im 1
 533         add
 534         not
 535         and
 536         flip
 537         im 1
 538         and
 539
 540         storesp 12
 541         storesp 4
 542         poppc
 543
 544 # opcode 47
 545 # offset 0x0000 01e0
 546         .balign 32,0
 547 _neq:
 548         loadsp 8
 549         fast_neg
 550         loadsp 8
 551         add
 552
 553         not
 554         loadsp 0
 555         im 1
 556         add
 557         not
 558         and
 559         flip
 560
 561         not
 562
 563         im 1
 564         and
 565
 566         storesp 12
 567         storesp 4
 568         poppc
 569
 570
 571 # opcode 48
 572 # offset 0x0000 0200
 573         .balign 32,0
 574 _neg:
 575         loadsp 4
 576         not
 577         im 1
 578         add
 579         storesp 8
 580
 581         poppc
 582
 583
 584 # opcode 49
 585 # offset 0x0000 0220
 586         .balign 32,0
 587 _sub:
 588         loadsp 8
 589         loadsp 8
 590         fast_neg
 591         add
 592         storesp 12
 593
 594         storesp 4
 595
 596         poppc
 597
 598
 599 # opcode 50
 600 # offset 0x0000 0240
 601         .balign 32,0
 602 _xor:
 603         loadsp 8
 604         not
 605         loadsp 8
 606         and
 607
 608         loadsp 12
 609         loadsp 12
 610         not
 611         and
 612
 613         or
 614
 615         storesp 12
 616         storesp 4
 617         poppc
 618
 619 # opcode 51
 620 # offset 0x0000 0260
 621         .balign 32,0
 622 _loadb:
 623         loadsp 4
 624         im ~0x3
 625         and
 626         load
 627
 628         loadsp 8
 629         im 3
 630         and
 631         fast_neg
 632         im 3
 633         add
 634         ; x8
 635         addsp 0
 636         addsp 0
 637         addsp 0
 638
 639         lshiftright
 640
 641         im 0xff
 642         and
 643         storesp 8
 644
 645         poppc
 646
 647
 648 # opcode 52
 649 # offset 0x0000 0280
 650         .balign 32,0
 651 _storeb:
 652         loadsp 4
 653         im ~0x3
 654         and
 655         load
 656
 657         ; mask away destination
 658         im _mask
 659         loadsp 12
 660         im 3
 661         and
 662         addsp 0
 663         addsp 0
 664         add
 665         load
 666
 667         and
 668
 669
 670         im _storebtail
 671         poppc
 672
 673 # opcode 53
 674 # offset 0x0000 02a0
 675         .balign 32,0
 676 _div:
 677         cimpl __divsi3
 678
 679 # opcode 54
 680 # offset 0x0000 02c0
 681         .balign 32,0
 682 _mod:
 683         cimpl __modsi3
 684
 685 # opcode 55
 686 # offset 0x0000 02e0
 687         .balign 32,0
 688         .globl _eqbranch
 689 _eqbranch:
 690         loadsp 8
 691
 692         ; eq
 693
 694         not
 695         loadsp 0
 696         im 1
 697         add
 698         not
 699         and
 700         flip
 701         im 1
 702         and
 703
 704         ; mask
 705         im -1
 706         add
 707         loadsp 0
 708         storesp 16
 709
 710         ; no branch address
 711         loadsp 4
 712
 713         and
 714
 715         ; fetch boolean & neg mask
 716         loadsp 12
 717         not
 718
 719         ; calc address & mask for branch
 720         loadsp 8
 721         loadsp 16
 722         add
 723         ; subtract 1 to find PC of branch instruction
 724         im -1
 725         add
 726
 727         and
 728
 729         or
 730
 731         storesp 4
 732         storesp 4
 733         storesp 4
 734         poppc
 735
 736
 737 # opcode 56
 738 # offset 0x0000 0300
 739         .balign 32,0
 740         .globl _neqbranch
 741 _neqbranch:
 742         loadsp 8
 743
 744         ; neq
 745
 746         not
 747         loadsp 0
 748         im 1
 749         add
 750         not
 751         and
 752         flip
 753
 754         not
 755
 756         im 1
 757         and
 758
 759         ; mask
 760         im -1
 761         add
 762         loadsp 0
 763         storesp 16
 764
 765         ; no branch address
 766         loadsp 4
 767
 768         and
 769
 770         ; fetch boolean & neg mask
 771         loadsp 12
 772         not
 773
 774         ; calc address & mask for branch
 775         loadsp 8
 776         loadsp 16
 777         add
 778         ; find address of branch instruction
 779         im -1
 780         add
 781
 782         and
 783
 784         or
 785
 786         storesp 4
 787         storesp 4
 788         storesp 4
 789         poppc
 790
 791 # opcode 57
 792 # offset 0x0000 0320
 793         .balign 32,0
 794         .globl _poppcrel
 795 _poppcrel:
 796         add
 797         ; address of poppcrel
 798         im -1
 799         add
 800         poppc
 801
 802 # opcode 58
 803 # offset 0x0000 0340
 804         .balign 32,0
 805         .globl _config
 806 _config:
 807         im 1
 808         nop
 809         im _hardware
 810         store
 811         storesp 4
 812         poppc
 813
 814 # opcode 59
 815 # offset 0x0000 0360
 816         .balign 32,0
 817 _pushpc:
 818         loadsp 4
 819         im 1
 820         add
 821         storesp 8
 822         poppc
 823
 824 # opcode 60
 825 # offset 0x0000 0380
 826         .balign 32,0
 827 _syscall_emulate:
 828         .byte 0
 829
 830 # opcode 61
 831 # offset 0x0000 03a0
 832         .balign 32,0
 833 _pushspadd:
 834         pushsp
 835         im 4
 836         add
 837         loadsp 8
 838         addsp 0
 839         addsp 0
 840         add
 841         storesp 8
 842
 843         poppc
 844
 845 # opcode 62
 846 # offset 0x0000 03c0
 847         .balign 32,0
 848 _halfmult:
 849         breakpoint
 850
 851 # opcode 63
 852 # offset 0x0000 03e0
 853         .balign 32,0
 854 _callpcrel:
 855         loadsp 4
 856         loadsp 4
 857         add
 858         im -1
 859         add
 860         loadsp 4
 861
 862         storesp 12      ; return address
 863         storesp 4
 864         pushsp          ; this will flush the internal stack.
 865         popsp
 866         poppc
 867
 868         .text
 869
 870
 871
 872
 873 _ashiftleftBegin:
 874         .rept 0x1f
 875         addsp 0
 876         .endr
 877 _ashiftleftEnd:
 878         storesp 12
 879         storesp 4
 880         poppc
 881
 882 _storebtail:
 883         loadsp 12
 884         im 0xff
 885         and
 886         loadsp 12
 887         im 3
 888         and
 889
 890         fast_neg
 891         im 3
 892         add
 893         ; x8
 894         addsp 0
 895         addsp 0
 896         addsp 0
 897
 898         ashiftleft
 899
 900         or
 901
 902         loadsp 8
 903         im  ~0x3
 904         and
 905
 906         store
 907
 908         storesp 4
 909         storesp 4
 910         poppc
 911
 912
 913
 914
 915 ; NB! this is not an EMULATE instruction. It is a varargs fn.
 916         .globl _syscall
 917 _syscall:
 918         syscall
 919         poppc
 920
 921 _slowmultImpl:
 922
 923         loadsp 8 ; A
 924         loadsp 8 ; B
 925         im 0 ; C
 926
 927 .LmoreMult:
 928         mult1bit
 929
 930         ; cutoff
 931         loadsp 8
 932         .byte (.LmoreMult-.Lbranch)&0x7f+0x80
 933 .Lbranch:
 934         neqbranch
 935
 936         storesp 4
 937         storesp 4
 938         storesp 12
 939         storesp 4
 940         poppc
 941
 942         .data
 943         .balign 4,0
 944 _mask:
 945         .long 0x00ffffff
 946         .long 0xff00ffff
 947         .long 0xffff00ff
 948         .long 0xffffff00
 949
 950
 951         .globl _hardware
 952 _hardware:
 953         .long 0
 954         .globl _cpu_config
 955 _cpu_config:
 956         .long 0
 957