arm-gen.c

   1 /*
   2  *  ARMv4 code generator for TCC
   3  *
   4  *  Copyright (c) 2003 Daniel Glöckner
   5  *  Copyright (c) 2012 Thomas Preud'homme
   6  *
   7  *  Based on i386-gen.c by Fabrice Bellard
   8  *
   9  * This library is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2 of the License, or (at your option) any later version.
  13  *
  14  * This library is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with this library; if not, write to the Free Software
  21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22  */
  23
  24 #ifdef TARGET_DEFS_ONLY
  25
  26 #if defined(TCC_ARM_EABI) && !defined(TCC_ARM_VFP)
  27 #error "Currently TinyCC only supports float computation with VFP instructions"
  28 #endif
  29
  30 /* number of available registers */
  31 #ifdef TCC_ARM_VFP
  32 #define NB_REGS            13
  33 #else
  34 #define NB_REGS             9
  35 #endif
  36
  37 #ifndef TCC_CPU_VERSION
  38 # define TCC_CPU_VERSION 5
  39 #endif
  40
  41 /* a register can belong to several classes. The classes must be
  42    sorted from more general to more precise (see gv2() code which does
  43    assumptions on it). */
  44 #define RC_INT     0x0001 /* generic integer register */
  45 #define RC_FLOAT   0x0002 /* generic float register */
  46 #define RC_R0      0x0004
  47 #define RC_R1      0x0008
  48 #define RC_R2      0x0010
  49 #define RC_R3      0x0020
  50 #define RC_R12     0x0040
  51 #define RC_F0      0x0080
  52 #define RC_F1      0x0100
  53 #define RC_F2      0x0200
  54 #define RC_F3      0x0400
  55 #ifdef TCC_ARM_VFP
  56 #define RC_F4      0x0800
  57 #define RC_F5      0x1000
  58 #define RC_F6      0x2000
  59 #define RC_F7      0x4000
  60 #endif
  61 #define RC_IRET    RC_R0  /* function return: integer register */
  62 #define RC_IRE2    RC_R1  /* function return: second integer register */
  63 #define RC_FRET    RC_F0  /* function return: float register */
  64
  65 /* pretty names for the registers */
  66 enum {
  67     TREG_R0 = 0,
  68     TREG_R1,
  69     TREG_R2,
  70     TREG_R3,
  71     TREG_R12,
  72     TREG_F0,
  73     TREG_F1,
  74     TREG_F2,
  75     TREG_F3,
  76 #ifdef TCC_ARM_VFP
  77     TREG_F4,
  78     TREG_F5,
  79     TREG_F6,
  80     TREG_F7,
  81 #endif
  82     TREG_SP = 13,
  83     TREG_LR,
  84 };
  85
  86 #ifdef TCC_ARM_VFP
  87 #define T2CPR(t) (((t) & VT_BTYPE) != VT_FLOAT ? 0x100 : 0)
  88 #endif
  89
  90 /* return registers for function */
  91 #define REG_IRET TREG_R0 /* single word int return register */
  92 #define REG_IRE2 TREG_R1 /* second word return register (for long long) */
  93 #define REG_FRET TREG_F0 /* float return register */
  94
  95 #ifdef TCC_ARM_EABI
  96 #define TOK___divdi3 TOK___aeabi_ldivmod
  97 #define TOK___moddi3 TOK___aeabi_ldivmod
  98 #define TOK___udivdi3 TOK___aeabi_uldivmod
  99 #define TOK___umoddi3 TOK___aeabi_uldivmod
 100 #endif
 101
 102 /* defined if function parameters must be evaluated in reverse order */
 103 #define INVERT_FUNC_PARAMS
 104
 105 /* defined if structures are passed as pointers. Otherwise structures
 106    are directly pushed on stack. */
 107 /* #define FUNC_STRUCT_PARAM_AS_PTR */
 108
 109 /* pointer size, in bytes */
 110 #define PTR_SIZE 4
 111
 112 /* long double size and alignment, in bytes */
 113 #ifdef TCC_ARM_VFP
 114 #define LDOUBLE_SIZE  8
 115 #endif
 116
 117 #ifndef LDOUBLE_SIZE
 118 #define LDOUBLE_SIZE  8
 119 #endif
 120
 121 #ifdef TCC_ARM_EABI
 122 #define LDOUBLE_ALIGN 8
 123 #else
 124 #define LDOUBLE_ALIGN 4
 125 #endif
 126
 127 /* maximum alignment (for aligned attribute support) */
 128 #define MAX_ALIGN     8
 129
 130 #define CHAR_IS_UNSIGNED
 131
 132 /******************************************************/
 133 #else /* ! TARGET_DEFS_ONLY */
 134 /******************************************************/
 135 #define USING_GLOBALS
 136 #include "tcc.h"
 137
 138 enum float_abi float_abi;
 139
 140 ST_DATA const int reg_classes[NB_REGS] = {
 141     /* r0 */ RC_INT | RC_R0,
 142     /* r1 */ RC_INT | RC_R1,
 143     /* r2 */ RC_INT | RC_R2,
 144     /* r3 */ RC_INT | RC_R3,
 145     /* r12 */ RC_INT | RC_R12,
 146     /* f0 */ RC_FLOAT | RC_F0,
 147     /* f1 */ RC_FLOAT | RC_F1,
 148     /* f2 */ RC_FLOAT | RC_F2,
 149     /* f3 */ RC_FLOAT | RC_F3,
 150 #ifdef TCC_ARM_VFP
 151  /* d4/s8 */ RC_FLOAT | RC_F4,
 152 /* d5/s10 */ RC_FLOAT | RC_F5,
 153 /* d6/s12 */ RC_FLOAT | RC_F6,
 154 /* d7/s14 */ RC_FLOAT | RC_F7,
 155 #endif
 156 };
 157
 158 static int func_sub_sp_offset, last_itod_magic;
 159 static int leaffunc;
 160
 161 #if defined(CONFIG_TCC_BCHECK)
 162 static addr_t func_bound_offset;
 163 static unsigned long func_bound_ind;
 164 ST_DATA int func_bound_add_epilog;
 165 #endif
 166
 167 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
 168 static CType float_type, double_type, func_float_type, func_double_type;
 169 ST_FUNC void arm_init(struct TCCState *s)
 170 {
 171     float_type.t = VT_FLOAT;
 172     double_type.t = VT_DOUBLE;
 173     func_float_type.t = VT_FUNC;
 174     func_float_type.ref = sym_push(SYM_FIELD, &float_type, FUNC_CDECL, FUNC_OLD);
 175     func_double_type.t = VT_FUNC;
 176     func_double_type.ref = sym_push(SYM_FIELD, &double_type, FUNC_CDECL, FUNC_OLD);
 177
 178     float_abi = s->float_abi;
 179 #ifndef TCC_ARM_HARDFLOAT
 180 # warning "soft float ABI currently not supported: default to softfp"
 181 #endif
 182 }
 183 #else
 184 #define func_float_type func_old_type
 185 #define func_double_type func_old_type
 186 #define func_ldouble_type func_old_type
 187 ST_FUNC void arm_init(struct TCCState *s)
 188 {
 189 #if 0
 190 #if !defined (TCC_ARM_VFP)
 191     tcc_warning("Support for FPA is deprecated and will be removed in next"
 192                 " release");
 193 #endif
 194 #if !defined (TCC_ARM_EABI)
 195     tcc_warning("Support for OABI is deprecated and will be removed in next"
 196                 " release");
 197 #endif
 198 #endif
 199 }
 200 #endif
 201
 202 #define CHECK_R(r) ((r) >= TREG_R0 && (r) <= TREG_LR)
 203
 204 static int two2mask(int a,int b) {
 205   if (!CHECK_R(a) || !CHECK_R(b))
 206     tcc_error("compiler error! registers %i,%i is not valid",a,b);
 207   return (reg_classes[a]|reg_classes[b])&~(RC_INT|RC_FLOAT);
 208 }
 209
 210 static int regmask(int r) {
 211   if (!CHECK_R(r))
 212     tcc_error("compiler error! register %i is not valid",r);
 213   return reg_classes[r]&~(RC_INT|RC_FLOAT);
 214 }
 215
 216 /******************************************************/
 217
 218 #if defined(TCC_ARM_EABI) && !defined(CONFIG_TCC_ELFINTERP)
 219 const char *default_elfinterp(struct TCCState *s)
 220 {
 221     if (s->float_abi == ARM_HARD_FLOAT)
 222         return "/lib/ld-linux-armhf.so.3";
 223     else
 224         return "/lib/ld-linux.so.3";
 225 }
 226 #endif
 227
 228 void o(uint32_t i)
 229 {
 230   /* this is a good place to start adding big-endian support*/
 231   int ind1;
 232   if (nocode_wanted)
 233     return;
 234   ind1 = ind + 4;
 235   if (!cur_text_section)
 236     tcc_error("compiler error! This happens f.ex. if the compiler\n"
 237          "can't evaluate constant expressions outside of a function.");
 238   if (ind1 > cur_text_section->data_allocated)
 239     section_realloc(cur_text_section, ind1);
 240   cur_text_section->data[ind++] = i&255;
 241   i>>=8;
 242   cur_text_section->data[ind++] = i&255;
 243   i>>=8;
 244   cur_text_section->data[ind++] = i&255;
 245   i>>=8;
 246   cur_text_section->data[ind++] = i;
 247 }
 248
 249 static uint32_t stuff_const(uint32_t op, uint32_t c)
 250 {
 251   int try_neg=0;
 252   uint32_t nc = 0, negop = 0;
 253
 254   switch(op&0x1F00000)
 255   {
 256     case 0x800000: //add
 257     case 0x400000: //sub
 258       try_neg=1;
 259       negop=op^0xC00000;
 260       nc=-c;
 261       break;
 262     case 0x1A00000: //mov
 263     case 0x1E00000: //mvn
 264       try_neg=1;
 265       negop=op^0x400000;
 266       nc=~c;
 267       break;
 268     case 0x200000: //xor
 269       if(c==~0)
 270         return (op&0xF010F000)|((op>>16)&0xF)|0x1E00000;
 271       break;
 272     case 0x0: //and
 273       if(c==~0)
 274         return (op&0xF010F000)|((op>>16)&0xF)|0x1A00000;
 275     case 0x1C00000: //bic
 276       try_neg=1;
 277       negop=op^0x1C00000;
 278       nc=~c;
 279       break;
 280     case 0x1800000: //orr
 281       if(c==~0)
 282         return (op&0xFFF0FFFF)|0x1E00000;
 283       break;
 284   }
 285   do {
 286     uint32_t m;
 287     int i;
 288     if(c<256) /* catch undefined <<32 */
 289       return op|c;
 290     for(i=2;i<32;i+=2) {
 291       m=(0xff>>i)|(0xff<<(32-i));
 292       if(!(c&~m))
 293         return op|(i<<7)|(c<<i)|(c>>(32-i));
 294     }
 295     op=negop;
 296     c=nc;
 297   } while(try_neg--);
 298   return 0;
 299 }
 300
 301
 302 //only add,sub
 303 void stuff_const_harder(uint32_t op, uint32_t v) {
 304   uint32_t x;
 305   x=stuff_const(op,v);
 306   if(x)
 307     o(x);
 308   else {
 309     uint32_t a[16], nv, no, o2, n2;
 310     int i,j,k;
 311     a[0]=0xff;
 312     o2=(op&0xfff0ffff)|((op&0xf000)<<4);;
 313     for(i=1;i<16;i++)
 314       a[i]=(a[i-1]>>2)|(a[i-1]<<30);
 315     for(i=0;i<12;i++)
 316       for(j=i<4?i+12:15;j>=i+4;j--)
 317         if((v&(a[i]|a[j]))==v) {
 318           o(stuff_const(op,v&a[i]));
 319           o(stuff_const(o2,v&a[j]));
 320           return;
 321         }
 322     no=op^0xC00000;
 323     n2=o2^0xC00000;
 324     nv=-v;
 325     for(i=0;i<12;i++)
 326       for(j=i<4?i+12:15;j>=i+4;j--)
 327         if((nv&(a[i]|a[j]))==nv) {
 328           o(stuff_const(no,nv&a[i]));
 329           o(stuff_const(n2,nv&a[j]));
 330           return;
 331         }
 332     for(i=0;i<8;i++)
 333       for(j=i+4;j<12;j++)
 334         for(k=i<4?i+12:15;k>=j+4;k--)
 335           if((v&(a[i]|a[j]|a[k]))==v) {
 336             o(stuff_const(op,v&a[i]));
 337             o(stuff_const(o2,v&a[j]));
 338             o(stuff_const(o2,v&a[k]));
 339             return;
 340           }
 341     no=op^0xC00000;
 342     nv=-v;
 343     for(i=0;i<8;i++)
 344       for(j=i+4;j<12;j++)
 345         for(k=i<4?i+12:15;k>=j+4;k--)
 346           if((nv&(a[i]|a[j]|a[k]))==nv) {
 347             o(stuff_const(no,nv&a[i]));
 348             o(stuff_const(n2,nv&a[j]));
 349             o(stuff_const(n2,nv&a[k]));
 350             return;
 351           }
 352     o(stuff_const(op,v&a[0]));
 353     o(stuff_const(o2,v&a[4]));
 354     o(stuff_const(o2,v&a[8]));
 355     o(stuff_const(o2,v&a[12]));
 356   }
 357 }
 358
 359 uint32_t encbranch(int pos, int addr, int fail)
 360 {
 361   addr-=pos+8;
 362   addr/=4;
 363   if(addr>=0x1000000 || addr<-0x1000000) {
 364     if(fail)
 365       tcc_error("FIXME: function bigger than 32MB");
 366     return 0;
 367   }
 368   return 0x0A000000|(addr&0xffffff);
 369 }
 370
 371 int decbranch(int pos)
 372 {
 373   int x;
 374   x=*(uint32_t *)(cur_text_section->data + pos);
 375   x&=0x00ffffff;
 376   if(x&0x800000)
 377     x-=0x1000000;
 378   return x*4+pos+8;
 379 }
 380
 381 /* output a symbol and patch all calls to it */
 382 void gsym_addr(int t, int a)
 383 {
 384   uint32_t *x;
 385   int lt;
 386   while(t) {
 387     x=(uint32_t *)(cur_text_section->data + t);
 388     t=decbranch(lt=t);
 389     if(a==lt+4)
 390       *x=0xE1A00000; // nop
 391     else {
 392       *x &= 0xff000000;
 393       *x |= encbranch(lt,a,1);
 394     }
 395   }
 396 }
 397
 398 #ifdef TCC_ARM_VFP
 399 static uint32_t vfpr(int r)
 400 {
 401   if(r<TREG_F0 || r>TREG_F7)
 402     tcc_error("compiler error! register %i is no vfp register",r);
 403   return r - TREG_F0;
 404 }
 405 #else
 406 static uint32_t fpr(int r)
 407 {
 408   if(r<TREG_F0 || r>TREG_F3)
 409     tcc_error("compiler error! register %i is no fpa register",r);
 410   return r - TREG_F0;
 411 }
 412 #endif
 413
 414 static uint32_t intr(int r)
 415 {
 416   if(r == TREG_R12)
 417     return 12;
 418   if(r >= TREG_R0 && r <= TREG_R3)
 419     return r - TREG_R0;
 420   if (!(r >= TREG_SP && r <= TREG_LR))
 421     tcc_error("compiler error! register %i is no int register",r);
 422   return r + (13 - TREG_SP);
 423 }
 424
 425 static void calcaddr(uint32_t *base, int *off, int *sgn, int maxoff, unsigned shift)
 426 {
 427   if(*off>maxoff || *off&((1<<shift)-1)) {
 428     uint32_t x, y;
 429     x=0xE280E000;
 430     if(*sgn)
 431       x=0xE240E000;
 432     x|=(*base)<<16;
 433     *base=14; // lr
 434     y=stuff_const(x,*off&~maxoff);
 435     if(y) {
 436       o(y);
 437       *off&=maxoff;
 438       return;
 439     }
 440     y=stuff_const(x,(*off+maxoff)&~maxoff);
 441     if(y) {
 442       o(y);
 443       *sgn=!*sgn;
 444       *off=((*off+maxoff)&~maxoff)-*off;
 445       return;
 446     }
 447     stuff_const_harder(x,*off&~maxoff);
 448     *off&=maxoff;
 449   }
 450 }
 451
 452 static uint32_t mapcc(int cc)
 453 {
 454   switch(cc)
 455   {
 456     case TOK_ULT:
 457       return 0x30000000; /* CC/LO */
 458     case TOK_UGE:
 459       return 0x20000000; /* CS/HS */
 460     case TOK_EQ:
 461       return 0x00000000; /* EQ */
 462     case TOK_NE:
 463       return 0x10000000; /* NE */
 464     case TOK_ULE:
 465       return 0x90000000; /* LS */
 466     case TOK_UGT:
 467       return 0x80000000; /* HI */
 468     case TOK_Nset:
 469       return 0x40000000; /* MI */
 470     case TOK_Nclear:
 471       return 0x50000000; /* PL */
 472     case TOK_LT:
 473       return 0xB0000000; /* LT */
 474     case TOK_GE:
 475       return 0xA0000000; /* GE */
 476     case TOK_LE:
 477       return 0xD0000000; /* LE */
 478     case TOK_GT:
 479       return 0xC0000000; /* GT */
 480   }
 481   tcc_error("unexpected condition code");
 482   return 0xE0000000; /* AL */
 483 }
 484
 485 static int negcc(int cc)
 486 {
 487   switch(cc)
 488   {
 489     case TOK_ULT:
 490       return TOK_UGE;
 491     case TOK_UGE:
 492       return TOK_ULT;
 493     case TOK_EQ:
 494       return TOK_NE;
 495     case TOK_NE:
 496       return TOK_EQ;
 497     case TOK_ULE:
 498       return TOK_UGT;
 499     case TOK_UGT:
 500       return TOK_ULE;
 501     case TOK_Nset:
 502       return TOK_Nclear;
 503     case TOK_Nclear:
 504       return TOK_Nset;
 505     case TOK_LT:
 506       return TOK_GE;
 507     case TOK_GE:
 508       return TOK_LT;
 509     case TOK_LE:
 510       return TOK_GT;
 511     case TOK_GT:
 512       return TOK_LE;
 513   }
 514   tcc_error("unexpected condition code");
 515   return TOK_NE;
 516 }
 517
 518 /* load 'r' from value 'sv' */
 519 void load(int r, SValue *sv)
 520 {
 521   int v, ft, fc, fr, sign;
 522   uint32_t op;
 523   SValue v1;
 524
 525   fr = sv->r;
 526   ft = sv->type.t;
 527   fc = sv->c.i;
 528
 529   if(fc>=0)
 530     sign=0;
 531   else {
 532     sign=1;
 533     fc=-fc;
 534   }
 535
 536   v = fr & VT_VALMASK;
 537   if (fr & VT_LVAL) {
 538     uint32_t base = 0xB; // fp
 539     if(v == VT_LLOCAL) {
 540       v1.type.t = VT_PTR;
 541       v1.r = VT_LOCAL | VT_LVAL;
 542       v1.c.i = sv->c.i;
 543       load(TREG_LR, &v1);
 544       base = 14; /* lr */
 545       fc=sign=0;
 546       v=VT_LOCAL;
 547     } else if(v == VT_CONST) {
 548       v1.type.t = VT_PTR;
 549       v1.r = fr&~VT_LVAL;
 550       v1.c.i = sv->c.i;
 551       v1.sym=sv->sym;
 552       load(TREG_LR, &v1);
 553       base = 14; /* lr */
 554       fc=sign=0;
 555       v=VT_LOCAL;
 556     } else if(v < VT_CONST) {
 557       base=intr(v);
 558       fc=sign=0;
 559       v=VT_LOCAL;
 560     }
 561     if(v == VT_LOCAL) {
 562       if(is_float(ft)) {
 563         calcaddr(&base,&fc,&sign,1020,2);
 564 #ifdef TCC_ARM_VFP
 565         op=0xED100A00; /* flds */
 566         if(!sign)
 567           op|=0x800000;
 568         if ((ft & VT_BTYPE) != VT_FLOAT)
 569           op|=0x100;   /* flds -> fldd */
 570         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 571 #else
 572         op=0xED100100;
 573         if(!sign)
 574           op|=0x800000;
 575 #if LDOUBLE_SIZE == 8
 576         if ((ft & VT_BTYPE) != VT_FLOAT)
 577           op|=0x8000;
 578 #else
 579         if ((ft & VT_BTYPE) == VT_DOUBLE)
 580           op|=0x8000;
 581         else if ((ft & VT_BTYPE) == VT_LDOUBLE)
 582           op|=0x400000;
 583 #endif
 584         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 585 #endif
 586       } else if((ft & (VT_BTYPE|VT_UNSIGNED)) == VT_BYTE
 587                 || (ft & VT_BTYPE) == VT_SHORT) {
 588         calcaddr(&base,&fc,&sign,255,0);
 589         op=0xE1500090;
 590         if ((ft & VT_BTYPE) == VT_SHORT)
 591           op|=0x20;
 592         if ((ft & VT_UNSIGNED) == 0)
 593           op|=0x40;
 594         if(!sign)
 595           op|=0x800000;
 596         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 597       } else {
 598         calcaddr(&base,&fc,&sign,4095,0);
 599         op=0xE5100000;
 600         if(!sign)
 601           op|=0x800000;
 602         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 603           op|=0x400000;
 604         o(op|(intr(r)<<12)|fc|(base<<16));
 605       }
 606       return;
 607     }
 608   } else {
 609     if (v == VT_CONST) {
 610       op=stuff_const(0xE3A00000|(intr(r)<<12),sv->c.i);
 611       if (fr & VT_SYM || !op) {
 612         o(0xE59F0000|(intr(r)<<12));
 613         o(0xEA000000);
 614         if(fr & VT_SYM)
 615           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 616         o(sv->c.i);
 617       } else
 618         o(op);
 619       return;
 620     } else if (v == VT_LOCAL) {
 621       op=stuff_const(0xE28B0000|(intr(r)<<12),sv->c.i);
 622       if (fr & VT_SYM || !op) {
 623         o(0xE59F0000|(intr(r)<<12));
 624         o(0xEA000000);
 625         if(fr & VT_SYM) // needed ?
 626           greloc(cur_text_section, sv->sym, ind, R_ARM_ABS32);
 627         o(sv->c.i);
 628         o(0xE08B0000|(intr(r)<<12)|intr(r));
 629       } else
 630         o(op);
 631       return;
 632     } else if(v == VT_CMP) {
 633       o(mapcc(sv->c.i)|0x3A00001|(intr(r)<<12));
 634       o(mapcc(negcc(sv->c.i))|0x3A00000|(intr(r)<<12));
 635       return;
 636     } else if (v == VT_JMP || v == VT_JMPI) {
 637       int t;
 638       t = v & 1;
 639       o(0xE3A00000|(intr(r)<<12)|t);
 640       o(0xEA000000);
 641       gsym(sv->c.i);
 642       o(0xE3A00000|(intr(r)<<12)|(t^1));
 643       return;
 644     } else if (v < VT_CONST) {
 645       if(is_float(ft))
 646 #ifdef TCC_ARM_VFP
 647         o(0xEEB00A40|(vfpr(r)<<12)|vfpr(v)|T2CPR(ft)); /* fcpyX */
 648 #else
 649         o(0xEE008180|(fpr(r)<<12)|fpr(v));
 650 #endif
 651       else
 652         o(0xE1A00000|(intr(r)<<12)|intr(v));
 653       return;
 654     }
 655   }
 656   tcc_error("load unimplemented!");
 657 }
 658
 659 /* store register 'r' in lvalue 'v' */
 660 void store(int r, SValue *sv)
 661 {
 662   SValue v1;
 663   int v, ft, fc, fr, sign;
 664   uint32_t op;
 665
 666   fr = sv->r;
 667   ft = sv->type.t;
 668   fc = sv->c.i;
 669
 670   if(fc>=0)
 671     sign=0;
 672   else {
 673     sign=1;
 674     fc=-fc;
 675   }
 676
 677   v = fr & VT_VALMASK;
 678   if (fr & VT_LVAL || fr == VT_LOCAL) {
 679     uint32_t base = 0xb; /* fp */
 680     if(v < VT_CONST) {
 681       base=intr(v);
 682       v=VT_LOCAL;
 683       fc=sign=0;
 684     } else if(v == VT_CONST) {
 685       v1.type.t = ft;
 686       v1.r = fr&~VT_LVAL;
 687       v1.c.i = sv->c.i;
 688       v1.sym=sv->sym;
 689       load(TREG_LR, &v1);
 690       base = 14; /* lr */
 691       fc=sign=0;
 692       v=VT_LOCAL;
 693     }
 694     if(v == VT_LOCAL) {
 695        if(is_float(ft)) {
 696         calcaddr(&base,&fc,&sign,1020,2);
 697 #ifdef TCC_ARM_VFP
 698         op=0xED000A00; /* fsts */
 699         if(!sign)
 700           op|=0x800000;
 701         if ((ft & VT_BTYPE) != VT_FLOAT)
 702           op|=0x100;   /* fsts -> fstd */
 703         o(op|(vfpr(r)<<12)|(fc>>2)|(base<<16));
 704 #else
 705         op=0xED000100;
 706         if(!sign)
 707           op|=0x800000;
 708 #if LDOUBLE_SIZE == 8
 709         if ((ft & VT_BTYPE) != VT_FLOAT)
 710           op|=0x8000;
 711 #else
 712         if ((ft & VT_BTYPE) == VT_DOUBLE)
 713           op|=0x8000;
 714         if ((ft & VT_BTYPE) == VT_LDOUBLE)
 715           op|=0x400000;
 716 #endif
 717         o(op|(fpr(r)<<12)|(fc>>2)|(base<<16));
 718 #endif
 719         return;
 720       } else if((ft & VT_BTYPE) == VT_SHORT) {
 721         calcaddr(&base,&fc,&sign,255,0);
 722         op=0xE14000B0;
 723         if(!sign)
 724           op|=0x800000;
 725         o(op|(intr(r)<<12)|(base<<16)|((fc&0xf0)<<4)|(fc&0xf));
 726       } else {
 727         calcaddr(&base,&fc,&sign,4095,0);
 728         op=0xE5000000;
 729         if(!sign)
 730           op|=0x800000;
 731         if ((ft & VT_BTYPE) == VT_BYTE || (ft & VT_BTYPE) == VT_BOOL)
 732           op|=0x400000;
 733         o(op|(intr(r)<<12)|fc|(base<<16));
 734       }
 735       return;
 736     }
 737   }
 738   tcc_error("store unimplemented");
 739 }
 740
 741 static void gadd_sp(int val)
 742 {
 743   stuff_const_harder(0xE28DD000,val);
 744 }
 745
 746 /* 'is_jmp' is '1' if it is a jump */
 747 static void gcall_or_jmp(int is_jmp)
 748 {
 749   int r;
 750   uint32_t x;
 751   if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) {
 752     /* constant case */
 753         if(vtop->r & VT_SYM){
 754                 x=encbranch(ind,ind+vtop->c.i,0);
 755                 if(x) {
 756                 /* relocation case */
 757                   greloc(cur_text_section, vtop->sym, ind, R_ARM_PC24);
 758                   o(x|(is_jmp?0xE0000000:0xE1000000));
 759                 } else {
 760                         if(!is_jmp)
 761                                 o(0xE28FE004); // add lr,pc,#4
 762                         o(0xE51FF004);   // ldr pc,[pc,#-4]
 763                         greloc(cur_text_section, vtop->sym, ind, R_ARM_ABS32);
 764                         o(vtop->c.i);
 765                 }
 766         }else{
 767                 if(!is_jmp)
 768                         o(0xE28FE004); // add lr,pc,#4
 769                 o(0xE51FF004);   // ldr pc,[pc,#-4]
 770                 o(vtop->c.i);
 771         }
 772   } else {
 773     /* otherwise, indirect call */
 774 #ifdef CONFIG_TCC_BCHECK
 775     vtop->r &= ~VT_MUSTBOUND;
 776 #endif
 777     r = gv(RC_INT);
 778     if(!is_jmp)
 779       o(0xE1A0E00F);       // mov lr,pc
 780     o(0xE1A0F000|intr(r)); // mov pc,r
 781   }
 782 }
 783
 784 #if defined(CONFIG_TCC_BCHECK)
 785
 786 static void gen_bounds_call(int v)
 787 {
 788     Sym *sym = external_helper_sym(v);
 789
 790     greloc(cur_text_section, sym, ind, R_ARM_PC24);
 791     o(0xebfffffe);
 792 }
 793
 794 static void gen_bounds_prolog(void)
 795 {
 796     /* leave some room for bound checking code */
 797     func_bound_offset = lbounds_section->data_offset;
 798     func_bound_ind = ind;
 799     func_bound_add_epilog = 0;
 800     o(0xe1a00000);  /* ld r0,lbounds_section->data_offset */
 801     o(0xe1a00000);
 802     o(0xe1a00000);
 803     o(0xe1a00000);  /* call __bound_local_new */
 804 }
 805
 806 static void gen_bounds_epilog(void)
 807 {
 808     addr_t saved_ind;
 809     addr_t *bounds_ptr;
 810     Sym *sym_data;
 811     int offset_modified = func_bound_offset != lbounds_section->data_offset;
 812
 813     if (!offset_modified && !func_bound_add_epilog)
 814         return;
 815
 816     /* add end of table info */
 817     bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t));
 818     *bounds_ptr = 0;
 819
 820     sym_data = get_sym_ref(&char_pointer_type, lbounds_section,
 821                            func_bound_offset, lbounds_section->data_offset);
 822
 823     /* generate bound local allocation */
 824     if (offset_modified) {
 825         saved_ind = ind;
 826         ind = func_bound_ind;
 827         o(0xe59f0000);  /* ldr r0, [pc] */
 828         o(0xea000000);  /* b $+4 */
 829         greloc(cur_text_section, sym_data, ind, R_ARM_ABS32);
 830         o(0x00000000);  /* lbounds_section->data_offset */
 831         gen_bounds_call(TOK___bound_local_new);
 832         ind = saved_ind;
 833     }
 834
 835     /* generate bound check local freeing */
 836     o(0xe92d0003);  /* push {r0,r1} */
 837     o(0xed2d0b02);  /* vpush {d0} */
 838     o(0xe59f0000);  /* ldr r0, [pc] */
 839     o(0xea000000);  /* b $+4 */
 840     greloc(cur_text_section, sym_data, ind, R_ARM_ABS32);
 841     o(0x00000000);  /* lbounds_section->data_offset */
 842     gen_bounds_call(TOK___bound_local_delete);
 843     o(0xecbd0b02); /* vpop {d0} */
 844     o(0xe8bd0003); /* pop {r0,r1} */
 845 }
 846 #endif
 847
 848 static int unalias_ldbl(int btype)
 849 {
 850 #if LDOUBLE_SIZE == 8
 851     if (btype == VT_LDOUBLE)
 852       btype = VT_DOUBLE;
 853 #endif
 854     return btype;
 855 }
 856
 857 /* Return whether a structure is an homogeneous float aggregate or not.
 858    The answer is true if all the elements of the structure are of the same
 859    primitive float type and there is less than 4 elements.
 860
 861    type: the type corresponding to the structure to be tested */
 862 static int is_hgen_float_aggr(CType *type)
 863 {
 864   if ((type->t & VT_BTYPE) == VT_STRUCT) {
 865     struct Sym *ref;
 866     int btype, nb_fields = 0;
 867
 868     ref = type->ref->next;
 869     if (ref) {
 870       btype = unalias_ldbl(ref->type.t & VT_BTYPE);
 871       if (btype == VT_FLOAT || btype == VT_DOUBLE) {
 872         for(; ref && btype == unalias_ldbl(ref->type.t & VT_BTYPE); ref = ref->next, nb_fields++);
 873         return !ref && nb_fields <= 4;
 874       }
 875     }
 876   }
 877   return 0;
 878 }
 879
 880 struct avail_regs {
 881   signed char avail[3]; /* 3 holes max with only float and double alignments */
 882   int first_hole; /* first available hole */
 883   int last_hole; /* last available hole (none if equal to first_hole) */
 884   int first_free_reg; /* next free register in the sequence, hole excluded */
 885 };
 886
 887 /* Find suitable registers for a VFP Co-Processor Register Candidate (VFP CPRC
 888    param) according to the rules described in the procedure call standard for
 889    the ARM architecture (AAPCS). If found, the registers are assigned to this
 890    VFP CPRC parameter. Registers are allocated in sequence unless a hole exists
 891    and the parameter is a single float.
 892
 893    avregs: opaque structure to keep track of available VFP co-processor regs
 894    align: alignment constraints for the param, as returned by type_size()
 895    size: size of the parameter, as returned by type_size() */
 896 int assign_vfpreg(struct avail_regs *avregs, int align, int size)
 897 {
 898   int first_reg = 0;
 899
 900   if (avregs->first_free_reg == -1)
 901     return -1;
 902   if (align >> 3) { /* double alignment */
 903     first_reg = avregs->first_free_reg;
 904     /* alignment constraint not respected so use next reg and record hole */
 905     if (first_reg & 1)
 906       avregs->avail[avregs->last_hole++] = first_reg++;
 907   } else { /* no special alignment (float or array of float) */
 908     /* if single float and a hole is available, assign the param to it */
 909     if (size == 4 && avregs->first_hole != avregs->last_hole)
 910       return avregs->avail[avregs->first_hole++];
 911     else
 912       first_reg = avregs->first_free_reg;
 913   }
 914   if (first_reg + size / 4 <= 16) {
 915     avregs->first_free_reg = first_reg + size / 4;
 916     return first_reg;
 917   }
 918   avregs->first_free_reg = -1;
 919   return -1;
 920 }
 921
 922 /* Returns whether all params need to be passed in core registers or not.
 923    This is the case for function part of the runtime ABI. */
 924 int floats_in_core_regs(SValue *sval)
 925 {
 926   if (!sval->sym)
 927     return 0;
 928
 929   switch (sval->sym->v) {
 930     case TOK___floatundisf:
 931     case TOK___floatundidf:
 932     case TOK___fixunssfdi:
 933     case TOK___fixunsdfdi:
 934 #ifndef TCC_ARM_VFP
 935     case TOK___fixunsxfdi:
 936 #endif
 937     case TOK___floatdisf:
 938     case TOK___floatdidf:
 939     case TOK___fixsfdi:
 940     case TOK___fixdfdi:
 941       return 1;
 942
 943     default:
 944       return 0;
 945   }
 946 }
 947
 948 /* Return the number of registers needed to return the struct, or 0 if
 949    returning via struct pointer. */
 950 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) {
 951 #ifdef TCC_ARM_EABI
 952     int size, align;
 953     size = type_size(vt, &align);
 954     if (float_abi == ARM_HARD_FLOAT && !variadic &&
 955         (is_float(vt->t) || is_hgen_float_aggr(vt))) {
 956         *ret_align = 8;
 957         *regsize = 8;
 958         ret->ref = NULL;
 959         ret->t = VT_DOUBLE;
 960         return (size + 7) >> 3;
 961     } else if (size > 0 && size <= 4) {
 962         *ret_align = 4;
 963         *regsize = 4;
 964         ret->ref = NULL;
 965         ret->t = VT_INT;
 966         return 1;
 967     } else
 968         return 0;
 969 #else
 970     return 0;
 971 #endif
 972 }
 973
 974 /* Parameters are classified according to how they are copied to their final
 975    destination for the function call. Because the copying is performed class
 976    after class according to the order in the union below, it is important that
 977    some constraints about the order of the members of this union are respected:
 978    - CORE_STRUCT_CLASS must come after STACK_CLASS;
 979    - CORE_CLASS must come after STACK_CLASS, CORE_STRUCT_CLASS and
 980      VFP_STRUCT_CLASS;
 981    - VFP_STRUCT_CLASS must come after VFP_CLASS.
 982    See the comment for the main loop in copy_params() for the reason. */
 983 enum reg_class {
 984         STACK_CLASS = 0,
 985         CORE_STRUCT_CLASS,
 986         VFP_CLASS,
 987         VFP_STRUCT_CLASS,
 988         CORE_CLASS,
 989         NB_CLASSES
 990 };
 991
 992 struct param_plan {
 993     int start; /* first reg or addr used depending on the class */
 994     int end; /* last reg used or next free addr depending on the class */
 995     SValue *sval; /* pointer to SValue on the value stack */
 996     struct param_plan *prev; /*  previous element in this class */
 997 };
 998
 999 struct plan {
1000     struct param_plan *pplans; /* array of all the param plans */
1001     struct param_plan *clsplans[NB_CLASSES]; /* per class lists of param plans */
1002     int nb_plans;
1003 };
1004
1005 static void add_param_plan(struct plan* plan, int cls, int start, int end, SValue *v)
1006 {
1007     struct param_plan *p = &plan->pplans[plan->nb_plans++];
1008     p->prev = plan->clsplans[cls];
1009     plan->clsplans[cls] = p;
1010     p->start = start, p->end = end, p->sval = v;
1011 }
1012
1013 /* Assign parameters to registers and stack with alignment according to the
1014    rules in the procedure call standard for the ARM architecture (AAPCS).
1015    The overall assignment is recorded in an array of per parameter structures
1016    called parameter plans. The parameter plans are also further organized in a
1017    number of linked lists, one per class of parameter (see the comment for the
1018    definition of union reg_class).
1019
1020    nb_args: number of parameters of the function for which a call is generated
1021    float_abi: float ABI in use for this function call
1022    plan: the structure where the overall assignment is recorded
1023    todo: a bitmap that record which core registers hold a parameter
1024
1025    Returns the amount of stack space needed for parameter passing
1026
1027    Note: this function allocated an array in plan->pplans with tcc_malloc. It
1028    is the responsibility of the caller to free this array once used (ie not
1029    before copy_params). */
1030 static int assign_regs(int nb_args, int float_abi, struct plan *plan, int *todo)
1031 {
1032   int i, size, align;
1033   int ncrn /* next core register number */, nsaa /* next stacked argument address*/;
1034   struct avail_regs avregs = {{0}};
1035
1036   ncrn = nsaa = 0;
1037   *todo = 0;
1038
1039   for(i = nb_args; i-- ;) {
1040     int j, start_vfpreg = 0;
1041     CType type = vtop[-i].type;
1042     type.t &= ~VT_ARRAY;
1043     size = type_size(&type, &align);
1044     size = (size + 3) & ~3;
1045     align = (align + 3) & ~3;
1046     switch(vtop[-i].type.t & VT_BTYPE) {
1047       case VT_STRUCT:
1048       case VT_FLOAT:
1049       case VT_DOUBLE:
1050       case VT_LDOUBLE:
1051       if (float_abi == ARM_HARD_FLOAT) {
1052         int is_hfa = 0; /* Homogeneous float aggregate */
1053
1054         if (is_float(vtop[-i].type.t)
1055             || (is_hfa = is_hgen_float_aggr(&vtop[-i].type))) {
1056           int end_vfpreg;
1057
1058           start_vfpreg = assign_vfpreg(&avregs, align, size);
1059           end_vfpreg = start_vfpreg + ((size - 1) >> 2);
1060           if (start_vfpreg >= 0) {
1061             add_param_plan(plan, is_hfa ? VFP_STRUCT_CLASS : VFP_CLASS,
1062                 start_vfpreg, end_vfpreg, &vtop[-i]);
1063             continue;
1064           } else
1065             break;
1066         }
1067       }
1068       ncrn = (ncrn + (align-1)/4) & ~((align/4) - 1);
1069       if (ncrn + size/4 <= 4 || (ncrn < 4 && start_vfpreg != -1)) {
1070         /* The parameter is allocated both in core register and on stack. As
1071          * such, it can be of either class: it would either be the last of
1072          * CORE_STRUCT_CLASS or the first of STACK_CLASS. */
1073         for (j = ncrn; j < 4 && j < ncrn + size / 4; j++)
1074           *todo|=(1<<j);
1075         add_param_plan(plan, CORE_STRUCT_CLASS, ncrn, j, &vtop[-i]);
1076         ncrn += size/4;
1077         if (ncrn > 4)
1078           nsaa = (ncrn - 4) * 4;
1079       } else {
1080         ncrn = 4;
1081         break;
1082       }
1083       continue;
1084       default:
1085       if (ncrn < 4) {
1086         int is_long = (vtop[-i].type.t & VT_BTYPE) == VT_LLONG;
1087
1088         if (is_long) {
1089           ncrn = (ncrn + 1) & -2;
1090           if (ncrn == 4)
1091             break;
1092         }
1093         add_param_plan(plan, CORE_CLASS, ncrn, ncrn + is_long, &vtop[-i]);
1094         ncrn += 1 + is_long;
1095         continue;
1096       }
1097     }
1098     nsaa = (nsaa + (align - 1)) & ~(align - 1);
1099     add_param_plan(plan, STACK_CLASS, nsaa, nsaa + size, &vtop[-i]);
1100     nsaa += size; /* size already rounded up before */
1101   }
1102   return nsaa;
1103 }
1104
1105 /* Copy parameters to their final destination (core reg, VFP reg or stack) for
1106    function call.
1107
1108    nb_args: number of parameters the function take
1109    plan: the overall assignment plan for parameters
1110    todo: a bitmap indicating what core reg will hold a parameter
1111
1112    Returns the number of SValue added by this function on the value stack */
1113 static int copy_params(int nb_args, struct plan *plan, int todo)
1114 {
1115   int size, align, r, i, nb_extra_sval = 0;
1116   struct param_plan *pplan;
1117   int pass = 0;
1118
1119    /* Several constraints require parameters to be copied in a specific order:
1120       - structures are copied to the stack before being loaded in a reg;
1121       - floats loaded to an odd numbered VFP reg are first copied to the
1122         preceding even numbered VFP reg and then moved to the next VFP reg.
1123
1124       It is thus important that:
1125       - structures assigned to core regs must be copied after parameters
1126         assigned to the stack but before structures assigned to VFP regs because
1127         a structure can lie partly in core registers and partly on the stack;
1128       - parameters assigned to the stack and all structures be copied before
1129         parameters assigned to a core reg since copying a parameter to the stack
1130         require using a core reg;
1131       - parameters assigned to VFP regs be copied before structures assigned to
1132         VFP regs as the copy might use an even numbered VFP reg that already
1133         holds part of a structure. */
1134 again:
1135   for(i = 0; i < NB_CLASSES; i++) {
1136     for(pplan = plan->clsplans[i]; pplan; pplan = pplan->prev) {
1137
1138       if (pass
1139           && (i != CORE_CLASS || pplan->sval->r < VT_CONST))
1140         continue;
1141
1142       vpushv(pplan->sval);
1143       pplan->sval->r = pplan->sval->r2 = VT_CONST; /* disable entry */
1144       switch(i) {
1145         case STACK_CLASS:
1146         case CORE_STRUCT_CLASS:
1147         case VFP_STRUCT_CLASS:
1148           if ((pplan->sval->type.t & VT_BTYPE) == VT_STRUCT) {
1149             int padding = 0;
1150             size = type_size(&pplan->sval->type, &align);
1151             /* align to stack align size */
1152             size = (size + 3) & ~3;
1153             if (i == STACK_CLASS && pplan->prev)
1154               padding = pplan->start - pplan->prev->end;
1155             size += padding; /* Add padding if any */
1156             /* allocate the necessary size on stack */
1157             gadd_sp(-size);
1158             /* generate structure store */
1159             r = get_reg(RC_INT);
1160             o(0xE28D0000|(intr(r)<<12)|padding); /* add r, sp, padding */
1161             vset(&vtop->type, r | VT_LVAL, 0);
1162             vswap();
1163             vstore(); /* memcpy to current sp + potential padding */
1164
1165             /* Homogeneous float aggregate are loaded to VFP registers
1166                immediately since there is no way of loading data in multiple
1167                non consecutive VFP registers as what is done for other
1168                structures (see the use of todo). */
1169             if (i == VFP_STRUCT_CLASS) {
1170               int first = pplan->start, nb = pplan->end - first + 1;
1171               /* vpop.32 {pplan->start, ..., pplan->end} */
1172               o(0xECBD0A00|(first&1)<<22|(first>>1)<<12|nb);
1173               /* No need to write the register used to a SValue since VFP regs
1174                  cannot be used for gcall_or_jmp */
1175             }
1176           } else {
1177             if (is_float(pplan->sval->type.t)) {
1178 #ifdef TCC_ARM_VFP
1179               r = vfpr(gv(RC_FLOAT)) << 12;
1180               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1181                 size = 4;
1182               else {
1183                 size = 8;
1184                 r |= 0x101; /* vpush.32 -> vpush.64 */
1185               }
1186               o(0xED2D0A01 + r); /* vpush */
1187 #else
1188               r = fpr(gv(RC_FLOAT)) << 12;
1189               if ((pplan->sval->type.t & VT_BTYPE) == VT_FLOAT)
1190                 size = 4;
1191               else if ((pplan->sval->type.t & VT_BTYPE) == VT_DOUBLE)
1192                 size = 8;
1193               else
1194                 size = LDOUBLE_SIZE;
1195
1196               if (size == 12)
1197                 r |= 0x400000;
1198               else if(size == 8)
1199                 r|=0x8000;
1200
1201               o(0xED2D0100|r|(size>>2)); /* some kind of vpush for FPA */
1202 #endif
1203             } else {
1204               /* simple type (currently always same size) */
1205               /* XXX: implicit cast ? */
1206               size=4;
1207               if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1208                 lexpand();
1209                 size = 8;
1210                 r = gv(RC_INT);
1211                 o(0xE52D0004|(intr(r)<<12)); /* push r */
1212                 vtop--;
1213               }
1214               r = gv(RC_INT);
1215               o(0xE52D0004|(intr(r)<<12)); /* push r */
1216             }
1217             if (i == STACK_CLASS && pplan->prev)
1218               gadd_sp(pplan->prev->end - pplan->start); /* Add padding if any */
1219           }
1220           break;
1221
1222         case VFP_CLASS:
1223           gv(regmask(TREG_F0 + (pplan->start >> 1)));
1224           if (pplan->start & 1) { /* Must be in upper part of double register */
1225             o(0xEEF00A40|((pplan->start>>1)<<12)|(pplan->start>>1)); /* vmov.f32 s(n+1), sn */
1226             vtop->r = VT_CONST; /* avoid being saved on stack by gv for next float */
1227           }
1228           break;
1229
1230         case CORE_CLASS:
1231           if ((pplan->sval->type.t & VT_BTYPE) == VT_LLONG) {
1232             lexpand();
1233             gv(regmask(pplan->end));
1234             pplan->sval->r2 = vtop->r;
1235             vtop--;
1236           }
1237           gv(regmask(pplan->start));
1238           /* Mark register as used so that gcall_or_jmp use another one
1239              (regs >=4 are free as never used to pass parameters) */
1240           pplan->sval->r = vtop->r;
1241           break;
1242       }
1243       vtop--;
1244     }
1245   }
1246
1247   /* second pass to restore registers that were saved on stack by accident.
1248      Maybe redundant after the "lvalue_save" patch in tccgen.c:gv() */
1249   if (++pass < 2)
1250     goto again;
1251
1252   /* Manually free remaining registers since next parameters are loaded
1253    * manually, without the help of gv(int). */
1254   save_regs(nb_args);
1255
1256   if(todo) {
1257     o(0xE8BD0000|todo); /* pop {todo} */
1258     for(pplan = plan->clsplans[CORE_STRUCT_CLASS]; pplan; pplan = pplan->prev) {
1259       int r;
1260       pplan->sval->r = pplan->start;
1261       /* An SValue can only pin 2 registers at best (r and r2) but a structure
1262          can occupy more than 2 registers. Thus, we need to push on the value
1263          stack some fake parameter to have on SValue for each registers used
1264          by a structure (r2 is not used). */
1265       for (r = pplan->start + 1; r <= pplan->end; r++) {
1266         if (todo & (1 << r)) {
1267           nb_extra_sval++;
1268           vpushi(0);
1269           vtop->r = r;
1270         }
1271       }
1272     }
1273   }
1274   return nb_extra_sval;
1275 }
1276
1277 /* Generate function call. The function address is pushed first, then
1278    all the parameters in call order. This functions pops all the
1279    parameters and the function address. */
1280 void gfunc_call(int nb_args)
1281 {
1282   int r, args_size;
1283   int def_float_abi = float_abi;
1284   int todo;
1285   struct plan plan;
1286 #ifdef TCC_ARM_EABI
1287   int variadic;
1288 #endif
1289
1290 #ifdef CONFIG_TCC_BCHECK
1291   if (tcc_state->do_bounds_check)
1292     gbound_args(nb_args);
1293 #endif
1294
1295 #ifdef TCC_ARM_EABI
1296   if (float_abi == ARM_HARD_FLOAT) {
1297     variadic = (vtop[-nb_args].type.ref->f.func_type == FUNC_ELLIPSIS);
1298     if (variadic || floats_in_core_regs(&vtop[-nb_args]))
1299       float_abi = ARM_SOFTFP_FLOAT;
1300   }
1301 #endif
1302   /* cannot let cpu flags if other instruction are generated. Also avoid leaving
1303      VT_JMP anywhere except on the top of the stack because it would complicate
1304      the code generator. */
1305   r = vtop->r & VT_VALMASK;
1306   if (r == VT_CMP || (r & ~1) == VT_JMP)
1307     gv(RC_INT);
1308
1309   memset(&plan, 0, sizeof plan);
1310   if (nb_args)
1311     plan.pplans = tcc_malloc(nb_args * sizeof(*plan.pplans));
1312
1313   args_size = assign_regs(nb_args, float_abi, &plan, &todo);
1314
1315 #ifdef TCC_ARM_EABI
1316   if (args_size & 7) { /* Stack must be 8 byte aligned at fct call for EABI */
1317     args_size = (args_size + 7) & ~7;
1318     o(0xE24DD004); /* sub sp, sp, #4 */
1319   }
1320 #endif
1321
1322   nb_args += copy_params(nb_args, &plan, todo);
1323   tcc_free(plan.pplans);
1324
1325   /* Move fct SValue on top as required by gcall_or_jmp */
1326   vrotb(nb_args + 1);
1327   gcall_or_jmp(0);
1328   if (args_size)
1329       gadd_sp(args_size); /* pop all parameters passed on the stack */
1330 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1331   if(float_abi == ARM_SOFTFP_FLOAT && is_float(vtop->type.ref->type.t)) {
1332     if((vtop->type.ref->type.t & VT_BTYPE) == VT_FLOAT) {
1333       o(0xEE000A10); /*vmov s0, r0 */
1334     } else {
1335       o(0xEE000B10); /* vmov.32 d0[0], r0 */
1336       o(0xEE201B10); /* vmov.32 d0[1], r1 */
1337     }
1338   }
1339 #endif
1340   vtop -= nb_args + 1; /* Pop all params and fct address from value stack */
1341   leaffunc = 0; /* we are calling a function, so we aren't in a leaf function */
1342   float_abi = def_float_abi;
1343 }
1344
1345 /* generate function prolog of type 't' */
1346 void gfunc_prolog(Sym *func_sym)
1347 {
1348   CType *func_type = &func_sym->type;
1349   Sym *sym,*sym2;
1350   int n, nf, size, align, rs, struct_ret = 0;
1351   int addr, pn, sn; /* pn=core, sn=stack */
1352   CType ret_type;
1353
1354 #ifdef TCC_ARM_EABI
1355   struct avail_regs avregs = {{0}};
1356 #endif
1357
1358   sym = func_type->ref;
1359
1360   n = nf = 0;
1361   if ((func_vt.t & VT_BTYPE) == VT_STRUCT &&
1362       !gfunc_sret(&func_vt, func_var, &ret_type, &align, &rs))
1363   {
1364     n++;
1365     struct_ret = 1;
1366     func_vc = 12; /* Offset from fp of the place to store the result */
1367   }
1368   for(sym2 = sym->next; sym2 && (n < 4 || nf < 16); sym2 = sym2->next) {
1369     size = type_size(&sym2->type, &align);
1370 #ifdef TCC_ARM_EABI
1371     if (float_abi == ARM_HARD_FLOAT && !func_var &&
1372         (is_float(sym2->type.t) || is_hgen_float_aggr(&sym2->type))) {
1373       int tmpnf = assign_vfpreg(&avregs, align, size);
1374       tmpnf += (size + 3) / 4;
1375       nf = (tmpnf > nf) ? tmpnf : nf;
1376     } else
1377 #endif
1378     if (n < 4)
1379       n += (size + 3) / 4;
1380   }
1381   o(0xE1A0C00D); /* mov ip,sp */
1382   if (func_var)
1383     n=4;
1384   if (n) {
1385     if(n>4)
1386       n=4;
1387 #ifdef TCC_ARM_EABI
1388     n=(n+1)&-2;
1389 #endif
1390     o(0xE92D0000|((1<<n)-1)); /* save r0-r4 on stack if needed */
1391   }
1392   if (nf) {
1393     if (nf>16)
1394       nf=16;
1395     nf=(nf+1)&-2; /* nf => HARDFLOAT => EABI */
1396     o(0xED2D0A00|nf); /* save s0-s15 on stack if needed */
1397   }
1398   o(0xE92D5800); /* save fp, ip, lr */
1399   o(0xE1A0B00D); /* mov fp, sp */
1400   func_sub_sp_offset = ind;
1401   o(0xE1A00000); /* nop, leave space for stack adjustment in epilog */
1402
1403 #ifdef TCC_ARM_EABI
1404   if (float_abi == ARM_HARD_FLOAT) {
1405     func_vc += nf * 4;
1406     memset(&avregs, 0, sizeof avregs);
1407   }
1408 #endif
1409   pn = struct_ret, sn = 0;
1410   while ((sym = sym->next)) {
1411     CType *type;
1412     type = &sym->type;
1413     size = type_size(type, &align);
1414     size = (size + 3) >> 2;
1415     align = (align + 3) & ~3;
1416 #ifdef TCC_ARM_EABI
1417     if (float_abi == ARM_HARD_FLOAT && !func_var && (is_float(sym->type.t)
1418         || is_hgen_float_aggr(&sym->type))) {
1419       int fpn = assign_vfpreg(&avregs, align, size << 2);
1420       if (fpn >= 0)
1421         addr = fpn * 4;
1422       else
1423         goto from_stack;
1424     } else
1425 #endif
1426     if (pn < 4) {
1427 #ifdef TCC_ARM_EABI
1428         pn = (pn + (align-1)/4) & -(align/4);
1429 #endif
1430       addr = (nf + pn) * 4;
1431       pn += size;
1432       if (!sn && pn > 4)
1433         sn = (pn - 4);
1434     } else {
1435 #ifdef TCC_ARM_EABI
1436 from_stack:
1437         sn = (sn + (align-1)/4) & -(align/4);
1438 #endif
1439       addr = (n + nf + sn) * 4;
1440       sn += size;
1441     }
1442     sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL,
1443              addr + 12);
1444   }
1445   last_itod_magic=0;
1446   leaffunc = 1;
1447   loc = 0;
1448 #ifdef CONFIG_TCC_BCHECK
1449   if (tcc_state->do_bounds_check)
1450     gen_bounds_prolog();
1451 #endif
1452 }
1453
1454 /* generate function epilog */
1455 void gfunc_epilog(void)
1456 {
1457   uint32_t x;
1458   int diff;
1459
1460 #ifdef CONFIG_TCC_BCHECK
1461   if (tcc_state->do_bounds_check)
1462     gen_bounds_epilog();
1463 #endif
1464   /* Copy float return value to core register if base standard is used and
1465      float computation is made with VFP */
1466 #if defined(TCC_ARM_EABI) && defined(TCC_ARM_VFP)
1467   if ((float_abi == ARM_SOFTFP_FLOAT || func_var) && is_float(func_vt.t)) {
1468     if((func_vt.t & VT_BTYPE) == VT_FLOAT)
1469       o(0xEE100A10); /* fmrs r0, s0 */
1470     else {
1471       o(0xEE100B10); /* fmrdl r0, d0 */
1472       o(0xEE301B10); /* fmrdh r1, d0 */
1473     }
1474   }
1475 #endif
1476   o(0xE89BA800); /* restore fp, sp, pc */
1477   diff = (-loc + 3) & -4;
1478 #ifdef TCC_ARM_EABI
1479   if(!leaffunc)
1480     diff = ((diff + 11) & -8) - 4;
1481 #endif
1482   if(diff > 0) {
1483     x=stuff_const(0xE24BD000, diff); /* sub sp,fp,# */
1484     if(x)
1485       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = x;
1486     else {
1487       int addr;
1488       addr=ind;
1489       o(0xE59FC004); /* ldr ip,[pc+4] */
1490       o(0xE04BD00C); /* sub sp,fp,ip  */
1491       o(0xE1A0F00E); /* mov pc,lr */
1492       o(diff);
1493       *(uint32_t *)(cur_text_section->data + func_sub_sp_offset) = 0xE1000000|encbranch(func_sub_sp_offset,addr,1);
1494     }
1495   }
1496 }
1497
1498 ST_FUNC void gen_fill_nops(int bytes)
1499 {
1500     if ((bytes & 3))
1501       tcc_error("alignment of code section not multiple of 4");
1502     while (bytes > 0) {
1503         o(0xE1A00000);
1504         bytes -= 4;
1505     }
1506 }
1507
1508 /* generate a jump to a label */
1509 ST_FUNC int gjmp(int t)
1510 {
1511   int r;
1512   if (nocode_wanted)
1513     return t;
1514   r=ind;
1515   o(0xE0000000|encbranch(r,t,1));
1516   return r;
1517 }
1518
1519 /* generate a jump to a fixed address */
1520 ST_FUNC void gjmp_addr(int a)
1521 {
1522   gjmp(a);
1523 }
1524
1525 ST_FUNC int gjmp_cond(int op, int t)
1526 {
1527   int r;
1528   if (nocode_wanted)
1529     return t;
1530   r=ind;
1531   op=mapcc(op);
1532   op|=encbranch(r,t,1);
1533   o(op);
1534   return r;
1535 }
1536
1537 ST_FUNC int gjmp_append(int n, int t)
1538 {
1539   uint32_t *x;
1540   int p,lp;
1541   if(n) {
1542     p = n;
1543     do {
1544       p = decbranch(lp=p);
1545     } while(p);
1546     x = (uint32_t *)(cur_text_section->data + lp);
1547     *x &= 0xff000000;
1548     *x |= encbranch(lp,t,1);
1549     t = n;
1550   }
1551   return t;
1552 }
1553
1554 /* generate an integer binary operation */
1555 void gen_opi(int op)
1556 {
1557   int c, func = 0;
1558   uint32_t opc = 0, r, fr;
1559   unsigned short retreg = REG_IRET;
1560
1561   c=0;
1562   switch(op) {
1563     case '+':
1564       opc = 0x8;
1565       c=1;
1566       break;
1567     case TOK_ADDC1: /* add with carry generation */
1568       opc = 0x9;
1569       c=1;
1570       break;
1571     case '-':
1572       opc = 0x4;
1573       c=1;
1574       break;
1575     case TOK_SUBC1: /* sub with carry generation */
1576       opc = 0x5;
1577       c=1;
1578       break;
1579     case TOK_ADDC2: /* add with carry use */
1580       opc = 0xA;
1581       c=1;
1582       break;
1583     case TOK_SUBC2: /* sub with carry use */
1584       opc = 0xC;
1585       c=1;
1586       break;
1587     case '&':
1588       opc = 0x0;
1589       c=1;
1590       break;
1591     case '^':
1592       opc = 0x2;
1593       c=1;
1594       break;
1595     case '|':
1596       opc = 0x18;
1597       c=1;
1598       break;
1599     case '*':
1600       gv2(RC_INT, RC_INT);
1601       r = vtop[-1].r;
1602       fr = vtop[0].r;
1603       vtop--;
1604       o(0xE0000090|(intr(r)<<16)|(intr(r)<<8)|intr(fr));
1605       return;
1606     case TOK_SHL:
1607       opc = 0;
1608       c=2;
1609       break;
1610     case TOK_SHR:
1611       opc = 1;
1612       c=2;
1613       break;
1614     case TOK_SAR:
1615       opc = 2;
1616       c=2;
1617       break;
1618     case '/':
1619     case TOK_PDIV:
1620       func=TOK___divsi3;
1621       c=3;
1622       break;
1623     case TOK_UDIV:
1624       func=TOK___udivsi3;
1625       c=3;
1626       break;
1627     case '%':
1628 #ifdef TCC_ARM_EABI
1629       func=TOK___aeabi_idivmod;
1630       retreg=REG_IRE2;
1631 #else
1632       func=TOK___modsi3;
1633 #endif
1634       c=3;
1635       break;
1636     case TOK_UMOD:
1637 #ifdef TCC_ARM_EABI
1638       func=TOK___aeabi_uidivmod;
1639       retreg=REG_IRE2;
1640 #else
1641       func=TOK___umodsi3;
1642 #endif
1643       c=3;
1644       break;
1645     case TOK_UMULL:
1646       gv2(RC_INT, RC_INT);
1647       r=intr(vtop[-1].r2=get_reg(RC_INT));
1648       c=vtop[-1].r;
1649       vtop[-1].r=get_reg_ex(RC_INT,regmask(c));
1650       vtop--;
1651       o(0xE0800090|(r<<16)|(intr(vtop->r)<<12)|(intr(c)<<8)|intr(vtop[1].r));
1652       return;
1653     default:
1654       opc = 0x15;
1655       c=1;
1656       break;
1657   }
1658   switch(c) {
1659     case 1:
1660       if((vtop[-1].r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1661         if(opc == 4 || opc == 5 || opc == 0xc) {
1662           vswap();
1663           opc|=2; // sub -> rsb
1664         }
1665       }
1666       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1667           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1668         gv(RC_INT);
1669       vswap();
1670       c=intr(gv(RC_INT));
1671       vswap();
1672       opc=0xE0000000|(opc<<20);
1673       if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1674         uint32_t x;
1675         x=stuff_const(opc|0x2000000|(c<<16),vtop->c.i);
1676         if(x) {
1677           r=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1678           o(x|(r<<12));
1679           goto done;
1680         }
1681       }
1682       fr=intr(gv(RC_INT));
1683       if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1684         vswap();
1685         c=intr(gv(RC_INT));
1686         vswap();
1687       }
1688       r=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1689       o(opc|(c<<16)|(r<<12)|fr);
1690 done:
1691       vtop--;
1692       if (op >= TOK_ULT && op <= TOK_GT)
1693         vset_VT_CMP(op);
1694       break;
1695     case 2:
1696       opc=0xE1A00000|(opc<<5);
1697       if ((vtop->r & VT_VALMASK) == VT_CMP ||
1698           (vtop->r & (VT_VALMASK & ~1)) == VT_JMP)
1699         gv(RC_INT);
1700       vswap();
1701       r=intr(gv(RC_INT));
1702       vswap();
1703       if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {
1704         fr=intr(vtop[-1].r=get_reg_ex(RC_INT,regmask(vtop[-1].r)));
1705         c = vtop->c.i & 0x1f;
1706         o(opc|r|(c<<7)|(fr<<12));
1707       } else {
1708         fr=intr(gv(RC_INT));
1709         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1710           vswap();
1711           r=intr(gv(RC_INT));
1712           vswap();
1713         }
1714         c=intr(vtop[-1].r=get_reg_ex(RC_INT,two2mask(vtop->r,vtop[-1].r)));
1715         o(opc|r|(c<<12)|(fr<<8)|0x10);
1716       }
1717       vtop--;
1718       break;
1719     case 3:
1720       vpush_helper_func(func);
1721       vrott(3);
1722       gfunc_call(2);
1723       vpushi(0);
1724       vtop->r = retreg;
1725       break;
1726     default:
1727       tcc_error("gen_opi %i unimplemented!",op);
1728   }
1729 }
1730
1731 #ifdef TCC_ARM_VFP
1732 static int is_zero(int i)
1733 {
1734   if((vtop[i].r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1735     return 0;
1736   if (vtop[i].type.t == VT_FLOAT)
1737     return (vtop[i].c.f == 0.f);
1738   else if (vtop[i].type.t == VT_DOUBLE)
1739     return (vtop[i].c.d == 0.0);
1740   return (vtop[i].c.ld == 0.l);
1741 }
1742
1743 /* generate a floating point operation 'v = t1 op t2' instruction. The
1744  *    two operands are guaranteed to have the same floating point type */
1745 void gen_opf(int op)
1746 {
1747   uint32_t x;
1748   int fneg=0,r;
1749   x=0xEE000A00|T2CPR(vtop->type.t);
1750   switch(op) {
1751     case '+':
1752       if(is_zero(-1))
1753         vswap();
1754       if(is_zero(0)) {
1755         vtop--;
1756         return;
1757       }
1758       x|=0x300000;
1759       break;
1760     case '-':
1761       x|=0x300040;
1762       if(is_zero(0)) {
1763         vtop--;
1764         return;
1765       }
1766       if(is_zero(-1)) {
1767         x|=0x810000; /* fsubX -> fnegX */
1768         vswap();
1769         vtop--;
1770         fneg=1;
1771       }
1772       break;
1773     case '*':
1774       x|=0x200000;
1775       break;
1776     case '/':
1777       x|=0x800000;
1778       break;
1779     default:
1780       if(op < TOK_ULT || op > TOK_GT) {
1781         tcc_error("unknown fp op %x!",op);
1782         return;
1783       }
1784       if(is_zero(-1)) {
1785         vswap();
1786         switch(op) {
1787           case TOK_LT: op=TOK_GT; break;
1788           case TOK_GE: op=TOK_ULE; break;
1789           case TOK_LE: op=TOK_GE; break;
1790           case TOK_GT: op=TOK_ULT; break;
1791         }
1792       }
1793       x|=0xB40040; /* fcmpX */
1794       if(op!=TOK_EQ && op!=TOK_NE)
1795         x|=0x80; /* fcmpX -> fcmpeX */
1796       if(is_zero(0)) {
1797         vtop--;
1798         o(x|0x10000|(vfpr(gv(RC_FLOAT))<<12)); /* fcmp(e)X -> fcmp(e)zX */
1799       } else {
1800         gv2(RC_FLOAT,RC_FLOAT);
1801         x|=vfpr(vtop[0].r);
1802         o(x|(vfpr(vtop[-1].r) << 12));
1803         vtop--;
1804       }
1805       o(0xEEF1FA10); /* fmstat */
1806
1807       switch(op) {
1808         case TOK_LE: op=TOK_ULE; break;
1809         case TOK_LT: op=TOK_ULT; break;
1810         case TOK_UGE: op=TOK_GE; break;
1811         case TOK_UGT: op=TOK_GT; break;
1812       }
1813       vset_VT_CMP(op);
1814       return;
1815   }
1816   r=gv(RC_FLOAT);
1817   x|=vfpr(r);
1818   r=regmask(r);
1819   if(!fneg) {
1820     int r2;
1821     vswap();
1822     r2=gv(RC_FLOAT);
1823     x|=vfpr(r2)<<16;
1824     r|=regmask(r2);
1825     if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1826       vswap();
1827       r=gv(RC_FLOAT);
1828       vswap();
1829       x=(x&~0xf)|vfpr(r);
1830     }
1831   }
1832   vtop->r=get_reg_ex(RC_FLOAT,r);
1833   if(!fneg)
1834     vtop--;
1835   o(x|(vfpr(vtop->r)<<12));
1836 }
1837
1838 #else
1839 static uint32_t is_fconst()
1840 {
1841   long double f;
1842   uint32_t r;
1843   if((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) != VT_CONST)
1844     return 0;
1845   if (vtop->type.t == VT_FLOAT)
1846     f = vtop->c.f;
1847   else if (vtop->type.t == VT_DOUBLE)
1848     f = vtop->c.d;
1849   else
1850     f = vtop->c.ld;
1851   if(!ieee_finite(f))
1852     return 0;
1853   r=0x8;
1854   if(f<0.0) {
1855     r=0x18;
1856     f=-f;
1857   }
1858   if(f==0.0)
1859     return r;
1860   if(f==1.0)
1861     return r|1;
1862   if(f==2.0)
1863     return r|2;
1864   if(f==3.0)
1865     return r|3;
1866   if(f==4.0)
1867     return r|4;
1868   if(f==5.0)
1869     return r|5;
1870   if(f==0.5)
1871     return r|6;
1872   if(f==10.0)
1873     return r|7;
1874   return 0;
1875 }
1876
1877 /* generate a floating point operation 'v = t1 op t2' instruction. The
1878    two operands are guaranteed to have the same floating point type */
1879 void gen_opf(int op)
1880 {
1881   uint32_t x, r, r2, c1, c2;
1882   //fputs("gen_opf\n",stderr);
1883   vswap();
1884   c1 = is_fconst();
1885   vswap();
1886   c2 = is_fconst();
1887   x=0xEE000100;
1888 #if LDOUBLE_SIZE == 8
1889   if ((vtop->type.t & VT_BTYPE) != VT_FLOAT)
1890     x|=0x80;
1891 #else
1892   if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE)
1893     x|=0x80;
1894   else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE)
1895     x|=0x80000;
1896 #endif
1897   switch(op)
1898   {
1899     case '+':
1900       if(!c2) {
1901         vswap();
1902         c2=c1;
1903       }
1904       vswap();
1905       r=fpr(gv(RC_FLOAT));
1906       vswap();
1907       if(c2) {
1908         if(c2>0xf)
1909           x|=0x200000; // suf
1910         r2=c2&0xf;
1911       } else {
1912         r2=fpr(gv(RC_FLOAT));
1913         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1914           vswap();
1915           r=fpr(gv(RC_FLOAT));
1916           vswap();
1917         }
1918       }
1919       break;
1920     case '-':
1921       if(c2) {
1922         if(c2<=0xf)
1923           x|=0x200000; // suf
1924         r2=c2&0xf;
1925         vswap();
1926         r=fpr(gv(RC_FLOAT));
1927         vswap();
1928       } else if(c1 && c1<=0xf) {
1929         x|=0x300000; // rsf
1930         r2=c1;
1931         r=fpr(gv(RC_FLOAT));
1932         vswap();
1933       } else {
1934         x|=0x200000; // suf
1935         vswap();
1936         r=fpr(gv(RC_FLOAT));
1937         vswap();
1938         r2=fpr(gv(RC_FLOAT));
1939         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1940           vswap();
1941           r=fpr(gv(RC_FLOAT));
1942           vswap();
1943         }
1944       }
1945       break;
1946     case '*':
1947       if(!c2 || c2>0xf) {
1948         vswap();
1949         c2=c1;
1950       }
1951       vswap();
1952       r=fpr(gv(RC_FLOAT));
1953       vswap();
1954       if(c2 && c2<=0xf)
1955         r2=c2;
1956       else {
1957         r2=fpr(gv(RC_FLOAT));
1958         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1959           vswap();
1960           r=fpr(gv(RC_FLOAT));
1961           vswap();
1962         }
1963       }
1964       x|=0x100000; // muf
1965       break;
1966     case '/':
1967       if(c2 && c2<=0xf) {
1968         x|=0x400000; // dvf
1969         r2=c2;
1970         vswap();
1971         r=fpr(gv(RC_FLOAT));
1972         vswap();
1973       } else if(c1 && c1<=0xf) {
1974         x|=0x500000; // rdf
1975         r2=c1;
1976         r=fpr(gv(RC_FLOAT));
1977         vswap();
1978       } else {
1979         x|=0x400000; // dvf
1980         vswap();
1981         r=fpr(gv(RC_FLOAT));
1982         vswap();
1983         r2=fpr(gv(RC_FLOAT));
1984         if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
1985           vswap();
1986           r=fpr(gv(RC_FLOAT));
1987           vswap();
1988         }
1989       }
1990       break;
1991     default:
1992       if(op >= TOK_ULT && op <= TOK_GT) {
1993         x|=0xd0f110; // cmfe
1994 /* bug (intention?) in Linux FPU emulator
1995    doesn't set carry if equal */
1996         switch(op) {
1997           case TOK_ULT:
1998           case TOK_UGE:
1999           case TOK_ULE:
2000           case TOK_UGT:
2001             tcc_error("unsigned comparison on floats?");
2002             break;
2003           case TOK_LT:
2004             op=TOK_Nset;
2005             break;
2006           case TOK_LE:
2007             op=TOK_ULE; /* correct in unordered case only if AC bit in FPSR set */
2008             break;
2009           case TOK_EQ:
2010           case TOK_NE:
2011             x&=~0x400000; // cmfe -> cmf
2012             break;
2013         }
2014         if(c1 && !c2) {
2015           c2=c1;
2016           vswap();
2017           switch(op) {
2018             case TOK_Nset:
2019               op=TOK_GT;
2020               break;
2021             case TOK_GE:
2022               op=TOK_ULE;
2023               break;
2024             case TOK_ULE:
2025               op=TOK_GE;
2026               break;
2027             case TOK_GT:
2028               op=TOK_Nset;
2029               break;
2030           }
2031         }
2032         vswap();
2033         r=fpr(gv(RC_FLOAT));
2034         vswap();
2035         if(c2) {
2036           if(c2>0xf)
2037             x|=0x200000;
2038           r2=c2&0xf;
2039         } else {
2040           r2=fpr(gv(RC_FLOAT));
2041           if ((vtop[-1].r & VT_VALMASK) >= VT_CONST) {
2042             vswap();
2043             r=fpr(gv(RC_FLOAT));
2044             vswap();
2045           }
2046         }
2047         --vtop;
2048         vset_VT_CMP(op);
2049         ++vtop;
2050       } else {
2051         tcc_error("unknown fp op %x!",op);
2052         return;
2053       }
2054   }
2055   if(vtop[-1].r == VT_CMP)
2056     c1=15;
2057   else {
2058     c1=vtop->r;
2059     if(r2&0x8)
2060       c1=vtop[-1].r;
2061     vtop[-1].r=get_reg_ex(RC_FLOAT,two2mask(vtop[-1].r,c1));
2062     c1=fpr(vtop[-1].r);
2063   }
2064   vtop--;
2065   o(x|(r<<16)|(c1<<12)|r2);
2066 }
2067 #endif
2068
2069 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2070    and 'long long' cases. */
2071 ST_FUNC void gen_cvt_itof(int t)
2072 {
2073   uint32_t r, r2;
2074   int bt;
2075   bt=vtop->type.t & VT_BTYPE;
2076   if(bt == VT_INT || bt == VT_SHORT || bt == VT_BYTE) {
2077 #ifndef TCC_ARM_VFP
2078     uint32_t dsize = 0;
2079 #endif
2080     r=intr(gv(RC_INT));
2081 #ifdef TCC_ARM_VFP
2082     r2=vfpr(vtop->r=get_reg(RC_FLOAT));
2083     o(0xEE000A10|(r<<12)|(r2<<16)); /* fmsr */
2084     r2|=r2<<12;
2085     if(!(vtop->type.t & VT_UNSIGNED))
2086       r2|=0x80;                /* fuitoX -> fsituX */
2087     o(0xEEB80A40|r2|T2CPR(t)); /* fYitoX*/
2088 #else
2089     r2=fpr(vtop->r=get_reg(RC_FLOAT));
2090     if((t & VT_BTYPE) != VT_FLOAT)
2091       dsize=0x80;    /* flts -> fltd */
2092     o(0xEE000110|dsize|(r2<<16)|(r<<12)); /* flts */
2093     if((vtop->type.t & (VT_UNSIGNED|VT_BTYPE)) == (VT_UNSIGNED|VT_INT)) {
2094       uint32_t off = 0;
2095       o(0xE3500000|(r<<12));        /* cmp */
2096       r=fpr(get_reg(RC_FLOAT));
2097       if(last_itod_magic) {
2098         off=ind+8-last_itod_magic;
2099         off/=4;
2100         if(off>255)
2101           off=0;
2102       }
2103       o(0xBD1F0100|(r<<12)|off);    /* ldflts */
2104       if(!off) {
2105         o(0xEA000000);              /* b */
2106         last_itod_magic=ind;
2107         o(0x4F800000);              /* 4294967296.0f */
2108       }
2109       o(0xBE000100|dsize|(r2<<16)|(r2<<12)|r); /* adflt */
2110     }
2111 #endif
2112     return;
2113   } else if(bt == VT_LLONG) {
2114     int func;
2115     CType *func_type = 0;
2116     if((t & VT_BTYPE) == VT_FLOAT) {
2117       func_type = &func_float_type;
2118       if(vtop->type.t & VT_UNSIGNED)
2119         func=TOK___floatundisf;
2120       else
2121         func=TOK___floatdisf;
2122 #if LDOUBLE_SIZE != 8
2123     } else if((t & VT_BTYPE) == VT_LDOUBLE) {
2124       func_type = &func_ldouble_type;
2125       if(vtop->type.t & VT_UNSIGNED)
2126         func=TOK___floatundixf;
2127       else
2128         func=TOK___floatdixf;
2129     } else if((t & VT_BTYPE) == VT_DOUBLE) {
2130 #else
2131     } else if((t & VT_BTYPE) == VT_DOUBLE || (t & VT_BTYPE) == VT_LDOUBLE) {
2132 #endif
2133       func_type = &func_double_type;
2134       if(vtop->type.t & VT_UNSIGNED)
2135         func=TOK___floatundidf;
2136       else
2137         func=TOK___floatdidf;
2138     }
2139     if(func_type) {
2140       vpushsym(func_type, external_helper_sym(func));
2141       vswap();
2142       gfunc_call(1);
2143       vpushi(0);
2144       vtop->r=TREG_F0;
2145       return;
2146     }
2147   }
2148   tcc_error("unimplemented gen_cvt_itof %x!",vtop->type.t);
2149 }
2150
2151 /* convert fp to int 't' type */
2152 void gen_cvt_ftoi(int t)
2153 {
2154   uint32_t r, r2;
2155   int u, func = 0;
2156   u=t&VT_UNSIGNED;
2157   t&=VT_BTYPE;
2158   r2=vtop->type.t & VT_BTYPE;
2159   if(t==VT_INT) {
2160 #ifdef TCC_ARM_VFP
2161     r=vfpr(gv(RC_FLOAT));
2162     u=u?0:0x10000;
2163     o(0xEEBC0AC0|(r<<12)|r|T2CPR(r2)|u); /* ftoXizY */
2164     r2=intr(vtop->r=get_reg(RC_INT));
2165     o(0xEE100A10|(r<<16)|(r2<<12));
2166     return;
2167 #else
2168     if(u) {
2169       if(r2 == VT_FLOAT)
2170         func=TOK___fixunssfsi;
2171 #if LDOUBLE_SIZE != 8
2172       else if(r2 == VT_LDOUBLE)
2173         func=TOK___fixunsxfsi;
2174       else if(r2 == VT_DOUBLE)
2175 #else
2176       else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2177 #endif
2178         func=TOK___fixunsdfsi;
2179     } else {
2180       r=fpr(gv(RC_FLOAT));
2181       r2=intr(vtop->r=get_reg(RC_INT));
2182       o(0xEE100170|(r2<<12)|r);
2183       return;
2184     }
2185 #endif
2186   } else if(t == VT_LLONG) { // unsigned handled in gen_cvt_ftoi1
2187     if(r2 == VT_FLOAT)
2188       func=TOK___fixsfdi;
2189 #if LDOUBLE_SIZE != 8
2190     else if(r2 == VT_LDOUBLE)
2191       func=TOK___fixxfdi;
2192     else if(r2 == VT_DOUBLE)
2193 #else
2194     else if(r2 == VT_LDOUBLE || r2 == VT_DOUBLE)
2195 #endif
2196       func=TOK___fixdfdi;
2197   }
2198   if(func) {
2199     vpush_helper_func(func);
2200     vswap();
2201     gfunc_call(1);
2202     vpushi(0);
2203     if(t == VT_LLONG)
2204       vtop->r2 = REG_IRE2;
2205     vtop->r = REG_IRET;
2206     return;
2207   }
2208   tcc_error("unimplemented gen_cvt_ftoi!");
2209 }
2210
2211 /* convert from one floating point type to another */
2212 void gen_cvt_ftof(int t)
2213 {
2214 #ifdef TCC_ARM_VFP
2215   if(((vtop->type.t & VT_BTYPE) == VT_FLOAT) != ((t & VT_BTYPE) == VT_FLOAT)) {
2216     uint32_t r = vfpr(gv(RC_FLOAT));
2217     o(0xEEB70AC0|(r<<12)|r|T2CPR(vtop->type.t));
2218   }
2219 #else
2220   /* all we have to do on i386 and FPA ARM is to put the float in a register */
2221   gv(RC_FLOAT);
2222 #endif
2223 }
2224
2225 /* computed goto support */
2226 void ggoto(void)
2227 {
2228   gcall_or_jmp(1);
2229   vtop--;
2230 }
2231
2232 /* Save the stack pointer onto the stack and return the location of its address */
2233 ST_FUNC void gen_vla_sp_save(int addr) {
2234     SValue v;
2235     v.type.t = VT_PTR;
2236     v.r = VT_LOCAL | VT_LVAL;
2237     v.c.i = addr;
2238     store(TREG_SP, &v);
2239 }
2240
2241 /* Restore the SP from a location on the stack */
2242 ST_FUNC void gen_vla_sp_restore(int addr) {
2243     SValue v;
2244     v.type.t = VT_PTR;
2245     v.r = VT_LOCAL | VT_LVAL;
2246     v.c.i = addr;
2247     load(TREG_SP, &v);
2248 }
2249
2250 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2251 ST_FUNC void gen_vla_alloc(CType *type, int align) {
2252     int r;
2253 #if defined(CONFIG_TCC_BCHECK)
2254     if (tcc_state->do_bounds_check)
2255         vpushv(vtop);
2256 #endif
2257     r = intr(gv(RC_INT));
2258 #if defined(CONFIG_TCC_BCHECK)
2259     if (tcc_state->do_bounds_check)
2260         o(0xe2800001 | (r<<16)|(r<<12)); /* add r,r,#1 */
2261 #endif
2262     o(0xE04D0000|(r<<12)|r); /* sub r, sp, r */
2263 #ifdef TCC_ARM_EABI
2264     if (align < 8)
2265         align = 8;
2266 #else
2267     if (align < 4)
2268         align = 4;
2269 #endif
2270     if (align & (align - 1))
2271         tcc_error("alignment is not a power of 2: %i", align);
2272     o(stuff_const(0xE3C0D000|(r<<16), align - 1)); /* bic sp, r, #align-1 */
2273     vpop();
2274 #if defined(CONFIG_TCC_BCHECK)
2275     if (tcc_state->do_bounds_check) {
2276         vpushi(0);
2277         vtop->r = TREG_R0;
2278         o(0xe1a0000d | (vtop->r << 12)); // mov r0,sp
2279         vswap();
2280         vpush_helper_func(TOK___bound_new_region);
2281         vrott(3);
2282         gfunc_call(2);
2283         func_bound_add_epilog = 1;
2284     }
2285 #endif
2286 }
2287
2288 /* end of ARM code generator */
2289 /*************************************************************/
2290 #endif
2291 /*************************************************************/