js/src/nanojit/Nativei386.cpp

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is [Open Source Virtual Machine].
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Adobe System Incorporated.
  19  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Adobe AS3 Team
  24  *   Mozilla TraceMonkey Team
  25  *   Asko Tontti <atontti@cc.hut.fi>
  26  *
  27  * Alternatively, the contents of this file may be used under the terms of
  28  * either the GNU General Public License Version 2 or later (the "GPL"), or
  29  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30  * in which case the provisions of the GPL or the LGPL are applicable instead
  31  * of those above. If you wish to allow use of your version of this file only
  32  * under the terms of either the GPL or the LGPL, and not to allow others to
  33  * use your version of this file under the terms of the MPL, indicate your
  34  * decision by deleting the provisions above and replace them with the notice
  35  * and other provisions required by the GPL or the LGPL. If you do not delete
  36  * the provisions above, a recipient may use your version of this file under
  37  * the terms of any one of the MPL, the GPL or the LGPL.
  38  *
  39  * ***** END LICENSE BLOCK ***** */
  40
  41 #ifdef _MAC
  42 // for MakeDataExecutable
  43 #include <CoreServices/CoreServices.h>
  44 #endif
  45
  46 #if defined AVMPLUS_UNIX
  47 #include <sys/mman.h>
  48 #include <errno.h>
  49 #endif
  50 #include "nanojit.h"
  51
  52 namespace nanojit
  53 {
  54         #ifdef FEATURE_NANOJIT
  55
  56         #ifdef NJ_VERBOSE
  57                 const char *regNames[] = {
  58 #if defined NANOJIT_IA32
  59                         "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
  60                         "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
  61                         "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
  62 #elif defined NANOJIT_AMD64
  63                         "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
  64                         "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
  65                         "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
  66             "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
  67 #endif
  68                 };
  69         #endif
  70
  71 #if defined NANOJIT_IA32
  72     const Register Assembler::argRegs[] = { ECX, EDX };
  73     const Register Assembler::retRegs[] = { EAX, EDX };
  74 #elif defined NANOJIT_AMD64
  75 #if defined WIN64
  76         const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
  77 #else
  78         const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
  79 #endif
  80         const Register Assembler::retRegs[] = { RAX, RDX };
  81 #endif
  82
  83         void Assembler::nInit(AvmCore* core)
  84         {
  85 #if defined NANOJIT_IA32
  86         sse2 = core->use_sse2();
  87
  88                 // CMOVcc is actually available on most PPro+ chips (except for a few
  89                 // oddballs like Via C3) but for now tie to SSE2 detection
  90                 has_cmov = sse2;
  91 #else
  92                 has_cmov = true;
  93 #endif
  94         OSDep::getDate();
  95         }
  96
  97         NIns* Assembler::genPrologue(RegisterMask needSaving)
  98         {
  99                 /**
 100                  * Prologue
 101                  */
 102                 uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
 103                 uint32_t savingCount = 0;
 104
 105                 for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
 106                         if (needSaving&rmask(i))
 107                                 savingCount++;
 108
 109                 // After forcing alignment, we've pushed the pre-alignment SP
 110                 // and savingCount registers.
 111                 uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount);
 112                 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
 113                 uint32_t amt = aligned - stackPushed;
 114
 115                 // Reserve stackNeeded bytes, padded
 116                 // to preserve NJ_ALIGN_STACK-byte alignment.
 117                 if (amt)
 118                 {
 119 #if defined NANOJIT_IA32
 120                         SUBi(SP, amt);
 121 #elif defined NANOJIT_AMD64
 122                         SUBQi(SP, amt);
 123 #endif
 124                 }
 125
 126                 verbose_only( verbose_outputf("        %p:",_nIns); )
 127                 verbose_only( verbose_output("        patch entry:"); )
 128         NIns *patchEntry = _nIns;
 129                 MR(FP, SP); // Establish our own FP.
 130
 131                 // Save pre-alignment SP value here, where the FP will point,
 132                 // to preserve the illusion of a valid frame chain for
 133                 // functions like MMgc::GetStackTrace.  The 'return address'
 134                 // of this 'frame' will be the last-saved register, but that's
 135                 // fine, because the next-older frame will be legit.
 136                 PUSHr(FP);
 137
 138                 for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
 139                         if (needSaving&rmask(i))
 140                                 PUSHr(i);
 141
 142                 // We'd like to be able to use SSE instructions like MOVDQA on
 143                 // stack slots; it requires 16B alignment.  Darwin requires a
 144                 // 16B stack alignment, and Linux GCC seems to intend to
 145                 // establish and preserve the same, but we're told that GCC
 146                 // has not always done this right.  To avoid doubt, do it on
 147                 // all platforms.  The prologue runs only when we enter
 148                 // fragments from the interpreter, so forcing 16B alignment
 149                 // here is cheap.
 150 #if defined NANOJIT_IA32
 151                 ANDi(SP, -NJ_ALIGN_STACK);
 152 #elif defined NANOJIT_AMD64
 153                 ANDQi(SP, -NJ_ALIGN_STACK);
 154 #endif
 155                 MR(FP,SP);
 156                 PUSHr(FP); // Save caller's FP.
 157
 158                 return patchEntry;
 159         }
 160
 161         void Assembler::nFragExit(LInsp guard)
 162         {
 163                 SideExit *exit = guard->exit();
 164                 bool trees = _frago->core()->config.tree_opt;
 165         Fragment *frag = exit->target;
 166         GuardRecord *lr = 0;
 167                 bool destKnown = (frag && frag->fragEntry);
 168                 if (destKnown && !trees)
 169                 {
 170                         // already exists, emit jump now.  no patching required.
 171                         JMP(frag->fragEntry);
 172             lr = 0;
 173                 }
 174                 else
 175                 {
 176                         // target doesn't exit yet.  emit jump to epilog, and set up to patch later.
 177                         lr = placeGuardRecord(guard);
 178 #if defined NANOJIT_AMD64
 179             /* 8 bytes for address, 4 for imm32, 2 for jmp */
 180             underrunProtect(14);
 181             _nIns -= 8;
 182             *(intptr_t *)_nIns = intptr_t(_epilogue);
 183             lr->jmp = _nIns;
 184             JMPm_nochk(0);
 185 #else
 186             JMP_long(_epilogue);
 187                         lr->jmp = _nIns;
 188 #endif
 189 #if 0
 190                         // @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link()
 191                         // for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
 192                         if (tress && destKnown)
 193                                 patch(lr);
 194 #endif
 195                 }
 196                 // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
 197         MR(SP,FP);
 198
 199
 200         #ifdef NJ_VERBOSE
 201         if (_frago->core()->config.show_stats) {
 202                         // load EDX (arg1) with Fragment *fromFrag, target fragment
 203                         // will make use of this when calling fragenter().
 204                 #if defined NANOJIT_IA32
 205             int fromfrag = int((Fragment*)_thisfrag);
 206             LDi(argRegs[1], fromfrag);
 207                 #elif defined NANOJIT_AMD64
 208                         LDQi(argRegs[1], intptr_t(_thisfrag));
 209                 #endif
 210         }
 211         #endif
 212
 213                 // return value is GuardRecord*
 214         #if defined NANOJIT_IA32
 215         LDi(EAX, int(lr));
 216         #elif defined NANOJIT_AMD64
 217                 LDQi(RAX, intptr_t(lr));
 218         #endif
 219         }
 220
 221     NIns *Assembler::genEpilogue(RegisterMask restore)
 222     {
 223         RET();
 224         POPr(FP); // Restore caller's FP.
 225         MR(SP,FP); // Undo forced alignment.
 226
 227                 // Restore saved registers.
 228                 for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
 229                         if (restore&rmask(i)) { POPr(i); }
 230
 231                 POPr(FP); // Pop the pre-alignment SP.
 232         return  _nIns;
 233     }
 234
 235 #if defined NANOJIT_IA32
 236         void Assembler::asm_call(LInsp ins)
 237         {
 238         uint32_t fid = ins->fid();
 239         const CallInfo* call = callInfoFor(fid);
 240                 // must be signed, not unsigned
 241                 const uint32_t iargs = call->count_iargs();
 242                 int32_t fstack = call->count_args() - iargs;
 243
 244         int32_t extra = 0;
 245
 246 #if defined NJ_NO_FASTCALL
 247         int32_t istack = iargs;
 248 #else
 249                 int32_t istack = iargs-2;  // first 2 4B args are in registers
 250                 if (istack <= 0)
 251                 {
 252                         istack = 0;
 253                 }
 254 #endif
 255
 256                 const int32_t size = 4*istack + 8*fstack; // actual stack space used
 257         if (size) {
 258                     // stack re-alignment
 259                     // only pop our adjustment amount since callee pops args in FASTCALL mode
 260                     extra = alignUp(size, NJ_ALIGN_STACK) - (size);
 261 #ifndef NJ_NO_FASTCALL
 262                     if (extra > 0)
 263                         {
 264                                 ADDi(SP, extra);
 265                         }
 266 #endif
 267         }
 268
 269 #ifdef NJ_NO_FASTCALL
 270         // In C calling conventions, callee doesn't pop args.
 271         ADDi(SP, 4*iargs + 8*fstack + extra);
 272 #endif
 273
 274                 CALL(call);
 275
 276 #ifdef NJ_NO_FASTCALL
 277         if (iargs >= 1) {
 278             PUSHr(ECX);
 279             if (iargs >= 2) {
 280                 PUSHr(EDX);
 281             }
 282         }
 283 #endif
 284
 285                 // make sure fpu stack is empty before call (restoreCallerSaved)
 286                 NanoAssert(_allocator.isFree(FST0));
 287                 // note: this code requires that ref arguments (ARGSIZE_Q)
 288         // be one of the first two arguments
 289                 // pre-assign registers to the first 2 4B args
 290                 const int max_regs = (iargs < 2) ? iargs : 2;
 291                 int n = 0;
 292
 293         ArgSize sizes[10];
 294         uint32_t argc = call->get_sizes(sizes);
 295
 296                 for(uint32_t i=0; i < argc; i++)
 297                 {
 298                         uint32_t j = argc-i-1;
 299             ArgSize sz = sizes[j];
 300             Register r = UnknownReg;
 301             if (n < max_regs && sz != ARGSIZE_F)
 302                             r = argRegs[n++]; // tell asm_arg what reg to use
 303             asm_arg(sz, ins->arg(j), r);
 304                 }
 305
 306                 if (extra > 0)
 307                 {
 308                         SUBi(SP, extra);
 309                 }
 310         }
 311
 312 #elif defined NANOJIT_AMD64
 313
 314         void Assembler::asm_call(LInsp ins)
 315         {
 316                 Register fpu_reg = XMM0;
 317         uint32_t fid = ins->fid();
 318         const CallInfo* call = callInfoFor(fid);
 319                 int n = 0;
 320
 321                 CALL(call);
 322
 323         ArgSize sizes[10];
 324         uint32_t argc = call->get_sizes(sizes);
 325
 326                 for(uint32_t i=0; i < argc; i++)
 327                 {
 328                         uint32_t j = argc-i-1;
 329             ArgSize sz = sizes[j];
 330             Register r = UnknownReg;
 331             if (sz != ARGSIZE_F) {
 332                             r = argRegs[n++]; // tell asm_arg what reg to use
 333                         } else {
 334                                 r = fpu_reg;
 335                                 fpu_reg = nextreg(fpu_reg);
 336                         }
 337                         findSpecificRegFor(ins->arg(j), r);
 338                 }
 339         }
 340 #endif
 341
 342         void Assembler::nMarkExecute(Page* page, int32_t count, bool enable)
 343         {
 344                 #if defined WIN32 || defined WIN64
 345                         DWORD dwIgnore;
 346                         VirtualProtect(&page->code, count*NJ_PAGE_SIZE, PAGE_EXECUTE_READWRITE, &dwIgnore);
 347                 #elif defined AVMPLUS_UNIX
 348                         intptr_t addr = (intptr_t)&page->code;
 349                         addr &= ~((uintptr_t)NJ_PAGE_SIZE - 1);
 350                         #if defined SOLARIS
 351                         if (mprotect((char *)addr, count*NJ_PAGE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
 352                         #else
 353                         if (mprotect((void *)addr, count*NJ_PAGE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
 354                         #endif
 355                 AvmDebugLog(("FATAL ERROR: mprotect(PROT_EXEC) failed\n"));
 356                 abort();
 357             }
 358                 #endif
 359                         (void)enable;
 360         }
 361
 362         Register Assembler::nRegisterAllocFromSet(int set)
 363         {
 364                 Register r;
 365                 RegAlloc &regs = _allocator;
 366         #ifdef WIN32
 367                 _asm
 368                 {
 369                         mov ecx, regs
 370                         bsf eax, set                                    // i = first bit set
 371                         btr RegAlloc::free[ecx], eax    // free &= ~rmask(i)
 372                         mov r, eax
 373                 }
 374         #elif defined WIN64
 375                 unsigned long tr, fr;
 376                 _BitScanForward(&tr, set);
 377                 _bittestandreset(&fr, tr);
 378                 regs.free = fr;
 379                 r = tr;
 380         #else
 381                 asm(
 382                         "bsf    %1, %%eax\n\t"
 383                         "btr    %%eax, %2\n\t"
 384                         "movl   %%eax, %0\n\t"
 385                         : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
 386         #endif /* WIN32 */
 387                 return r;
 388         }
 389
 390         void Assembler::nRegisterResetAll(RegAlloc& a)
 391         {
 392                 // add scratch registers to our free list for the allocator
 393                 a.clear();
 394                 a.used = 0;
 395                 a.free = SavedRegs | ScratchRegs;
 396 #if defined NANOJIT_IA32
 397         if (!sse2)
 398             a.free &= ~XmmRegs;
 399 #endif
 400                 debug_only( a.managed = a.free; )
 401         }
 402
 403         void Assembler::nPatchBranch(NIns* branch, NIns* location)
 404         {
 405 #if defined NANOJIT_IA32
 406                 intptr_t offset = intptr_t(location) - intptr_t(branch);
 407                 if (branch[0] == JMPc)
 408                         *(uint32_t*)&branch[1] = offset - 5;
 409                 else
 410                         *(uint32_t*)&branch[2] = offset - 6;
 411 #else
 412         if (branch[0] == 0xFF && branch[1] == 0x25) {
 413             NIns *mem;
 414
 415             mem = &branch[6] + *(int32_t *)&branch[2];
 416             *(intptr_t *)mem = intptr_t(location);
 417         } else {
 418             NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
 419         }
 420 #endif
 421         }
 422
 423         RegisterMask Assembler::hint(LIns* i, RegisterMask allow)
 424         {
 425                 uint32_t op = i->opcode();
 426                 int prefer = allow;
 427                 if (op == LIR_call)
 428 #if defined NANOJIT_IA32
 429                         prefer &= rmask(EAX);
 430 #elif defined NANOJIT_AMD64
 431                         prefer &= rmask(RAX);
 432 #endif
 433                 else if (op == LIR_param)
 434                         prefer &= rmask(Register(i->imm8()));
 435 #if defined NANOJIT_IA32
 436         else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
 437             prefer &= rmask(EDX);
 438 #else
 439                 else if (op == LIR_callh)
 440                         prefer &= rmask(RAX);
 441 #endif
 442                 else if (i->isCmp())
 443                         prefer &= AllowableFlagRegs;
 444         else if (i->isconst())
 445             prefer &= ScratchRegs;
 446                 return (_allocator.free & prefer) ? prefer : allow;
 447         }
 448
 449     void Assembler::asm_qjoin(LIns *ins)
 450     {
 451                 int d = findMemFor(ins);
 452                 AvmAssert(d);
 453                 LIns* lo = ins->oprnd1();
 454                 LIns* hi = ins->oprnd2();
 455
 456         Reservation *resv = getresv(ins);
 457         Register rr = resv->reg;
 458
 459         if (rr != UnknownReg && (rmask(rr) & FpRegs))
 460             evict(rr);
 461
 462         if (hi->isconst())
 463                 {
 464                         STi(FP, d+4, hi->constval());
 465                 }
 466                 else
 467                 {
 468                         Register r = findRegFor(hi, GpRegs);
 469                         ST(FP, d+4, r);
 470                 }
 471
 472         if (lo->isconst())
 473                 {
 474                         STi(FP, d, lo->constval());
 475                 }
 476                 else
 477                 {
 478                         // okay if r gets recycled.
 479                         Register r = findRegFor(lo, GpRegs);
 480                         ST(FP, d, r);
 481                 }
 482
 483         freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
 484     }
 485
 486         void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
 487         {
 488         if (i->isconst())
 489         {
 490             if (!resv->arIndex) {
 491                 reserveFree(i);
 492             }
 493             LDi(r, i->constval());
 494         }
 495         else
 496         {
 497             int d = findMemFor(i);
 498             if (rmask(r) & FpRegs)
 499                     {
 500 #if defined NANOJIT_IA32
 501                 if (rmask(r) & XmmRegs) {
 502 #endif
 503                     SSE_LDQ(r, d, FP);
 504 #if defined NANOJIT_IA32
 505                 } else {
 506                                 FLDQ(d, FP);
 507                 }
 508 #endif
 509             }
 510             else
 511                     {
 512 #if defined NANOJIT_AMD64
 513                 LDQ(r, d, FP);
 514 #else
 515                             LD(r, d, FP);
 516 #endif
 517                     }
 518                         verbose_only(if (_verbose) {
 519                                 outputf("        restore %s", _thisfrag->lirbuf->names->formatRef(i));
 520                         })
 521         }
 522         }
 523
 524     void Assembler::asm_store32(LIns *value, int dr, LIns *base)
 525     {
 526         if (value->isconst())
 527         {
 528                         Register rb = findRegFor(base, GpRegs);
 529             int c = value->constval();
 530                         STi(rb, dr, c);
 531         }
 532         else
 533         {
 534                     // make sure what is in a register
 535                     Reservation *rA, *rB;
 536                     findRegFor2(GpRegs, value, rA, base, rB);
 537                     Register ra = rA->reg;
 538                     Register rb = rB->reg;
 539                     ST(rb, dr, ra);
 540         }
 541     }
 542
 543         void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
 544         {
 545                 (void)i;
 546                 int d = disp(resv);
 547                 Register rr = resv->reg;
 548                 if (d)
 549                 {
 550                         // save to spill location
 551             if (rmask(rr) & FpRegs)
 552                         {
 553 #if defined NANOJIT_IA32
 554                 if (rmask(rr) & XmmRegs) {
 555 #endif
 556                     SSE_STQ(d, FP, rr);
 557 #if defined NANOJIT_IA32
 558                 } else {
 559                                         FSTQ((pop?1:0), d, FP);
 560                 }
 561 #endif
 562                         }
 563                         else
 564                         {
 565 #if defined NANOJIT_AMD64
 566                                 STQ(FP, d, rr);
 567 #else
 568                                 ST(FP, d, rr);
 569 #endif
 570                         }
 571                         verbose_only(if (_verbose) {
 572                                 outputf("        spill %s",_thisfrag->lirbuf->names->formatRef(i));
 573                         })
 574                 }
 575 #if defined NANOJIT_IA32
 576                 else if (pop && (rmask(rr) & x87Regs))
 577                 {
 578                         // pop the fpu result since it isn't used
 579                         FSTP(FST0);
 580                 }
 581 #endif
 582         }
 583
 584         void Assembler::asm_load64(LInsp ins)
 585         {
 586                 LIns* base = ins->oprnd1();
 587                 int db = ins->oprnd2()->constval();
 588                 Reservation *resv = getresv(ins);
 589                 Register rr = resv->reg;
 590
 591                 if (rr != UnknownReg && rmask(rr) & XmmRegs)
 592                 {
 593                         freeRsrcOf(ins, false);
 594                         Register rb = findRegFor(base, GpRegs);
 595                         SSE_LDQ(rr, db, rb);
 596                 }
 597 #if defined NANOJIT_AMD64
 598                 else if (rr != UnknownReg && rmask(rr) & GpRegs)
 599                 {
 600                         freeRsrcOf(ins, false);
 601                         Register rb = findRegFor(base, GpRegs);
 602                         LDQ(rr, db, rb);
 603                 }
 604                 else
 605                 {
 606             int d = disp(resv);
 607             Register rb = findRegFor(base, GpRegs);
 608
 609             /* We need a temporary register we can move the desination into */
 610             rr = registerAlloc(GpRegs);
 611
 612             STQ(FP, d, rr);
 613             LDQ(rr, db, rb);
 614
 615             /* Mark as free */
 616             _allocator.addFree(rr);
 617
 618                         freeRsrcOf(ins, false);
 619                 }
 620 #elif defined NANOJIT_IA32
 621                 else
 622                 {
 623                         int dr = disp(resv);
 624                         Register rb = findRegFor(base, GpRegs);
 625                         resv->reg = UnknownReg;
 626
 627                         // don't use an fpu reg to simply load & store the value.
 628                         if (dr)
 629                                 asm_mmq(FP, dr, rb, db);
 630
 631                         freeRsrcOf(ins, false);
 632
 633                         if (rr != UnknownReg)
 634                         {
 635                                 NanoAssert(rmask(rr)&FpRegs);
 636                                 _allocator.retire(rr);
 637                                 FLDQ(db, rb);
 638                         }
 639                 }
 640 #endif
 641         }
 642
 643         void Assembler::asm_store64(LInsp value, int dr, LInsp base)
 644         {
 645                 if (value->isconstq())
 646                 {
 647                         // if a constant 64-bit value just store it now rather than
 648                         // generating a pointless store/load/store sequence
 649                         Register rb = findRegFor(base, GpRegs);
 650                         const int32_t* p = (const int32_t*) (value-2);
 651                         STi(rb, dr+4, p[1]);
 652                         STi(rb, dr, p[0]);
 653             return;
 654                 }
 655
 656 #if defined NANOJIT_IA32
 657         if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
 658                 {
 659                         // value is 64bit struct or int64_t, or maybe a double.
 660                         // it may be live in an FPU reg.  Either way, don't
 661                         // put it in an FPU reg just to load & store it.
 662
 663                         // a) if we know it's not a double, this is right.
 664                         // b) if we guarded that its a double, this store could be on
 665                         // the side exit, copying a non-double.
 666                         // c) maybe its a double just being stored.  oh well.
 667
 668                         if (sse2) {
 669                 Register rv = findRegFor(value, XmmRegs);
 670                 Register rb = findRegFor(base, GpRegs);
 671                 SSE_STQ(dr, rb, rv);
 672                                 return;
 673             }
 674
 675                         int da = findMemFor(value);
 676                     Register rb = findRegFor(base, GpRegs);
 677                     asm_mmq(rb, dr, FP, da);
 678             return;
 679                 }
 680
 681                 Reservation* rA = getresv(value);
 682                 int pop = !rA || rA->reg==UnknownReg;
 683                 Register rv = findRegFor(value, sse2 ? XmmRegs : FpRegs);
 684                 Register rb = findRegFor(base, GpRegs);
 685
 686                 if (rmask(rv) & XmmRegs) {
 687             SSE_STQ(dr, rb, rv);
 688                 } else {
 689                         FSTQ(pop, dr, rb);
 690                 }
 691 #elif defined NANOJIT_AMD64
 692                 /* If this is not a float operation, we can use GpRegs instead.
 693                  * We can do this in a few other cases but for now I'll keep it simple.
 694                  */
 695             Register rb = findRegFor(base, GpRegs);
 696         Reservation *rV = getresv(value);
 697
 698         if (rV != NULL && rV->reg != UnknownReg) {
 699             if (rmask(rV->reg) & GpRegs) {
 700                 STQ(rb, dr, rV->reg);
 701             } else {
 702                 SSE_STQ(dr, rb, rV->reg);
 703             }
 704         } else {
 705             Register rv;
 706
 707             /* Try to catch some common patterns.
 708              * Note: this is a necessity, since in between things like
 709              * asm_fop() could see the reservation and try to use a non-SSE
 710              * register for adding.  Same for asm_qbinop in theory.
 711              * There should probably be asserts to catch more cases.
 712              */
 713             if (value->isop(LIR_u2f)
 714                 || value->isop(LIR_i2f)
 715                 || (value->opcode() >= LIR_fneg && value->opcode() <= LIR_fmul)
 716                 || value->opcode() == LIR_fdiv
 717                 || value->opcode() == LIR_fcall) {
 718                 rv = findRegFor(value, XmmRegs);
 719                 SSE_STQ(dr, rb, rv);
 720             } else {
 721                 rv = findRegFor(value, GpRegs);
 722                 STQ(rb, dr, rv);
 723             }
 724         }
 725 #endif
 726         }
 727
 728     /**
 729      * copy 64 bits: (rd+dd) <- (rs+ds)
 730      */
 731     void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
 732     {
 733         // value is either a 64bit struct or maybe a float
 734         // that isn't live in an FPU reg.  Either way, don't
 735         // put it in an FPU reg just to load & store it.
 736 #if defined NANOJIT_IA32
 737         if (sse2)
 738         {
 739 #endif
 740             // use SSE to load+store 64bits
 741             Register t = registerAlloc(XmmRegs);
 742             _allocator.addFree(t);
 743             SSE_STQ(dd, rd, t);
 744             SSE_LDQ(t, ds, rs);
 745 #if defined NANOJIT_IA32
 746         }
 747         else
 748         {
 749             // get a scratch reg
 750             Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
 751             _allocator.addFree(t);
 752             ST(rd, dd+4, t);
 753             LD(t, ds+4, rs);
 754             ST(rd, dd, t);
 755             LD(t, ds, rs);
 756         }
 757 #endif
 758     }
 759
 760         void Assembler::asm_quad(LInsp ins)
 761         {
 762 #if defined NANOJIT_IA32
 763         Reservation *rR = getresv(ins);
 764                 Register rr = rR->reg;
 765                 if (rr != UnknownReg)
 766                 {
 767                         // @todo -- add special-cases for 0 and 1
 768                         _allocator.retire(rr);
 769                         rR->reg = UnknownReg;
 770                         NanoAssert((rmask(rr) & FpRegs) != 0);
 771
 772                         const double d = ins->constvalf();
 773                         if (rmask(rr) & XmmRegs) {
 774                                 if (d == 0.0) {
 775                                         SSE_XORPDr(rr, rr);
 776                                 } else if (d == 1.0) {
 777                                         // 1.0 is extremely frequent and worth special-casing!
 778                                         static const double k_ONE = 1.0;
 779                                         LDSDm(rr, &k_ONE);
 780                                 } else {
 781                                         findMemFor(ins);
 782                                         const int d = disp(rR);
 783                                         SSE_LDQ(rr, d, FP);
 784                                 }
 785                         } else {
 786                                 if (d == 0.0) {
 787                                         FLDZ();
 788                                 } else if (d == 1.0) {
 789                                         FLD1();
 790                                 } else {
 791                                         findMemFor(ins);
 792                                         int d = disp(rR);
 793                                         FLDQ(d,FP);
 794                                 }
 795                         }
 796                 }
 797
 798                 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
 799                 int d = disp(rR);
 800                 freeRsrcOf(ins, false);
 801                 if (d)
 802                 {
 803                         const int32_t* p = (const int32_t*) (ins-2);
 804                         STi(FP,d+4,p[1]);
 805                         STi(FP,d,p[0]);
 806                 }
 807 #elif defined NANOJIT_AMD64
 808                 Reservation *rR = getresv(ins);
 809                 int64_t val = *(int64_t *)(ins - 2);
 810
 811                 if (rR->reg != UnknownReg)
 812                 {
 813             Register rr = rR->reg;
 814                     freeRsrcOf(ins, false);
 815                         if (rmask(rr) & GpRegs)
 816                         {
 817                                 LDQi(rr, val);
 818                         }
 819                         else if (rmask(rr) & XmmRegs)
 820                         {
 821                                 if (ins->constvalf() == 0.0)
 822                                 {
 823                                         SSE_XORPDr(rr, rr);
 824                                 }
 825                                 else
 826                                 {
 827                                         /* Get a short-lived register, not associated with instruction */
 828                                         Register rs = registerAlloc(GpRegs);
 829
 830                                         SSE_MOVD(rr, rs);
 831                                         LDQi(rs, val);
 832
 833                                         _allocator.addFree(rs);
 834                                 }
 835                         }
 836                 }
 837                 else
 838                 {
 839                         const int32_t* p = (const int32_t*) (ins-2);
 840                         int dr = disp(rR);
 841                     freeRsrcOf(ins, false);
 842                         STi(FP, dr+4, p[1]);
 843                         STi(FP, dr, p[0]);
 844                 }
 845 #endif
 846         }
 847
 848         bool Assembler::asm_qlo(LInsp ins, LInsp q)
 849         {
 850 #if defined NANOJIT_IA32
 851                 if (!sse2)
 852                 {
 853                         return false;
 854                 }
 855 #endif
 856
 857                 Reservation *resv = getresv(ins);
 858                 Register rr = resv->reg;
 859                 if (rr == UnknownReg) {
 860                         // store quad in spill loc
 861                         int d = disp(resv);
 862                         freeRsrcOf(ins, false);
 863                         Register qr = findRegFor(q, XmmRegs);
 864                         SSE_MOVDm(d, FP, qr);
 865                 } else {
 866                         freeRsrcOf(ins, false);
 867                         Register qr = findRegFor(q, XmmRegs);
 868                         SSE_MOVD(rr,qr);
 869                 }
 870
 871                 return true;
 872         }
 873
 874         void Assembler::asm_fneg(LInsp ins)
 875         {
 876 #if defined NANOJIT_IA32
 877                 if (sse2)
 878                 {
 879 #endif
 880                         LIns *lhs = ins->oprnd1();
 881
 882                         Register rr = prepResultReg(ins, XmmRegs);
 883                         Reservation *rA = getresv(lhs);
 884                         Register ra;
 885
 886                         // if this is last use of lhs in reg, we can re-use result reg
 887                         if (rA == 0 || (ra = rA->reg) == UnknownReg) {
 888                                 ra = findSpecificRegFor(lhs, rr);
 889                         } else if ((rmask(ra) & XmmRegs) == 0) {
 890                                 /* We need this case on AMD64, because it's possible that
 891                                  * an earlier instruction has done a quadword load and reserved a
 892                                  * GPR.  If so, ask for a new register.
 893                                  */
 894                                 ra = findRegFor(lhs, XmmRegs);
 895                         }
 896                         // else, rA already has a register assigned.
 897
 898 #if defined __SUNPRO_CC
 899                         // from Sun Studio C++ Readme: #pragma align inside namespace requires mangled names
 900                         static uint32_t temp[] = {0, 0, 0, 0, 0, 0, 0};
 901                         static uint32_t *negateMask = (uint32_t *)alignUp(temp, 16);
 902                         negateMask[1] = 0x80000000;
 903 #else
 904                         static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0};
 905 #endif
 906                         SSE_XORPD(rr, negateMask);
 907
 908                         if (rr != ra)
 909                                 SSE_MOVSD(rr, ra);
 910 #if defined NANOJIT_IA32
 911                 }
 912                 else
 913                 {
 914                         Register rr = prepResultReg(ins, FpRegs);
 915
 916                         LIns* lhs = ins->oprnd1();
 917
 918                         // lhs into reg, prefer same reg as result
 919                         Reservation* rA = getresv(lhs);
 920                         // if this is last use of lhs in reg, we can re-use result reg
 921                         if (rA == 0 || rA->reg == UnknownReg)
 922                                 findSpecificRegFor(lhs, rr);
 923                         // else, rA already has a different reg assigned
 924
 925                         NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
 926                         // assume that the lhs is in ST(0) and rhs is on stack
 927                         FCHS();
 928
 929                         // if we had more than one fpu reg, this is where
 930                         // we would move ra into rr if rr != ra.
 931                 }
 932 #endif
 933         }
 934
 935         void Assembler::asm_pusharg(LInsp p)
 936         {
 937                 // arg goes on stack
 938                 Reservation* rA = getresv(p);
 939                 if (rA == 0)
 940                 {
 941                         if (p->isconst())
 942                         {
 943                                 // small const we push directly
 944                                 PUSHi(p->constval());
 945                         }
 946                         else
 947                         {
 948                                 Register ra = findRegFor(p, GpRegs);
 949                                 PUSHr(ra);
 950                         }
 951                 }
 952                 else if (rA->reg == UnknownReg)
 953                 {
 954                         PUSHm(disp(rA), FP);
 955                 }
 956                 else
 957                 {
 958                         PUSHr(rA->reg);
 959                 }
 960         }
 961
 962         void Assembler::asm_farg(LInsp p)
 963         {
 964 #if defined NANOJIT_IA32
 965                 Register r = findRegFor(p, FpRegs);
 966                 if (rmask(r) & XmmRegs) {
 967                         SSE_STQ(0, SP, r);
 968                 } else {
 969                         FSTPQ(0, SP);
 970                 }
 971                 PUSHr(ECX); // 2*pushr is smaller than sub
 972                 PUSHr(ECX);
 973 #endif
 974         }
 975
 976         void Assembler::asm_fop(LInsp ins)
 977         {
 978                 LOpcode op = ins->opcode();
 979 #if defined NANOJIT_IA32
 980                 if (sse2)
 981                 {
 982 #endif
 983                         LIns *lhs = ins->oprnd1();
 984                         LIns *rhs = ins->oprnd2();
 985
 986                         RegisterMask allow = XmmRegs;
 987                         Register rb = UnknownReg;
 988                         if (lhs != rhs) {
 989                                 rb = findRegFor(rhs,allow);
 990                                 allow &= ~rmask(rb);
 991                         }
 992
 993                         Register rr = prepResultReg(ins, allow);
 994                         Reservation *rA = getresv(lhs);
 995                         Register ra;
 996
 997                         // if this is last use of lhs in reg, we can re-use result reg
 998                         if (rA == 0 || (ra = rA->reg) == UnknownReg) {
 999                                 ra = findSpecificRegFor(lhs, rr);
1000                         } else if ((rmask(ra) & XmmRegs) == 0) {
1001                                 /* We need this case on AMD64, because it's possible that
1002                                  * an earlier instruction has done a quadword load and reserved a
1003                                  * GPR.  If so, ask for a new register.
1004                                  */
1005                                 ra = findRegFor(lhs, XmmRegs);
1006                         }
1007                         // else, rA already has a register assigned.
1008
1009                         if (lhs == rhs)
1010                                 rb = ra;
1011
1012                         if (op == LIR_fadd)
1013                                 SSE_ADDSD(rr, rb);
1014                         else if (op == LIR_fsub)
1015                                 SSE_SUBSD(rr, rb);
1016                         else if (op == LIR_fmul)
1017                                 SSE_MULSD(rr, rb);
1018                         else //if (op == LIR_fdiv)
1019                                 SSE_DIVSD(rr, rb);
1020
1021                         if (rr != ra)
1022                                 SSE_MOVSD(rr, ra);
1023 #if defined NANOJIT_IA32
1024                 }
1025                 else
1026                 {
1027                         // we swap lhs/rhs on purpose here, works out better
1028                         // if you only have one fpu reg.  use divr/subr.
1029                         LIns* rhs = ins->oprnd1();
1030                         LIns* lhs = ins->oprnd2();
1031                         Register rr = prepResultReg(ins, rmask(FST0));
1032
1033                         // make sure rhs is in memory
1034                         int db = findMemFor(rhs);
1035
1036                         // lhs into reg, prefer same reg as result
1037                         Reservation* rA = getresv(lhs);
1038                         // last use of lhs in reg, can reuse rr
1039                         if (rA == 0 || rA->reg == UnknownReg)
1040                                 findSpecificRegFor(lhs, rr);
1041                         // else, rA already has a different reg assigned
1042
1043                         NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
1044                         // assume that the lhs is in ST(0) and rhs is on stack
1045                         if (op == LIR_fadd)
1046                                 { FADD(db, FP); }
1047                         else if (op == LIR_fsub)
1048                                 { FSUBR(db, FP); }
1049                         else if (op == LIR_fmul)
1050                                 { FMUL(db, FP); }
1051                         else if (op == LIR_fdiv)
1052                                 { FDIVR(db, FP); }
1053                 }
1054 #endif
1055         }
1056
1057         void Assembler::asm_i2f(LInsp ins)
1058         {
1059                 // where our result goes
1060                 Register rr = prepResultReg(ins, FpRegs);
1061 #if defined NANOJIT_IA32
1062                 if (rmask(rr) & XmmRegs)
1063                 {
1064 #endif
1065                         // todo support int value in memory
1066                         Register gr = findRegFor(ins->oprnd1(), GpRegs);
1067                         SSE_CVTSI2SD(rr, gr);
1068 #if defined NANOJIT_IA32
1069                 }
1070                 else
1071                 {
1072                         int d = findMemFor(ins->oprnd1());
1073                         FILD(d, FP);
1074                 }
1075 #endif
1076         }
1077
1078         Register Assembler::asm_prep_fcall(Reservation *rR, LInsp ins)
1079         {
1080                 #if defined NANOJIT_IA32
1081                 if (rR) {
1082                 Register rr;
1083                         if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs))
1084                                 evict(rr);
1085                 }
1086                 return prepResultReg(ins, rmask(FST0));
1087                 #elif defined NANOJIT_AMD64
1088                 evict(RAX);
1089                 return prepResultReg(ins, rmask(XMM0));
1090                 #endif
1091         }
1092
1093         void Assembler::asm_u2f(LInsp ins)
1094         {
1095                 // where our result goes
1096                 Register rr = prepResultReg(ins, FpRegs);
1097 #if defined NANOJIT_IA32
1098                 if (rmask(rr) & XmmRegs)
1099                 {
1100 #endif
1101                         // don't call findRegFor, we want a reg we can stomp on for a very short time,
1102                         // not a reg that will continue to be associated with the LIns
1103                         Register gr = registerAlloc(GpRegs);
1104
1105                         // technique inspired by gcc disassembly
1106                         // Edwin explains it:
1107                         //
1108                         // gr is 0..2^32-1
1109                         //
1110                         //         sub gr,0x80000000
1111                         //
1112                         // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
1113                         // as before
1114                         //
1115                         //         cvtsi2sd rr,gr
1116                         //
1117                         // rr is now a double with the int value range
1118                         //
1119                         //     addsd rr, 2147483648.0
1120                         //
1121                         // adding back double(0x80000000) makes the range 0..2^32-1.
1122
1123                         static const double k_NEGONE = 2147483648.0;
1124 #if defined NANOJIT_IA32
1125                         SSE_ADDSDm(rr, &k_NEGONE);
1126 #elif defined NANOJIT_AMD64
1127                         /* Squirrel the constant at the bottom of the page. */
1128                         if (_dblNegPtr != NULL)
1129                         {
1130                                 underrunProtect(10);
1131                         }
1132                         if (_dblNegPtr == NULL)
1133                         {
1134                                 underrunProtect(30);
1135                                 uint8_t *base, *begin;
1136                                 base = (uint8_t *)((intptr_t)_nIns & ~((intptr_t)NJ_PAGE_SIZE-1));
1137                                 base += sizeof(PageHeader) + _pageData;
1138                                 begin = base;
1139                                 /* Make sure we align */
1140                                 if ((uintptr_t)base & 0xF) {
1141                                         base = (NIns *)((uintptr_t)base & ~(0xF));
1142                                         base += 16;
1143                                 }
1144                                 _pageData += (int32_t)(base - begin) + sizeof(double);
1145                                 _negOnePtr = (NIns *)base;
1146                                 *(double *)_negOnePtr = k_NEGONE;
1147                         }
1148                         SSE_ADDSDm(rr, _negOnePtr);
1149 #endif
1150
1151                         SSE_CVTSI2SD(rr, gr);
1152
1153                         Reservation* resv = getresv(ins->oprnd1());
1154                         Register xr;
1155                         if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs))
1156                         {
1157                                 LEA(gr, 0x80000000, xr);
1158                         }
1159                         else
1160                         {
1161                                 const int d = findMemFor(ins->oprnd1());
1162                                 SUBi(gr, 0x80000000);
1163                                 LD(gr, d, FP);
1164                         }
1165
1166                         // ok, we're done with it
1167                         _allocator.addFree(gr);
1168 #if defined NANOJIT_IA32
1169                 }
1170                 else
1171                 {
1172             const int disp = -8;
1173             const Register base = SP;
1174                         Register gr = findRegFor(ins->oprnd1(), GpRegs);
1175                         NanoAssert(rr == FST0);
1176                         FILDQ(disp, base);
1177                         STi(base, disp+4, 0);   // high 32 bits = 0
1178                         ST(base, disp, gr);             // low 32 bits = unsigned value
1179                 }
1180 #endif
1181         }
1182
1183         void Assembler::asm_nongp_copy(Register r, Register s)
1184         {
1185                 if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) {
1186                         SSE_MOVSD(r, s);
1187                 } else if ((rmask(r) & GpRegs) && (rmask(s) & XmmRegs)) {
1188                         SSE_MOVD(r, s);
1189                 } else {
1190                         if (rmask(r) & XmmRegs) {
1191                                 // x87 -> xmm
1192                                 NanoAssertMsg(false, "Should not move data from GPR to XMM");
1193                         } else {
1194                                 // xmm -> x87
1195                                 NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
1196                         }
1197                 }
1198         }
1199
1200         void Assembler::asm_fcmp(LIns *cond)
1201         {
1202                 LOpcode condop = cond->opcode();
1203                 NanoAssert(condop >= LIR_feq && condop <= LIR_fge);
1204             LIns* lhs = cond->oprnd1();
1205             LIns* rhs = cond->oprnd2();
1206
1207         int mask;
1208             if (condop == LIR_feq)
1209                     mask = 0x44;
1210             else if (condop == LIR_fle)
1211                     mask = 0x41;
1212             else if (condop == LIR_flt)
1213                     mask = 0x05;
1214         else if (condop == LIR_fge) {
1215             // swap, use le
1216             LIns* t = lhs; lhs = rhs; rhs = t;
1217             mask = 0x41;
1218         } else { // if (condop == LIR_fgt)
1219             // swap, use lt
1220             LIns* t = lhs; lhs = rhs; rhs = t;
1221                     mask = 0x05;
1222         }
1223
1224 #if defined NANOJIT_IA32
1225         if (sse2)
1226         {
1227 #endif
1228             // UNORDERED:    ZF,PF,CF <- 111;
1229             // GREATER_THAN: ZF,PF,CF <- 000;
1230             // LESS_THAN:    ZF,PF,CF <- 001;
1231             // EQUAL:        ZF,PF,CF <- 100;
1232
1233             if (condop == LIR_feq && lhs == rhs) {
1234                 // nan check
1235                 Register r = findRegFor(lhs, XmmRegs);
1236                 SSE_UCOMISD(r, r);
1237             } else {
1238 #if defined NANOJIT_IA32
1239                 evict(EAX);
1240                 TEST_AH(mask);
1241                 LAHF();
1242 #elif defined NANOJIT_AMD64
1243                 evict(RAX);
1244                 TEST_AL(mask);
1245                 POPr(RAX);
1246                 PUSHFQ();
1247 #endif
1248                 Reservation *rA, *rB;
1249                 findRegFor2(XmmRegs, lhs, rA, rhs, rB);
1250                 SSE_UCOMISD(rA->reg, rB->reg);
1251             }
1252 #if defined NANOJIT_IA32
1253         }
1254         else
1255         {
1256             evict(EAX);
1257             TEST_AH(mask);
1258                     FNSTSW_AX();
1259                     NanoAssert(lhs->isQuad() && rhs->isQuad());
1260                     Reservation *rA;
1261                     if (lhs != rhs)
1262                     {
1263                             // compare two different numbers
1264                             int d = findMemFor(rhs);
1265                             rA = getresv(lhs);
1266                             int pop = !rA || rA->reg == UnknownReg;
1267                             findSpecificRegFor(lhs, FST0);
1268                             // lhs is in ST(0) and rhs is on stack
1269                             FCOM(pop, d, FP);
1270                     }
1271                     else
1272                     {
1273                             // compare n to itself, this is a NaN test.
1274                             rA = getresv(lhs);
1275                             int pop = !rA || rA->reg == UnknownReg;
1276                             findSpecificRegFor(lhs, FST0);
1277                             // value in ST(0)
1278                             if (pop)
1279                                     FCOMPP();
1280                             else
1281                                     FCOMP();
1282                             FLDr(FST0); // DUP
1283                     }
1284         }
1285 #endif
1286         }
1287
1288         NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
1289         {
1290         NIns* was;
1291 #if defined NANOJIT_AMD64
1292                 was = (NIns*)( *(intptr_t*)(at) );
1293         *(intptr_t *)(at) = intptr_t(target);
1294 #else
1295                 NIns* save = _nIns;
1296                 was = (NIns*)( (intptr_t)*(int32_t*)(at+1)+(intptr_t)(at+5) );
1297                 _nIns = at +5; // +5 is size of JMP
1298                 intptr_t tt = (intptr_t)target - (intptr_t)_nIns;
1299                 IMM32(tt);
1300                 *(--_nIns) = JMPc;
1301         _nIns = save;
1302 #endif
1303                 return was;
1304         }
1305
1306         void Assembler::nativePageReset()
1307         {
1308 #if defined NANOJIT_AMD64
1309         /* We store some stuff at the bottom of the page.
1310          * We reserve 8-bytes for long jumps just in case we need them.
1311          */
1312                 _pageData = 0;
1313                 _dblNegPtr = NULL;
1314                 _negOnePtr = NULL;
1315 #endif
1316         }
1317
1318         Register Assembler::asm_binop_rhs_reg(LInsp ins)
1319         {
1320                 LOpcode op = ins->opcode();
1321                 LIns *rhs = ins->oprnd2();
1322
1323                 if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush) {
1324 #if defined NANOJIT_IA32
1325                         return findSpecificRegFor(rhs, ECX);
1326 #elif defined NANOJIT_AMD64
1327                         return findSpecificRegFor(rhs, RCX);
1328 #endif
1329                 }
1330
1331                 return UnknownReg;
1332         }
1333
1334 #if defined NANOJIT_AMD64
1335     void Assembler::asm_qbinop(LIns *ins)
1336     {
1337         LInsp lhs = ins->oprnd1();
1338         LInsp rhs = ins->oprnd2();
1339         LOpcode op = ins->opcode();
1340
1341         Register rr = prepResultReg(ins, GpRegs);
1342         Reservation *rA = getresv(lhs);
1343         Register ra;
1344
1345         if (rA == NULL || (ra = rA->reg) == UnknownReg) {
1346             ra = findSpecificRegFor(lhs, rr);
1347         }
1348
1349         if (rhs->isconst())
1350         {
1351             int c = rhs->constval();
1352
1353             if (op == LIR_qiadd)
1354             {
1355                 ADDQi(rr, c);
1356             } else if (op == LIR_qiand) {
1357                 ANDQi(rr, c);
1358             } else if (op == LIR_qilsh) {
1359                 SHLQi(rr, c);
1360             } else if (op == LIR_qior) {
1361                 ORQi(rr, c);
1362             }
1363         } else {
1364             Register rv;
1365
1366             if (lhs == rhs) {
1367                 rv = ra;
1368             } else {
1369                 rv = findRegFor(rhs, GpRegs & ~(rmask(rr)));
1370             }
1371
1372             if (op == LIR_qiadd) {
1373                 ADDQ(rr, rv);
1374             } else if (op == LIR_qiand) {
1375                 ANDQ(rr, rv);
1376             } else if (op == LIR_qior) {
1377                 ORQ(rr, rv);
1378             } else {
1379                 NanoAssert(rhs->isconst());
1380             }
1381         }
1382
1383         if (rr != ra) {
1384             MR(rr, ra);
1385         }
1386     }
1387 #endif
1388
1389         void Assembler::nativePageSetup()
1390         {
1391                 if (!_nIns)              _nIns     = pageAlloc();
1392                 if (!_nExitIns)  _nExitIns = pageAlloc(true);
1393         }
1394         #endif /* FEATURE_NANOJIT */
1395 }