Bug 469739 - Add support for displaying Vista UAC shield icon; r=joe sr=vladimir
[wine-gecko.git] / js / src / nanojit / Nativei386.cpp
blob7a0a09441338aa5eb6c3861de69d37831dee6b1b
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is [Open Source Virtual Machine].
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Adobe AS3 Team
24 * Mozilla TraceMonkey Team
25 * Asko Tontti <atontti@cc.hut.fi>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 #ifdef _MAC
42 // for MakeDataExecutable
43 #include <CoreServices/CoreServices.h>
44 #endif
46 #if defined AVMPLUS_UNIX || defined AVMPLUS_MAC
47 #include <sys/mman.h>
48 #include <errno.h>
49 #include <stdlib.h>
50 #endif
51 #include "nanojit.h"
53 namespace nanojit
55 #ifdef FEATURE_NANOJIT
57 #ifdef NJ_VERBOSE
58 const char *regNames[] = {
59 #if defined NANOJIT_IA32
60 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
61 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
62 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"
63 #elif defined NANOJIT_AMD64
64 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
65 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
66 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
67 "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
68 #endif
70 #endif
72 #if defined NANOJIT_IA32
73 const Register Assembler::argRegs[] = { ECX, EDX };
74 const Register Assembler::retRegs[] = { EAX, EDX };
75 const Register Assembler::savedRegs[] = { EBX, ESI, EDI };
76 #elif defined NANOJIT_AMD64
77 #if defined WIN64
78 const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
79 #else
80 const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
81 #endif
82 const Register Assembler::retRegs[] = { RAX, RDX };
83 const Register Assembler::savedRegs[] = { R13, R14, R15 };
84 #endif
86 const static uint8_t max_abi_regs[] = {
87 2, /* ABI_FASTCALL */
88 1, /* ABI_THISCALL */
89 0, /* ABI_STDCALL */
90 0 /* ABI_CDECL */
94 void Assembler::nInit(AvmCore* core)
96 (void) core;
97 OSDep::getDate();
100 NIns* Assembler::genPrologue()
103 * Prologue
105 uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
107 uint32_t stackPushed =
108 STACK_GRANULARITY + // returnaddr
109 STACK_GRANULARITY; // ebp
111 if (!_thisfrag->lirbuf->explicitSavedRegs)
112 stackPushed += NumSavedRegs * STACK_GRANULARITY;
114 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
115 uint32_t amt = aligned - stackPushed;
117 // Reserve stackNeeded bytes, padded
118 // to preserve NJ_ALIGN_STACK-byte alignment.
119 if (amt)
121 #if defined NANOJIT_IA32
122 SUBi(SP, amt);
123 #elif defined NANOJIT_AMD64
124 SUBQi(SP, amt);
125 #endif
128 verbose_only( outputAddr=true; asm_output("[frag entry]"); )
129 NIns *fragEntry = _nIns;
130 MR(FP, SP); // Establish our own FP.
131 PUSHr(FP); // Save caller's FP.
133 if (!_thisfrag->lirbuf->explicitSavedRegs)
134 for (int i = 0; i < NumSavedRegs; ++i)
135 PUSHr(savedRegs[i]);
137 // align the entry point
138 asm_align_code();
140 return fragEntry;
143 void Assembler::asm_align_code() {
144 static uint8_t nop[][9] = {
145 {0x90},
146 {0x66,0x90},
147 {0x0f,0x1f,0x00},
148 {0x0f,0x1f,0x40,0x00},
149 {0x0f,0x1f,0x44,0x00,0x00},
150 {0x66,0x0f,0x1f,0x44,0x00,0x00},
151 {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00},
152 {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
153 {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
155 unsigned n;
156 while((n = uintptr_t(_nIns) & 15) != 0) {
157 if (n > 9)
158 n = 9;
159 underrunProtect(n);
160 _nIns -= n;
161 memcpy(_nIns, nop[n-1], n);
162 asm_output("nop%d", n);
166 void Assembler::nFragExit(LInsp guard)
168 SideExit *exit = guard->record()->exit;
169 bool trees = _frago->core()->config.tree_opt;
170 Fragment *frag = exit->target;
171 GuardRecord *lr = 0;
172 bool destKnown = (frag && frag->fragEntry);
173 if (destKnown && !trees)
175 // already exists, emit jump now. no patching required.
176 JMP(frag->fragEntry);
177 lr = 0;
179 else
181 // target doesn't exit yet. emit jump to epilog, and set up to patch later.
182 lr = guard->record();
183 #if defined NANOJIT_AMD64
184 /* 8 bytes for address, 4 for imm32, 2 for jmp */
185 underrunProtect(14);
186 _nIns -= 8;
187 *(intptr_t *)_nIns = intptr_t(_epilogue);
188 lr->jmp = _nIns;
189 JMPm_nochk(0);
190 #else
191 JMP_long(_epilogue);
192 lr->jmp = _nIns;
193 #endif
195 // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
196 MR(SP,FP);
198 // return value is GuardRecord*
199 #if defined NANOJIT_IA32
200 LDi(EAX, int(lr));
201 #elif defined NANOJIT_AMD64
202 LDQi(RAX, intptr_t(lr));
203 #endif
206 NIns *Assembler::genEpilogue()
208 RET();
210 if (!_thisfrag->lirbuf->explicitSavedRegs)
211 for (int i = NumSavedRegs - 1; i >= 0; --i)
212 POPr(savedRegs[i]);
214 POPr(FP); // Restore caller's FP.
215 MR(SP,FP); // pop the stack frame
216 return _nIns;
219 #if defined NANOJIT_IA32
220 void Assembler::asm_call(LInsp ins)
222 const CallInfo* call = ins->callInfo();
223 // must be signed, not unsigned
224 uint32_t iargs = call->count_iargs();
225 int32_t fargs = call->count_args() - iargs - call->isIndirect();
227 bool imt = call->isInterface();
228 if (imt)
229 iargs --;
231 uint32_t max_regs = max_abi_regs[call->_abi];
232 if (max_regs > iargs)
233 max_regs = iargs;
235 int32_t istack = iargs-max_regs; // first 2 4B args are in registers
236 int32_t extra = 0;
237 const int32_t pushsize = 4*istack + 8*fargs; // actual stack space used
239 #if _MSC_VER
240 // msc is slack, and MIR doesn't do anything extra, so lets use this
241 // call-site alignment to at least have code size parity with MIR.
242 uint32_t align = 4;//NJ_ALIGN_STACK;
243 #else
244 uint32_t align = NJ_ALIGN_STACK;
245 #endif
247 if (pushsize) {
248 // stack re-alignment
249 // only pop our adjustment amount since callee pops args in FASTCALL mode
250 extra = alignUp(pushsize, align) - pushsize;
251 if (call->_abi == ABI_CDECL) {
252 // with CDECL only, caller pops args
253 ADDi(SP, extra+pushsize);
254 } else if (extra > 0) {
255 ADDi(SP, extra);
259 bool indirect = false;
260 if (ins->isop(LIR_call) || ins->isop(LIR_fcall)) {
261 CALL(call);
263 else {
264 // indirect call. x86 Calling conventions don't use EAX as an
265 // argument, and do use EAX as a return value. We need a register
266 // for the address to call, so we use EAX since it will always be
267 // available
268 NanoAssert(ins->isop(LIR_calli) || ins->isop(LIR_fcalli));
269 CALLr(call, EAX);
270 indirect = true;
273 // make sure fpu stack is empty before call (restoreCallerSaved)
274 NanoAssert(_allocator.isFree(FST0));
275 // note: this code requires that ref arguments (ARGSIZE_Q)
276 // be one of the first two arguments
277 // pre-assign registers to the first N 4B args based on the calling convention
278 uint32_t n = 0;
280 ArgSize sizes[2*MAXARGS];
281 uint32_t argc = call->get_sizes(sizes);
282 if (indirect) {
283 argc--;
284 asm_arg(ARGSIZE_LO, ins->arg(argc), EAX);
287 if (imt) {
288 // interface thunk calling convention: put iid in EDX
289 NanoAssert(call->_abi == ABI_CDECL);
290 argc--;
291 asm_arg(ARGSIZE_LO, ins->arg(argc), EDX);
294 for(uint32_t i=0; i < argc; i++)
296 uint32_t j = argc-i-1;
297 ArgSize sz = sizes[j];
298 Register r = UnknownReg;
299 if (n < max_regs && sz != ARGSIZE_F) {
300 r = argRegs[n++]; // tell asm_arg what reg to use
302 asm_arg(sz, ins->arg(j), r);
305 if (extra > 0)
306 SUBi(SP, extra);
309 #elif defined NANOJIT_AMD64
311 void Assembler::asm_call(LInsp ins)
313 Register fpu_reg = XMM0;
314 const CallInfo* call = ins->callInfo();
315 int n = 0;
317 CALL(call);
319 ArgSize sizes[10];
320 uint32_t argc = call->get_sizes(sizes);
322 for(uint32_t i=0; i < argc; i++)
324 uint32_t j = argc-i-1;
325 ArgSize sz = sizes[j];
326 Register r = UnknownReg;
327 if (sz != ARGSIZE_F) {
328 r = argRegs[n++]; // tell asm_arg what reg to use
329 } else {
330 r = fpu_reg;
331 fpu_reg = nextreg(fpu_reg);
333 findSpecificRegFor(ins->arg(j), r);
336 #endif
338 void Assembler::nMarkExecute(Page* page, int flags)
340 NanoAssert(sizeof(Page) == NJ_PAGE_SIZE);
341 #if defined WIN32 || defined WIN64
342 DWORD dwIgnore;
343 static const DWORD kProtFlags[4] =
345 PAGE_READONLY, // 0
346 PAGE_READWRITE, // PAGE_WRITE
347 PAGE_EXECUTE_READ, // PAGE_EXEC
348 PAGE_EXECUTE_READWRITE // PAGE_EXEC|PAGE_WRITE
350 DWORD prot = kProtFlags[flags & (PAGE_WRITE|PAGE_EXEC)];
351 BOOL res = VirtualProtect(page, NJ_PAGE_SIZE, prot, &dwIgnore);
352 if (!res)
354 // todo: we can't abort or assert here, we have to fail gracefully.
355 NanoAssertMsg(false, "FATAL ERROR: VirtualProtect() failed\n");
357 #elif defined AVMPLUS_UNIX || defined AVMPLUS_MAC
358 static const int kProtFlags[4] =
360 PROT_READ, // 0
361 PROT_READ|PROT_WRITE, // PAGE_WRITE
362 PROT_READ|PROT_EXEC, // PAGE_EXEC
363 PROT_READ|PROT_WRITE|PROT_EXEC // PAGE_EXEC|PAGE_WRITE
365 int prot = kProtFlags[flags & (PAGE_WRITE|PAGE_EXEC)];
366 intptr_t addr = (intptr_t)page;
367 addr &= ~((uintptr_t)NJ_PAGE_SIZE - 1);
368 NanoAssert(addr == (intptr_t)page);
369 #if defined SOLARIS
370 if (mprotect((char *)addr, NJ_PAGE_SIZE, prot) == -1)
371 #else
372 if (mprotect((void *)addr, NJ_PAGE_SIZE, prot) == -1)
373 #endif
375 // todo: we can't abort or assert here, we have to fail gracefully.
376 NanoAssertMsg(false, "FATAL ERROR: mprotect(PROT_EXEC) failed\n");
377 abort();
379 #else
380 (void)page;
381 #endif
384 Register Assembler::nRegisterAllocFromSet(int set)
386 Register r;
387 RegAlloc &regs = _allocator;
388 #ifdef WIN32
389 _asm
391 mov ecx, regs
392 bsf eax, set // i = first bit set
393 btr RegAlloc::free[ecx], eax // free &= ~rmask(i)
394 mov r, eax
396 #elif defined WIN64
397 unsigned long tr, fr;
398 _BitScanForward(&tr, set);
399 _bittestandreset(&fr, tr);
400 regs.free = fr;
401 r = tr;
402 #else
403 asm(
404 "bsf %1, %%eax\n\t"
405 "btr %%eax, %2\n\t"
406 "movl %%eax, %0\n\t"
407 : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
408 #endif /* WIN32 */
409 return r;
412 void Assembler::nRegisterResetAll(RegAlloc& a)
414 // add scratch registers to our free list for the allocator
415 a.clear();
416 a.used = 0;
417 a.free = SavedRegs | ScratchRegs;
418 #if defined NANOJIT_IA32
419 if (!config.sse2)
420 a.free &= ~XmmRegs;
421 #endif
422 debug_only( a.managed = a.free; )
425 NIns* Assembler::nPatchBranch(NIns* branch, NIns* targ)
427 #if defined NANOJIT_IA32
428 NIns* was = 0;
429 intptr_t offset = intptr_t(targ) - intptr_t(branch);
430 if (branch[0] == JMP32) {
431 was = branch + *(int32_t*)&branch[1] + 5;
432 *(int32_t*)&branch[1] = offset - 5;
433 } else if (branch[0] == JCC32) {
434 was = branch + *(int32_t*)&branch[2] + 6;
435 *(int32_t*)&branch[2] = offset - 6;
436 } else
437 NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
438 #else
439 if (branch[0] == 0xFF && branch[1] == 0x25) {
440 NIns *mem;
441 mem = &branch[6] + *(int32_t *)&branch[2];
442 was = *(intptr_t*)mem;
443 *(intptr_t *)mem = intptr_t(targ);
444 } else {
445 NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
447 #endif
448 return was;
451 RegisterMask Assembler::hint(LIns* i, RegisterMask allow)
453 uint32_t op = i->opcode();
454 int prefer = allow;
455 if (op == LIR_call || op == LIR_calli) {
456 prefer &= rmask(retRegs[0]);
458 else if (op == LIR_fcall || op == LIR_fcalli) {
459 prefer &= rmask(FST0);
461 else if (op == LIR_param) {
462 uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi];
463 if (i->imm8() < max_regs)
464 prefer &= rmask(Register(i->imm8()));
466 else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh) {
467 prefer &= rmask(retRegs[1]);
469 else if (i->isCmp()) {
470 prefer &= AllowableFlagRegs;
472 else if (i->isconst()) {
473 prefer &= ScratchRegs;
475 return (_allocator.free & prefer) ? prefer : allow;
478 void Assembler::asm_qjoin(LIns *ins)
480 int d = findMemFor(ins);
481 AvmAssert(d);
482 LIns* lo = ins->oprnd1();
483 LIns* hi = ins->oprnd2();
485 Reservation *resv = getresv(ins);
486 Register rr = resv->reg;
488 if (rr != UnknownReg && (rmask(rr) & FpRegs))
489 evict(rr);
491 if (hi->isconst())
493 STi(FP, d+4, hi->constval());
495 else
497 Register r = findRegFor(hi, GpRegs);
498 ST(FP, d+4, r);
501 if (lo->isconst())
503 STi(FP, d, lo->constval());
505 else
507 // okay if r gets recycled.
508 Register r = findRegFor(lo, GpRegs);
509 ST(FP, d, r);
512 freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
515 void Assembler::asm_load(int d, Register r)
517 if (rmask(r) & FpRegs)
519 #if defined NANOJIT_IA32
520 if (rmask(r) & XmmRegs) {
521 #endif
522 SSE_LDQ(r, d, FP);
523 #if defined NANOJIT_IA32
524 } else {
525 FLDQ(d, FP);
527 #endif
529 #if defined NANOJIT_AMD64
530 else if (i->opcode() == LIR_param)
532 LDQ(r, d, FP);
534 #endif
535 else
537 LD(r, d, FP);
541 void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
543 if (i->isop(LIR_alloc)) {
544 verbose_only( if (_verbose) { outputForEOL(" <= remat %s size %d", _thisfrag->lirbuf->names->formatRef(i), i->size()); } )
545 LEA(r, disp(resv), FP);
547 else if (i->isconst()) {
548 if (!resv->arIndex) {
549 reserveFree(i);
551 LDi(r, i->constval());
553 else {
554 int d = findMemFor(i);
555 verbose_only( if (_verbose) { outputForEOL(" <= restore %s", _thisfrag->lirbuf->names->formatRef(i)); } )
556 asm_load(d,r);
560 void Assembler::asm_store32(LIns *value, int dr, LIns *base)
562 if (value->isconst())
564 Register rb = getBaseReg(base, dr, GpRegs);
565 int c = value->constval();
566 STi(rb, dr, c);
568 else
570 // make sure what is in a register
571 Reservation *rA, *rB;
572 Register ra, rb;
573 if (base->isop(LIR_alloc)) {
574 rb = FP;
575 dr += findMemFor(base);
576 ra = findRegFor(value, GpRegs);
577 } else if (base->isconst()) {
578 // absolute address
579 dr += base->constval();
580 ra = findRegFor(value, GpRegs);
581 rb = UnknownReg;
582 } else {
583 findRegFor2(GpRegs, value, rA, base, rB);
584 ra = rA->reg;
585 rb = rB->reg;
587 ST(rb, dr, ra);
591 void Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
593 (void)quad;
594 if (d)
596 // save to spill location
597 if (rmask(rr) & FpRegs)
599 #if defined NANOJIT_IA32
600 if (rmask(rr) & XmmRegs) {
601 #endif
602 SSE_STQ(d, FP, rr);
603 #if defined NANOJIT_IA32
604 } else {
605 FSTQ((pop?1:0), d, FP);
607 #endif
609 #if defined NANOJIT_AMD64
610 else if (quad)
612 STQ(FP, d, rr);
614 #endif
615 else
617 ST(FP, d, rr);
620 #if defined NANOJIT_IA32
621 else if (pop && (rmask(rr) & x87Regs))
623 // pop the fpu result since it isn't used
624 FSTP(FST0);
626 #endif
629 void Assembler::asm_load64(LInsp ins)
631 LIns* base = ins->oprnd1();
632 int db = ins->oprnd2()->constval();
633 Reservation *resv = getresv(ins);
634 Register rr = resv->reg;
636 if (rr != UnknownReg && rmask(rr) & XmmRegs)
638 freeRsrcOf(ins, false);
639 Register rb = getBaseReg(base, db, GpRegs);
640 SSE_LDQ(rr, db, rb);
642 #if defined NANOJIT_AMD64
643 else if (rr != UnknownReg && rmask(rr) & GpRegs)
645 freeRsrcOf(ins, false);
646 Register rb = findRegFor(base, GpRegs);
647 LDQ(rr, db, rb);
649 else
651 int d = disp(resv);
652 Register rb = findRegFor(base, GpRegs);
654 /* We need a temporary register we can move the desination into */
655 rr = registerAlloc(GpRegs);
657 STQ(FP, d, rr);
658 LDQ(rr, db, rb);
660 /* Mark as free */
661 _allocator.addFree(rr);
663 freeRsrcOf(ins, false);
665 #elif defined NANOJIT_IA32
666 else
668 int dr = disp(resv);
669 Register rb;
670 if (base->isop(LIR_alloc)) {
671 rb = FP;
672 db += findMemFor(base);
673 } else {
674 rb = findRegFor(base, GpRegs);
676 resv->reg = UnknownReg;
678 // don't use an fpu reg to simply load & store the value.
679 if (dr)
680 asm_mmq(FP, dr, rb, db);
682 freeRsrcOf(ins, false);
684 if (rr != UnknownReg)
686 NanoAssert(rmask(rr)&FpRegs);
687 _allocator.retire(rr);
688 FLDQ(db, rb);
691 #endif
694 void Assembler::asm_store64(LInsp value, int dr, LInsp base)
696 if (value->isconstq())
698 // if a constant 64-bit value just store it now rather than
699 // generating a pointless store/load/store sequence
700 Register rb;
701 if (base->isop(LIR_alloc)) {
702 rb = FP;
703 dr += findMemFor(base);
704 } else {
705 rb = findRegFor(base, GpRegs);
707 const int32_t* p = (const int32_t*) (value-2);
708 STi(rb, dr+4, p[1]);
709 STi(rb, dr, p[0]);
710 return;
713 #if defined NANOJIT_IA32
714 if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
716 // value is 64bit struct or int64_t, or maybe a double.
717 // it may be live in an FPU reg. Either way, don't
718 // put it in an FPU reg just to load & store it.
720 // a) if we know it's not a double, this is right.
721 // b) if we guarded that its a double, this store could be on
722 // the side exit, copying a non-double.
723 // c) maybe its a double just being stored. oh well.
725 if (config.sse2) {
726 Register rv = findRegFor(value, XmmRegs);
727 Register rb;
728 if (base->isop(LIR_alloc)) {
729 rb = FP;
730 dr += findMemFor(base);
731 } else {
732 rb = findRegFor(base, GpRegs);
734 SSE_STQ(dr, rb, rv);
735 return;
738 int da = findMemFor(value);
739 Register rb;
740 if (base->isop(LIR_alloc)) {
741 rb = FP;
742 dr += findMemFor(base);
743 } else {
744 rb = findRegFor(base, GpRegs);
746 asm_mmq(rb, dr, FP, da);
747 return;
750 Register rb;
751 if (base->isop(LIR_alloc)) {
752 rb = FP;
753 dr += findMemFor(base);
754 } else {
755 rb = findRegFor(base, GpRegs);
758 // if value already in a reg, use that, otherwise
759 // try to get it into XMM regs before FPU regs.
760 Reservation* rA = getresv(value);
761 Register rv;
762 int pop = !rA || rA->reg==UnknownReg;
763 if (pop) {
764 rv = findRegFor(value, config.sse2 ? XmmRegs : FpRegs);
765 } else {
766 rv = rA->reg;
769 if (rmask(rv) & XmmRegs) {
770 SSE_STQ(dr, rb, rv);
771 } else {
772 FSTQ(pop, dr, rb);
774 #elif defined NANOJIT_AMD64
775 /* If this is not a float operation, we can use GpRegs instead.
776 * We can do this in a few other cases but for now I'll keep it simple.
778 Register rb = findRegFor(base, GpRegs);
779 Reservation *rV = getresv(value);
781 if (rV != NULL && rV->reg != UnknownReg) {
782 if (rmask(rV->reg) & GpRegs) {
783 STQ(rb, dr, rV->reg);
784 } else {
785 SSE_STQ(dr, rb, rV->reg);
787 } else {
788 Register rv;
790 /* Try to catch some common patterns.
791 * Note: this is a necessity, since in between things like
792 * asm_fop() could see the reservation and try to use a non-SSE
793 * register for adding. Same for asm_qbinop in theory.
794 * There should probably be asserts to catch more cases.
796 if (value->isop(LIR_u2f)
797 || value->isop(LIR_i2f)
798 || (value->opcode() >= LIR_fneg && value->opcode() <= LIR_fmul)
799 || value->opcode() == LIR_fdiv
800 || value->opcode() == LIR_fcall) {
801 rv = findRegFor(value, XmmRegs);
802 SSE_STQ(dr, rb, rv);
803 } else {
804 rv = findRegFor(value, GpRegs);
805 STQ(rb, dr, rv);
808 #endif
812 * copy 64 bits: (rd+dd) <- (rs+ds)
814 void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
816 // value is either a 64bit struct or maybe a float
817 // that isn't live in an FPU reg. Either way, don't
818 // put it in an FPU reg just to load & store it.
819 #if defined NANOJIT_IA32
820 if (config.sse2)
822 #endif
823 // use SSE to load+store 64bits
824 Register t = registerAlloc(XmmRegs);
825 _allocator.addFree(t);
826 SSE_STQ(dd, rd, t);
827 SSE_LDQ(t, ds, rs);
828 #if defined NANOJIT_IA32
830 else
832 // get a scratch reg
833 Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
834 _allocator.addFree(t);
835 ST(rd, dd+4, t);
836 LD(t, ds+4, rs);
837 ST(rd, dd, t);
838 LD(t, ds, rs);
840 #endif
843 NIns* Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ, bool isfar)
845 NIns* at = 0;
846 LOpcode condop = cond->opcode();
847 NanoAssert(cond->isCond());
848 #ifndef NJ_SOFTFLOAT
849 if (condop >= LIR_feq && condop <= LIR_fge)
851 return asm_jmpcc(branchOnFalse, cond, targ);
853 #endif
854 // produce the branch
855 if (branchOnFalse)
857 if (condop == LIR_eq)
858 JNE(targ, isfar);
859 else if (condop == LIR_ov)
860 JNO(targ, isfar);
861 else if (condop == LIR_cs)
862 JNC(targ, isfar);
863 else if (condop == LIR_lt)
864 JNL(targ, isfar);
865 else if (condop == LIR_le)
866 JNLE(targ, isfar);
867 else if (condop == LIR_gt)
868 JNG(targ, isfar);
869 else if (condop == LIR_ge)
870 JNGE(targ, isfar);
871 else if (condop == LIR_ult)
872 JNB(targ, isfar);
873 else if (condop == LIR_ule)
874 JNBE(targ, isfar);
875 else if (condop == LIR_ugt)
876 JNA(targ, isfar);
877 else //if (condop == LIR_uge)
878 JNAE(targ, isfar);
880 else // op == LIR_xt
882 if (condop == LIR_eq)
883 JE(targ, isfar);
884 else if (condop == LIR_ov)
885 JO(targ, isfar);
886 else if (condop == LIR_cs)
887 JC(targ, isfar);
888 else if (condop == LIR_lt)
889 JL(targ, isfar);
890 else if (condop == LIR_le)
891 JLE(targ, isfar);
892 else if (condop == LIR_gt)
893 JG(targ, isfar);
894 else if (condop == LIR_ge)
895 JGE(targ, isfar);
896 else if (condop == LIR_ult)
897 JB(targ, isfar);
898 else if (condop == LIR_ule)
899 JBE(targ, isfar);
900 else if (condop == LIR_ugt)
901 JA(targ, isfar);
902 else //if (condop == LIR_uge)
903 JAE(targ, isfar);
905 at = _nIns;
906 asm_cmp(cond);
907 return at;
910 void Assembler::asm_cmp(LIns *cond)
912 LOpcode condop = cond->opcode();
914 // LIR_ov and LIR_cs recycle the flags set by arithmetic ops
915 if ((condop == LIR_ov) || (condop == LIR_cs))
916 return;
918 LInsp lhs = cond->oprnd1();
919 LInsp rhs = cond->oprnd2();
920 Reservation *rA, *rB;
922 NanoAssert((!lhs->isQuad() && !rhs->isQuad()) || (lhs->isQuad() && rhs->isQuad()));
924 // Not supported yet.
925 #if !defined NANOJIT_64BIT
926 NanoAssert(!lhs->isQuad() && !rhs->isQuad());
927 #endif
929 // ready to issue the compare
930 if (rhs->isconst())
932 int c = rhs->constval();
933 if (c == 0 && cond->isop(LIR_eq)) {
934 Register r = findRegFor(lhs, GpRegs);
935 if (rhs->isQuad()) {
936 #if defined NANOJIT_64BIT
937 TESTQ(r, r);
938 #endif
939 } else {
940 TEST(r,r);
942 // No 64-bit immediates so fall-back to below
944 else if (!rhs->isQuad()) {
945 Register r = getBaseReg(lhs, c, GpRegs);
946 CMPi(r, c);
949 else
951 findRegFor2(GpRegs, lhs, rA, rhs, rB);
952 Register ra = rA->reg;
953 Register rb = rB->reg;
954 if (rhs->isQuad()) {
955 #if defined NANOJIT_64BIT
956 CMPQ(ra, rb);
957 #endif
958 } else {
959 CMP(ra, rb);
964 void Assembler::asm_loop(LInsp ins, NInsList& loopJumps)
966 JMP_long(0);
967 loopJumps.add(_nIns);
969 // If the target we are looping to is in a different fragment, we have to restore
970 // SP since we will target fragEntry and not loopEntry.
971 if (ins->record()->exit->target != _thisfrag)
972 MR(SP,FP);
975 void Assembler::asm_fcond(LInsp ins)
977 // only want certain regs
978 Register r = prepResultReg(ins, AllowableFlagRegs);
979 asm_setcc(r, ins);
980 #ifdef NJ_ARM_VFP
981 SETE(r);
982 #else
983 // SETcc only sets low 8 bits, so extend
984 MOVZX8(r,r);
985 SETNP(r);
986 #endif
987 asm_fcmp(ins);
990 void Assembler::asm_cond(LInsp ins)
992 // only want certain regs
993 LOpcode op = ins->opcode();
994 Register r = prepResultReg(ins, AllowableFlagRegs);
995 // SETcc only sets low 8 bits, so extend
996 MOVZX8(r,r);
997 if (op == LIR_eq)
998 SETE(r);
999 else if (op == LIR_ov)
1000 SETO(r);
1001 else if (op == LIR_cs)
1002 SETC(r);
1003 else if (op == LIR_lt)
1004 SETL(r);
1005 else if (op == LIR_le)
1006 SETLE(r);
1007 else if (op == LIR_gt)
1008 SETG(r);
1009 else if (op == LIR_ge)
1010 SETGE(r);
1011 else if (op == LIR_ult)
1012 SETB(r);
1013 else if (op == LIR_ule)
1014 SETBE(r);
1015 else if (op == LIR_ugt)
1016 SETA(r);
1017 else // if (op == LIR_uge)
1018 SETAE(r);
1019 asm_cmp(ins);
1022 void Assembler::asm_arith(LInsp ins)
1024 LOpcode op = ins->opcode();
1025 LInsp lhs = ins->oprnd1();
1026 LInsp rhs = ins->oprnd2();
1028 Register rb = UnknownReg;
1029 RegisterMask allow = GpRegs;
1030 bool forceReg = (op == LIR_mul || !rhs->isconst());
1032 /* Even if lhs == rhs && forceReg, shift instructions require ECX on the rhs. */
1033 if ((lhs != rhs || (op == LIR_lsh || op == LIR_rsh || op == LIR_ush)) && forceReg)
1035 if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
1036 rb = findRegFor(rhs, allow);
1038 allow &= ~rmask(rb);
1040 else if ((op == LIR_add||op == LIR_addp) && lhs->isop(LIR_alloc) && rhs->isconst()) {
1041 // add alloc+const, use lea
1042 Register rr = prepResultReg(ins, allow);
1043 int d = findMemFor(lhs) + rhs->constval();
1044 LEA(rr, d, FP);
1047 Register rr = prepResultReg(ins, allow);
1048 Reservation* rA = getresv(lhs);
1049 Register ra;
1050 // if this is last use of lhs in reg, we can re-use result reg
1051 if (rA == 0 || (ra = rA->reg) == UnknownReg)
1052 ra = findSpecificRegFor(lhs, rr);
1053 // else, rA already has a register assigned.
1055 if (forceReg)
1057 if (lhs == rhs)
1058 rb = ra;
1060 if (op == LIR_add || op == LIR_addp)
1061 ADD(rr, rb);
1062 else if (op == LIR_sub)
1063 SUB(rr, rb);
1064 else if (op == LIR_mul)
1065 MUL(rr, rb);
1066 else if (op == LIR_and)
1067 AND(rr, rb);
1068 else if (op == LIR_or)
1069 OR(rr, rb);
1070 else if (op == LIR_xor)
1071 XOR(rr, rb);
1072 else if (op == LIR_lsh)
1073 SHL(rr, rb);
1074 else if (op == LIR_rsh)
1075 SAR(rr, rb);
1076 else if (op == LIR_ush)
1077 SHR(rr, rb);
1078 else
1079 NanoAssertMsg(0, "Unsupported");
1081 else
1083 int c = rhs->constval();
1084 if (op == LIR_add || op == LIR_addp) {
1085 #ifdef NANOJIT_IA32_TODO
1086 if (ra != rr) {
1087 // this doesn't set cc's, only use it when cc's not required.
1088 LEA(rr, c, ra);
1089 ra = rr; // suppress mov
1090 } else
1091 #endif
1093 ADDi(rr, c);
1095 } else if (op == LIR_sub) {
1096 #ifdef NANOJIT_IA32
1097 if (ra != rr) {
1098 LEA(rr, -c, ra);
1099 ra = rr;
1100 } else
1101 #endif
1103 SUBi(rr, c);
1105 } else if (op == LIR_and)
1106 ANDi(rr, c);
1107 else if (op == LIR_or)
1108 ORi(rr, c);
1109 else if (op == LIR_xor)
1110 XORi(rr, c);
1111 else if (op == LIR_lsh)
1112 SHLi(rr, c);
1113 else if (op == LIR_rsh)
1114 SARi(rr, c);
1115 else if (op == LIR_ush)
1116 SHRi(rr, c);
1117 else
1118 NanoAssertMsg(0, "Unsupported");
1121 if ( rr != ra )
1122 MR(rr,ra);
1125 void Assembler::asm_neg_not(LInsp ins)
1127 LOpcode op = ins->opcode();
1128 Register rr = prepResultReg(ins, GpRegs);
1130 LIns* lhs = ins->oprnd1();
1131 Reservation *rA = getresv(lhs);
1132 // if this is last use of lhs in reg, we can re-use result reg
1133 Register ra;
1134 if (rA == 0 || (ra=rA->reg) == UnknownReg)
1135 ra = findSpecificRegFor(lhs, rr);
1136 // else, rA already has a register assigned.
1138 if (op == LIR_not)
1139 NOT(rr);
1140 else
1141 NEG(rr);
1143 if ( rr != ra )
1144 MR(rr,ra);
1147 void Assembler::asm_ld(LInsp ins)
1149 LOpcode op = ins->opcode();
1150 LIns* base = ins->oprnd1();
1151 LIns* disp = ins->oprnd2();
1152 Register rr = prepResultReg(ins, GpRegs);
1153 int d = disp->constval();
1155 #ifdef NANOJIT_IA32
1156 /* Can't use this on AMD64, no 64-bit immediate addresses. */
1157 if (base->isconst()) {
1158 intptr_t addr = base->constval();
1159 addr += d;
1160 if (op == LIR_ldcb)
1161 LD8Zdm(rr, addr);
1162 else if (op == LIR_ldcs)
1163 LD16Zdm(rr, addr);
1164 else
1165 LDdm(rr, addr);
1166 return;
1169 /* :TODO: Use this on AMD64 as well. */
1170 /* Search for add(X,Y) */
1171 if (base->opcode() == LIR_piadd) {
1172 int scale = 0;
1173 LIns *lhs = base->oprnd1();
1174 LIns *rhs = base->oprnd2();
1176 /* See if we can bypass any SHLs, by searching for
1177 * add(X, shl(Y,Z)) -> mov r, [X+Y*Z]
1179 if (rhs->opcode() == LIR_pilsh && rhs->oprnd2()->isconst()) {
1180 scale = rhs->oprnd2()->constval();
1181 if (scale >= 1 && scale <= 3)
1182 rhs = rhs->oprnd1();
1183 else
1184 scale = 0;
1187 Register rleft;
1188 Reservation *rL = getresv(lhs);
1190 /* Does LHS have a register yet? If not, re-use the result reg.
1191 * :TODO: If LHS is const, we could eliminate a register use.
1193 if (rL == NULL || rL->reg == UnknownReg)
1194 rleft = findSpecificRegFor(lhs, rr);
1195 else
1196 rleft = rL->reg;
1198 Register rright = UnknownReg;
1199 Reservation *rR = getresv(rhs);
1201 /* Does RHS have a register yet? If not, try to re-use the result reg. */
1202 if (rr != rleft && (rR == NULL || rR->reg == UnknownReg))
1203 rright = findSpecificRegFor(rhs, rr);
1204 if (rright == UnknownReg)
1205 rright = findRegFor(rhs, GpRegs & ~(rmask(rleft)));
1207 if (op == LIR_ldcb)
1208 LD8Zsib(rr, d, rleft, rright, scale);
1209 else if (op == LIR_ldcs)
1210 LD16Zsib(rr, d, rleft, rright, scale);
1211 else
1212 LDsib(rr, d, rleft, rright, scale);
1214 return;
1216 #endif
1218 Register ra = getBaseReg(base, d, GpRegs);
1219 if (op == LIR_ldcb)
1220 LD8Z(rr, d, ra);
1221 else if (op == LIR_ldcs)
1222 LD16Z(rr, d, ra);
1223 else
1224 LD(rr, d, ra);
1227 void Assembler::asm_cmov(LInsp ins)
1229 LOpcode op = ins->opcode();
1230 LIns* condval = ins->oprnd1();
1231 NanoAssert(condval->isCmp());
1233 LIns* values = ins->oprnd2();
1235 NanoAssert(values->opcode() == LIR_2);
1236 LIns* iftrue = values->oprnd1();
1237 LIns* iffalse = values->oprnd2();
1239 NanoAssert(op == LIR_qcmov || (!iftrue->isQuad() && !iffalse->isQuad()));
1241 const Register rr = prepResultReg(ins, GpRegs);
1243 // this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
1244 // (This is true on Intel, is it true on all architectures?)
1245 const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr));
1246 if (op == LIR_cmov) {
1247 switch (condval->opcode())
1249 // note that these are all opposites...
1250 case LIR_eq: MRNE(rr, iffalsereg); break;
1251 case LIR_ov: MRNO(rr, iffalsereg); break;
1252 case LIR_cs: MRNC(rr, iffalsereg); break;
1253 case LIR_lt: MRGE(rr, iffalsereg); break;
1254 case LIR_le: MRG(rr, iffalsereg); break;
1255 case LIR_gt: MRLE(rr, iffalsereg); break;
1256 case LIR_ge: MRL(rr, iffalsereg); break;
1257 case LIR_ult: MRAE(rr, iffalsereg); break;
1258 case LIR_ule: MRA(rr, iffalsereg); break;
1259 case LIR_ugt: MRBE(rr, iffalsereg); break;
1260 case LIR_uge: MRB(rr, iffalsereg); break;
1261 debug_only( default: NanoAssert(0); break; )
1263 } else if (op == LIR_qcmov) {
1264 #if !defined NANOJIT_64BIT
1265 NanoAssert(0);
1266 #else
1267 switch (condval->opcode())
1269 // note that these are all opposites...
1270 case LIR_eq: MRQNE(rr, iffalsereg); break;
1271 case LIR_ov: MRQNO(rr, iffalsereg); break;
1272 case LIR_cs: MRQNC(rr, iffalsereg); break;
1273 case LIR_lt: MRQGE(rr, iffalsereg); break;
1274 case LIR_le: MRQG(rr, iffalsereg); break;
1275 case LIR_gt: MRQLE(rr, iffalsereg); break;
1276 case LIR_ge: MRQL(rr, iffalsereg); break;
1277 case LIR_ult: MRQAE(rr, iffalsereg); break;
1278 case LIR_ule: MRQA(rr, iffalsereg); break;
1279 case LIR_ugt: MRQBE(rr, iffalsereg); break;
1280 case LIR_uge: MRQB(rr, iffalsereg); break;
1281 debug_only( default: NanoAssert(0); break; )
1283 #endif
1285 /*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);
1286 asm_cmp(condval);
1289 void Assembler::asm_qhi(LInsp ins)
1291 Register rr = prepResultReg(ins, GpRegs);
1292 LIns *q = ins->oprnd1();
1293 int d = findMemFor(q);
1294 LD(rr, d+4, FP);
1297 void Assembler::asm_param(LInsp ins)
1299 uint32_t a = ins->imm8();
1300 uint32_t kind = ins->imm8b();
1301 if (kind == 0) {
1302 // ordinary param
1303 AbiKind abi = _thisfrag->lirbuf->abi;
1304 uint32_t abi_regcount = max_abi_regs[abi];
1305 if (a < abi_regcount) {
1306 // incoming arg in register
1307 prepResultReg(ins, rmask(argRegs[a]));
1308 } else {
1309 // incoming arg is on stack, and EBP points nearby (see genPrologue)
1310 Register r = prepResultReg(ins, GpRegs);
1311 int d = (a - abi_regcount) * sizeof(intptr_t) + 8;
1312 LD(r, d, FP);
1315 else {
1316 // saved param
1317 prepResultReg(ins, rmask(savedRegs[a]));
1321 void Assembler::asm_short(LInsp ins)
1323 Register rr = prepResultReg(ins, GpRegs);
1324 int32_t val = ins->imm16();
1325 if (val == 0)
1326 XOR(rr,rr);
1327 else
1328 LDi(rr, val);
1331 void Assembler::asm_int(LInsp ins)
1333 Register rr = prepResultReg(ins, GpRegs);
1334 int32_t val = ins->imm32();
1335 if (val == 0)
1336 XOR(rr,rr);
1337 else
1338 LDi(rr, val);
1341 void Assembler::asm_quad(LInsp ins)
1343 #if defined NANOJIT_IA32
1344 Reservation *rR = getresv(ins);
1345 Register rr = rR->reg;
1346 if (rr != UnknownReg)
1348 // @todo -- add special-cases for 0 and 1
1349 _allocator.retire(rr);
1350 rR->reg = UnknownReg;
1351 NanoAssert((rmask(rr) & FpRegs) != 0);
1353 const double d = ins->constvalf();
1354 const uint64_t q = ins->constvalq();
1355 if (rmask(rr) & XmmRegs) {
1356 if (q == 0.0) {
1357 // test (int64)0 since -0.0 == 0.0
1358 SSE_XORPDr(rr, rr);
1359 } else if (d == 1.0) {
1360 // 1.0 is extremely frequent and worth special-casing!
1361 static const double k_ONE = 1.0;
1362 LDSDm(rr, &k_ONE);
1363 } else {
1364 findMemFor(ins);
1365 const int d = disp(rR);
1366 SSE_LDQ(rr, d, FP);
1368 } else {
1369 if (q == 0.0) {
1370 // test (int64)0 since -0.0 == 0.0
1371 FLDZ();
1372 } else if (d == 1.0) {
1373 FLD1();
1374 } else {
1375 findMemFor(ins);
1376 int d = disp(rR);
1377 FLDQ(d,FP);
1382 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
1383 int d = disp(rR);
1384 freeRsrcOf(ins, false);
1385 if (d)
1387 const int32_t* p = (const int32_t*) (ins-2);
1388 STi(FP,d+4,p[1]);
1389 STi(FP,d,p[0]);
1391 #elif defined NANOJIT_AMD64
1392 Reservation *rR = getresv(ins);
1393 int64_t val = *(int64_t *)(ins - 2);
1395 if (rR->reg != UnknownReg)
1397 if (rmask(rR->reg) & GpRegs)
1399 LDQi(rR->reg, val);
1401 else if (rmask(rR->reg) & XmmRegs)
1403 if (ins->constvalf() == 0.0)
1405 SSE_XORPDr(rR->reg, rR->reg);
1407 else
1409 /* Get a short-lived register, not associated with instruction */
1410 Register rd = rR->reg;
1411 Register rs = registerAlloc(GpRegs);
1413 SSE_MOVD(rd, rs);
1414 LDQi(rs, val);
1416 _allocator.addFree(rs);
1420 else
1422 const int32_t* p = (const int32_t*) (ins-2);
1423 int dr = disp(rR);
1424 STi(FP, dr+4, p[1]);
1425 STi(FP, dr, p[0]);
1428 freeRsrcOf(ins, false);
1429 #endif
1432 void Assembler::asm_qlo(LInsp ins)
1434 LIns *q = ins->oprnd1();
1436 #if defined NANOJIT_IA32
1437 if (!config.sse2)
1439 Register rr = prepResultReg(ins, GpRegs);
1440 int d = findMemFor(q);
1441 LD(rr, d, FP);
1443 else
1444 #endif
1446 Reservation *resv = getresv(ins);
1447 Register rr = resv->reg;
1448 if (rr == UnknownReg) {
1449 // store quad in spill loc
1450 int d = disp(resv);
1451 freeRsrcOf(ins, false);
1452 Register qr = findRegFor(q, XmmRegs);
1453 SSE_MOVDm(d, FP, qr);
1454 } else {
1455 freeRsrcOf(ins, false);
1456 Register qr = findRegFor(q, XmmRegs);
1457 SSE_MOVD(rr,qr);
1462 void Assembler::asm_fneg(LInsp ins)
1464 #if defined NANOJIT_IA32
1465 if (config.sse2)
1467 #endif
1468 LIns *lhs = ins->oprnd1();
1470 Register rr = prepResultReg(ins, XmmRegs);
1471 Reservation *rA = getresv(lhs);
1472 Register ra;
1474 // if this is last use of lhs in reg, we can re-use result reg
1475 if (rA == 0 || (ra = rA->reg) == UnknownReg) {
1476 ra = findSpecificRegFor(lhs, rr);
1477 } else if ((rmask(ra) & XmmRegs) == 0) {
1478 /* We need this case on AMD64, because it's possible that
1479 * an earlier instruction has done a quadword load and reserved a
1480 * GPR. If so, ask for a new register.
1482 ra = findRegFor(lhs, XmmRegs);
1484 // else, rA already has a register assigned.
1486 #if defined __SUNPRO_CC
1487 // from Sun Studio C++ Readme: #pragma align inside namespace requires mangled names
1488 static uint32_t temp[] = {0, 0, 0, 0, 0, 0, 0};
1489 static uint32_t *negateMask = (uint32_t *)alignUp(temp, 16);
1490 negateMask[1] = 0x80000000;
1491 #else
1492 static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0};
1493 #endif
1494 SSE_XORPD(rr, negateMask);
1496 if (rr != ra)
1497 SSE_MOVSD(rr, ra);
1498 #if defined NANOJIT_IA32
1500 else
1502 Register rr = prepResultReg(ins, FpRegs);
1504 LIns* lhs = ins->oprnd1();
1506 // lhs into reg, prefer same reg as result
1507 Reservation* rA = getresv(lhs);
1508 // if this is last use of lhs in reg, we can re-use result reg
1509 if (rA == 0 || rA->reg == UnknownReg)
1510 findSpecificRegFor(lhs, rr);
1511 // else, rA already has a different reg assigned
1513 NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
1514 // assume that the lhs is in ST(0) and rhs is on stack
1515 FCHS();
1517 // if we had more than one fpu reg, this is where
1518 // we would move ra into rr if rr != ra.
1520 #endif
1523 void Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
1525 if (sz == ARGSIZE_Q)
1527 // ref arg - use lea
1528 if (r != UnknownReg)
1530 // arg in specific reg
1531 int da = findMemFor(p);
1532 LEA(r, da, FP);
1534 else
1536 NanoAssert(0); // not supported
1539 else if (sz == ARGSIZE_LO)
1541 if (r != UnknownReg) {
1542 // arg goes in specific register
1543 if (p->isconst()) {
1544 LDi(r, p->constval());
1545 } else {
1546 Reservation* rA = getresv(p);
1547 if (rA) {
1548 if (rA->reg == UnknownReg) {
1549 // load it into the arg reg
1550 int d = findMemFor(p);
1551 if (p->isop(LIR_alloc)) {
1552 LEA(r, d, FP);
1553 } else {
1554 LD(r, d, FP);
1556 } else {
1557 // it must be in a saved reg
1558 MR(r, rA->reg);
1561 else {
1562 // this is the last use, so fine to assign it
1563 // to the scratch reg, it's dead after this point.
1564 findSpecificRegFor(p, r);
1568 else {
1569 asm_pusharg(p);
1572 else
1574 NanoAssert(sz == ARGSIZE_F);
1575 asm_farg(p);
1579 void Assembler::asm_pusharg(LInsp p)
1581 // arg goes on stack
1582 Reservation* rA = getresv(p);
1583 if (rA == 0 && p->isconst())
1585 // small const we push directly
1586 PUSHi(p->constval());
1588 else if (rA == 0 || p->isop(LIR_alloc))
1590 Register ra = findRegFor(p, GpRegs);
1591 PUSHr(ra);
1593 else if (rA->reg == UnknownReg)
1595 PUSHm(disp(rA), FP);
1597 else
1599 PUSHr(rA->reg);
1603 void Assembler::asm_farg(LInsp p)
1605 #if defined NANOJIT_IA32
1606 NanoAssert(p->isQuad());
1607 Register r = findRegFor(p, FpRegs);
1608 if (rmask(r) & XmmRegs) {
1609 SSE_STQ(0, SP, r);
1610 } else {
1611 FSTPQ(0, SP);
1612 /* It's possible that the same LIns* with r=FST0 will appear in the argument list more
1613 * than once. In this case FST0 will not have been evicted and the multiple pop
1614 * actions will unbalance the FPU stack. A quick fix is to always evict FST0 manually.
1616 evict(FST0);
1618 SUBi(ESP,8);
1619 //PUSHr(ECX); // 2*pushr is smaller than sub
1620 //PUSHr(ECX);
1621 #endif
1624 void Assembler::asm_fop(LInsp ins)
1626 LOpcode op = ins->opcode();
1627 #if defined NANOJIT_IA32
1628 if (config.sse2)
1630 #endif
1631 LIns *lhs = ins->oprnd1();
1632 LIns *rhs = ins->oprnd2();
1634 RegisterMask allow = XmmRegs;
1635 Register rb = UnknownReg;
1636 if (lhs != rhs) {
1637 rb = findRegFor(rhs,allow);
1638 allow &= ~rmask(rb);
1641 Register rr = prepResultReg(ins, allow);
1642 Reservation *rA = getresv(lhs);
1643 Register ra;
1645 // if this is last use of lhs in reg, we can re-use result reg
1646 if (rA == 0 || (ra = rA->reg) == UnknownReg) {
1647 ra = findSpecificRegFor(lhs, rr);
1648 } else if ((rmask(ra) & XmmRegs) == 0) {
1649 /* We need this case on AMD64, because it's possible that
1650 * an earlier instruction has done a quadword load and reserved a
1651 * GPR. If so, ask for a new register.
1653 ra = findRegFor(lhs, XmmRegs);
1655 else {
1656 // rA already has a register assigned but maybe not from the allow set
1657 ra = findRegFor(lhs, allow);
1660 if (lhs == rhs)
1661 rb = ra;
1663 if (op == LIR_fadd)
1664 SSE_ADDSD(rr, rb);
1665 else if (op == LIR_fsub)
1666 SSE_SUBSD(rr, rb);
1667 else if (op == LIR_fmul)
1668 SSE_MULSD(rr, rb);
1669 else //if (op == LIR_fdiv)
1670 SSE_DIVSD(rr, rb);
1672 if (rr != ra)
1673 SSE_MOVSD(rr, ra);
1674 #if defined NANOJIT_IA32
1676 else
1678 // we swap lhs/rhs on purpose here, works out better
1679 // if you only have one fpu reg. use divr/subr.
1680 LIns* rhs = ins->oprnd1();
1681 LIns* lhs = ins->oprnd2();
1682 Register rr = prepResultReg(ins, rmask(FST0));
1684 // make sure rhs is in memory
1685 int db = findMemFor(rhs);
1687 // lhs into reg, prefer same reg as result
1688 Reservation* rA = getresv(lhs);
1689 // last use of lhs in reg, can reuse rr
1690 if (rA == 0 || rA->reg == UnknownReg)
1691 findSpecificRegFor(lhs, rr);
1692 // else, rA already has a different reg assigned
1694 NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
1695 // assume that the lhs is in ST(0) and rhs is on stack
1696 if (op == LIR_fadd)
1697 { FADD(db, FP); }
1698 else if (op == LIR_fsub)
1699 { FSUBR(db, FP); }
1700 else if (op == LIR_fmul)
1701 { FMUL(db, FP); }
1702 else if (op == LIR_fdiv)
1703 { FDIVR(db, FP); }
1705 #endif
1708 void Assembler::asm_i2f(LInsp ins)
1710 // where our result goes
1711 Register rr = prepResultReg(ins, FpRegs);
1712 #if defined NANOJIT_IA32
1713 if (rmask(rr) & XmmRegs)
1715 #endif
1716 // todo support int value in memory
1717 Register gr = findRegFor(ins->oprnd1(), GpRegs);
1718 SSE_CVTSI2SD(rr, gr);
1719 #if defined NANOJIT_IA32
1721 else
1723 int d = findMemFor(ins->oprnd1());
1724 FILD(d, FP);
1726 #endif
1729 Register Assembler::asm_prep_fcall(Reservation *rR, LInsp ins)
1731 #if defined NANOJIT_IA32
1732 if (rR) {
1733 Register rr;
1734 if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs))
1735 evict(rr);
1737 return prepResultReg(ins, rmask(FST0));
1738 #elif defined NANOJIT_AMD64
1739 evict(RAX);
1740 return prepResultReg(ins, rmask(XMM0));
1741 #endif
1744 void Assembler::asm_u2f(LInsp ins)
1746 // where our result goes
1747 Register rr = prepResultReg(ins, FpRegs);
1748 #if defined NANOJIT_IA32
1749 if (rmask(rr) & XmmRegs)
1751 #endif
1752 // don't call findRegFor, we want a reg we can stomp on for a very short time,
1753 // not a reg that will continue to be associated with the LIns
1754 Register gr = registerAlloc(GpRegs);
1756 // technique inspired by gcc disassembly
1757 // Edwin explains it:
1759 // gr is 0..2^32-1
1761 // sub gr,0x80000000
1763 // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
1764 // as before
1766 // cvtsi2sd rr,gr
1768 // rr is now a double with the int value range
1770 // addsd rr, 2147483648.0
1772 // adding back double(0x80000000) makes the range 0..2^32-1.
1774 static const double k_NEGONE = 2147483648.0;
1775 #if defined NANOJIT_IA32
1776 SSE_ADDSDm(rr, &k_NEGONE);
1777 #elif defined NANOJIT_AMD64
1778 /* Squirrel the constant at the bottom of the page. */
1779 if (_dblNegPtr != NULL)
1781 underrunProtect(10);
1783 if (_dblNegPtr == NULL)
1785 underrunProtect(30);
1786 uint8_t *base, *begin;
1787 base = (uint8_t *)((intptr_t)_nIns & ~((intptr_t)NJ_PAGE_SIZE-1));
1788 base += sizeof(PageHeader) + _pageData;
1789 begin = base;
1790 /* Make sure we align */
1791 if ((uintptr_t)base & 0xF) {
1792 base = (NIns *)((uintptr_t)base & ~(0xF));
1793 base += 16;
1795 _pageData += (int32_t)(base - begin) + sizeof(double);
1796 _negOnePtr = (NIns *)base;
1797 *(double *)_negOnePtr = k_NEGONE;
1799 SSE_ADDSDm(rr, _negOnePtr);
1800 #endif
1802 SSE_CVTSI2SD(rr, gr);
1804 Reservation* resv = getresv(ins->oprnd1());
1805 Register xr;
1806 if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs))
1808 LEA(gr, 0x80000000, xr);
1810 else
1812 const int d = findMemFor(ins->oprnd1());
1813 SUBi(gr, 0x80000000);
1814 LD(gr, d, FP);
1817 // ok, we're done with it
1818 _allocator.addFree(gr);
1819 #if defined NANOJIT_IA32
1821 else
1823 const int disp = -8;
1824 const Register base = SP;
1825 Register gr = findRegFor(ins->oprnd1(), GpRegs);
1826 NanoAssert(rr == FST0);
1827 FILDQ(disp, base);
1828 STi(base, disp+4, 0); // high 32 bits = 0
1829 ST(base, disp, gr); // low 32 bits = unsigned value
1831 #endif
1834 void Assembler::asm_nongp_copy(Register r, Register s)
1836 if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) {
1837 SSE_MOVSD(r, s);
1838 } else if ((rmask(r) & GpRegs) && (rmask(s) & XmmRegs)) {
1839 SSE_MOVD(r, s);
1840 } else {
1841 if (rmask(r) & XmmRegs) {
1842 // x87 -> xmm
1843 NanoAssertMsg(false, "Should not move data from GPR to XMM");
1844 } else {
1845 // xmm -> x87
1846 NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
1851 NIns * Assembler::asm_jmpcc(bool branchOnFalse, LIns *cond, NIns *targ)
1853 LOpcode c = cond->opcode();
1854 if (config.sse2 && c != LIR_feq) {
1855 LIns *lhs = cond->oprnd1();
1856 LIns *rhs = cond->oprnd2();
1857 if (c == LIR_flt) {
1858 LIns *t = lhs; lhs = rhs; rhs = t;
1859 c = LIR_fgt;
1861 else if (c == LIR_fle) {
1862 LIns *t = lhs; lhs = rhs; rhs = t;
1863 c = LIR_fge;
1866 if (c == LIR_fgt) {
1867 if (branchOnFalse) { JNA(targ, false); } else { JA(targ, false); }
1869 else { // if (c == LIR_fge)
1870 if (branchOnFalse) { JNAE(targ, false); } else { JAE(targ, false); }
1872 NIns *at = _nIns;
1873 Reservation *rA, *rB;
1874 findRegFor2(XmmRegs, lhs, rA, rhs, rB);
1875 SSE_UCOMISD(rA->reg, rB->reg);
1876 return at;
1879 if (branchOnFalse)
1880 JP(targ, false);
1881 else
1882 JNP(targ, false);
1883 NIns *at = _nIns;
1884 asm_fcmp(cond);
1885 return at;
1888 void Assembler::asm_setcc(Register r, LIns *cond)
1890 LOpcode c = cond->opcode();
1891 if (config.sse2 && c != LIR_feq) {
1892 MOVZX8(r,r);
1893 LIns *lhs = cond->oprnd1();
1894 LIns *rhs = cond->oprnd2();
1895 if (c == LIR_flt) {
1896 LIns *t = lhs; lhs = rhs; rhs = t;
1897 SETA(r);
1899 else if (c == LIR_fle) {
1900 LIns *t = lhs; lhs = rhs; rhs = t;
1901 SETAE(r);
1903 else if (c == LIR_fgt) {
1904 SETA(r);
1906 else { // if (c == LIR_fge)
1907 SETAE(r);
1909 Reservation *rA, *rB;
1910 findRegFor2(XmmRegs, lhs, rA, rhs, rB);
1911 SSE_UCOMISD(rA->reg, rB->reg);
1912 return;
1914 // SETcc only sets low 8 bits, so extend
1915 MOVZX8(r,r);
1916 SETNP(r);
1917 asm_fcmp(cond);
1920 void Assembler::asm_fcmp(LIns *cond)
1922 LOpcode condop = cond->opcode();
1923 NanoAssert(condop >= LIR_feq && condop <= LIR_fge);
1924 LIns* lhs = cond->oprnd1();
1925 LIns* rhs = cond->oprnd2();
1927 int mask;
1928 if (condop == LIR_feq)
1929 mask = 0x44;
1930 else if (condop == LIR_fle)
1931 mask = 0x41;
1932 else if (condop == LIR_flt)
1933 mask = 0x05;
1934 else if (condop == LIR_fge) {
1935 // swap, use le
1936 condop = LIR_fle;
1937 LIns* t = lhs; lhs = rhs; rhs = t;
1938 mask = 0x41;
1939 } else { // if (condop == LIR_fgt)
1940 // swap, use lt
1941 condop = LIR_flt;
1942 LIns* t = lhs; lhs = rhs; rhs = t;
1943 mask = 0x05;
1946 #if defined NANOJIT_IA32
1947 if (config.sse2)
1949 #endif
1950 // UNORDERED: ZF,PF,CF <- 111;
1951 // GREATER_THAN: ZF,PF,CF <- 000;
1952 // LESS_THAN: ZF,PF,CF <- 001;
1953 // EQUAL: ZF,PF,CF <- 100;
1955 if (condop == LIR_feq && lhs == rhs) {
1956 // nan check
1957 Register r = findRegFor(lhs, XmmRegs);
1958 SSE_UCOMISD(r, r);
1960 else {
1961 #if defined NANOJIT_IA32
1962 evict(EAX);
1963 TEST_AH(mask);
1964 LAHF();
1965 #elif defined NANOJIT_AMD64
1966 evict(RAX);
1967 TEST_AL(mask);
1968 POPr(RAX);
1969 PUSHFQ();
1970 #endif
1971 Reservation *rA, *rB;
1972 findRegFor2(XmmRegs, lhs, rA, rhs, rB);
1973 SSE_UCOMISD(rA->reg, rB->reg);
1975 #if defined NANOJIT_IA32
1977 else
1979 evict(EAX);
1980 TEST_AH(mask);
1981 FNSTSW_AX();
1982 NanoAssert(lhs->isQuad() && rhs->isQuad());
1983 Reservation *rA;
1984 if (lhs != rhs)
1986 // compare two different numbers
1987 int d = findMemFor(rhs);
1988 rA = getresv(lhs);
1989 int pop = !rA || rA->reg == UnknownReg;
1990 findSpecificRegFor(lhs, FST0);
1991 // lhs is in ST(0) and rhs is on stack
1992 FCOM(pop, d, FP);
1994 else
1996 // compare n to itself, this is a NaN test.
1997 rA = getresv(lhs);
1998 int pop = !rA || rA->reg == UnknownReg;
1999 findSpecificRegFor(lhs, FST0);
2000 // value in ST(0)
2001 if (pop)
2002 FCOMPP();
2003 else
2004 FCOMP();
2005 FLDr(FST0); // DUP
2008 #endif
2011 void Assembler::nativePageReset()
2013 #if defined NANOJIT_AMD64
2014 /* We store some stuff at the bottom of the page.
2015 * We reserve 8-bytes for long jumps just in case we need them.
2017 _pageData = 0;
2018 _dblNegPtr = NULL;
2019 _negOnePtr = NULL;
2020 #endif
2023 Register Assembler::asm_binop_rhs_reg(LInsp ins)
2025 LOpcode op = ins->opcode();
2026 LIns *rhs = ins->oprnd2();
2028 if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush) {
2029 #if defined NANOJIT_IA32
2030 return findSpecificRegFor(rhs, ECX);
2031 #elif defined NANOJIT_AMD64
2032 return findSpecificRegFor(rhs, RCX);
2033 #endif
2036 return UnknownReg;
2039 #if defined NANOJIT_AMD64
2040 void Assembler::asm_qbinop(LIns *ins)
2042 LInsp lhs = ins->oprnd1();
2043 LInsp rhs = ins->oprnd2();
2044 LOpcode op = ins->opcode();
2046 Register rr = prepResultReg(ins, GpRegs);
2047 Reservation *rA = getresv(lhs);
2048 Register ra;
2050 if (rA == NULL || (ra = rA->reg) == UnknownReg) {
2051 ra = findSpecificRegFor(lhs, rr);
2054 if (rhs->isconst())
2056 int c = rhs->constval();
2058 if (op == LIR_qiadd)
2060 ADDQi(rr, c);
2061 } else if (op == LIR_qiand) {
2062 ANDQi(rr, c);
2063 } else if (op == LIR_qilsh) {
2064 SHLQi(rr, c);
2065 } else if (op == LIR_qior) {
2066 ORQi(rr, c);
2068 } else {
2069 Register rv;
2071 if (lhs == rhs) {
2072 rv = ra;
2073 } else {
2074 rv = findRegFor(rhs, GpRegs & ~(rmask(rr)));
2077 if (op == LIR_qiadd) {
2078 ADDQ(rr, rv);
2079 } else if (op == LIR_qiand) {
2080 ANDQ(rr, rv);
2081 } else if (op == LIR_qior) {
2082 ORQ(rr, rv);
2083 } else {
2084 NanoAssert(rhs->isconst());
2088 if (rr != ra) {
2089 MR(rr, ra);
2092 #endif
2094 void Assembler::nativePageSetup()
2096 if (!_nIns) _nIns = pageAlloc();
2097 if (!_nExitIns) _nExitIns = pageAlloc(true);
2100 // enough room for n bytes
2101 void Assembler::underrunProtect(int n)
2103 NanoAssertMsg(n<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
2104 NIns *eip = this->_nIns;
2105 Page *p = (Page*)pageTop(eip-1);
2106 NIns *top = (NIns*) &p->code[0];
2107 if (eip - n < top) {
2108 _nIns = pageAlloc(_inExit);
2109 JMP(eip);
2113 #endif /* FEATURE_NANOJIT */