On x86 compilers without fastcall, simulate it when invoking traces and un-simulate...
[wine-gecko.git] / js / src / nanojit / Nativei386.cpp
blob3cac56795267055573bb14faf2d95cc7b6cd0a78
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is [Open Source Virtual Machine].
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Adobe AS3 Team
24 * Mozilla TraceMonkey Team
25 * Asko Tontti <atontti@cc.hut.fi>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 #ifdef _MAC
42 // for MakeDataExecutable
43 #include <CoreServices/CoreServices.h>
44 #endif
46 #if defined DARWIN || defined LINUX
47 #include <sys/mman.h>
48 #include <errno.h>
49 #endif
50 #include "nanojit.h"
52 namespace nanojit
54 #ifdef FEATURE_NANOJIT
56 #ifdef NJ_VERBOSE
57 const char *regNames[] = {
58 #if defined NANOJIT_IA32
59 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
60 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
61 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"
62 #elif defined NANOJIT_AMD64
63 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
64 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
65 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
66 "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
67 #endif
69 #endif
71 #if defined NANOJIT_IA32
72 const Register Assembler::argRegs[] = { ECX, EDX };
73 const Register Assembler::retRegs[] = { EAX, EDX };
74 #elif defined NANOJIT_AMD64
75 #if defined WIN64
76 const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
77 #else
78 const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
79 #endif
80 const Register Assembler::retRegs[] = { RAX, RDX };
81 #endif
83 void Assembler::nInit(AvmCore* core)
85 #if defined NANOJIT_IA32
86 sse2 = core->use_sse2();
88 // CMOVcc is actually available on most PPro+ chips (except for a few
89 // oddballs like Via C3) but for now tie to SSE2 detection
90 has_cmov = sse2;
91 #else
92 has_cmov = true;
93 #endif
94 OSDep::getDate();
97 NIns* Assembler::genPrologue(RegisterMask needSaving)
99 /**
100 * Prologue
102 uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
103 uint32_t savingCount = 0;
105 for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
106 if (needSaving&rmask(i))
107 savingCount++;
109 // After forcing alignment, we've pushed the pre-alignment SP
110 // and savingCount registers.
111 uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount);
112 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
113 uint32_t amt = aligned - stackPushed;
115 // Reserve stackNeeded bytes, padded
116 // to preserve NJ_ALIGN_STACK-byte alignment.
117 if (amt)
119 #if defined NANOJIT_IA32
120 SUBi(SP, amt);
121 #elif defined NANOJIT_AMD64
122 SUBQi(SP, amt);
123 #endif
126 verbose_only( verbose_outputf(" %p:",_nIns); )
127 verbose_only( verbose_output(" patch entry:"); )
128 NIns *patchEntry = _nIns;
129 MR(FP, SP); // Establish our own FP.
131 // Save pre-alignment SP value here, where the FP will point,
132 // to preserve the illusion of a valid frame chain for
133 // functions like MMgc::GetStackTrace. The 'return address'
134 // of this 'frame' will be the last-saved register, but that's
135 // fine, because the next-older frame will be legit.
136 PUSHr(FP);
138 for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
139 if (needSaving&rmask(i))
140 PUSHr(i);
142 // We'd like to be able to use SSE instructions like MOVDQA on
143 // stack slots; it requires 16B alignment. Darwin requires a
144 // 16B stack alignment, and Linux GCC seems to intend to
145 // establish and preserve the same, but we're told that GCC
146 // has not always done this right. To avoid doubt, do it on
147 // all platforms. The prologue runs only when we enter
148 // fragments from the interpreter, so forcing 16B alignment
149 // here is cheap.
150 #if defined NANOJIT_IA32
151 ANDi(SP, -NJ_ALIGN_STACK);
152 #elif defined NANOJIT_AMD64
153 ANDQi(SP, -NJ_ALIGN_STACK);
154 #endif
155 MR(FP,SP);
156 PUSHr(FP); // Save caller's FP.
158 return patchEntry;
161 void Assembler::nFragExit(LInsp guard)
163 SideExit *exit = guard->exit();
164 bool trees = _frago->core()->config.tree_opt;
165 Fragment *frag = exit->target;
166 GuardRecord *lr = 0;
167 bool destKnown = (frag && frag->fragEntry);
168 if (destKnown && !trees)
170 // already exists, emit jump now. no patching required.
171 JMP(frag->fragEntry);
172 lr = 0;
174 else
176 // target doesn't exit yet. emit jump to epilog, and set up to patch later.
177 lr = placeGuardRecord(guard);
178 #if defined NANOJIT_AMD64
179 /* 8 bytes for address, 4 for imm32, 2 for jmp */
180 underrunProtect(14);
181 _nIns -= 8;
182 *(intptr_t *)_nIns = intptr_t(_epilogue);
183 lr->jmp = _nIns;
184 JMPm_nochk(0);
185 #else
186 JMP_long(_epilogue);
187 lr->jmp = _nIns;
188 #endif
189 #if 0
190 // @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link()
191 // for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
192 if (tress && destKnown)
193 patch(lr);
194 #endif
196 // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
197 MR(SP,FP);
200 #ifdef NJ_VERBOSE
201 if (_frago->core()->config.show_stats) {
202 // load EDX (arg1) with Fragment *fromFrag, target fragment
203 // will make use of this when calling fragenter().
204 #if defined NANOJIT_IA32
205 int fromfrag = int((Fragment*)_thisfrag);
206 LDi(argRegs[1], fromfrag);
207 #elif defined NANOJIT_AMD64
208 LDQi(argRegs[1], intptr_t(_thisfrag));
209 #endif
211 #endif
213 // return value is GuardRecord*
214 #if defined NANOJIT_IA32
215 LDi(EAX, int(lr));
216 #elif defined NANOJIT_AMD64
217 LDQi(RAX, intptr_t(lr));
218 #endif
221 NIns *Assembler::genEpilogue(RegisterMask restore)
223 RET();
224 POPr(FP); // Restore caller's FP.
225 MR(SP,FP); // Undo forced alignment.
227 // Restore saved registers.
228 for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
229 if (restore&rmask(i)) { POPr(i); }
231 POPr(FP); // Pop the pre-alignment SP.
232 return _nIns;
235 #if defined NANOJIT_IA32
236 void Assembler::asm_call(LInsp ins)
238 uint32_t fid = ins->fid();
239 const CallInfo* call = callInfoFor(fid);
240 // must be signed, not unsigned
241 const uint32_t iargs = call->count_iargs();
242 int32_t fstack = call->count_args() - iargs;
244 int32_t extra = 0;
246 #if defined NJ_NO_FASTCALL
247 int32_t istack = iargs;
248 #else
249 int32_t istack = iargs-2; // first 2 4B args are in registers
250 if (istack <= 0)
252 istack = 0;
254 #endif
256 const int32_t size = 4*istack + 8*fstack; // actual stack space used
257 if (size) {
258 // stack re-alignment
259 // only pop our adjustment amount since callee pops args in FASTCALL mode
260 extra = alignUp(size, NJ_ALIGN_STACK) - (size);
261 #ifndef NJ_NO_FASTCALL
262 if (extra > 0)
264 ADDi(SP, extra);
266 #endif
269 #ifdef NJ_NO_FASTCALL
270 // In C calling conventions, callee doesn't pop args.
271 ADDi(SP, 4*iargs + 8*fstack + extra);
272 #endif
274 CALL(call);
276 #ifdef NJ_NO_FASTCALL
277 if (iargs >= 1) {
278 PUSHr(ECX);
279 if (iargs >= 2) {
280 PUSHr(EDX);
283 #endif
285 // make sure fpu stack is empty before call (restoreCallerSaved)
286 NanoAssert(_allocator.isFree(FST0));
287 // note: this code requires that ref arguments (ARGSIZE_Q)
288 // be one of the first two arguments
289 // pre-assign registers to the first 2 4B args
290 const int max_regs = (iargs < 2) ? iargs : 2;
291 int n = 0;
293 ArgSize sizes[10];
294 uint32_t argc = call->get_sizes(sizes);
296 for(uint32_t i=0; i < argc; i++)
298 uint32_t j = argc-i-1;
299 ArgSize sz = sizes[j];
300 Register r = UnknownReg;
301 if (n < max_regs && sz != ARGSIZE_F)
302 r = argRegs[n++]; // tell asm_arg what reg to use
303 asm_arg(sz, ins->arg(j), r);
306 if (extra > 0)
308 SUBi(SP, extra);
312 #elif defined NANOJIT_AMD64
314 void Assembler::asm_call(LInsp ins)
316 Register fpu_reg = XMM0;
317 uint32_t fid = ins->fid();
318 const CallInfo* call = callInfoFor(fid);
319 int n = 0;
321 CALL(call);
323 ArgSize sizes[10];
324 uint32_t argc = call->get_sizes(sizes);
326 for(uint32_t i=0; i < argc; i++)
328 uint32_t j = argc-i-1;
329 ArgSize sz = sizes[j];
330 Register r = UnknownReg;
331 if (sz != ARGSIZE_F) {
332 r = argRegs[n++]; // tell asm_arg what reg to use
333 } else {
334 r = fpu_reg;
335 fpu_reg = nextreg(fpu_reg);
337 findSpecificRegFor(ins->arg(j), r);
340 #endif
342 void Assembler::nMarkExecute(Page* page, int32_t count, bool enable)
344 #if defined WIN32 || defined WIN64
345 DWORD dwIgnore;
346 VirtualProtect(&page->code, count*NJ_PAGE_SIZE, PAGE_EXECUTE_READWRITE, &dwIgnore);
347 #elif defined DARWIN || defined AVMPLUS_LINUX
348 intptr_t addr = (intptr_t)&page->code;
349 addr &= ~((uintptr_t)NJ_PAGE_SIZE - 1);
350 if (mprotect((void *)addr, count*NJ_PAGE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
351 AvmDebugLog(("FATAL ERROR: mprotect(PROT_EXEC) failed\n"));
352 abort();
354 #endif
355 (void)enable;
358 Register Assembler::nRegisterAllocFromSet(int set)
360 Register r;
361 RegAlloc &regs = _allocator;
362 #ifdef WIN32
363 _asm
365 mov ecx, regs
366 bsf eax, set // i = first bit set
367 btr RegAlloc::free[ecx], eax // free &= ~rmask(i)
368 mov r, eax
370 #elif defined WIN64
371 unsigned long tr, fr;
372 _BitScanForward(&tr, set);
373 _bittestandreset(&fr, tr);
374 regs.free = fr;
375 r = tr;
376 #else
377 asm(
378 "bsf %1, %%eax\n\t"
379 "btr %%eax, %2\n\t"
380 "movl %%eax, %0\n\t"
381 : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
382 #endif /* WIN32 */
383 return r;
386 void Assembler::nRegisterResetAll(RegAlloc& a)
388 // add scratch registers to our free list for the allocator
389 a.clear();
390 a.used = 0;
391 a.free = SavedRegs | ScratchRegs;
392 #if defined NANOJIT_IA32
393 if (!sse2)
394 a.free &= ~XmmRegs;
395 #endif
396 debug_only( a.managed = a.free; )
399 void Assembler::nPatchBranch(NIns* branch, NIns* location)
401 #if defined NANOJIT_IA32
402 intptr_t offset = intptr_t(location) - intptr_t(branch);
403 if (branch[0] == JMPc)
404 *(uint32_t*)&branch[1] = offset - 5;
405 else
406 *(uint32_t*)&branch[2] = offset - 6;
407 #else
408 if (branch[0] == 0xFF && branch[1] == 0x25) {
409 NIns *mem;
411 mem = &branch[6] + *(int32_t *)&branch[2];
412 *(intptr_t *)mem = intptr_t(location);
413 } else {
414 NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
416 #endif
419 RegisterMask Assembler::hint(LIns* i, RegisterMask allow)
421 uint32_t op = i->opcode();
422 int prefer = allow;
423 if (op == LIR_call)
424 #if defined NANOJIT_IA32
425 prefer &= rmask(EAX);
426 #elif defined NANOJIT_AMD64
427 prefer &= rmask(RAX);
428 #endif
429 else if (op == LIR_param)
430 prefer &= rmask(Register(i->imm8()));
431 #if defined NANOJIT_IA32
432 else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
433 prefer &= rmask(EDX);
434 #else
435 else if (op == LIR_callh)
436 prefer &= rmask(RAX);
437 #endif
438 else if (i->isCmp())
439 prefer &= AllowableFlagRegs;
440 else if (i->isconst())
441 prefer &= ScratchRegs;
442 return (_allocator.free & prefer) ? prefer : allow;
445 void Assembler::asm_qjoin(LIns *ins)
447 int d = findMemFor(ins);
448 AvmAssert(d);
449 LIns* lo = ins->oprnd1();
450 LIns* hi = ins->oprnd2();
452 Reservation *resv = getresv(ins);
453 Register rr = resv->reg;
455 if (rr != UnknownReg && (rmask(rr) & FpRegs))
456 evict(rr);
458 if (hi->isconst())
460 STi(FP, d+4, hi->constval());
462 else
464 Register r = findRegFor(hi, GpRegs);
465 ST(FP, d+4, r);
468 if (lo->isconst())
470 STi(FP, d, lo->constval());
472 else
474 // okay if r gets recycled.
475 Register r = findRegFor(lo, GpRegs);
476 ST(FP, d, r);
479 freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
482 void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
484 if (i->isconst())
486 if (!resv->arIndex) {
487 reserveFree(i);
489 LDi(r, i->constval());
491 else
493 int d = findMemFor(i);
494 if (rmask(r) & FpRegs)
496 #if defined NANOJIT_IA32
497 if (rmask(r) & XmmRegs) {
498 #endif
499 SSE_LDQ(r, d, FP);
500 #if defined NANOJIT_IA32
501 } else {
502 FLDQ(d, FP);
504 #endif
506 else
508 #if defined NANOJIT_AMD64
509 LDQ(r, d, FP);
510 #else
511 LD(r, d, FP);
512 #endif
514 verbose_only(if (_verbose) {
515 outputf(" restore %s", _thisfrag->lirbuf->names->formatRef(i));
520 void Assembler::asm_store32(LIns *value, int dr, LIns *base)
522 if (value->isconst())
524 Register rb = findRegFor(base, GpRegs);
525 int c = value->constval();
526 STi(rb, dr, c);
528 else
530 // make sure what is in a register
531 Reservation *rA, *rB;
532 findRegFor2(GpRegs, value, rA, base, rB);
533 Register ra = rA->reg;
534 Register rb = rB->reg;
535 ST(rb, dr, ra);
539 void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
541 (void)i;
542 int d = disp(resv);
543 Register rr = resv->reg;
544 if (d)
546 // save to spill location
547 if (rmask(rr) & FpRegs)
549 #if defined NANOJIT_IA32
550 if (rmask(rr) & XmmRegs) {
551 #endif
552 SSE_STQ(d, FP, rr);
553 #if defined NANOJIT_IA32
554 } else {
555 FSTQ((pop?1:0), d, FP);
557 #endif
559 else
561 #if defined NANOJIT_AMD64
562 STQ(FP, d, rr);
563 #else
564 ST(FP, d, rr);
565 #endif
567 verbose_only(if (_verbose) {
568 outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
571 #if defined NANOJIT_IA32
572 else if (pop && (rmask(rr) & x87Regs))
574 // pop the fpu result since it isn't used
575 FSTP(FST0);
577 #endif
580 void Assembler::asm_load64(LInsp ins)
582 LIns* base = ins->oprnd1();
583 int db = ins->oprnd2()->constval();
584 Reservation *resv = getresv(ins);
585 Register rr = resv->reg;
587 if (rr != UnknownReg && rmask(rr) & XmmRegs)
589 freeRsrcOf(ins, false);
590 Register rb = findRegFor(base, GpRegs);
591 SSE_LDQ(rr, db, rb);
593 #if defined NANOJIT_AMD64
594 else if (rr != UnknownReg && rmask(rr) & GpRegs)
596 freeRsrcOf(ins, false);
597 Register rb = findRegFor(base, GpRegs);
598 LDQ(rr, db, rb);
600 else
602 int d = disp(resv);
603 Register rb = findRegFor(base, GpRegs);
605 /* We need a temporary register we can move the desination into */
606 rr = registerAlloc(GpRegs);
608 STQ(FP, d, rr);
609 LDQ(rr, db, rb);
611 /* Mark as free */
612 _allocator.addFree(rr);
614 freeRsrcOf(ins, false);
616 #elif defined NANOJIT_IA32
617 else
619 int dr = disp(resv);
620 Register rb = findRegFor(base, GpRegs);
621 resv->reg = UnknownReg;
623 // don't use an fpu reg to simply load & store the value.
624 if (dr)
625 asm_mmq(FP, dr, rb, db);
627 freeRsrcOf(ins, false);
629 if (rr != UnknownReg)
631 NanoAssert(rmask(rr)&FpRegs);
632 _allocator.retire(rr);
633 FLDQ(db, rb);
636 #endif
639 void Assembler::asm_store64(LInsp value, int dr, LInsp base)
641 if (value->isconstq())
643 // if a constant 64-bit value just store it now rather than
644 // generating a pointless store/load/store sequence
645 Register rb = findRegFor(base, GpRegs);
646 const int32_t* p = (const int32_t*) (value-2);
647 STi(rb, dr+4, p[1]);
648 STi(rb, dr, p[0]);
649 return;
652 #if defined NANOJIT_IA32
653 if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
655 // value is 64bit struct or int64_t, or maybe a double.
656 // it may be live in an FPU reg. Either way, don't
657 // put it in an FPU reg just to load & store it.
659 // a) if we know it's not a double, this is right.
660 // b) if we guarded that its a double, this store could be on
661 // the side exit, copying a non-double.
662 // c) maybe its a double just being stored. oh well.
664 if (sse2) {
665 Register rv = findRegFor(value, XmmRegs);
666 Register rb = findRegFor(base, GpRegs);
667 SSE_STQ(dr, rb, rv);
668 return;
671 int da = findMemFor(value);
672 Register rb = findRegFor(base, GpRegs);
673 asm_mmq(rb, dr, FP, da);
674 return;
677 Reservation* rA = getresv(value);
678 int pop = !rA || rA->reg==UnknownReg;
679 Register rv = findRegFor(value, sse2 ? XmmRegs : FpRegs);
680 Register rb = findRegFor(base, GpRegs);
682 if (rmask(rv) & XmmRegs) {
683 SSE_STQ(dr, rb, rv);
684 } else {
685 FSTQ(pop, dr, rb);
687 #elif defined NANOJIT_AMD64
688 /* If this is not a float operation, we can use GpRegs instead.
689 * We can do this in a few other cases but for now I'll keep it simple.
691 Register rb = findRegFor(base, GpRegs);
692 Reservation *rV = getresv(value);
694 if (rV != NULL && rV->reg != UnknownReg) {
695 if (rmask(rV->reg) & GpRegs) {
696 STQ(rb, dr, rV->reg);
697 } else {
698 SSE_STQ(dr, rb, rV->reg);
700 } else {
701 Register rv;
703 /* Try to catch some common patterns.
704 * Note: this is a necessity, since in between things like
705 * asm_fop() could see the reservation and try to use a non-SSE
706 * register for adding. Same for asm_qbinop in theory.
707 * There should probably be asserts to catch more cases.
709 if (value->isop(LIR_u2f)
710 || value->isop(LIR_i2f)
711 || value->opcode() == LIR_fcall) {
712 rv = findRegFor(value, XmmRegs);
713 SSE_STQ(dr, rb, rv);
714 } else {
715 rv = findRegFor(value, GpRegs);
716 STQ(rb, dr, rv);
719 #endif
723 * copy 64 bits: (rd+dd) <- (rs+ds)
725 void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
727 // value is either a 64bit struct or maybe a float
728 // that isn't live in an FPU reg. Either way, don't
729 // put it in an FPU reg just to load & store it.
730 #if defined NANOJIT_IA32
731 if (sse2)
733 #endif
734 // use SSE to load+store 64bits
735 Register t = registerAlloc(XmmRegs);
736 _allocator.addFree(t);
737 SSE_STQ(dd, rd, t);
738 SSE_LDQ(t, ds, rs);
739 #if defined NANOJIT_IA32
741 else
743 // get a scratch reg
744 Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
745 _allocator.addFree(t);
746 ST(rd, dd+4, t);
747 LD(t, ds+4, rs);
748 ST(rd, dd, t);
749 LD(t, ds, rs);
751 #endif
754 void Assembler::asm_quad(LInsp ins)
756 #if defined NANOJIT_IA32
757 Reservation *rR = getresv(ins);
758 Register rr = rR->reg;
759 if (rr != UnknownReg)
761 // @todo -- add special-cases for 0 and 1
762 _allocator.retire(rr);
763 rR->reg = UnknownReg;
764 NanoAssert((rmask(rr) & FpRegs) != 0);
766 const double d = ins->constvalf();
767 if (rmask(rr) & XmmRegs) {
768 if (d == 0.0) {
769 SSE_XORPDr(rr, rr);
770 } else if (d == 1.0) {
771 // 1.0 is extremely frequent and worth special-casing!
772 static const double k_ONE = 1.0;
773 LDSDm(rr, &k_ONE);
774 } else {
775 findMemFor(ins);
776 const int d = disp(rR);
777 SSE_LDQ(rr, d, FP);
779 } else {
780 if (d == 0.0) {
781 FLDZ();
782 } else if (d == 1.0) {
783 FLD1();
784 } else {
785 findMemFor(ins);
786 int d = disp(rR);
787 FLDQ(d,FP);
792 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
793 int d = disp(rR);
794 freeRsrcOf(ins, false);
795 if (d)
797 const int32_t* p = (const int32_t*) (ins-2);
798 STi(FP,d+4,p[1]);
799 STi(FP,d,p[0]);
801 #elif defined NANOJIT_AMD64
802 Reservation *rR = getresv(ins);
803 int64_t val = *(int64_t *)(ins - 2);
805 if (rR->reg != UnknownReg)
807 Register rr = rR->reg;
808 freeRsrcOf(ins, false);
809 if (rmask(rr) & GpRegs)
811 LDQi(rr, val);
813 else if (rmask(rr) & XmmRegs)
815 if (ins->constvalf() == 0.0)
817 SSE_XORPDr(rr, rr);
819 else
821 /* Get a short-lived register, not associated with instruction */
822 Register rs = registerAlloc(GpRegs);
824 SSE_MOVD(rr, rs);
825 LDQi(rs, val);
827 _allocator.addFree(rs);
831 else
833 const int32_t* p = (const int32_t*) (ins-2);
834 int dr = disp(rR);
835 freeRsrcOf(ins, false);
836 STi(FP, dr+4, p[1]);
837 STi(FP, dr, p[0]);
839 #endif
842 bool Assembler::asm_qlo(LInsp ins, LInsp q)
844 #if defined NANOJIT_IA32
845 if (!sse2)
847 return false;
849 #endif
851 Reservation *resv = getresv(ins);
852 Register rr = resv->reg;
853 if (rr == UnknownReg) {
854 // store quad in spill loc
855 int d = disp(resv);
856 freeRsrcOf(ins, false);
857 Register qr = findRegFor(q, XmmRegs);
858 SSE_MOVDm(d, FP, qr);
859 } else {
860 freeRsrcOf(ins, false);
861 Register qr = findRegFor(q, XmmRegs);
862 SSE_MOVD(rr,qr);
865 return true;
868 void Assembler::asm_fneg(LInsp ins)
870 #if defined NANOJIT_IA32
871 if (sse2)
873 #endif
874 LIns *lhs = ins->oprnd1();
876 Register rr = prepResultReg(ins, XmmRegs);
877 Reservation *rA = getresv(lhs);
878 Register ra;
880 // if this is last use of lhs in reg, we can re-use result reg
881 if (rA == 0 || (ra = rA->reg) == UnknownReg)
882 ra = findSpecificRegFor(lhs, rr);
883 // else, rA already has a register assigned.
885 static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0};
886 SSE_XORPD(rr, negateMask);
888 if (rr != ra)
889 SSE_MOVSD(rr, ra);
890 #if defined NANOJIT_IA32
892 else
894 Register rr = prepResultReg(ins, FpRegs);
896 LIns* lhs = ins->oprnd1();
898 // lhs into reg, prefer same reg as result
899 Reservation* rA = getresv(lhs);
900 // if this is last use of lhs in reg, we can re-use result reg
901 if (rA == 0 || rA->reg == UnknownReg)
902 findSpecificRegFor(lhs, rr);
903 // else, rA already has a different reg assigned
905 NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
906 // assume that the lhs is in ST(0) and rhs is on stack
907 FCHS();
909 // if we had more than one fpu reg, this is where
910 // we would move ra into rr if rr != ra.
912 #endif
915 void Assembler::asm_pusharg(LInsp p)
917 // arg goes on stack
918 Reservation* rA = getresv(p);
919 if (rA == 0)
921 if (p->isconst())
923 // small const we push directly
924 PUSHi(p->constval());
926 else
928 Register ra = findRegFor(p, GpRegs);
929 PUSHr(ra);
932 else if (rA->reg == UnknownReg)
934 PUSHm(disp(rA), FP);
936 else
938 PUSHr(rA->reg);
942 void Assembler::asm_farg(LInsp p)
944 #if defined NANOJIT_IA32
945 Register r = findRegFor(p, FpRegs);
946 if (rmask(r) & XmmRegs) {
947 SSE_STQ(0, SP, r);
948 } else {
949 FSTPQ(0, SP);
951 PUSHr(ECX); // 2*pushr is smaller than sub
952 PUSHr(ECX);
953 #endif
956 void Assembler::asm_fop(LInsp ins)
958 LOpcode op = ins->opcode();
959 #if defined NANOJIT_IA32
960 if (sse2)
962 #endif
963 LIns *lhs = ins->oprnd1();
964 LIns *rhs = ins->oprnd2();
966 RegisterMask allow = XmmRegs;
967 Register rb = UnknownReg;
968 if (lhs != rhs) {
969 rb = findRegFor(rhs,allow);
970 allow &= ~rmask(rb);
973 Register rr = prepResultReg(ins, allow);
974 Reservation *rA = getresv(lhs);
975 Register ra;
977 // if this is last use of lhs in reg, we can re-use result reg
978 if (rA == 0 || (ra = rA->reg) == UnknownReg)
979 ra = findSpecificRegFor(lhs, rr);
980 // else, rA already has a register assigned.
982 if (lhs == rhs)
983 rb = ra;
985 if (op == LIR_fadd)
986 SSE_ADDSD(rr, rb);
987 else if (op == LIR_fsub)
988 SSE_SUBSD(rr, rb);
989 else if (op == LIR_fmul)
990 SSE_MULSD(rr, rb);
991 else //if (op == LIR_fdiv)
992 SSE_DIVSD(rr, rb);
994 if (rr != ra)
995 SSE_MOVSD(rr, ra);
996 #if defined NANOJIT_IA32
998 else
1000 // we swap lhs/rhs on purpose here, works out better
1001 // if you only have one fpu reg. use divr/subr.
1002 LIns* rhs = ins->oprnd1();
1003 LIns* lhs = ins->oprnd2();
1004 Register rr = prepResultReg(ins, rmask(FST0));
1006 // make sure rhs is in memory
1007 int db = findMemFor(rhs);
1009 // lhs into reg, prefer same reg as result
1010 Reservation* rA = getresv(lhs);
1011 // last use of lhs in reg, can reuse rr
1012 if (rA == 0 || rA->reg == UnknownReg)
1013 findSpecificRegFor(lhs, rr);
1014 // else, rA already has a different reg assigned
1016 NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
1017 // assume that the lhs is in ST(0) and rhs is on stack
1018 if (op == LIR_fadd)
1019 { FADD(db, FP); }
1020 else if (op == LIR_fsub)
1021 { FSUBR(db, FP); }
1022 else if (op == LIR_fmul)
1023 { FMUL(db, FP); }
1024 else if (op == LIR_fdiv)
1025 { FDIVR(db, FP); }
1027 #endif
1030 void Assembler::asm_i2f(LInsp ins)
1032 // where our result goes
1033 Register rr = prepResultReg(ins, FpRegs);
1034 #if defined NANOJIT_IA32
1035 if (rmask(rr) & XmmRegs)
1037 #endif
1038 // todo support int value in memory
1039 Register gr = findRegFor(ins->oprnd1(), GpRegs);
1040 SSE_CVTSI2SD(rr, gr);
1041 #if defined NANOJIT_IA32
1043 else
1045 int d = findMemFor(ins->oprnd1());
1046 FILD(d, FP);
1048 #endif
1051 Register Assembler::asm_prep_fcall(Reservation *rR, LInsp ins)
1053 #if defined NANOJIT_IA32
1054 if (rR) {
1055 Register rr;
1056 if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs))
1057 evict(rr);
1059 return prepResultReg(ins, rmask(FST0));
1060 #elif defined NANOJIT_AMD64
1061 evict(RAX);
1062 return prepResultReg(ins, rmask(XMM0));
1063 #endif
1066 void Assembler::asm_u2f(LInsp ins)
1068 // where our result goes
1069 Register rr = prepResultReg(ins, FpRegs);
1070 #if defined NANOJIT_IA32
1071 if (rmask(rr) & XmmRegs)
1073 #endif
1074 // don't call findRegFor, we want a reg we can stomp on for a very short time,
1075 // not a reg that will continue to be associated with the LIns
1076 Register gr = registerAlloc(GpRegs);
1078 // technique inspired by gcc disassembly
1079 // Edwin explains it:
1081 // gr is 0..2^32-1
1083 // sub gr,0x80000000
1085 // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
1086 // as before
1088 // cvtsi2sd rr,gr
1090 // rr is now a double with the int value range
1092 // addsd rr, 2147483648.0
1094 // adding back double(0x80000000) makes the range 0..2^32-1.
1096 static const double k_NEGONE = 2147483648.0;
1097 #if defined NANOJIT_IA32
1098 SSE_ADDSDm(rr, &k_NEGONE);
1099 #elif defined NANOJIT_AMD64
1100 /* Squirrel the constant at the bottom of the page. */
1101 if (_dblNegPtr != NULL)
1103 underrunProtect(10);
1105 if (_dblNegPtr == NULL)
1107 underrunProtect(30);
1108 uint8_t *base, *begin;
1109 base = (uint8_t *)((intptr_t)_nIns & ~((intptr_t)NJ_PAGE_SIZE-1));
1110 base += sizeof(PageHeader) + _pageData;
1111 begin = base;
1112 /* Make sure we align */
1113 if ((uintptr_t)base & 0xF) {
1114 base = (NIns *)((uintptr_t)base & ~(0xF));
1115 base += 16;
1117 _pageData += (int32_t)(base - begin) + sizeof(double);
1118 _negOnePtr = (NIns *)base;
1119 *(double *)_negOnePtr = k_NEGONE;
1121 SSE_ADDSDm(rr, _negOnePtr);
1122 #endif
1124 SSE_CVTSI2SD(rr, gr);
1126 Reservation* resv = getresv(ins->oprnd1());
1127 Register xr;
1128 if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs))
1130 LEA(gr, 0x80000000, xr);
1132 else
1134 const int d = findMemFor(ins->oprnd1());
1135 SUBi(gr, 0x80000000);
1136 LD(gr, d, FP);
1139 // ok, we're done with it
1140 _allocator.addFree(gr);
1141 #if defined NANOJIT_IA32
1143 else
1145 const int disp = -8;
1146 const Register base = SP;
1147 Register gr = findRegFor(ins->oprnd1(), GpRegs);
1148 NanoAssert(rr == FST0);
1149 FILDQ(disp, base);
1150 STi(base, disp+4, 0); // high 32 bits = 0
1151 ST(base, disp, gr); // low 32 bits = unsigned value
1153 #endif
1156 void Assembler::asm_nongp_copy(Register r, Register s)
1158 if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) {
1159 SSE_MOVSD(r, s);
1160 } else if ((rmask(r) & GpRegs) && (rmask(s) & XmmRegs)) {
1161 SSE_MOVD(r, s);
1162 } else {
1163 if (rmask(r) & XmmRegs) {
1164 // x87 -> xmm
1165 NanoAssertMsg(false, "Should not move data from GPR to XMM");
1166 } else {
1167 // xmm -> x87
1168 NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
1173 void Assembler::asm_fcmp(LIns *cond)
1175 LOpcode condop = cond->opcode();
1176 NanoAssert(condop >= LIR_feq && condop <= LIR_fge);
1177 LIns* lhs = cond->oprnd1();
1178 LIns* rhs = cond->oprnd2();
1180 int mask;
1181 if (condop == LIR_feq)
1182 mask = 0x44;
1183 else if (condop == LIR_fle)
1184 mask = 0x41;
1185 else if (condop == LIR_flt)
1186 mask = 0x05;
1187 else if (condop == LIR_fge) {
1188 // swap, use le
1189 LIns* t = lhs; lhs = rhs; rhs = t;
1190 mask = 0x41;
1191 } else { // if (condop == LIR_fgt)
1192 // swap, use lt
1193 LIns* t = lhs; lhs = rhs; rhs = t;
1194 mask = 0x05;
1197 #if defined NANOJIT_IA32
1198 if (sse2)
1200 #endif
1201 // UNORDERED: ZF,PF,CF <- 111;
1202 // GREATER_THAN: ZF,PF,CF <- 000;
1203 // LESS_THAN: ZF,PF,CF <- 001;
1204 // EQUAL: ZF,PF,CF <- 100;
1206 if (condop == LIR_feq && lhs == rhs) {
1207 // nan check
1208 Register r = findRegFor(lhs, XmmRegs);
1209 SSE_UCOMISD(r, r);
1210 } else {
1211 #if defined NANOJIT_IA32
1212 evict(EAX);
1213 TEST_AH(mask);
1214 LAHF();
1215 #elif defined NANOJIT_AMD64
1216 evict(RAX);
1217 TEST_AL(mask);
1218 POPr(RAX);
1219 PUSHFQ();
1220 #endif
1221 Reservation *rA, *rB;
1222 findRegFor2(XmmRegs, lhs, rA, rhs, rB);
1223 SSE_UCOMISD(rA->reg, rB->reg);
1225 #if defined NANOJIT_IA32
1227 else
1229 evict(EAX);
1230 TEST_AH(mask);
1231 FNSTSW_AX();
1232 NanoAssert(lhs->isQuad() && rhs->isQuad());
1233 Reservation *rA;
1234 if (lhs != rhs)
1236 // compare two different numbers
1237 int d = findMemFor(rhs);
1238 rA = getresv(lhs);
1239 int pop = !rA || rA->reg == UnknownReg;
1240 findSpecificRegFor(lhs, FST0);
1241 // lhs is in ST(0) and rhs is on stack
1242 FCOM(pop, d, FP);
1244 else
1246 // compare n to itself, this is a NaN test.
1247 rA = getresv(lhs);
1248 int pop = !rA || rA->reg == UnknownReg;
1249 findSpecificRegFor(lhs, FST0);
1250 // value in ST(0)
1251 if (pop)
1252 FCOMPP();
1253 else
1254 FCOMP();
1255 FLDr(FST0); // DUP
1258 #endif
1261 NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
1263 NIns* was;
1264 #if defined NANOJIT_AMD64
1265 was = (NIns*)( *(intptr_t*)(at) );
1266 *(intptr_t *)(at) = intptr_t(target);
1267 #else
1268 NIns* save = _nIns;
1269 was = (NIns*)( (intptr_t)*(int32_t*)(at+1)+(intptr_t)(at+5) );
1270 _nIns = at +5; // +5 is size of JMP
1271 intptr_t tt = (intptr_t)target - (intptr_t)_nIns;
1272 IMM32(tt);
1273 *(--_nIns) = JMPc;
1274 _nIns = save;
1275 #endif
1276 return was;
1279 void Assembler::nativePageReset()
1281 #if defined NANOJIT_AMD64
1282 /* We store some stuff at the bottom of the page.
1283 * We reserve 8-bytes for long jumps just in case we need them.
1285 _pageData = 0;
1286 _dblNegPtr = NULL;
1287 _negOnePtr = NULL;
1288 #endif
1291 Register Assembler::asm_binop_rhs_reg(LInsp ins)
1293 LOpcode op = ins->opcode();
1294 LIns *rhs = ins->oprnd2();
1296 if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush) {
1297 #if defined NANOJIT_IA32
1298 return findSpecificRegFor(rhs, ECX);
1299 #elif defined NANOJIT_AMD64
1300 return findSpecificRegFor(rhs, RCX);
1301 #endif
1304 return UnknownReg;
1307 #if defined NANOJIT_AMD64
1308 void Assembler::asm_qbinop(LIns *ins)
1310 LInsp lhs = ins->oprnd1();
1311 LInsp rhs = ins->oprnd2();
1312 LOpcode op = ins->opcode();
1314 Register rr = prepResultReg(ins, GpRegs);
1315 Reservation *rA = getresv(lhs);
1316 Register ra;
1318 if (rA == NULL || (ra = rA->reg) == UnknownReg) {
1319 ra = findSpecificRegFor(lhs, rr);
1322 if (rhs->isconst())
1324 int c = rhs->constval();
1326 if (op == LIR_qiadd)
1328 ADDQi(rr, c);
1329 } else if (op == LIR_qiand) {
1330 ANDQi(rr, c);
1331 } else if (op == LIR_qilsh) {
1332 SHLQi(rr, c);
1333 } else if (op == LIR_qior) {
1334 ORQi(rr, c);
1336 } else {
1337 Register rv;
1339 if (lhs == rhs) {
1340 rv = ra;
1341 } else {
1342 rv = findRegFor(rhs, GpRegs & ~(rmask(rr)));
1345 if (op == LIR_qiadd) {
1346 ADDQ(rr, rv);
1347 } else if (op == LIR_qiand) {
1348 ANDQ(rr, rv);
1349 } else if (op == LIR_qior) {
1350 ORQ(rr, rv);
1351 } else {
1352 NanoAssert(rhs->isconst());
1356 if (rr != ra) {
1357 MR(rr, ra);
1360 #endif
1362 void Assembler::nativePageSetup()
1364 if (!_nIns) _nIns = pageAlloc();
1365 if (!_nExitIns) _nExitIns = pageAlloc(true);
1367 #endif /* FEATURE_NANOJIT */