Make TraceMonkey build on Solaris x86 with Sun Studio 12 (bug 452588, r=danderson).
[wine-gecko.git] / js / src / nanojit / Nativei386.cpp
blob6151059bf993e85719cff5e55525b213405c9fb1
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is [Open Source Virtual Machine].
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Adobe AS3 Team
24 * Mozilla TraceMonkey Team
25 * Asko Tontti <atontti@cc.hut.fi>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
41 #ifdef _MAC
42 // for MakeDataExecutable
43 #include <CoreServices/CoreServices.h>
44 #endif
46 #if defined AVMPLUS_UNIX
47 #include <sys/mman.h>
48 #include <errno.h>
49 #endif
50 #include "nanojit.h"
52 namespace nanojit
54 #ifdef FEATURE_NANOJIT
56 #ifdef NJ_VERBOSE
57 const char *regNames[] = {
58 #if defined NANOJIT_IA32
59 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
60 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
61 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"
62 #elif defined NANOJIT_AMD64
63 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
64 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
65 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
66 "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
67 #endif
69 #endif
71 #if defined NANOJIT_IA32
72 const Register Assembler::argRegs[] = { ECX, EDX };
73 const Register Assembler::retRegs[] = { EAX, EDX };
74 #elif defined NANOJIT_AMD64
75 #if defined WIN64
76 const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
77 #else
78 const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
79 #endif
80 const Register Assembler::retRegs[] = { RAX, RDX };
81 #endif
83 void Assembler::nInit(AvmCore* core)
85 #if defined NANOJIT_IA32
86 sse2 = core->use_sse2();
88 // CMOVcc is actually available on most PPro+ chips (except for a few
89 // oddballs like Via C3) but for now tie to SSE2 detection
90 has_cmov = sse2;
91 #else
92 has_cmov = true;
93 #endif
94 OSDep::getDate();
97 NIns* Assembler::genPrologue(RegisterMask needSaving)
99 /**
100 * Prologue
102 uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
103 uint32_t savingCount = 0;
105 for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
106 if (needSaving&rmask(i))
107 savingCount++;
109 // After forcing alignment, we've pushed the pre-alignment SP
110 // and savingCount registers.
111 uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount);
112 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
113 uint32_t amt = aligned - stackPushed;
115 // Reserve stackNeeded bytes, padded
116 // to preserve NJ_ALIGN_STACK-byte alignment.
117 if (amt)
119 #if defined NANOJIT_IA32
120 SUBi(SP, amt);
121 #elif defined NANOJIT_AMD64
122 SUBQi(SP, amt);
123 #endif
126 verbose_only( verbose_outputf(" %p:",_nIns); )
127 verbose_only( verbose_output(" patch entry:"); )
128 NIns *patchEntry = _nIns;
129 MR(FP, SP); // Establish our own FP.
131 // Save pre-alignment SP value here, where the FP will point,
132 // to preserve the illusion of a valid frame chain for
133 // functions like MMgc::GetStackTrace. The 'return address'
134 // of this 'frame' will be the last-saved register, but that's
135 // fine, because the next-older frame will be legit.
136 PUSHr(FP);
138 for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
139 if (needSaving&rmask(i))
140 PUSHr(i);
142 // We'd like to be able to use SSE instructions like MOVDQA on
143 // stack slots; it requires 16B alignment. Darwin requires a
144 // 16B stack alignment, and Linux GCC seems to intend to
145 // establish and preserve the same, but we're told that GCC
146 // has not always done this right. To avoid doubt, do it on
147 // all platforms. The prologue runs only when we enter
148 // fragments from the interpreter, so forcing 16B alignment
149 // here is cheap.
150 #if defined NANOJIT_IA32
151 ANDi(SP, -NJ_ALIGN_STACK);
152 #elif defined NANOJIT_AMD64
153 ANDQi(SP, -NJ_ALIGN_STACK);
154 #endif
155 MR(FP,SP);
156 PUSHr(FP); // Save caller's FP.
158 return patchEntry;
161 void Assembler::nFragExit(LInsp guard)
163 SideExit *exit = guard->exit();
164 bool trees = _frago->core()->config.tree_opt;
165 Fragment *frag = exit->target;
166 GuardRecord *lr = 0;
167 bool destKnown = (frag && frag->fragEntry);
168 if (destKnown && !trees)
170 // already exists, emit jump now. no patching required.
171 JMP(frag->fragEntry);
172 lr = 0;
174 else
176 // target doesn't exit yet. emit jump to epilog, and set up to patch later.
177 lr = placeGuardRecord(guard);
178 #if defined NANOJIT_AMD64
179 /* 8 bytes for address, 4 for imm32, 2 for jmp */
180 underrunProtect(14);
181 _nIns -= 8;
182 *(intptr_t *)_nIns = intptr_t(_epilogue);
183 lr->jmp = _nIns;
184 JMPm_nochk(0);
185 #else
186 JMP_long(_epilogue);
187 lr->jmp = _nIns;
188 #endif
189 #if 0
190 // @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link()
191 // for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
192 if (tress && destKnown)
193 patch(lr);
194 #endif
196 // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
197 MR(SP,FP);
200 #ifdef NJ_VERBOSE
201 if (_frago->core()->config.show_stats) {
202 // load EDX (arg1) with Fragment *fromFrag, target fragment
203 // will make use of this when calling fragenter().
204 #if defined NANOJIT_IA32
205 int fromfrag = int((Fragment*)_thisfrag);
206 LDi(argRegs[1], fromfrag);
207 #elif defined NANOJIT_AMD64
208 LDQi(argRegs[1], intptr_t(_thisfrag));
209 #endif
211 #endif
213 // return value is GuardRecord*
214 #if defined NANOJIT_IA32
215 LDi(EAX, int(lr));
216 #elif defined NANOJIT_AMD64
217 LDQi(RAX, intptr_t(lr));
218 #endif
221 NIns *Assembler::genEpilogue(RegisterMask restore)
223 RET();
224 POPr(FP); // Restore caller's FP.
225 MR(SP,FP); // Undo forced alignment.
227 // Restore saved registers.
228 for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
229 if (restore&rmask(i)) { POPr(i); }
231 POPr(FP); // Pop the pre-alignment SP.
232 return _nIns;
235 #if defined NANOJIT_IA32
236 void Assembler::asm_call(LInsp ins)
238 uint32_t fid = ins->fid();
239 const CallInfo* call = callInfoFor(fid);
240 // must be signed, not unsigned
241 const uint32_t iargs = call->count_iargs();
242 int32_t fstack = call->count_args() - iargs;
244 int32_t extra = 0;
246 #if defined NJ_NO_FASTCALL
247 int32_t istack = iargs;
248 #else
249 int32_t istack = iargs-2; // first 2 4B args are in registers
250 if (istack <= 0)
252 istack = 0;
254 #endif
256 const int32_t size = 4*istack + 8*fstack; // actual stack space used
257 if (size) {
258 // stack re-alignment
259 // only pop our adjustment amount since callee pops args in FASTCALL mode
260 extra = alignUp(size, NJ_ALIGN_STACK) - (size);
261 #ifndef NJ_NO_FASTCALL
262 if (extra > 0)
264 ADDi(SP, extra);
266 #endif
269 #ifdef NJ_NO_FASTCALL
270 // In C calling conventions, callee doesn't pop args.
271 ADDi(SP, 4*iargs + 8*fstack + extra);
272 #endif
274 CALL(call);
276 #ifdef NJ_NO_FASTCALL
277 if (iargs >= 1) {
278 PUSHr(ECX);
279 if (iargs >= 2) {
280 PUSHr(EDX);
283 #endif
285 // make sure fpu stack is empty before call (restoreCallerSaved)
286 NanoAssert(_allocator.isFree(FST0));
287 // note: this code requires that ref arguments (ARGSIZE_Q)
288 // be one of the first two arguments
289 // pre-assign registers to the first 2 4B args
290 const int max_regs = (iargs < 2) ? iargs : 2;
291 int n = 0;
293 ArgSize sizes[10];
294 uint32_t argc = call->get_sizes(sizes);
296 for(uint32_t i=0; i < argc; i++)
298 uint32_t j = argc-i-1;
299 ArgSize sz = sizes[j];
300 Register r = UnknownReg;
301 if (n < max_regs && sz != ARGSIZE_F)
302 r = argRegs[n++]; // tell asm_arg what reg to use
303 asm_arg(sz, ins->arg(j), r);
306 if (extra > 0)
308 SUBi(SP, extra);
312 #elif defined NANOJIT_AMD64
314 void Assembler::asm_call(LInsp ins)
316 Register fpu_reg = XMM0;
317 uint32_t fid = ins->fid();
318 const CallInfo* call = callInfoFor(fid);
319 int n = 0;
321 CALL(call);
323 ArgSize sizes[10];
324 uint32_t argc = call->get_sizes(sizes);
326 for(uint32_t i=0; i < argc; i++)
328 uint32_t j = argc-i-1;
329 ArgSize sz = sizes[j];
330 Register r = UnknownReg;
331 if (sz != ARGSIZE_F) {
332 r = argRegs[n++]; // tell asm_arg what reg to use
333 } else {
334 r = fpu_reg;
335 fpu_reg = nextreg(fpu_reg);
337 findSpecificRegFor(ins->arg(j), r);
340 #endif
342 void Assembler::nMarkExecute(Page* page, int32_t count, bool enable)
344 #if defined WIN32 || defined WIN64
345 DWORD dwIgnore;
346 VirtualProtect(&page->code, count*NJ_PAGE_SIZE, PAGE_EXECUTE_READWRITE, &dwIgnore);
347 #elif defined AVMPLUS_UNIX
348 intptr_t addr = (intptr_t)&page->code;
349 addr &= ~((uintptr_t)NJ_PAGE_SIZE - 1);
350 #if defined SOLARIS
351 if (mprotect((char *)addr, count*NJ_PAGE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
352 #else
353 if (mprotect((void *)addr, count*NJ_PAGE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
354 #endif
355 AvmDebugLog(("FATAL ERROR: mprotect(PROT_EXEC) failed\n"));
356 abort();
358 #endif
359 (void)enable;
362 Register Assembler::nRegisterAllocFromSet(int set)
364 Register r;
365 RegAlloc &regs = _allocator;
366 #ifdef WIN32
367 _asm
369 mov ecx, regs
370 bsf eax, set // i = first bit set
371 btr RegAlloc::free[ecx], eax // free &= ~rmask(i)
372 mov r, eax
374 #elif defined WIN64
375 unsigned long tr, fr;
376 _BitScanForward(&tr, set);
377 _bittestandreset(&fr, tr);
378 regs.free = fr;
379 r = tr;
380 #else
381 asm(
382 "bsf %1, %%eax\n\t"
383 "btr %%eax, %2\n\t"
384 "movl %%eax, %0\n\t"
385 : "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
386 #endif /* WIN32 */
387 return r;
390 void Assembler::nRegisterResetAll(RegAlloc& a)
392 // add scratch registers to our free list for the allocator
393 a.clear();
394 a.used = 0;
395 a.free = SavedRegs | ScratchRegs;
396 #if defined NANOJIT_IA32
397 if (!sse2)
398 a.free &= ~XmmRegs;
399 #endif
400 debug_only( a.managed = a.free; )
403 void Assembler::nPatchBranch(NIns* branch, NIns* location)
405 #if defined NANOJIT_IA32
406 intptr_t offset = intptr_t(location) - intptr_t(branch);
407 if (branch[0] == JMPc)
408 *(uint32_t*)&branch[1] = offset - 5;
409 else
410 *(uint32_t*)&branch[2] = offset - 6;
411 #else
412 if (branch[0] == 0xFF && branch[1] == 0x25) {
413 NIns *mem;
415 mem = &branch[6] + *(int32_t *)&branch[2];
416 *(intptr_t *)mem = intptr_t(location);
417 } else {
418 NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
420 #endif
423 RegisterMask Assembler::hint(LIns* i, RegisterMask allow)
425 uint32_t op = i->opcode();
426 int prefer = allow;
427 if (op == LIR_call)
428 #if defined NANOJIT_IA32
429 prefer &= rmask(EAX);
430 #elif defined NANOJIT_AMD64
431 prefer &= rmask(RAX);
432 #endif
433 else if (op == LIR_param)
434 prefer &= rmask(Register(i->imm8()));
435 #if defined NANOJIT_IA32
436 else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
437 prefer &= rmask(EDX);
438 #else
439 else if (op == LIR_callh)
440 prefer &= rmask(RAX);
441 #endif
442 else if (i->isCmp())
443 prefer &= AllowableFlagRegs;
444 else if (i->isconst())
445 prefer &= ScratchRegs;
446 return (_allocator.free & prefer) ? prefer : allow;
449 void Assembler::asm_qjoin(LIns *ins)
451 int d = findMemFor(ins);
452 AvmAssert(d);
453 LIns* lo = ins->oprnd1();
454 LIns* hi = ins->oprnd2();
456 Reservation *resv = getresv(ins);
457 Register rr = resv->reg;
459 if (rr != UnknownReg && (rmask(rr) & FpRegs))
460 evict(rr);
462 if (hi->isconst())
464 STi(FP, d+4, hi->constval());
466 else
468 Register r = findRegFor(hi, GpRegs);
469 ST(FP, d+4, r);
472 if (lo->isconst())
474 STi(FP, d, lo->constval());
476 else
478 // okay if r gets recycled.
479 Register r = findRegFor(lo, GpRegs);
480 ST(FP, d, r);
483 freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
486 void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
488 if (i->isconst())
490 if (!resv->arIndex) {
491 reserveFree(i);
493 LDi(r, i->constval());
495 else
497 int d = findMemFor(i);
498 if (rmask(r) & FpRegs)
500 #if defined NANOJIT_IA32
501 if (rmask(r) & XmmRegs) {
502 #endif
503 SSE_LDQ(r, d, FP);
504 #if defined NANOJIT_IA32
505 } else {
506 FLDQ(d, FP);
508 #endif
510 else
512 #if defined NANOJIT_AMD64
513 LDQ(r, d, FP);
514 #else
515 LD(r, d, FP);
516 #endif
518 verbose_only(if (_verbose) {
519 outputf(" restore %s", _thisfrag->lirbuf->names->formatRef(i));
524 void Assembler::asm_store32(LIns *value, int dr, LIns *base)
526 if (value->isconst())
528 Register rb = findRegFor(base, GpRegs);
529 int c = value->constval();
530 STi(rb, dr, c);
532 else
534 // make sure what is in a register
535 Reservation *rA, *rB;
536 findRegFor2(GpRegs, value, rA, base, rB);
537 Register ra = rA->reg;
538 Register rb = rB->reg;
539 ST(rb, dr, ra);
543 void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
545 (void)i;
546 int d = disp(resv);
547 Register rr = resv->reg;
548 if (d)
550 // save to spill location
551 if (rmask(rr) & FpRegs)
553 #if defined NANOJIT_IA32
554 if (rmask(rr) & XmmRegs) {
555 #endif
556 SSE_STQ(d, FP, rr);
557 #if defined NANOJIT_IA32
558 } else {
559 FSTQ((pop?1:0), d, FP);
561 #endif
563 else
565 #if defined NANOJIT_AMD64
566 STQ(FP, d, rr);
567 #else
568 ST(FP, d, rr);
569 #endif
571 verbose_only(if (_verbose) {
572 outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
575 #if defined NANOJIT_IA32
576 else if (pop && (rmask(rr) & x87Regs))
578 // pop the fpu result since it isn't used
579 FSTP(FST0);
581 #endif
584 void Assembler::asm_load64(LInsp ins)
586 LIns* base = ins->oprnd1();
587 int db = ins->oprnd2()->constval();
588 Reservation *resv = getresv(ins);
589 Register rr = resv->reg;
591 if (rr != UnknownReg && rmask(rr) & XmmRegs)
593 freeRsrcOf(ins, false);
594 Register rb = findRegFor(base, GpRegs);
595 SSE_LDQ(rr, db, rb);
597 #if defined NANOJIT_AMD64
598 else if (rr != UnknownReg && rmask(rr) & GpRegs)
600 freeRsrcOf(ins, false);
601 Register rb = findRegFor(base, GpRegs);
602 LDQ(rr, db, rb);
604 else
606 int d = disp(resv);
607 Register rb = findRegFor(base, GpRegs);
609 /* We need a temporary register we can move the desination into */
610 rr = registerAlloc(GpRegs);
612 STQ(FP, d, rr);
613 LDQ(rr, db, rb);
615 /* Mark as free */
616 _allocator.addFree(rr);
618 freeRsrcOf(ins, false);
620 #elif defined NANOJIT_IA32
621 else
623 int dr = disp(resv);
624 Register rb = findRegFor(base, GpRegs);
625 resv->reg = UnknownReg;
627 // don't use an fpu reg to simply load & store the value.
628 if (dr)
629 asm_mmq(FP, dr, rb, db);
631 freeRsrcOf(ins, false);
633 if (rr != UnknownReg)
635 NanoAssert(rmask(rr)&FpRegs);
636 _allocator.retire(rr);
637 FLDQ(db, rb);
640 #endif
643 void Assembler::asm_store64(LInsp value, int dr, LInsp base)
645 if (value->isconstq())
647 // if a constant 64-bit value just store it now rather than
648 // generating a pointless store/load/store sequence
649 Register rb = findRegFor(base, GpRegs);
650 const int32_t* p = (const int32_t*) (value-2);
651 STi(rb, dr+4, p[1]);
652 STi(rb, dr, p[0]);
653 return;
656 #if defined NANOJIT_IA32
657 if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
659 // value is 64bit struct or int64_t, or maybe a double.
660 // it may be live in an FPU reg. Either way, don't
661 // put it in an FPU reg just to load & store it.
663 // a) if we know it's not a double, this is right.
664 // b) if we guarded that its a double, this store could be on
665 // the side exit, copying a non-double.
666 // c) maybe its a double just being stored. oh well.
668 if (sse2) {
669 Register rv = findRegFor(value, XmmRegs);
670 Register rb = findRegFor(base, GpRegs);
671 SSE_STQ(dr, rb, rv);
672 return;
675 int da = findMemFor(value);
676 Register rb = findRegFor(base, GpRegs);
677 asm_mmq(rb, dr, FP, da);
678 return;
681 Reservation* rA = getresv(value);
682 int pop = !rA || rA->reg==UnknownReg;
683 Register rv = findRegFor(value, sse2 ? XmmRegs : FpRegs);
684 Register rb = findRegFor(base, GpRegs);
686 if (rmask(rv) & XmmRegs) {
687 SSE_STQ(dr, rb, rv);
688 } else {
689 FSTQ(pop, dr, rb);
691 #elif defined NANOJIT_AMD64
692 /* If this is not a float operation, we can use GpRegs instead.
693 * We can do this in a few other cases but for now I'll keep it simple.
695 Register rb = findRegFor(base, GpRegs);
696 Reservation *rV = getresv(value);
698 if (rV != NULL && rV->reg != UnknownReg) {
699 if (rmask(rV->reg) & GpRegs) {
700 STQ(rb, dr, rV->reg);
701 } else {
702 SSE_STQ(dr, rb, rV->reg);
704 } else {
705 Register rv;
707 /* Try to catch some common patterns.
708 * Note: this is a necessity, since in between things like
709 * asm_fop() could see the reservation and try to use a non-SSE
710 * register for adding. Same for asm_qbinop in theory.
711 * There should probably be asserts to catch more cases.
713 if (value->isop(LIR_u2f)
714 || value->isop(LIR_i2f)
715 || (value->opcode() >= LIR_fneg && value->opcode() <= LIR_fmul)
716 || value->opcode() == LIR_fdiv
717 || value->opcode() == LIR_fcall) {
718 rv = findRegFor(value, XmmRegs);
719 SSE_STQ(dr, rb, rv);
720 } else {
721 rv = findRegFor(value, GpRegs);
722 STQ(rb, dr, rv);
725 #endif
729 * copy 64 bits: (rd+dd) <- (rs+ds)
731 void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
733 // value is either a 64bit struct or maybe a float
734 // that isn't live in an FPU reg. Either way, don't
735 // put it in an FPU reg just to load & store it.
736 #if defined NANOJIT_IA32
737 if (sse2)
739 #endif
740 // use SSE to load+store 64bits
741 Register t = registerAlloc(XmmRegs);
742 _allocator.addFree(t);
743 SSE_STQ(dd, rd, t);
744 SSE_LDQ(t, ds, rs);
745 #if defined NANOJIT_IA32
747 else
749 // get a scratch reg
750 Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
751 _allocator.addFree(t);
752 ST(rd, dd+4, t);
753 LD(t, ds+4, rs);
754 ST(rd, dd, t);
755 LD(t, ds, rs);
757 #endif
760 void Assembler::asm_quad(LInsp ins)
762 #if defined NANOJIT_IA32
763 Reservation *rR = getresv(ins);
764 Register rr = rR->reg;
765 if (rr != UnknownReg)
767 // @todo -- add special-cases for 0 and 1
768 _allocator.retire(rr);
769 rR->reg = UnknownReg;
770 NanoAssert((rmask(rr) & FpRegs) != 0);
772 const double d = ins->constvalf();
773 if (rmask(rr) & XmmRegs) {
774 if (d == 0.0) {
775 SSE_XORPDr(rr, rr);
776 } else if (d == 1.0) {
777 // 1.0 is extremely frequent and worth special-casing!
778 static const double k_ONE = 1.0;
779 LDSDm(rr, &k_ONE);
780 } else {
781 findMemFor(ins);
782 const int d = disp(rR);
783 SSE_LDQ(rr, d, FP);
785 } else {
786 if (d == 0.0) {
787 FLDZ();
788 } else if (d == 1.0) {
789 FLD1();
790 } else {
791 findMemFor(ins);
792 int d = disp(rR);
793 FLDQ(d,FP);
798 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
799 int d = disp(rR);
800 freeRsrcOf(ins, false);
801 if (d)
803 const int32_t* p = (const int32_t*) (ins-2);
804 STi(FP,d+4,p[1]);
805 STi(FP,d,p[0]);
807 #elif defined NANOJIT_AMD64
808 Reservation *rR = getresv(ins);
809 int64_t val = *(int64_t *)(ins - 2);
811 if (rR->reg != UnknownReg)
813 Register rr = rR->reg;
814 freeRsrcOf(ins, false);
815 if (rmask(rr) & GpRegs)
817 LDQi(rr, val);
819 else if (rmask(rr) & XmmRegs)
821 if (ins->constvalf() == 0.0)
823 SSE_XORPDr(rr, rr);
825 else
827 /* Get a short-lived register, not associated with instruction */
828 Register rs = registerAlloc(GpRegs);
830 SSE_MOVD(rr, rs);
831 LDQi(rs, val);
833 _allocator.addFree(rs);
837 else
839 const int32_t* p = (const int32_t*) (ins-2);
840 int dr = disp(rR);
841 freeRsrcOf(ins, false);
842 STi(FP, dr+4, p[1]);
843 STi(FP, dr, p[0]);
845 #endif
848 bool Assembler::asm_qlo(LInsp ins, LInsp q)
850 #if defined NANOJIT_IA32
851 if (!sse2)
853 return false;
855 #endif
857 Reservation *resv = getresv(ins);
858 Register rr = resv->reg;
859 if (rr == UnknownReg) {
860 // store quad in spill loc
861 int d = disp(resv);
862 freeRsrcOf(ins, false);
863 Register qr = findRegFor(q, XmmRegs);
864 SSE_MOVDm(d, FP, qr);
865 } else {
866 freeRsrcOf(ins, false);
867 Register qr = findRegFor(q, XmmRegs);
868 SSE_MOVD(rr,qr);
871 return true;
874 void Assembler::asm_fneg(LInsp ins)
876 #if defined NANOJIT_IA32
877 if (sse2)
879 #endif
880 LIns *lhs = ins->oprnd1();
882 Register rr = prepResultReg(ins, XmmRegs);
883 Reservation *rA = getresv(lhs);
884 Register ra;
886 // if this is last use of lhs in reg, we can re-use result reg
887 if (rA == 0 || (ra = rA->reg) == UnknownReg) {
888 ra = findSpecificRegFor(lhs, rr);
889 } else if ((rmask(ra) & XmmRegs) == 0) {
890 /* We need this case on AMD64, because it's possible that
891 * an earlier instruction has done a quadword load and reserved a
892 * GPR. If so, ask for a new register.
894 ra = findRegFor(lhs, XmmRegs);
896 // else, rA already has a register assigned.
898 #if defined __SUNPRO_CC
899 // from Sun Studio C++ Readme: #pragma align inside namespace requires mangled names
900 static uint32_t temp[] = {0, 0, 0, 0, 0, 0, 0};
901 static uint32_t *negateMask = (uint32_t *)alignUp(temp, 16);
902 negateMask[1] = 0x80000000;
903 #else
904 static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0};
905 #endif
906 SSE_XORPD(rr, negateMask);
908 if (rr != ra)
909 SSE_MOVSD(rr, ra);
910 #if defined NANOJIT_IA32
912 else
914 Register rr = prepResultReg(ins, FpRegs);
916 LIns* lhs = ins->oprnd1();
918 // lhs into reg, prefer same reg as result
919 Reservation* rA = getresv(lhs);
920 // if this is last use of lhs in reg, we can re-use result reg
921 if (rA == 0 || rA->reg == UnknownReg)
922 findSpecificRegFor(lhs, rr);
923 // else, rA already has a different reg assigned
925 NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
926 // assume that the lhs is in ST(0) and rhs is on stack
927 FCHS();
929 // if we had more than one fpu reg, this is where
930 // we would move ra into rr if rr != ra.
932 #endif
935 void Assembler::asm_pusharg(LInsp p)
937 // arg goes on stack
938 Reservation* rA = getresv(p);
939 if (rA == 0)
941 if (p->isconst())
943 // small const we push directly
944 PUSHi(p->constval());
946 else
948 Register ra = findRegFor(p, GpRegs);
949 PUSHr(ra);
952 else if (rA->reg == UnknownReg)
954 PUSHm(disp(rA), FP);
956 else
958 PUSHr(rA->reg);
962 void Assembler::asm_farg(LInsp p)
964 #if defined NANOJIT_IA32
965 Register r = findRegFor(p, FpRegs);
966 if (rmask(r) & XmmRegs) {
967 SSE_STQ(0, SP, r);
968 } else {
969 FSTPQ(0, SP);
971 PUSHr(ECX); // 2*pushr is smaller than sub
972 PUSHr(ECX);
973 #endif
976 void Assembler::asm_fop(LInsp ins)
978 LOpcode op = ins->opcode();
979 #if defined NANOJIT_IA32
980 if (sse2)
982 #endif
983 LIns *lhs = ins->oprnd1();
984 LIns *rhs = ins->oprnd2();
986 RegisterMask allow = XmmRegs;
987 Register rb = UnknownReg;
988 if (lhs != rhs) {
989 rb = findRegFor(rhs,allow);
990 allow &= ~rmask(rb);
993 Register rr = prepResultReg(ins, allow);
994 Reservation *rA = getresv(lhs);
995 Register ra;
997 // if this is last use of lhs in reg, we can re-use result reg
998 if (rA == 0 || (ra = rA->reg) == UnknownReg) {
999 ra = findSpecificRegFor(lhs, rr);
1000 } else if ((rmask(ra) & XmmRegs) == 0) {
1001 /* We need this case on AMD64, because it's possible that
1002 * an earlier instruction has done a quadword load and reserved a
1003 * GPR. If so, ask for a new register.
1005 ra = findRegFor(lhs, XmmRegs);
1007 // else, rA already has a register assigned.
1009 if (lhs == rhs)
1010 rb = ra;
1012 if (op == LIR_fadd)
1013 SSE_ADDSD(rr, rb);
1014 else if (op == LIR_fsub)
1015 SSE_SUBSD(rr, rb);
1016 else if (op == LIR_fmul)
1017 SSE_MULSD(rr, rb);
1018 else //if (op == LIR_fdiv)
1019 SSE_DIVSD(rr, rb);
1021 if (rr != ra)
1022 SSE_MOVSD(rr, ra);
1023 #if defined NANOJIT_IA32
1025 else
1027 // we swap lhs/rhs on purpose here, works out better
1028 // if you only have one fpu reg. use divr/subr.
1029 LIns* rhs = ins->oprnd1();
1030 LIns* lhs = ins->oprnd2();
1031 Register rr = prepResultReg(ins, rmask(FST0));
1033 // make sure rhs is in memory
1034 int db = findMemFor(rhs);
1036 // lhs into reg, prefer same reg as result
1037 Reservation* rA = getresv(lhs);
1038 // last use of lhs in reg, can reuse rr
1039 if (rA == 0 || rA->reg == UnknownReg)
1040 findSpecificRegFor(lhs, rr);
1041 // else, rA already has a different reg assigned
1043 NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
1044 // assume that the lhs is in ST(0) and rhs is on stack
1045 if (op == LIR_fadd)
1046 { FADD(db, FP); }
1047 else if (op == LIR_fsub)
1048 { FSUBR(db, FP); }
1049 else if (op == LIR_fmul)
1050 { FMUL(db, FP); }
1051 else if (op == LIR_fdiv)
1052 { FDIVR(db, FP); }
1054 #endif
1057 void Assembler::asm_i2f(LInsp ins)
1059 // where our result goes
1060 Register rr = prepResultReg(ins, FpRegs);
1061 #if defined NANOJIT_IA32
1062 if (rmask(rr) & XmmRegs)
1064 #endif
1065 // todo support int value in memory
1066 Register gr = findRegFor(ins->oprnd1(), GpRegs);
1067 SSE_CVTSI2SD(rr, gr);
1068 #if defined NANOJIT_IA32
1070 else
1072 int d = findMemFor(ins->oprnd1());
1073 FILD(d, FP);
1075 #endif
1078 Register Assembler::asm_prep_fcall(Reservation *rR, LInsp ins)
1080 #if defined NANOJIT_IA32
1081 if (rR) {
1082 Register rr;
1083 if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs))
1084 evict(rr);
1086 return prepResultReg(ins, rmask(FST0));
1087 #elif defined NANOJIT_AMD64
1088 evict(RAX);
1089 return prepResultReg(ins, rmask(XMM0));
1090 #endif
1093 void Assembler::asm_u2f(LInsp ins)
1095 // where our result goes
1096 Register rr = prepResultReg(ins, FpRegs);
1097 #if defined NANOJIT_IA32
1098 if (rmask(rr) & XmmRegs)
1100 #endif
1101 // don't call findRegFor, we want a reg we can stomp on for a very short time,
1102 // not a reg that will continue to be associated with the LIns
1103 Register gr = registerAlloc(GpRegs);
1105 // technique inspired by gcc disassembly
1106 // Edwin explains it:
1108 // gr is 0..2^32-1
1110 // sub gr,0x80000000
1112 // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
1113 // as before
1115 // cvtsi2sd rr,gr
1117 // rr is now a double with the int value range
1119 // addsd rr, 2147483648.0
1121 // adding back double(0x80000000) makes the range 0..2^32-1.
1123 static const double k_NEGONE = 2147483648.0;
1124 #if defined NANOJIT_IA32
1125 SSE_ADDSDm(rr, &k_NEGONE);
1126 #elif defined NANOJIT_AMD64
1127 /* Squirrel the constant at the bottom of the page. */
1128 if (_dblNegPtr != NULL)
1130 underrunProtect(10);
1132 if (_dblNegPtr == NULL)
1134 underrunProtect(30);
1135 uint8_t *base, *begin;
1136 base = (uint8_t *)((intptr_t)_nIns & ~((intptr_t)NJ_PAGE_SIZE-1));
1137 base += sizeof(PageHeader) + _pageData;
1138 begin = base;
1139 /* Make sure we align */
1140 if ((uintptr_t)base & 0xF) {
1141 base = (NIns *)((uintptr_t)base & ~(0xF));
1142 base += 16;
1144 _pageData += (int32_t)(base - begin) + sizeof(double);
1145 _negOnePtr = (NIns *)base;
1146 *(double *)_negOnePtr = k_NEGONE;
1148 SSE_ADDSDm(rr, _negOnePtr);
1149 #endif
1151 SSE_CVTSI2SD(rr, gr);
1153 Reservation* resv = getresv(ins->oprnd1());
1154 Register xr;
1155 if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs))
1157 LEA(gr, 0x80000000, xr);
1159 else
1161 const int d = findMemFor(ins->oprnd1());
1162 SUBi(gr, 0x80000000);
1163 LD(gr, d, FP);
1166 // ok, we're done with it
1167 _allocator.addFree(gr);
1168 #if defined NANOJIT_IA32
1170 else
1172 const int disp = -8;
1173 const Register base = SP;
1174 Register gr = findRegFor(ins->oprnd1(), GpRegs);
1175 NanoAssert(rr == FST0);
1176 FILDQ(disp, base);
1177 STi(base, disp+4, 0); // high 32 bits = 0
1178 ST(base, disp, gr); // low 32 bits = unsigned value
1180 #endif
1183 void Assembler::asm_nongp_copy(Register r, Register s)
1185 if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) {
1186 SSE_MOVSD(r, s);
1187 } else if ((rmask(r) & GpRegs) && (rmask(s) & XmmRegs)) {
1188 SSE_MOVD(r, s);
1189 } else {
1190 if (rmask(r) & XmmRegs) {
1191 // x87 -> xmm
1192 NanoAssertMsg(false, "Should not move data from GPR to XMM");
1193 } else {
1194 // xmm -> x87
1195 NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
1200 void Assembler::asm_fcmp(LIns *cond)
1202 LOpcode condop = cond->opcode();
1203 NanoAssert(condop >= LIR_feq && condop <= LIR_fge);
1204 LIns* lhs = cond->oprnd1();
1205 LIns* rhs = cond->oprnd2();
1207 int mask;
1208 if (condop == LIR_feq)
1209 mask = 0x44;
1210 else if (condop == LIR_fle)
1211 mask = 0x41;
1212 else if (condop == LIR_flt)
1213 mask = 0x05;
1214 else if (condop == LIR_fge) {
1215 // swap, use le
1216 LIns* t = lhs; lhs = rhs; rhs = t;
1217 mask = 0x41;
1218 } else { // if (condop == LIR_fgt)
1219 // swap, use lt
1220 LIns* t = lhs; lhs = rhs; rhs = t;
1221 mask = 0x05;
1224 #if defined NANOJIT_IA32
1225 if (sse2)
1227 #endif
1228 // UNORDERED: ZF,PF,CF <- 111;
1229 // GREATER_THAN: ZF,PF,CF <- 000;
1230 // LESS_THAN: ZF,PF,CF <- 001;
1231 // EQUAL: ZF,PF,CF <- 100;
1233 if (condop == LIR_feq && lhs == rhs) {
1234 // nan check
1235 Register r = findRegFor(lhs, XmmRegs);
1236 SSE_UCOMISD(r, r);
1237 } else {
1238 #if defined NANOJIT_IA32
1239 evict(EAX);
1240 TEST_AH(mask);
1241 LAHF();
1242 #elif defined NANOJIT_AMD64
1243 evict(RAX);
1244 TEST_AL(mask);
1245 POPr(RAX);
1246 PUSHFQ();
1247 #endif
1248 Reservation *rA, *rB;
1249 findRegFor2(XmmRegs, lhs, rA, rhs, rB);
1250 SSE_UCOMISD(rA->reg, rB->reg);
1252 #if defined NANOJIT_IA32
1254 else
1256 evict(EAX);
1257 TEST_AH(mask);
1258 FNSTSW_AX();
1259 NanoAssert(lhs->isQuad() && rhs->isQuad());
1260 Reservation *rA;
1261 if (lhs != rhs)
1263 // compare two different numbers
1264 int d = findMemFor(rhs);
1265 rA = getresv(lhs);
1266 int pop = !rA || rA->reg == UnknownReg;
1267 findSpecificRegFor(lhs, FST0);
1268 // lhs is in ST(0) and rhs is on stack
1269 FCOM(pop, d, FP);
1271 else
1273 // compare n to itself, this is a NaN test.
1274 rA = getresv(lhs);
1275 int pop = !rA || rA->reg == UnknownReg;
1276 findSpecificRegFor(lhs, FST0);
1277 // value in ST(0)
1278 if (pop)
1279 FCOMPP();
1280 else
1281 FCOMP();
1282 FLDr(FST0); // DUP
1285 #endif
1288 NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
1290 NIns* was;
1291 #if defined NANOJIT_AMD64
1292 was = (NIns*)( *(intptr_t*)(at) );
1293 *(intptr_t *)(at) = intptr_t(target);
1294 #else
1295 NIns* save = _nIns;
1296 was = (NIns*)( (intptr_t)*(int32_t*)(at+1)+(intptr_t)(at+5) );
1297 _nIns = at +5; // +5 is size of JMP
1298 intptr_t tt = (intptr_t)target - (intptr_t)_nIns;
1299 IMM32(tt);
1300 *(--_nIns) = JMPc;
1301 _nIns = save;
1302 #endif
1303 return was;
1306 void Assembler::nativePageReset()
1308 #if defined NANOJIT_AMD64
1309 /* We store some stuff at the bottom of the page.
1310 * We reserve 8-bytes for long jumps just in case we need them.
1312 _pageData = 0;
1313 _dblNegPtr = NULL;
1314 _negOnePtr = NULL;
1315 #endif
1318 Register Assembler::asm_binop_rhs_reg(LInsp ins)
1320 LOpcode op = ins->opcode();
1321 LIns *rhs = ins->oprnd2();
1323 if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush) {
1324 #if defined NANOJIT_IA32
1325 return findSpecificRegFor(rhs, ECX);
1326 #elif defined NANOJIT_AMD64
1327 return findSpecificRegFor(rhs, RCX);
1328 #endif
1331 return UnknownReg;
1334 #if defined NANOJIT_AMD64
1335 void Assembler::asm_qbinop(LIns *ins)
1337 LInsp lhs = ins->oprnd1();
1338 LInsp rhs = ins->oprnd2();
1339 LOpcode op = ins->opcode();
1341 Register rr = prepResultReg(ins, GpRegs);
1342 Reservation *rA = getresv(lhs);
1343 Register ra;
1345 if (rA == NULL || (ra = rA->reg) == UnknownReg) {
1346 ra = findSpecificRegFor(lhs, rr);
1349 if (rhs->isconst())
1351 int c = rhs->constval();
1353 if (op == LIR_qiadd)
1355 ADDQi(rr, c);
1356 } else if (op == LIR_qiand) {
1357 ANDQi(rr, c);
1358 } else if (op == LIR_qilsh) {
1359 SHLQi(rr, c);
1360 } else if (op == LIR_qior) {
1361 ORQi(rr, c);
1363 } else {
1364 Register rv;
1366 if (lhs == rhs) {
1367 rv = ra;
1368 } else {
1369 rv = findRegFor(rhs, GpRegs & ~(rmask(rr)));
1372 if (op == LIR_qiadd) {
1373 ADDQ(rr, rv);
1374 } else if (op == LIR_qiand) {
1375 ANDQ(rr, rv);
1376 } else if (op == LIR_qior) {
1377 ORQ(rr, rv);
1378 } else {
1379 NanoAssert(rhs->isconst());
1383 if (rr != ra) {
1384 MR(rr, ra);
1387 #endif
1389 void Assembler::nativePageSetup()
1391 if (!_nIns) _nIns = pageAlloc();
1392 if (!_nExitIns) _nExitIns = pageAlloc(true);
1394 #endif /* FEATURE_NANOJIT */