1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is [Open Source Virtual Machine].
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
24 * Mozilla TraceMonkey Team
25 * Asko Tontti <atontti@cc.hut.fi>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 // for MakeDataExecutable
43 #include <CoreServices/CoreServices.h>
46 #if defined AVMPLUS_UNIX
54 #ifdef FEATURE_NANOJIT
57 const char *regNames
[] = {
58 #if defined NANOJIT_IA32
59 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
60 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
61 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"
62 #elif defined NANOJIT_AMD64
63 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
64 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
65 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
66 "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
71 #if defined NANOJIT_IA32
72 const Register
Assembler::argRegs
[] = { ECX
, EDX
};
73 const Register
Assembler::retRegs
[] = { EAX
, EDX
};
74 #elif defined NANOJIT_AMD64
76 const Register
Assembler::argRegs
[] = { R8
, R9
, RCX
, RDX
};
78 const Register
Assembler::argRegs
[] = { RDI
, RSI
, RDX
, RCX
, R8
, R9
};
80 const Register
Assembler::retRegs
[] = { RAX
, RDX
};
83 void Assembler::nInit(AvmCore
* core
)
85 #if defined NANOJIT_IA32
86 sse2
= core
->use_sse2();
88 // CMOVcc is actually available on most PPro+ chips (except for a few
89 // oddballs like Via C3) but for now tie to SSE2 detection
97 NIns
* Assembler::genPrologue(RegisterMask needSaving
)
102 uint32_t stackNeeded
= STACK_GRANULARITY
* _activation
.highwatermark
;
103 uint32_t savingCount
= 0;
105 for(Register i
=FirstReg
; i
<= LastReg
; i
= nextreg(i
))
106 if (needSaving
&rmask(i
))
109 // After forcing alignment, we've pushed the pre-alignment SP
110 // and savingCount registers.
111 uint32_t stackPushed
= STACK_GRANULARITY
* (1+savingCount
);
112 uint32_t aligned
= alignUp(stackNeeded
+ stackPushed
, NJ_ALIGN_STACK
);
113 uint32_t amt
= aligned
- stackPushed
;
115 // Reserve stackNeeded bytes, padded
116 // to preserve NJ_ALIGN_STACK-byte alignment.
119 #if defined NANOJIT_IA32
121 #elif defined NANOJIT_AMD64
126 verbose_only( verbose_outputf(" %p:",_nIns
); )
127 verbose_only( verbose_output(" patch entry:"); )
128 NIns
*patchEntry
= _nIns
;
129 MR(FP
, SP
); // Establish our own FP.
131 // Save pre-alignment SP value here, where the FP will point,
132 // to preserve the illusion of a valid frame chain for
133 // functions like MMgc::GetStackTrace. The 'return address'
134 // of this 'frame' will be the last-saved register, but that's
135 // fine, because the next-older frame will be legit.
138 for(Register i
=FirstReg
; i
<= LastReg
; i
= nextreg(i
))
139 if (needSaving
&rmask(i
))
142 // We'd like to be able to use SSE instructions like MOVDQA on
143 // stack slots; it requires 16B alignment. Darwin requires a
144 // 16B stack alignment, and Linux GCC seems to intend to
145 // establish and preserve the same, but we're told that GCC
146 // has not always done this right. To avoid doubt, do it on
147 // all platforms. The prologue runs only when we enter
148 // fragments from the interpreter, so forcing 16B alignment
150 #if defined NANOJIT_IA32
151 ANDi(SP
, -NJ_ALIGN_STACK
);
152 #elif defined NANOJIT_AMD64
153 ANDQi(SP
, -NJ_ALIGN_STACK
);
156 PUSHr(FP
); // Save caller's FP.
161 void Assembler::nFragExit(LInsp guard
)
163 SideExit
*exit
= guard
->exit();
164 bool trees
= _frago
->core()->config
.tree_opt
;
165 Fragment
*frag
= exit
->target
;
167 bool destKnown
= (frag
&& frag
->fragEntry
);
168 if (destKnown
&& !trees
)
170 // already exists, emit jump now. no patching required.
171 JMP(frag
->fragEntry
);
176 // target doesn't exit yet. emit jump to epilog, and set up to patch later.
177 lr
= placeGuardRecord(guard
);
178 #if defined NANOJIT_AMD64
179 /* 8 bytes for address, 4 for imm32, 2 for jmp */
182 *(intptr_t *)_nIns
= intptr_t(_epilogue
);
190 // @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link()
191 // for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
192 if (tress
&& destKnown
)
196 // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
201 if (_frago
->core()->config
.show_stats
) {
202 // load EDX (arg1) with Fragment *fromFrag, target fragment
203 // will make use of this when calling fragenter().
204 #if defined NANOJIT_IA32
205 int fromfrag
= int((Fragment
*)_thisfrag
);
206 LDi(argRegs
[1], fromfrag
);
207 #elif defined NANOJIT_AMD64
208 LDQi(argRegs
[1], intptr_t(_thisfrag
));
213 // return value is GuardRecord*
214 #if defined NANOJIT_IA32
216 #elif defined NANOJIT_AMD64
217 LDQi(RAX
, intptr_t(lr
));
221 NIns
*Assembler::genEpilogue(RegisterMask restore
)
224 POPr(FP
); // Restore caller's FP.
225 MR(SP
,FP
); // Undo forced alignment.
227 // Restore saved registers.
228 for (Register i
=UnknownReg
; i
>= FirstReg
; i
= prevreg(i
))
229 if (restore
&rmask(i
)) { POPr(i
); }
231 POPr(FP
); // Pop the pre-alignment SP.
235 #if defined NANOJIT_IA32
236 void Assembler::asm_call(LInsp ins
)
238 uint32_t fid
= ins
->fid();
239 const CallInfo
* call
= callInfoFor(fid
);
240 // must be signed, not unsigned
241 const uint32_t iargs
= call
->count_iargs();
242 int32_t fstack
= call
->count_args() - iargs
;
246 #if defined NJ_NO_FASTCALL
247 int32_t istack
= iargs
;
249 int32_t istack
= iargs
-2; // first 2 4B args are in registers
256 const int32_t size
= 4*istack
+ 8*fstack
; // actual stack space used
258 // stack re-alignment
259 // only pop our adjustment amount since callee pops args in FASTCALL mode
260 extra
= alignUp(size
, NJ_ALIGN_STACK
) - (size
);
261 #ifndef NJ_NO_FASTCALL
269 #ifdef NJ_NO_FASTCALL
270 // In C calling conventions, callee doesn't pop args.
271 ADDi(SP
, 4*iargs
+ 8*fstack
+ extra
);
276 #ifdef NJ_NO_FASTCALL
285 // make sure fpu stack is empty before call (restoreCallerSaved)
286 NanoAssert(_allocator
.isFree(FST0
));
287 // note: this code requires that ref arguments (ARGSIZE_Q)
288 // be one of the first two arguments
289 // pre-assign registers to the first 2 4B args
290 const int max_regs
= (iargs
< 2) ? iargs
: 2;
294 uint32_t argc
= call
->get_sizes(sizes
);
296 for(uint32_t i
=0; i
< argc
; i
++)
298 uint32_t j
= argc
-i
-1;
299 ArgSize sz
= sizes
[j
];
300 Register r
= UnknownReg
;
301 if (n
< max_regs
&& sz
!= ARGSIZE_F
)
302 r
= argRegs
[n
++]; // tell asm_arg what reg to use
303 asm_arg(sz
, ins
->arg(j
), r
);
312 #elif defined NANOJIT_AMD64
314 void Assembler::asm_call(LInsp ins
)
316 Register fpu_reg
= XMM0
;
317 uint32_t fid
= ins
->fid();
318 const CallInfo
* call
= callInfoFor(fid
);
324 uint32_t argc
= call
->get_sizes(sizes
);
326 for(uint32_t i
=0; i
< argc
; i
++)
328 uint32_t j
= argc
-i
-1;
329 ArgSize sz
= sizes
[j
];
330 Register r
= UnknownReg
;
331 if (sz
!= ARGSIZE_F
) {
332 r
= argRegs
[n
++]; // tell asm_arg what reg to use
335 fpu_reg
= nextreg(fpu_reg
);
337 findSpecificRegFor(ins
->arg(j
), r
);
342 void Assembler::nMarkExecute(Page
* page
, int32_t count
, bool enable
)
344 #if defined WIN32 || defined WIN64
346 VirtualProtect(&page
->code
, count
*NJ_PAGE_SIZE
, PAGE_EXECUTE_READWRITE
, &dwIgnore
);
347 #elif defined AVMPLUS_UNIX
348 intptr_t addr
= (intptr_t)&page
->code
;
349 addr
&= ~((uintptr_t)NJ_PAGE_SIZE
- 1);
351 if (mprotect((char *)addr
, count
*NJ_PAGE_SIZE
, PROT_READ
|PROT_WRITE
|PROT_EXEC
) == -1) {
353 if (mprotect((void *)addr
, count
*NJ_PAGE_SIZE
, PROT_READ
|PROT_WRITE
|PROT_EXEC
) == -1) {
355 AvmDebugLog(("FATAL ERROR: mprotect(PROT_EXEC) failed\n"));
362 Register
Assembler::nRegisterAllocFromSet(int set
)
365 RegAlloc
®s
= _allocator
;
370 bsf eax
, set
// i = first bit set
371 btr
RegAlloc::free
[ecx
], eax
// free &= ~rmask(i)
375 unsigned long tr
, fr
;
376 _BitScanForward(&tr
, set
);
377 _bittestandreset(&fr
, tr
);
385 : "=m"(r
) : "m"(set
), "m"(regs
.free
) : "%eax", "memory" );
390 void Assembler::nRegisterResetAll(RegAlloc
& a
)
392 // add scratch registers to our free list for the allocator
395 a
.free
= SavedRegs
| ScratchRegs
;
396 #if defined NANOJIT_IA32
400 debug_only( a
.managed
= a
.free
; )
403 void Assembler::nPatchBranch(NIns
* branch
, NIns
* location
)
405 #if defined NANOJIT_IA32
406 intptr_t offset
= intptr_t(location
) - intptr_t(branch
);
407 if (branch
[0] == JMPc
)
408 *(uint32_t*)&branch
[1] = offset
- 5;
410 *(uint32_t*)&branch
[2] = offset
- 6;
412 if (branch
[0] == 0xFF && branch
[1] == 0x25) {
415 mem
= &branch
[6] + *(int32_t *)&branch
[2];
416 *(intptr_t *)mem
= intptr_t(location
);
418 NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
423 RegisterMask
Assembler::hint(LIns
* i
, RegisterMask allow
)
425 uint32_t op
= i
->opcode();
428 #if defined NANOJIT_IA32
429 prefer
&= rmask(EAX
);
430 #elif defined NANOJIT_AMD64
431 prefer
&= rmask(RAX
);
433 else if (op
== LIR_param
)
434 prefer
&= rmask(Register(i
->imm8()));
435 #if defined NANOJIT_IA32
436 else if (op
== LIR_callh
|| op
== LIR_rsh
&& i
->oprnd1()->opcode()==LIR_callh
)
437 prefer
&= rmask(EDX
);
439 else if (op
== LIR_callh
)
440 prefer
&= rmask(RAX
);
443 prefer
&= AllowableFlagRegs
;
444 else if (i
->isconst())
445 prefer
&= ScratchRegs
;
446 return (_allocator
.free
& prefer
) ? prefer
: allow
;
449 void Assembler::asm_qjoin(LIns
*ins
)
451 int d
= findMemFor(ins
);
453 LIns
* lo
= ins
->oprnd1();
454 LIns
* hi
= ins
->oprnd2();
456 Reservation
*resv
= getresv(ins
);
457 Register rr
= resv
->reg
;
459 if (rr
!= UnknownReg
&& (rmask(rr
) & FpRegs
))
464 STi(FP
, d
+4, hi
->constval());
468 Register r
= findRegFor(hi
, GpRegs
);
474 STi(FP
, d
, lo
->constval());
478 // okay if r gets recycled.
479 Register r
= findRegFor(lo
, GpRegs
);
483 freeRsrcOf(ins
, false); // if we had a reg in use, emit a ST to flush it to mem
486 void Assembler::asm_restore(LInsp i
, Reservation
*resv
, Register r
)
490 if (!resv
->arIndex
) {
493 LDi(r
, i
->constval());
497 int d
= findMemFor(i
);
498 if (rmask(r
) & FpRegs
)
500 #if defined NANOJIT_IA32
501 if (rmask(r
) & XmmRegs
) {
504 #if defined NANOJIT_IA32
512 #if defined NANOJIT_AMD64
518 verbose_only(if (_verbose
) {
519 outputf(" restore %s", _thisfrag
->lirbuf
->names
->formatRef(i
));
524 void Assembler::asm_store32(LIns
*value
, int dr
, LIns
*base
)
526 if (value
->isconst())
528 Register rb
= findRegFor(base
, GpRegs
);
529 int c
= value
->constval();
534 // make sure what is in a register
535 Reservation
*rA
, *rB
;
536 findRegFor2(GpRegs
, value
, rA
, base
, rB
);
537 Register ra
= rA
->reg
;
538 Register rb
= rB
->reg
;
543 void Assembler::asm_spill(LInsp i
, Reservation
*resv
, bool pop
)
547 Register rr
= resv
->reg
;
550 // save to spill location
551 if (rmask(rr
) & FpRegs
)
553 #if defined NANOJIT_IA32
554 if (rmask(rr
) & XmmRegs
) {
557 #if defined NANOJIT_IA32
559 FSTQ((pop
?1:0), d
, FP
);
565 #if defined NANOJIT_AMD64
571 verbose_only(if (_verbose
) {
572 outputf(" spill %s",_thisfrag
->lirbuf
->names
->formatRef(i
));
575 #if defined NANOJIT_IA32
576 else if (pop
&& (rmask(rr
) & x87Regs
))
578 // pop the fpu result since it isn't used
584 void Assembler::asm_load64(LInsp ins
)
586 LIns
* base
= ins
->oprnd1();
587 int db
= ins
->oprnd2()->constval();
588 Reservation
*resv
= getresv(ins
);
589 Register rr
= resv
->reg
;
591 if (rr
!= UnknownReg
&& rmask(rr
) & XmmRegs
)
593 freeRsrcOf(ins
, false);
594 Register rb
= findRegFor(base
, GpRegs
);
597 #if defined NANOJIT_AMD64
598 else if (rr
!= UnknownReg
&& rmask(rr
) & GpRegs
)
600 freeRsrcOf(ins
, false);
601 Register rb
= findRegFor(base
, GpRegs
);
607 Register rb
= findRegFor(base
, GpRegs
);
609 /* We need a temporary register we can move the desination into */
610 rr
= registerAlloc(GpRegs
);
616 _allocator
.addFree(rr
);
618 freeRsrcOf(ins
, false);
620 #elif defined NANOJIT_IA32
624 Register rb
= findRegFor(base
, GpRegs
);
625 resv
->reg
= UnknownReg
;
627 // don't use an fpu reg to simply load & store the value.
629 asm_mmq(FP
, dr
, rb
, db
);
631 freeRsrcOf(ins
, false);
633 if (rr
!= UnknownReg
)
635 NanoAssert(rmask(rr
)&FpRegs
);
636 _allocator
.retire(rr
);
643 void Assembler::asm_store64(LInsp value
, int dr
, LInsp base
)
645 if (value
->isconstq())
647 // if a constant 64-bit value just store it now rather than
648 // generating a pointless store/load/store sequence
649 Register rb
= findRegFor(base
, GpRegs
);
650 const int32_t* p
= (const int32_t*) (value
-2);
656 #if defined NANOJIT_IA32
657 if (value
->isop(LIR_ldq
) || value
->isop(LIR_qjoin
))
659 // value is 64bit struct or int64_t, or maybe a double.
660 // it may be live in an FPU reg. Either way, don't
661 // put it in an FPU reg just to load & store it.
663 // a) if we know it's not a double, this is right.
664 // b) if we guarded that its a double, this store could be on
665 // the side exit, copying a non-double.
666 // c) maybe its a double just being stored. oh well.
669 Register rv
= findRegFor(value
, XmmRegs
);
670 Register rb
= findRegFor(base
, GpRegs
);
675 int da
= findMemFor(value
);
676 Register rb
= findRegFor(base
, GpRegs
);
677 asm_mmq(rb
, dr
, FP
, da
);
681 Reservation
* rA
= getresv(value
);
682 int pop
= !rA
|| rA
->reg
==UnknownReg
;
683 Register rv
= findRegFor(value
, sse2
? XmmRegs
: FpRegs
);
684 Register rb
= findRegFor(base
, GpRegs
);
686 if (rmask(rv
) & XmmRegs
) {
691 #elif defined NANOJIT_AMD64
692 /* If this is not a float operation, we can use GpRegs instead.
693 * We can do this in a few other cases but for now I'll keep it simple.
695 Register rb
= findRegFor(base
, GpRegs
);
696 Reservation
*rV
= getresv(value
);
698 if (rV
!= NULL
&& rV
->reg
!= UnknownReg
) {
699 if (rmask(rV
->reg
) & GpRegs
) {
700 STQ(rb
, dr
, rV
->reg
);
702 SSE_STQ(dr
, rb
, rV
->reg
);
707 /* Try to catch some common patterns.
708 * Note: this is a necessity, since in between things like
709 * asm_fop() could see the reservation and try to use a non-SSE
710 * register for adding. Same for asm_qbinop in theory.
711 * There should probably be asserts to catch more cases.
713 if (value
->isop(LIR_u2f
)
714 || value
->isop(LIR_i2f
)
715 || (value
->opcode() >= LIR_fneg
&& value
->opcode() <= LIR_fmul
)
716 || value
->opcode() == LIR_fdiv
717 || value
->opcode() == LIR_fcall
) {
718 rv
= findRegFor(value
, XmmRegs
);
721 rv
= findRegFor(value
, GpRegs
);
729 * copy 64 bits: (rd+dd) <- (rs+ds)
731 void Assembler::asm_mmq(Register rd
, int dd
, Register rs
, int ds
)
733 // value is either a 64bit struct or maybe a float
734 // that isn't live in an FPU reg. Either way, don't
735 // put it in an FPU reg just to load & store it.
736 #if defined NANOJIT_IA32
740 // use SSE to load+store 64bits
741 Register t
= registerAlloc(XmmRegs
);
742 _allocator
.addFree(t
);
745 #if defined NANOJIT_IA32
750 Register t
= registerAlloc(GpRegs
& ~(rmask(rd
)|rmask(rs
)));
751 _allocator
.addFree(t
);
760 void Assembler::asm_quad(LInsp ins
)
762 #if defined NANOJIT_IA32
763 Reservation
*rR
= getresv(ins
);
764 Register rr
= rR
->reg
;
765 if (rr
!= UnknownReg
)
767 // @todo -- add special-cases for 0 and 1
768 _allocator
.retire(rr
);
769 rR
->reg
= UnknownReg
;
770 NanoAssert((rmask(rr
) & FpRegs
) != 0);
772 const double d
= ins
->constvalf();
773 if (rmask(rr
) & XmmRegs
) {
776 } else if (d
== 1.0) {
777 // 1.0 is extremely frequent and worth special-casing!
778 static const double k_ONE
= 1.0;
782 const int d
= disp(rR
);
788 } else if (d
== 1.0) {
798 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
800 freeRsrcOf(ins
, false);
803 const int32_t* p
= (const int32_t*) (ins
-2);
807 #elif defined NANOJIT_AMD64
808 Reservation
*rR
= getresv(ins
);
809 int64_t val
= *(int64_t *)(ins
- 2);
811 if (rR
->reg
!= UnknownReg
)
813 Register rr
= rR
->reg
;
814 freeRsrcOf(ins
, false);
815 if (rmask(rr
) & GpRegs
)
819 else if (rmask(rr
) & XmmRegs
)
821 if (ins
->constvalf() == 0.0)
827 /* Get a short-lived register, not associated with instruction */
828 Register rs
= registerAlloc(GpRegs
);
833 _allocator
.addFree(rs
);
839 const int32_t* p
= (const int32_t*) (ins
-2);
841 freeRsrcOf(ins
, false);
848 bool Assembler::asm_qlo(LInsp ins
, LInsp q
)
850 #if defined NANOJIT_IA32
857 Reservation
*resv
= getresv(ins
);
858 Register rr
= resv
->reg
;
859 if (rr
== UnknownReg
) {
860 // store quad in spill loc
862 freeRsrcOf(ins
, false);
863 Register qr
= findRegFor(q
, XmmRegs
);
864 SSE_MOVDm(d
, FP
, qr
);
866 freeRsrcOf(ins
, false);
867 Register qr
= findRegFor(q
, XmmRegs
);
874 void Assembler::asm_fneg(LInsp ins
)
876 #if defined NANOJIT_IA32
880 LIns
*lhs
= ins
->oprnd1();
882 Register rr
= prepResultReg(ins
, XmmRegs
);
883 Reservation
*rA
= getresv(lhs
);
886 // if this is last use of lhs in reg, we can re-use result reg
887 if (rA
== 0 || (ra
= rA
->reg
) == UnknownReg
) {
888 ra
= findSpecificRegFor(lhs
, rr
);
889 } else if ((rmask(ra
) & XmmRegs
) == 0) {
890 /* We need this case on AMD64, because it's possible that
891 * an earlier instruction has done a quadword load and reserved a
892 * GPR. If so, ask for a new register.
894 ra
= findRegFor(lhs
, XmmRegs
);
896 // else, rA already has a register assigned.
898 #if defined __SUNPRO_CC
899 // from Sun Studio C++ Readme: #pragma align inside namespace requires mangled names
900 static uint32_t temp
[] = {0, 0, 0, 0, 0, 0, 0};
901 static uint32_t *negateMask
= (uint32_t *)alignUp(temp
, 16);
902 negateMask
[1] = 0x80000000;
904 static const AVMPLUS_ALIGN16(uint32_t) negateMask
[] = {0,0x80000000,0,0};
906 SSE_XORPD(rr
, negateMask
);
910 #if defined NANOJIT_IA32
914 Register rr
= prepResultReg(ins
, FpRegs
);
916 LIns
* lhs
= ins
->oprnd1();
918 // lhs into reg, prefer same reg as result
919 Reservation
* rA
= getresv(lhs
);
920 // if this is last use of lhs in reg, we can re-use result reg
921 if (rA
== 0 || rA
->reg
== UnknownReg
)
922 findSpecificRegFor(lhs
, rr
);
923 // else, rA already has a different reg assigned
925 NanoAssert(getresv(lhs
)!=0 && getresv(lhs
)->reg
==FST0
);
926 // assume that the lhs is in ST(0) and rhs is on stack
929 // if we had more than one fpu reg, this is where
930 // we would move ra into rr if rr != ra.
935 void Assembler::asm_pusharg(LInsp p
)
938 Reservation
* rA
= getresv(p
);
943 // small const we push directly
944 PUSHi(p
->constval());
948 Register ra
= findRegFor(p
, GpRegs
);
952 else if (rA
->reg
== UnknownReg
)
962 void Assembler::asm_farg(LInsp p
)
964 #if defined NANOJIT_IA32
965 Register r
= findRegFor(p
, FpRegs
);
966 if (rmask(r
) & XmmRegs
) {
971 PUSHr(ECX
); // 2*pushr is smaller than sub
976 void Assembler::asm_fop(LInsp ins
)
978 LOpcode op
= ins
->opcode();
979 #if defined NANOJIT_IA32
983 LIns
*lhs
= ins
->oprnd1();
984 LIns
*rhs
= ins
->oprnd2();
986 RegisterMask allow
= XmmRegs
;
987 Register rb
= UnknownReg
;
989 rb
= findRegFor(rhs
,allow
);
993 Register rr
= prepResultReg(ins
, allow
);
994 Reservation
*rA
= getresv(lhs
);
997 // if this is last use of lhs in reg, we can re-use result reg
998 if (rA
== 0 || (ra
= rA
->reg
) == UnknownReg
) {
999 ra
= findSpecificRegFor(lhs
, rr
);
1000 } else if ((rmask(ra
) & XmmRegs
) == 0) {
1001 /* We need this case on AMD64, because it's possible that
1002 * an earlier instruction has done a quadword load and reserved a
1003 * GPR. If so, ask for a new register.
1005 ra
= findRegFor(lhs
, XmmRegs
);
1007 // else, rA already has a register assigned.
1014 else if (op
== LIR_fsub
)
1016 else if (op
== LIR_fmul
)
1018 else //if (op == LIR_fdiv)
1023 #if defined NANOJIT_IA32
1027 // we swap lhs/rhs on purpose here, works out better
1028 // if you only have one fpu reg. use divr/subr.
1029 LIns
* rhs
= ins
->oprnd1();
1030 LIns
* lhs
= ins
->oprnd2();
1031 Register rr
= prepResultReg(ins
, rmask(FST0
));
1033 // make sure rhs is in memory
1034 int db
= findMemFor(rhs
);
1036 // lhs into reg, prefer same reg as result
1037 Reservation
* rA
= getresv(lhs
);
1038 // last use of lhs in reg, can reuse rr
1039 if (rA
== 0 || rA
->reg
== UnknownReg
)
1040 findSpecificRegFor(lhs
, rr
);
1041 // else, rA already has a different reg assigned
1043 NanoAssert(getresv(lhs
)!=0 && getresv(lhs
)->reg
==FST0
);
1044 // assume that the lhs is in ST(0) and rhs is on stack
1047 else if (op
== LIR_fsub
)
1049 else if (op
== LIR_fmul
)
1051 else if (op
== LIR_fdiv
)
1057 void Assembler::asm_i2f(LInsp ins
)
1059 // where our result goes
1060 Register rr
= prepResultReg(ins
, FpRegs
);
1061 #if defined NANOJIT_IA32
1062 if (rmask(rr
) & XmmRegs
)
1065 // todo support int value in memory
1066 Register gr
= findRegFor(ins
->oprnd1(), GpRegs
);
1067 SSE_CVTSI2SD(rr
, gr
);
1068 #if defined NANOJIT_IA32
1072 int d
= findMemFor(ins
->oprnd1());
1078 Register
Assembler::asm_prep_fcall(Reservation
*rR
, LInsp ins
)
1080 #if defined NANOJIT_IA32
1083 if ((rr
=rR
->reg
) != UnknownReg
&& (rmask(rr
) & XmmRegs
))
1086 return prepResultReg(ins
, rmask(FST0
));
1087 #elif defined NANOJIT_AMD64
1089 return prepResultReg(ins
, rmask(XMM0
));
1093 void Assembler::asm_u2f(LInsp ins
)
1095 // where our result goes
1096 Register rr
= prepResultReg(ins
, FpRegs
);
1097 #if defined NANOJIT_IA32
1098 if (rmask(rr
) & XmmRegs
)
1101 // don't call findRegFor, we want a reg we can stomp on for a very short time,
1102 // not a reg that will continue to be associated with the LIns
1103 Register gr
= registerAlloc(GpRegs
);
1105 // technique inspired by gcc disassembly
1106 // Edwin explains it:
1110 // sub gr,0x80000000
1112 // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
1117 // rr is now a double with the int value range
1119 // addsd rr, 2147483648.0
1121 // adding back double(0x80000000) makes the range 0..2^32-1.
1123 static const double k_NEGONE
= 2147483648.0;
1124 #if defined NANOJIT_IA32
1125 SSE_ADDSDm(rr
, &k_NEGONE
);
1126 #elif defined NANOJIT_AMD64
1127 /* Squirrel the constant at the bottom of the page. */
1128 if (_dblNegPtr
!= NULL
)
1130 underrunProtect(10);
1132 if (_dblNegPtr
== NULL
)
1134 underrunProtect(30);
1135 uint8_t *base
, *begin
;
1136 base
= (uint8_t *)((intptr_t)_nIns
& ~((intptr_t)NJ_PAGE_SIZE
-1));
1137 base
+= sizeof(PageHeader
) + _pageData
;
1139 /* Make sure we align */
1140 if ((uintptr_t)base
& 0xF) {
1141 base
= (NIns
*)((uintptr_t)base
& ~(0xF));
1144 _pageData
+= (int32_t)(base
- begin
) + sizeof(double);
1145 _negOnePtr
= (NIns
*)base
;
1146 *(double *)_negOnePtr
= k_NEGONE
;
1148 SSE_ADDSDm(rr
, _negOnePtr
);
1151 SSE_CVTSI2SD(rr
, gr
);
1153 Reservation
* resv
= getresv(ins
->oprnd1());
1155 if (resv
&& (xr
= resv
->reg
) != UnknownReg
&& (rmask(xr
) & GpRegs
))
1157 LEA(gr
, 0x80000000, xr
);
1161 const int d
= findMemFor(ins
->oprnd1());
1162 SUBi(gr
, 0x80000000);
1166 // ok, we're done with it
1167 _allocator
.addFree(gr
);
1168 #if defined NANOJIT_IA32
1172 const int disp
= -8;
1173 const Register base
= SP
;
1174 Register gr
= findRegFor(ins
->oprnd1(), GpRegs
);
1175 NanoAssert(rr
== FST0
);
1177 STi(base
, disp
+4, 0); // high 32 bits = 0
1178 ST(base
, disp
, gr
); // low 32 bits = unsigned value
1183 void Assembler::asm_nongp_copy(Register r
, Register s
)
1185 if ((rmask(r
) & XmmRegs
) && (rmask(s
) & XmmRegs
)) {
1187 } else if ((rmask(r
) & GpRegs
) && (rmask(s
) & XmmRegs
)) {
1190 if (rmask(r
) & XmmRegs
) {
1192 NanoAssertMsg(false, "Should not move data from GPR to XMM");
1195 NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
1200 void Assembler::asm_fcmp(LIns
*cond
)
1202 LOpcode condop
= cond
->opcode();
1203 NanoAssert(condop
>= LIR_feq
&& condop
<= LIR_fge
);
1204 LIns
* lhs
= cond
->oprnd1();
1205 LIns
* rhs
= cond
->oprnd2();
1208 if (condop
== LIR_feq
)
1210 else if (condop
== LIR_fle
)
1212 else if (condop
== LIR_flt
)
1214 else if (condop
== LIR_fge
) {
1216 LIns
* t
= lhs
; lhs
= rhs
; rhs
= t
;
1218 } else { // if (condop == LIR_fgt)
1220 LIns
* t
= lhs
; lhs
= rhs
; rhs
= t
;
1224 #if defined NANOJIT_IA32
1228 // UNORDERED: ZF,PF,CF <- 111;
1229 // GREATER_THAN: ZF,PF,CF <- 000;
1230 // LESS_THAN: ZF,PF,CF <- 001;
1231 // EQUAL: ZF,PF,CF <- 100;
1233 if (condop
== LIR_feq
&& lhs
== rhs
) {
1235 Register r
= findRegFor(lhs
, XmmRegs
);
1238 #if defined NANOJIT_IA32
1242 #elif defined NANOJIT_AMD64
1248 Reservation
*rA
, *rB
;
1249 findRegFor2(XmmRegs
, lhs
, rA
, rhs
, rB
);
1250 SSE_UCOMISD(rA
->reg
, rB
->reg
);
1252 #if defined NANOJIT_IA32
1259 NanoAssert(lhs
->isQuad() && rhs
->isQuad());
1263 // compare two different numbers
1264 int d
= findMemFor(rhs
);
1266 int pop
= !rA
|| rA
->reg
== UnknownReg
;
1267 findSpecificRegFor(lhs
, FST0
);
1268 // lhs is in ST(0) and rhs is on stack
1273 // compare n to itself, this is a NaN test.
1275 int pop
= !rA
|| rA
->reg
== UnknownReg
;
1276 findSpecificRegFor(lhs
, FST0
);
1288 NIns
* Assembler::asm_adjustBranch(NIns
* at
, NIns
* target
)
1291 #if defined NANOJIT_AMD64
1292 was
= (NIns
*)( *(intptr_t*)(at
) );
1293 *(intptr_t *)(at
) = intptr_t(target
);
1296 was
= (NIns
*)( (intptr_t)*(int32_t*)(at
+1)+(intptr_t)(at
+5) );
1297 _nIns
= at
+5; // +5 is size of JMP
1298 intptr_t tt
= (intptr_t)target
- (intptr_t)_nIns
;
1306 void Assembler::nativePageReset()
1308 #if defined NANOJIT_AMD64
1309 /* We store some stuff at the bottom of the page.
1310 * We reserve 8-bytes for long jumps just in case we need them.
1318 Register
Assembler::asm_binop_rhs_reg(LInsp ins
)
1320 LOpcode op
= ins
->opcode();
1321 LIns
*rhs
= ins
->oprnd2();
1323 if (op
== LIR_lsh
|| op
== LIR_rsh
|| op
== LIR_ush
) {
1324 #if defined NANOJIT_IA32
1325 return findSpecificRegFor(rhs
, ECX
);
1326 #elif defined NANOJIT_AMD64
1327 return findSpecificRegFor(rhs
, RCX
);
1334 #if defined NANOJIT_AMD64
1335 void Assembler::asm_qbinop(LIns
*ins
)
1337 LInsp lhs
= ins
->oprnd1();
1338 LInsp rhs
= ins
->oprnd2();
1339 LOpcode op
= ins
->opcode();
1341 Register rr
= prepResultReg(ins
, GpRegs
);
1342 Reservation
*rA
= getresv(lhs
);
1345 if (rA
== NULL
|| (ra
= rA
->reg
) == UnknownReg
) {
1346 ra
= findSpecificRegFor(lhs
, rr
);
1351 int c
= rhs
->constval();
1353 if (op
== LIR_qiadd
)
1356 } else if (op
== LIR_qiand
) {
1358 } else if (op
== LIR_qilsh
) {
1360 } else if (op
== LIR_qior
) {
1369 rv
= findRegFor(rhs
, GpRegs
& ~(rmask(rr
)));
1372 if (op
== LIR_qiadd
) {
1374 } else if (op
== LIR_qiand
) {
1376 } else if (op
== LIR_qior
) {
1379 NanoAssert(rhs
->isconst());
1389 void Assembler::nativePageSetup()
1391 if (!_nIns
) _nIns
= pageAlloc();
1392 if (!_nExitIns
) _nExitIns
= pageAlloc(true);
1394 #endif /* FEATURE_NANOJIT */