1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is [Open Source Virtual Machine].
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
24 * Mozilla TraceMonkey Team
25 * Asko Tontti <atontti@cc.hut.fi>
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
39 * ***** END LICENSE BLOCK ***** */
42 // for MakeDataExecutable
43 #include <CoreServices/CoreServices.h>
46 #if defined DARWIN || defined LINUX
54 #ifdef FEATURE_NANOJIT
57 const char *regNames
[] = {
58 #if defined NANOJIT_IA32
59 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
60 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
61 "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7"
62 #elif defined NANOJIT_AMD64
63 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
64 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
65 "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
66 "xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
71 #if defined NANOJIT_IA32
72 const Register
Assembler::argRegs
[] = { ECX
, EDX
};
73 const Register
Assembler::retRegs
[] = { EAX
, EDX
};
74 #elif defined NANOJIT_AMD64
76 const Register
Assembler::argRegs
[] = { R8
, R9
, RCX
, RDX
};
78 const Register
Assembler::argRegs
[] = { RDI
, RSI
, RDX
, RCX
, R8
, R9
};
80 const Register
Assembler::retRegs
[] = { RAX
, RDX
};
83 void Assembler::nInit(AvmCore
* core
)
85 #if defined NANOJIT_IA32
86 sse2
= core
->use_sse2();
88 // CMOVcc is actually available on most PPro+ chips (except for a few
89 // oddballs like Via C3) but for now tie to SSE2 detection
97 NIns
* Assembler::genPrologue(RegisterMask needSaving
)
102 uint32_t stackNeeded
= STACK_GRANULARITY
* _activation
.highwatermark
;
103 uint32_t savingCount
= 0;
105 for(Register i
=FirstReg
; i
<= LastReg
; i
= nextreg(i
))
106 if (needSaving
&rmask(i
))
109 // After forcing alignment, we've pushed the pre-alignment SP
110 // and savingCount registers.
111 uint32_t stackPushed
= STACK_GRANULARITY
* (1+savingCount
);
112 uint32_t aligned
= alignUp(stackNeeded
+ stackPushed
, NJ_ALIGN_STACK
);
113 uint32_t amt
= aligned
- stackPushed
;
115 // Reserve stackNeeded bytes, padded
116 // to preserve NJ_ALIGN_STACK-byte alignment.
119 #if defined NANOJIT_IA32
121 #elif defined NANOJIT_AMD64
126 verbose_only( verbose_outputf(" %p:",_nIns
); )
127 verbose_only( verbose_output(" patch entry:"); )
128 NIns
*patchEntry
= _nIns
;
129 MR(FP
, SP
); // Establish our own FP.
131 // Save pre-alignment SP value here, where the FP will point,
132 // to preserve the illusion of a valid frame chain for
133 // functions like MMgc::GetStackTrace. The 'return address'
134 // of this 'frame' will be the last-saved register, but that's
135 // fine, because the next-older frame will be legit.
138 for(Register i
=FirstReg
; i
<= LastReg
; i
= nextreg(i
))
139 if (needSaving
&rmask(i
))
142 // We'd like to be able to use SSE instructions like MOVDQA on
143 // stack slots; it requires 16B alignment. Darwin requires a
144 // 16B stack alignment, and Linux GCC seems to intend to
145 // establish and preserve the same, but we're told that GCC
146 // has not always done this right. To avoid doubt, do it on
147 // all platforms. The prologue runs only when we enter
148 // fragments from the interpreter, so forcing 16B alignment
150 #if defined NANOJIT_IA32
151 ANDi(SP
, -NJ_ALIGN_STACK
);
152 #elif defined NANOJIT_AMD64
153 ANDQi(SP
, -NJ_ALIGN_STACK
);
156 PUSHr(FP
); // Save caller's FP.
161 void Assembler::nFragExit(LInsp guard
)
163 SideExit
*exit
= guard
->exit();
164 bool trees
= _frago
->core()->config
.tree_opt
;
165 Fragment
*frag
= exit
->target
;
167 bool destKnown
= (frag
&& frag
->fragEntry
);
168 if (destKnown
&& !trees
)
170 // already exists, emit jump now. no patching required.
171 JMP(frag
->fragEntry
);
176 // target doesn't exit yet. emit jump to epilog, and set up to patch later.
177 lr
= placeGuardRecord(guard
);
178 #if defined NANOJIT_AMD64
179 /* 8 bytes for address, 4 for imm32, 2 for jmp */
182 *(intptr_t *)_nIns
= intptr_t(_epilogue
);
190 // @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link()
191 // for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
192 if (tress
&& destKnown
)
196 // first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
201 if (_frago
->core()->config
.show_stats
) {
202 // load EDX (arg1) with Fragment *fromFrag, target fragment
203 // will make use of this when calling fragenter().
204 #if defined NANOJIT_IA32
205 int fromfrag
= int((Fragment
*)_thisfrag
);
206 LDi(argRegs
[1], fromfrag
);
207 #elif defined NANOJIT_AMD64
208 LDQi(argRegs
[1], intptr_t(_thisfrag
));
213 // return value is GuardRecord*
214 #if defined NANOJIT_IA32
216 #elif defined NANOJIT_AMD64
217 LDQi(RAX
, intptr_t(lr
));
221 NIns
*Assembler::genEpilogue(RegisterMask restore
)
224 POPr(FP
); // Restore caller's FP.
225 MR(SP
,FP
); // Undo forced alignment.
227 // Restore saved registers.
228 for (Register i
=UnknownReg
; i
>= FirstReg
; i
= prevreg(i
))
229 if (restore
&rmask(i
)) { POPr(i
); }
231 POPr(FP
); // Pop the pre-alignment SP.
235 #if defined NANOJIT_IA32
236 void Assembler::asm_call(LInsp ins
)
238 uint32_t fid
= ins
->fid();
239 const CallInfo
* call
= callInfoFor(fid
);
240 // must be signed, not unsigned
241 const uint32_t iargs
= call
->count_iargs();
242 int32_t fstack
= call
->count_args() - iargs
;
246 #if defined NJ_NO_FASTCALL
247 int32_t istack
= iargs
;
249 int32_t istack
= iargs
-2; // first 2 4B args are in registers
256 const int32_t size
= 4*istack
+ 8*fstack
; // actual stack space used
258 // stack re-alignment
259 // only pop our adjustment amount since callee pops args in FASTCALL mode
260 extra
= alignUp(size
, NJ_ALIGN_STACK
) - (size
);
261 #ifndef NJ_NO_FASTCALL
269 #ifdef NJ_NO_FASTCALL
270 // In C calling conventions, callee doesn't pop args.
271 ADDi(SP
, 4*iargs
+ 8*fstack
+ extra
);
276 #ifdef NJ_NO_FASTCALL
285 // make sure fpu stack is empty before call (restoreCallerSaved)
286 NanoAssert(_allocator
.isFree(FST0
));
287 // note: this code requires that ref arguments (ARGSIZE_Q)
288 // be one of the first two arguments
289 // pre-assign registers to the first 2 4B args
290 const int max_regs
= (iargs
< 2) ? iargs
: 2;
294 uint32_t argc
= call
->get_sizes(sizes
);
296 for(uint32_t i
=0; i
< argc
; i
++)
298 uint32_t j
= argc
-i
-1;
299 ArgSize sz
= sizes
[j
];
300 Register r
= UnknownReg
;
301 if (n
< max_regs
&& sz
!= ARGSIZE_F
)
302 r
= argRegs
[n
++]; // tell asm_arg what reg to use
303 asm_arg(sz
, ins
->arg(j
), r
);
312 #elif defined NANOJIT_AMD64
314 void Assembler::asm_call(LInsp ins
)
316 Register fpu_reg
= XMM0
;
317 uint32_t fid
= ins
->fid();
318 const CallInfo
* call
= callInfoFor(fid
);
324 uint32_t argc
= call
->get_sizes(sizes
);
326 for(uint32_t i
=0; i
< argc
; i
++)
328 uint32_t j
= argc
-i
-1;
329 ArgSize sz
= sizes
[j
];
330 Register r
= UnknownReg
;
331 if (sz
!= ARGSIZE_F
) {
332 r
= argRegs
[n
++]; // tell asm_arg what reg to use
335 fpu_reg
= nextreg(fpu_reg
);
337 findSpecificRegFor(ins
->arg(j
), r
);
342 void Assembler::nMarkExecute(Page
* page
, int32_t count
, bool enable
)
344 #if defined WIN32 || defined WIN64
346 VirtualProtect(&page
->code
, count
*NJ_PAGE_SIZE
, PAGE_EXECUTE_READWRITE
, &dwIgnore
);
347 #elif defined DARWIN || defined AVMPLUS_LINUX
348 intptr_t addr
= (intptr_t)&page
->code
;
349 addr
&= ~((uintptr_t)NJ_PAGE_SIZE
- 1);
350 if (mprotect((void *)addr
, count
*NJ_PAGE_SIZE
, PROT_READ
|PROT_WRITE
|PROT_EXEC
) == -1) {
351 AvmDebugLog(("FATAL ERROR: mprotect(PROT_EXEC) failed\n"));
358 Register
Assembler::nRegisterAllocFromSet(int set
)
361 RegAlloc
®s
= _allocator
;
366 bsf eax
, set
// i = first bit set
367 btr
RegAlloc::free
[ecx
], eax
// free &= ~rmask(i)
371 unsigned long tr
, fr
;
372 _BitScanForward(&tr
, set
);
373 _bittestandreset(&fr
, tr
);
381 : "=m"(r
) : "m"(set
), "m"(regs
.free
) : "%eax", "memory" );
386 void Assembler::nRegisterResetAll(RegAlloc
& a
)
388 // add scratch registers to our free list for the allocator
391 a
.free
= SavedRegs
| ScratchRegs
;
392 #if defined NANOJIT_IA32
396 debug_only( a
.managed
= a
.free
; )
399 void Assembler::nPatchBranch(NIns
* branch
, NIns
* location
)
401 #if defined NANOJIT_IA32
402 intptr_t offset
= intptr_t(location
) - intptr_t(branch
);
403 if (branch
[0] == JMPc
)
404 *(uint32_t*)&branch
[1] = offset
- 5;
406 *(uint32_t*)&branch
[2] = offset
- 6;
408 if (branch
[0] == 0xFF && branch
[1] == 0x25) {
411 mem
= &branch
[6] + *(int32_t *)&branch
[2];
412 *(intptr_t *)mem
= intptr_t(location
);
414 NanoAssertMsg(0, "Unknown branch type in nPatchBranch");
419 RegisterMask
Assembler::hint(LIns
* i
, RegisterMask allow
)
421 uint32_t op
= i
->opcode();
424 #if defined NANOJIT_IA32
425 prefer
&= rmask(EAX
);
426 #elif defined NANOJIT_AMD64
427 prefer
&= rmask(RAX
);
429 else if (op
== LIR_param
)
430 prefer
&= rmask(Register(i
->imm8()));
431 #if defined NANOJIT_IA32
432 else if (op
== LIR_callh
|| op
== LIR_rsh
&& i
->oprnd1()->opcode()==LIR_callh
)
433 prefer
&= rmask(EDX
);
435 else if (op
== LIR_callh
)
436 prefer
&= rmask(RAX
);
439 prefer
&= AllowableFlagRegs
;
440 else if (i
->isconst())
441 prefer
&= ScratchRegs
;
442 return (_allocator
.free
& prefer
) ? prefer
: allow
;
445 void Assembler::asm_qjoin(LIns
*ins
)
447 int d
= findMemFor(ins
);
449 LIns
* lo
= ins
->oprnd1();
450 LIns
* hi
= ins
->oprnd2();
452 Reservation
*resv
= getresv(ins
);
453 Register rr
= resv
->reg
;
455 if (rr
!= UnknownReg
&& (rmask(rr
) & FpRegs
))
460 STi(FP
, d
+4, hi
->constval());
464 Register r
= findRegFor(hi
, GpRegs
);
470 STi(FP
, d
, lo
->constval());
474 // okay if r gets recycled.
475 Register r
= findRegFor(lo
, GpRegs
);
479 freeRsrcOf(ins
, false); // if we had a reg in use, emit a ST to flush it to mem
482 void Assembler::asm_restore(LInsp i
, Reservation
*resv
, Register r
)
486 if (!resv
->arIndex
) {
489 LDi(r
, i
->constval());
493 int d
= findMemFor(i
);
494 if (rmask(r
) & FpRegs
)
496 #if defined NANOJIT_IA32
497 if (rmask(r
) & XmmRegs
) {
500 #if defined NANOJIT_IA32
508 #if defined NANOJIT_AMD64
514 verbose_only(if (_verbose
) {
515 outputf(" restore %s", _thisfrag
->lirbuf
->names
->formatRef(i
));
520 void Assembler::asm_store32(LIns
*value
, int dr
, LIns
*base
)
522 if (value
->isconst())
524 Register rb
= findRegFor(base
, GpRegs
);
525 int c
= value
->constval();
530 // make sure what is in a register
531 Reservation
*rA
, *rB
;
532 findRegFor2(GpRegs
, value
, rA
, base
, rB
);
533 Register ra
= rA
->reg
;
534 Register rb
= rB
->reg
;
539 void Assembler::asm_spill(LInsp i
, Reservation
*resv
, bool pop
)
543 Register rr
= resv
->reg
;
546 // save to spill location
547 if (rmask(rr
) & FpRegs
)
549 #if defined NANOJIT_IA32
550 if (rmask(rr
) & XmmRegs
) {
553 #if defined NANOJIT_IA32
555 FSTQ((pop
?1:0), d
, FP
);
561 #if defined NANOJIT_AMD64
567 verbose_only(if (_verbose
) {
568 outputf(" spill %s",_thisfrag
->lirbuf
->names
->formatRef(i
));
571 #if defined NANOJIT_IA32
572 else if (pop
&& (rmask(rr
) & x87Regs
))
574 // pop the fpu result since it isn't used
580 void Assembler::asm_load64(LInsp ins
)
582 LIns
* base
= ins
->oprnd1();
583 int db
= ins
->oprnd2()->constval();
584 Reservation
*resv
= getresv(ins
);
585 Register rr
= resv
->reg
;
587 if (rr
!= UnknownReg
&& rmask(rr
) & XmmRegs
)
589 freeRsrcOf(ins
, false);
590 Register rb
= findRegFor(base
, GpRegs
);
593 #if defined NANOJIT_AMD64
594 else if (rr
!= UnknownReg
&& rmask(rr
) & GpRegs
)
596 freeRsrcOf(ins
, false);
597 Register rb
= findRegFor(base
, GpRegs
);
603 Register rb
= findRegFor(base
, GpRegs
);
605 /* We need a temporary register we can move the desination into */
606 rr
= registerAlloc(GpRegs
);
612 _allocator
.addFree(rr
);
614 freeRsrcOf(ins
, false);
616 #elif defined NANOJIT_IA32
620 Register rb
= findRegFor(base
, GpRegs
);
621 resv
->reg
= UnknownReg
;
623 // don't use an fpu reg to simply load & store the value.
625 asm_mmq(FP
, dr
, rb
, db
);
627 freeRsrcOf(ins
, false);
629 if (rr
!= UnknownReg
)
631 NanoAssert(rmask(rr
)&FpRegs
);
632 _allocator
.retire(rr
);
639 void Assembler::asm_store64(LInsp value
, int dr
, LInsp base
)
641 if (value
->isconstq())
643 // if a constant 64-bit value just store it now rather than
644 // generating a pointless store/load/store sequence
645 Register rb
= findRegFor(base
, GpRegs
);
646 const int32_t* p
= (const int32_t*) (value
-2);
652 #if defined NANOJIT_IA32
653 if (value
->isop(LIR_ldq
) || value
->isop(LIR_qjoin
))
655 // value is 64bit struct or int64_t, or maybe a double.
656 // it may be live in an FPU reg. Either way, don't
657 // put it in an FPU reg just to load & store it.
659 // a) if we know it's not a double, this is right.
660 // b) if we guarded that its a double, this store could be on
661 // the side exit, copying a non-double.
662 // c) maybe its a double just being stored. oh well.
665 Register rv
= findRegFor(value
, XmmRegs
);
666 Register rb
= findRegFor(base
, GpRegs
);
671 int da
= findMemFor(value
);
672 Register rb
= findRegFor(base
, GpRegs
);
673 asm_mmq(rb
, dr
, FP
, da
);
677 Reservation
* rA
= getresv(value
);
678 int pop
= !rA
|| rA
->reg
==UnknownReg
;
679 Register rv
= findRegFor(value
, sse2
? XmmRegs
: FpRegs
);
680 Register rb
= findRegFor(base
, GpRegs
);
682 if (rmask(rv
) & XmmRegs
) {
687 #elif defined NANOJIT_AMD64
688 /* If this is not a float operation, we can use GpRegs instead.
689 * We can do this in a few other cases but for now I'll keep it simple.
691 Register rb
= findRegFor(base
, GpRegs
);
692 Reservation
*rV
= getresv(value
);
694 if (rV
!= NULL
&& rV
->reg
!= UnknownReg
) {
695 if (rmask(rV
->reg
) & GpRegs
) {
696 STQ(rb
, dr
, rV
->reg
);
698 SSE_STQ(dr
, rb
, rV
->reg
);
703 /* Try to catch some common patterns.
704 * Note: this is a necessity, since in between things like
705 * asm_fop() could see the reservation and try to use a non-SSE
706 * register for adding. Same for asm_qbinop in theory.
707 * There should probably be asserts to catch more cases.
709 if (value
->isop(LIR_u2f
)
710 || value
->isop(LIR_i2f
)
711 || value
->opcode() == LIR_fcall
) {
712 rv
= findRegFor(value
, XmmRegs
);
715 rv
= findRegFor(value
, GpRegs
);
723 * copy 64 bits: (rd+dd) <- (rs+ds)
725 void Assembler::asm_mmq(Register rd
, int dd
, Register rs
, int ds
)
727 // value is either a 64bit struct or maybe a float
728 // that isn't live in an FPU reg. Either way, don't
729 // put it in an FPU reg just to load & store it.
730 #if defined NANOJIT_IA32
734 // use SSE to load+store 64bits
735 Register t
= registerAlloc(XmmRegs
);
736 _allocator
.addFree(t
);
739 #if defined NANOJIT_IA32
744 Register t
= registerAlloc(GpRegs
& ~(rmask(rd
)|rmask(rs
)));
745 _allocator
.addFree(t
);
754 void Assembler::asm_quad(LInsp ins
)
756 #if defined NANOJIT_IA32
757 Reservation
*rR
= getresv(ins
);
758 Register rr
= rR
->reg
;
759 if (rr
!= UnknownReg
)
761 // @todo -- add special-cases for 0 and 1
762 _allocator
.retire(rr
);
763 rR
->reg
= UnknownReg
;
764 NanoAssert((rmask(rr
) & FpRegs
) != 0);
766 const double d
= ins
->constvalf();
767 if (rmask(rr
) & XmmRegs
) {
770 } else if (d
== 1.0) {
771 // 1.0 is extremely frequent and worth special-casing!
772 static const double k_ONE
= 1.0;
776 const int d
= disp(rR
);
782 } else if (d
== 1.0) {
792 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
794 freeRsrcOf(ins
, false);
797 const int32_t* p
= (const int32_t*) (ins
-2);
801 #elif defined NANOJIT_AMD64
802 Reservation
*rR
= getresv(ins
);
803 int64_t val
= *(int64_t *)(ins
- 2);
805 if (rR
->reg
!= UnknownReg
)
807 Register rr
= rR
->reg
;
808 freeRsrcOf(ins
, false);
809 if (rmask(rr
) & GpRegs
)
813 else if (rmask(rr
) & XmmRegs
)
815 if (ins
->constvalf() == 0.0)
821 /* Get a short-lived register, not associated with instruction */
822 Register rs
= registerAlloc(GpRegs
);
827 _allocator
.addFree(rs
);
833 const int32_t* p
= (const int32_t*) (ins
-2);
835 freeRsrcOf(ins
, false);
842 bool Assembler::asm_qlo(LInsp ins
, LInsp q
)
844 #if defined NANOJIT_IA32
851 Reservation
*resv
= getresv(ins
);
852 Register rr
= resv
->reg
;
853 if (rr
== UnknownReg
) {
854 // store quad in spill loc
856 freeRsrcOf(ins
, false);
857 Register qr
= findRegFor(q
, XmmRegs
);
858 SSE_MOVDm(d
, FP
, qr
);
860 freeRsrcOf(ins
, false);
861 Register qr
= findRegFor(q
, XmmRegs
);
868 void Assembler::asm_fneg(LInsp ins
)
870 #if defined NANOJIT_IA32
874 LIns
*lhs
= ins
->oprnd1();
876 Register rr
= prepResultReg(ins
, XmmRegs
);
877 Reservation
*rA
= getresv(lhs
);
880 // if this is last use of lhs in reg, we can re-use result reg
881 if (rA
== 0 || (ra
= rA
->reg
) == UnknownReg
)
882 ra
= findSpecificRegFor(lhs
, rr
);
883 // else, rA already has a register assigned.
885 static const AVMPLUS_ALIGN16(uint32_t) negateMask
[] = {0,0x80000000,0,0};
886 SSE_XORPD(rr
, negateMask
);
890 #if defined NANOJIT_IA32
894 Register rr
= prepResultReg(ins
, FpRegs
);
896 LIns
* lhs
= ins
->oprnd1();
898 // lhs into reg, prefer same reg as result
899 Reservation
* rA
= getresv(lhs
);
900 // if this is last use of lhs in reg, we can re-use result reg
901 if (rA
== 0 || rA
->reg
== UnknownReg
)
902 findSpecificRegFor(lhs
, rr
);
903 // else, rA already has a different reg assigned
905 NanoAssert(getresv(lhs
)!=0 && getresv(lhs
)->reg
==FST0
);
906 // assume that the lhs is in ST(0) and rhs is on stack
909 // if we had more than one fpu reg, this is where
910 // we would move ra into rr if rr != ra.
915 void Assembler::asm_pusharg(LInsp p
)
918 Reservation
* rA
= getresv(p
);
923 // small const we push directly
924 PUSHi(p
->constval());
928 Register ra
= findRegFor(p
, GpRegs
);
932 else if (rA
->reg
== UnknownReg
)
942 void Assembler::asm_farg(LInsp p
)
944 #if defined NANOJIT_IA32
945 Register r
= findRegFor(p
, FpRegs
);
946 if (rmask(r
) & XmmRegs
) {
951 PUSHr(ECX
); // 2*pushr is smaller than sub
956 void Assembler::asm_fop(LInsp ins
)
958 LOpcode op
= ins
->opcode();
959 #if defined NANOJIT_IA32
963 LIns
*lhs
= ins
->oprnd1();
964 LIns
*rhs
= ins
->oprnd2();
966 RegisterMask allow
= XmmRegs
;
967 Register rb
= UnknownReg
;
969 rb
= findRegFor(rhs
,allow
);
973 Register rr
= prepResultReg(ins
, allow
);
974 Reservation
*rA
= getresv(lhs
);
977 // if this is last use of lhs in reg, we can re-use result reg
978 if (rA
== 0 || (ra
= rA
->reg
) == UnknownReg
)
979 ra
= findSpecificRegFor(lhs
, rr
);
980 // else, rA already has a register assigned.
987 else if (op
== LIR_fsub
)
989 else if (op
== LIR_fmul
)
991 else //if (op == LIR_fdiv)
996 #if defined NANOJIT_IA32
1000 // we swap lhs/rhs on purpose here, works out better
1001 // if you only have one fpu reg. use divr/subr.
1002 LIns
* rhs
= ins
->oprnd1();
1003 LIns
* lhs
= ins
->oprnd2();
1004 Register rr
= prepResultReg(ins
, rmask(FST0
));
1006 // make sure rhs is in memory
1007 int db
= findMemFor(rhs
);
1009 // lhs into reg, prefer same reg as result
1010 Reservation
* rA
= getresv(lhs
);
1011 // last use of lhs in reg, can reuse rr
1012 if (rA
== 0 || rA
->reg
== UnknownReg
)
1013 findSpecificRegFor(lhs
, rr
);
1014 // else, rA already has a different reg assigned
1016 NanoAssert(getresv(lhs
)!=0 && getresv(lhs
)->reg
==FST0
);
1017 // assume that the lhs is in ST(0) and rhs is on stack
1020 else if (op
== LIR_fsub
)
1022 else if (op
== LIR_fmul
)
1024 else if (op
== LIR_fdiv
)
1030 void Assembler::asm_i2f(LInsp ins
)
1032 // where our result goes
1033 Register rr
= prepResultReg(ins
, FpRegs
);
1034 #if defined NANOJIT_IA32
1035 if (rmask(rr
) & XmmRegs
)
1038 // todo support int value in memory
1039 Register gr
= findRegFor(ins
->oprnd1(), GpRegs
);
1040 SSE_CVTSI2SD(rr
, gr
);
1041 #if defined NANOJIT_IA32
1045 int d
= findMemFor(ins
->oprnd1());
1051 Register
Assembler::asm_prep_fcall(Reservation
*rR
, LInsp ins
)
1053 #if defined NANOJIT_IA32
1056 if ((rr
=rR
->reg
) != UnknownReg
&& (rmask(rr
) & XmmRegs
))
1059 return prepResultReg(ins
, rmask(FST0
));
1060 #elif defined NANOJIT_AMD64
1062 return prepResultReg(ins
, rmask(XMM0
));
1066 void Assembler::asm_u2f(LInsp ins
)
1068 // where our result goes
1069 Register rr
= prepResultReg(ins
, FpRegs
);
1070 #if defined NANOJIT_IA32
1071 if (rmask(rr
) & XmmRegs
)
1074 // don't call findRegFor, we want a reg we can stomp on for a very short time,
1075 // not a reg that will continue to be associated with the LIns
1076 Register gr
= registerAlloc(GpRegs
);
1078 // technique inspired by gcc disassembly
1079 // Edwin explains it:
1083 // sub gr,0x80000000
1085 // now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
1090 // rr is now a double with the int value range
1092 // addsd rr, 2147483648.0
1094 // adding back double(0x80000000) makes the range 0..2^32-1.
1096 static const double k_NEGONE
= 2147483648.0;
1097 #if defined NANOJIT_IA32
1098 SSE_ADDSDm(rr
, &k_NEGONE
);
1099 #elif defined NANOJIT_AMD64
1100 /* Squirrel the constant at the bottom of the page. */
1101 if (_dblNegPtr
!= NULL
)
1103 underrunProtect(10);
1105 if (_dblNegPtr
== NULL
)
1107 underrunProtect(30);
1108 uint8_t *base
, *begin
;
1109 base
= (uint8_t *)((intptr_t)_nIns
& ~((intptr_t)NJ_PAGE_SIZE
-1));
1110 base
+= sizeof(PageHeader
) + _pageData
;
1112 /* Make sure we align */
1113 if ((uintptr_t)base
& 0xF) {
1114 base
= (NIns
*)((uintptr_t)base
& ~(0xF));
1117 _pageData
+= (int32_t)(base
- begin
) + sizeof(double);
1118 _negOnePtr
= (NIns
*)base
;
1119 *(double *)_negOnePtr
= k_NEGONE
;
1121 SSE_ADDSDm(rr
, _negOnePtr
);
1124 SSE_CVTSI2SD(rr
, gr
);
1126 Reservation
* resv
= getresv(ins
->oprnd1());
1128 if (resv
&& (xr
= resv
->reg
) != UnknownReg
&& (rmask(xr
) & GpRegs
))
1130 LEA(gr
, 0x80000000, xr
);
1134 const int d
= findMemFor(ins
->oprnd1());
1135 SUBi(gr
, 0x80000000);
1139 // ok, we're done with it
1140 _allocator
.addFree(gr
);
1141 #if defined NANOJIT_IA32
1145 const int disp
= -8;
1146 const Register base
= SP
;
1147 Register gr
= findRegFor(ins
->oprnd1(), GpRegs
);
1148 NanoAssert(rr
== FST0
);
1150 STi(base
, disp
+4, 0); // high 32 bits = 0
1151 ST(base
, disp
, gr
); // low 32 bits = unsigned value
1156 void Assembler::asm_nongp_copy(Register r
, Register s
)
1158 if ((rmask(r
) & XmmRegs
) && (rmask(s
) & XmmRegs
)) {
1160 } else if ((rmask(r
) & GpRegs
) && (rmask(s
) & XmmRegs
)) {
1163 if (rmask(r
) & XmmRegs
) {
1165 NanoAssertMsg(false, "Should not move data from GPR to XMM");
1168 NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
1173 void Assembler::asm_fcmp(LIns
*cond
)
1175 LOpcode condop
= cond
->opcode();
1176 NanoAssert(condop
>= LIR_feq
&& condop
<= LIR_fge
);
1177 LIns
* lhs
= cond
->oprnd1();
1178 LIns
* rhs
= cond
->oprnd2();
1181 if (condop
== LIR_feq
)
1183 else if (condop
== LIR_fle
)
1185 else if (condop
== LIR_flt
)
1187 else if (condop
== LIR_fge
) {
1189 LIns
* t
= lhs
; lhs
= rhs
; rhs
= t
;
1191 } else { // if (condop == LIR_fgt)
1193 LIns
* t
= lhs
; lhs
= rhs
; rhs
= t
;
1197 #if defined NANOJIT_IA32
1201 // UNORDERED: ZF,PF,CF <- 111;
1202 // GREATER_THAN: ZF,PF,CF <- 000;
1203 // LESS_THAN: ZF,PF,CF <- 001;
1204 // EQUAL: ZF,PF,CF <- 100;
1206 if (condop
== LIR_feq
&& lhs
== rhs
) {
1208 Register r
= findRegFor(lhs
, XmmRegs
);
1211 #if defined NANOJIT_IA32
1215 #elif defined NANOJIT_AMD64
1221 Reservation
*rA
, *rB
;
1222 findRegFor2(XmmRegs
, lhs
, rA
, rhs
, rB
);
1223 SSE_UCOMISD(rA
->reg
, rB
->reg
);
1225 #if defined NANOJIT_IA32
1232 NanoAssert(lhs
->isQuad() && rhs
->isQuad());
1236 // compare two different numbers
1237 int d
= findMemFor(rhs
);
1239 int pop
= !rA
|| rA
->reg
== UnknownReg
;
1240 findSpecificRegFor(lhs
, FST0
);
1241 // lhs is in ST(0) and rhs is on stack
1246 // compare n to itself, this is a NaN test.
1248 int pop
= !rA
|| rA
->reg
== UnknownReg
;
1249 findSpecificRegFor(lhs
, FST0
);
1261 NIns
* Assembler::asm_adjustBranch(NIns
* at
, NIns
* target
)
1264 #if defined NANOJIT_AMD64
1265 was
= (NIns
*)( *(intptr_t*)(at
) );
1266 *(intptr_t *)(at
) = intptr_t(target
);
1269 was
= (NIns
*)( (intptr_t)*(int32_t*)(at
+1)+(intptr_t)(at
+5) );
1270 _nIns
= at
+5; // +5 is size of JMP
1271 intptr_t tt
= (intptr_t)target
- (intptr_t)_nIns
;
1279 void Assembler::nativePageReset()
1281 #if defined NANOJIT_AMD64
1282 /* We store some stuff at the bottom of the page.
1283 * We reserve 8-bytes for long jumps just in case we need them.
1291 Register
Assembler::asm_binop_rhs_reg(LInsp ins
)
1293 LOpcode op
= ins
->opcode();
1294 LIns
*rhs
= ins
->oprnd2();
1296 if (op
== LIR_lsh
|| op
== LIR_rsh
|| op
== LIR_ush
) {
1297 #if defined NANOJIT_IA32
1298 return findSpecificRegFor(rhs
, ECX
);
1299 #elif defined NANOJIT_AMD64
1300 return findSpecificRegFor(rhs
, RCX
);
1307 #if defined NANOJIT_AMD64
1308 void Assembler::asm_qbinop(LIns
*ins
)
1310 LInsp lhs
= ins
->oprnd1();
1311 LInsp rhs
= ins
->oprnd2();
1312 LOpcode op
= ins
->opcode();
1314 Register rr
= prepResultReg(ins
, GpRegs
);
1315 Reservation
*rA
= getresv(lhs
);
1318 if (rA
== NULL
|| (ra
= rA
->reg
) == UnknownReg
) {
1319 ra
= findSpecificRegFor(lhs
, rr
);
1324 int c
= rhs
->constval();
1326 if (op
== LIR_qiadd
)
1329 } else if (op
== LIR_qiand
) {
1331 } else if (op
== LIR_qilsh
) {
1333 } else if (op
== LIR_qior
) {
1342 rv
= findRegFor(rhs
, GpRegs
& ~(rmask(rr
)));
1345 if (op
== LIR_qiadd
) {
1347 } else if (op
== LIR_qiand
) {
1349 } else if (op
== LIR_qior
) {
1352 NanoAssert(rhs
->isconst());
1362 void Assembler::nativePageSetup()
1364 if (!_nIns
) _nIns
= pageAlloc();
1365 if (!_nExitIns
) _nExitIns
= pageAlloc(true);
1367 #endif /* FEATURE_NANOJIT */