Bug 469739 - Add support for displaying Vista UAC shield icon; r=joe sr=vladimir
[wine-gecko.git] / js / src / nanojit / NativeThumb.cpp
blobc1c53790fa339bc36004e410fb493c256c12b82e
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is [Open Source Virtual Machine].
17 * The Initial Developer of the Original Code is
18 * Adobe System Incorporated.
19 * Portions created by the Initial Developer are Copyright (C) 2004-2007
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * Adobe AS3 Team
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include "nanojit.h"
41 #ifdef AVMPLUS_PORTING_API
42 #include "portapi_nanojit.h"
43 #endif
45 #ifdef UNDER_CE
46 #include <cmnintrin.h>
47 #endif
49 #if defined(AVMPLUS_LINUX)
50 #include <asm/unistd.h>
51 #endif
53 namespace nanojit
55 #ifdef FEATURE_NANOJIT
57 #ifdef NJ_VERBOSE
58 const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","IP","SP","LR","PC"};
60 #endif
61 const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
62 const Register Assembler::retRegs[] = { R0, R1 };
63 const Register Assembler::savedRegs[] = { R4, R5, R6, R7 };
65 void Assembler::nInit(AvmCore*)
67 // Thumb mode does not have conditional move, alas
68 has_cmov = false;
71 NIns* Assembler::genPrologue(RegisterMask needSaving)
73 /**
74 * Prologue
77 // NJ_RESV_OFFSET is space at the top of the stack for us
78 // to use for parameter passing (8 bytes at the moment)
79 uint32_t stackNeeded = 4 * _activation.highwatermark + NJ_STACK_OFFSET;
80 uint32_t savingCount = 0;
82 uint32_t savingMask = 0;
83 savingCount = 5; // R4-R7, LR
84 savingMask = 0xF0;
85 (void)needSaving;
87 // so for alignment purposes we've pushed return addr, fp, and savingCount registers
88 uint32_t stackPushed = 4 * (2+savingCount);
89 uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
90 int32_t amt = aligned - stackPushed;
92 // Make room on stack for what we are doing
93 if (amt)
95 // largest value is 508 (7-bits << 2)
96 if (amt>508)
98 int size = 508;
99 while (size>0)
101 SUBi(SP, size);
102 amt -= size;
103 size = amt;
104 if (size>508)
105 size=508;
108 else
109 SUBi(SP, amt);
112 verbose_only( verbose_outputf(" %p:",_nIns); )
113 verbose_only( verbose_output(" patch entry"); )
114 NIns *patchEntry = _nIns;
116 MR(FRAME_PTR, SP);
117 PUSH_mask(savingMask|rmask(LR));
118 return patchEntry;
121 void Assembler::nFragExit(LInsp guard)
123 SideExit* exit = guard->exit();
124 Fragment *frag = exit->target;
125 GuardRecord *lr;
126 if (frag && frag->fragEntry)
128 JMP(frag->fragEntry);
129 lr = 0;
131 else
133 // target doesn't exit yet. emit jump to epilog, and set up to patch later.
134 lr = placeGuardRecord(guard);
136 BL(_epilogue);
138 lr->jmp = _nIns;
141 // pop the stack frame first
142 MR(SP, FRAME_PTR);
144 #ifdef NJ_VERBOSE
145 if (_frago->core()->config.show_stats) {
146 // load R1 with Fragment *fromFrag, target fragment
147 // will make use of this when calling fragenter().
148 int fromfrag = int((Fragment*)_thisfrag);
149 LDi(argRegs[1], fromfrag);
151 #endif
153 // return value is GuardRecord*
154 LDi(R2, int(lr));
157 NIns* Assembler::genEpilogue(RegisterMask restore)
159 (void)restore;
160 if (false) {
161 // interworking
162 BX(R3); // return
163 POPr(R3); // POP LR into R3
164 POP_mask(0xF0); // {R4-R7}
165 } else {
166 // return to Thumb caller
167 POP_mask(0xF0|rmask(PC));
169 MR(R0,R2); // return LinkRecord*
170 return _nIns;
173 void Assembler::asm_call(LInsp ins)
175 const CallInfo* call = ins->callInfo();
176 uint32_t atypes = call->_argtypes;
177 uint32_t roffset = 0;
179 // we need to detect if we have arg0 as LO followed by arg1 as F;
180 // in that case, we need to skip using r1 -- the F needs to be
181 // loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
182 // generated code.
183 bool arg0IsInt32FollowedByFloat = false;
184 while ((atypes & 3) != ARGSIZE_NONE) {
185 if (((atypes >> 4) & 3) == ARGSIZE_LO &&
186 ((atypes >> 2) & 3) == ARGSIZE_F &&
187 ((atypes >> 6) & 3) == ARGSIZE_NONE)
189 arg0IsInt32FollowedByFloat = true;
190 break;
192 atypes >>= 2;
195 CALL(call);
196 ArgSize sizes[10];
197 uint32_t argc = call->get_sizes(sizes);
198 for(uint32_t i=0; i < argc; i++)
200 uint32_t j = argc - i - 1;
201 ArgSize sz = sizes[j];
202 NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
203 // pre-assign registers R0-R3 for arguments (if they fit)
204 Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
205 asm_arg(sz, ins->arg(j), r);
207 if (i == 0 && arg0IsInt32FollowedByFloat)
208 roffset = 1;
212 void Assembler::nMarkExecute(Page* page, int flags)
214 NanoAssert(sizeof(Page) == NJ_PAGE_SIZE);
215 #ifdef UNDER_CE
216 static const DWORD kProtFlags[4] =
218 PAGE_READONLY, // 0
219 PAGE_READWRITE, // PAGE_WRITE
220 PAGE_EXECUTE_READ, // PAGE_EXEC
221 PAGE_EXECUTE_READWRITE // PAGE_EXEC|PAGE_WRITE
223 DWORD prot = kProtFlags[flags & (PAGE_WRITE|PAGE_EXEC)];
224 DWORD dwOld;
225 BOOL res = VirtualProtect(page, NJ_PAGE_SIZE, prot, &dwOld);
226 if (!res)
228 // todo: we can't abort or assert here, we have to fail gracefully.
229 NanoAssertMsg(false, "FATAL ERROR: VirtualProtect() failed\n");
231 #endif
232 #ifdef AVMPLUS_PORTING_API
233 NanoJIT_PortAPI_MarkExecutable(page, (void*)((char*)page+NJ_PAGE_SIZE), flags);
234 // todo, must add error-handling to the portapi
235 #endif
238 Register Assembler::nRegisterAllocFromSet(int set)
240 // need to implement faster way
241 int i=0;
242 while (!(set & rmask((Register)i)))
243 i ++;
244 _allocator.free &= ~rmask((Register)i);
245 return (Register) i;
248 void Assembler::nRegisterResetAll(RegAlloc& a)
250 // add scratch registers to our free list for the allocator
251 a.clear();
252 a.used = 0;
253 a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5);
254 debug_only(a.managed = a.free);
257 void Assembler::nPatchBranch(NIns* branch, NIns* target)
259 // Patch the jump in a loop
261 // This is ALWAYS going to be a long branch (using the BL instruction)
262 // Which is really 2 instructions, so we need to modify both
263 // XXX -- this is B, not BL, at least on non-Thumb..
265 // branch+2 because PC is always 2 instructions ahead on ARM/Thumb
266 int32_t offset = int(target) - int(branch+2);
268 //printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
270 NanoAssert(-(1<<21) <= offset && offset < (1<<21));
271 *branch++ = (NIns)(0xF000 | (offset>>12)&0x7FF);
272 *branch = (NIns)(0xF800 | (offset>>1)&0x7FF);
275 RegisterMask Assembler::hint(LIns* i, RegisterMask allow /* = ~0 */)
277 uint32_t op = i->opcode();
278 int prefer = ~0;
280 if (op==LIR_call || op==LIR_fcall)
281 prefer = rmask(R0);
282 else if (op == LIR_callh)
283 prefer = rmask(R1);
284 else if (op == LIR_param)
285 prefer = rmask(imm2register(argRegs[i->imm8()]));
287 if (_allocator.free & allow & prefer)
288 allow &= prefer;
289 return allow;
292 void Assembler::asm_qjoin(LIns *ins)
294 int d = findMemFor(ins);
295 AvmAssert(d);
296 LIns* lo = ins->oprnd1();
297 LIns* hi = ins->oprnd2();
299 Register r = findRegFor(hi, GpRegs);
300 ST(FP, d+4, r);
302 // okay if r gets recycled.
303 r = findRegFor(lo, GpRegs);
304 ST(FP, d, r);
305 freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
308 void Assembler::asm_store32(LIns *value, int dr, LIns *base)
310 // make sure what is in a register
311 Reservation *rA, *rB;
312 findRegFor2(GpRegs, value, rA, base, rB);
313 Register ra = rA->reg;
314 Register rb = rB->reg;
315 ST(rb, dr, ra);
318 void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
320 (void)resv;
321 int d = findMemFor(i);
322 LD(r, d, FP);
323 verbose_only(if (_verbose) {
324 outputf(" restore %s",_thisfrag->lirbuf->names->formatRef(i));
328 void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
330 (void)i;
331 (void)pop;
332 if (resv->arIndex)
334 int d = disp(resv);
335 // save to spill location
336 Register rr = resv->reg;
337 ST(FP, d, rr);
338 verbose_only(if (_verbose){
339 outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
344 void Assembler::asm_load64(LInsp ins)
346 LIns* base = ins->oprnd1();
347 int db = ins->oprnd2()->constval();
348 Reservation *resv = getresv(ins);
349 int dr = disp(resv);
350 NanoAssert(resv->reg == UnknownReg && dr != 0);
352 Register rb = findRegFor(base, GpRegs);
353 resv->reg = UnknownReg;
354 asm_mmq(FP, dr, rb, db);
355 freeRsrcOf(ins, false);
358 void Assembler::asm_store64(LInsp value, int dr, LInsp base)
360 int da = findMemFor(value);
361 Register rb = findRegFor(base, GpRegs);
362 asm_mmq(rb, dr, FP, da);
366 void Assembler::asm_quad(LInsp ins)
368 Reservation *rR = getresv(ins);
369 int d = disp(rR);
370 freeRsrcOf(ins, false);
371 if (d)
373 const int32_t* p = (const int32_t*) (ins-2);
374 STi(FP,d+4,p[1]);
375 STi(FP,d,p[0]);
379 NIns* Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ, bool isfar)
381 NIns* at = 0;
382 LOpcode condop = cond->opcode();
383 NanoAssert(cond->isCond());
384 #ifndef NJ_SOFTFLOAT
385 if (condop >= LIR_feq && condop <= LIR_fge)
387 return asm_jmpcc(branchOnFalse, cond, targ);
389 #endif
390 // produce the branch
391 if (branchOnFalse)
393 if (condop == LIR_eq)
394 JNE(targ);
395 else if (condop == LIR_ov)
396 JNO(targ);
397 else if (condop == LIR_cs)
398 JNC(targ);
399 else if (condop == LIR_lt)
400 JNL(targ);
401 else if (condop == LIR_le)
402 JNLE(targ);
403 else if (condop == LIR_gt)
404 JNG(targ);
405 else if (condop == LIR_ge)
406 JNGE(targ);
407 else if (condop == LIR_ult)
408 JNB(targ);
409 else if (condop == LIR_ule)
410 JNBE(targ);
411 else if (condop == LIR_ugt)
412 JNA(targ);
413 else //if (condop == LIR_uge)
414 JNAE(targ);
416 else // op == LIR_xt
418 if (condop == LIR_eq)
419 JE(targ);
420 else if (condop == LIR_ov)
421 JO(targ);
422 else if (condop == LIR_cs)
423 JC(targ);
424 else if (condop == LIR_lt)
425 JL(targ);
426 else if (condop == LIR_le)
427 JLE(targ);
428 else if (condop == LIR_gt)
429 JG(targ);
430 else if (condop == LIR_ge)
431 JGE(targ);
432 else if (condop == LIR_ult)
433 JB(targ);
434 else if (condop == LIR_ule)
435 JBE(targ);
436 else if (condop == LIR_ugt)
437 JA(targ);
438 else //if (condop == LIR_uge)
439 JAE(targ);
441 at = _nIns;
442 asm_cmp(cond);
443 return at;
446 void Assembler::asm_cmp(LIns *cond)
448 LOpcode condop = cond->opcode();
450 // LIR_ov and LIR_cs recycle the flags set by arithmetic ops
451 if ((condop == LIR_ov) || (condop == LIR_cs))
452 return;
454 LInsp lhs = cond->oprnd1();
455 LInsp rhs = cond->oprnd2();
456 Reservation *rA, *rB;
458 // Not supported yet.
459 NanoAssert(!lhs->isQuad() && !rhs->isQuad());
461 // ready to issue the compare
462 if (rhs->isconst())
464 int c = rhs->constval();
465 if (c == 0 && cond->isop(LIR_eq)) {
466 Register r = findRegFor(lhs, GpRegs);
467 TEST(r,r);
468 // No 64-bit immediates so fall-back to below
470 else if (!rhs->isQuad()) {
471 Register r = getBaseReg(lhs, c, GpRegs);
472 CMPi(r, c);
475 else
477 findRegFor2(GpRegs, lhs, rA, rhs, rB);
478 Register ra = rA->reg;
479 Register rb = rB->reg;
480 CMP(ra, rb);
484 void Assembler::asm_loop(LInsp ins, NInsList& loopJumps)
486 (void)ins;
487 JMP_long_placeholder(); // jump to SOT
488 verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf(" jmp SOT"); } );
490 loopJumps.add(_nIns);
492 #ifdef NJ_VERBOSE
493 // branching from this frag to ourself.
494 if (_frago->core()->config.show_stats)
495 LDi(argRegs[1], int((Fragment*)_thisfrag));
496 #endif
498 assignSavedParams();
500 // restore first parameter, the only one we use
501 LInsp state = _thisfrag->lirbuf->state;
502 findSpecificRegFor(state, argRegs[state->imm8()]);
505 void Assembler::asm_fcond(LInsp ins)
507 // only want certain regs
508 Register r = prepResultReg(ins, AllowableFlagRegs);
509 asm_setcc(r, ins);
510 #ifdef NJ_ARM_VFP
511 SETE(r);
512 #else
513 // SETcc only sets low 8 bits, so extend
514 MOVZX8(r,r);
515 SETNP(r);
516 #endif
517 asm_fcmp(ins);
520 void Assembler::asm_cond(LInsp ins)
522 // only want certain regs
523 LOpcode op = ins->opcode();
524 Register r = prepResultReg(ins, AllowableFlagRegs);
525 // SETcc only sets low 8 bits, so extend
526 MOVZX8(r,r);
527 if (op == LIR_eq)
528 SETE(r);
529 else if (op == LIR_ov)
530 SETO(r);
531 else if (op == LIR_cs)
532 SETC(r);
533 else if (op == LIR_lt)
534 SETL(r);
535 else if (op == LIR_le)
536 SETLE(r);
537 else if (op == LIR_gt)
538 SETG(r);
539 else if (op == LIR_ge)
540 SETGE(r);
541 else if (op == LIR_ult)
542 SETB(r);
543 else if (op == LIR_ule)
544 SETBE(r);
545 else if (op == LIR_ugt)
546 SETA(r);
547 else // if (op == LIR_uge)
548 SETAE(r);
549 asm_cmp(ins);
552 void Assembler::asm_arith(LInsp ins)
554 LOpcode op = ins->opcode();
555 LInsp lhs = ins->oprnd1();
556 LInsp rhs = ins->oprnd2();
558 Register rb = UnknownReg;
559 RegisterMask allow = GpRegs;
560 bool forceReg = (op == LIR_mul || !rhs->isconst());
562 #ifdef NANOJIT_ARM
563 // Arm can't do an immediate op with immediates
564 // outside of +/-255 (for AND) r outside of
565 // 0..255 for others.
566 if (!forceReg)
568 if (rhs->isconst() && !isU8(rhs->constval()))
569 forceReg = true;
571 #endif
573 if (lhs != rhs && forceReg)
575 if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
576 rb = findRegFor(rhs, allow);
578 allow &= ~rmask(rb);
580 else if ((op == LIR_add||op == LIR_addp) && lhs->isop(LIR_alloc) && rhs->isconst()) {
581 // add alloc+const, use lea
582 Register rr = prepResultReg(ins, allow);
583 int d = findMemFor(lhs) + rhs->constval();
584 LEA(rr, d, FP);
587 Register rr = prepResultReg(ins, allow);
588 Reservation* rA = getresv(lhs);
589 Register ra;
590 // if this is last use of lhs in reg, we can re-use result reg
591 if (rA == 0 || (ra = rA->reg) == UnknownReg)
592 ra = findSpecificRegFor(lhs, rr);
593 // else, rA already has a register assigned.
595 if (forceReg)
597 if (lhs == rhs)
598 rb = ra;
600 if (op == LIR_add || op == LIR_addp)
601 ADD(rr, rb);
602 else if (op == LIR_sub)
603 SUB(rr, rb);
604 else if (op == LIR_mul)
605 MUL(rr, rb);
606 else if (op == LIR_and)
607 AND(rr, rb);
608 else if (op == LIR_or)
609 OR(rr, rb);
610 else if (op == LIR_xor)
611 XOR(rr, rb);
612 else if (op == LIR_lsh)
613 SHL(rr, rb);
614 else if (op == LIR_rsh)
615 SAR(rr, rb);
616 else if (op == LIR_ush)
617 SHR(rr, rb);
618 else
619 NanoAssertMsg(0, "Unsupported");
621 else
623 int c = rhs->constval();
624 if (op == LIR_add || op == LIR_addp) {
626 ADDi(rr, c);
628 } else if (op == LIR_sub) {
630 SUBi(rr, c);
632 } else if (op == LIR_and)
633 ANDi(rr, c);
634 else if (op == LIR_or)
635 ORi(rr, c);
636 else if (op == LIR_xor)
637 XORi(rr, c);
638 else if (op == LIR_lsh)
639 SHLi(rr, c);
640 else if (op == LIR_rsh)
641 SARi(rr, c);
642 else if (op == LIR_ush)
643 SHRi(rr, c);
644 else
645 NanoAssertMsg(0, "Unsupported");
648 if ( rr != ra )
649 MR(rr,ra);
652 void Assembler::asm_neg_not(LInsp ins)
654 LOpcode op = ins->opcode();
655 Register rr = prepResultReg(ins, GpRegs);
657 LIns* lhs = ins->oprnd1();
658 Reservation *rA = getresv(lhs);
659 // if this is last use of lhs in reg, we can re-use result reg
660 Register ra;
661 if (rA == 0 || (ra=rA->reg) == UnknownReg)
662 ra = findSpecificRegFor(lhs, rr);
663 // else, rA already has a register assigned.
665 if (op == LIR_not)
666 NOT(rr);
667 else
668 NEG(rr);
670 if ( rr != ra )
671 MR(rr,ra);
674 void Assembler::asm_ld(LInsp ins)
676 LOpcode op = ins->opcode();
677 LIns* base = ins->oprnd1();
678 LIns* disp = ins->oprnd2();
679 Register rr = prepResultReg(ins, GpRegs);
680 int d = disp->constval();
681 Register ra = getBaseReg(base, d, GpRegs);
682 if (op == LIR_ldcb)
683 LD8Z(rr, d, ra);
684 else
685 LD(rr, d, ra);
688 void Assembler::asm_cmov(LInsp ins)
690 LOpcode op = ins->opcode();
691 LIns* condval = ins->oprnd1();
692 NanoAssert(condval->isCmp());
694 LIns* values = ins->oprnd2();
696 NanoAssert(values->opcode() == LIR_2);
697 LIns* iftrue = values->oprnd1();
698 LIns* iffalse = values->oprnd2();
700 NanoAssert(op == LIR_qcmov || (!iftrue->isQuad() && !iffalse->isQuad()));
702 const Register rr = prepResultReg(ins, GpRegs);
704 // this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
705 // (This is true on Intel, is it true on all architectures?)
706 const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr));
707 if (op == LIR_cmov) {
708 switch (condval->opcode())
710 // note that these are all opposites...
711 case LIR_eq: MRNE(rr, iffalsereg); break;
712 case LIR_ov: MRNO(rr, iffalsereg); break;
713 case LIR_cs: MRNC(rr, iffalsereg); break;
714 case LIR_lt: MRGE(rr, iffalsereg); break;
715 case LIR_le: MRG(rr, iffalsereg); break;
716 case LIR_gt: MRLE(rr, iffalsereg); break;
717 case LIR_ge: MRL(rr, iffalsereg); break;
718 case LIR_ult: MRAE(rr, iffalsereg); break;
719 case LIR_ule: MRA(rr, iffalsereg); break;
720 case LIR_ugt: MRBE(rr, iffalsereg); break;
721 case LIR_uge: MRB(rr, iffalsereg); break;
722 debug_only( default: NanoAssert(0); break; )
724 } else if (op == LIR_qcmov) {
725 NanoAssert(0);
727 /*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);
728 asm_cmp(condval);
731 void Assembler::asm_qhi(LInsp ins)
733 Register rr = prepResultReg(ins, GpRegs);
734 LIns *q = ins->oprnd1();
735 int d = findMemFor(q);
736 LD(rr, d+4, FP);
739 void Assembler::asm_param(LInsp ins)
741 uint32_t a = ins->imm8();
742 uint32_t kind = ins->imm8b();
743 if (kind == 0) {
744 // ordinary param
745 AbiKind abi = _thisfrag->lirbuf->abi;
746 uint32_t abi_regcount = abi == ABI_FASTCALL ? 2 : abi == ABI_THISCALL ? 1 : 0;
747 if (a < abi_regcount) {
748 // incoming arg in register
749 prepResultReg(ins, rmask(argRegs[a]));
750 } else {
751 // incoming arg is on stack, and EBP points nearby (see genPrologue)
752 Register r = prepResultReg(ins, GpRegs);
753 int d = (a - abi_regcount) * sizeof(intptr_t) + 8;
754 LD(r, d, FP);
757 else {
758 // saved param
759 prepResultReg(ins, rmask(savedRegs[a]));
763 void Assembler::asm_short(LInsp ins)
765 Register rr = prepResultReg(ins, GpRegs);
766 int32_t val = ins->imm16();
767 if (val == 0)
768 XOR(rr,rr);
769 else
770 LDi(rr, val);
773 void Assembler::asm_int(LInsp ins)
775 Register rr = prepResultReg(ins, GpRegs);
776 int32_t val = ins->imm32();
777 if (val == 0)
778 XOR(rr,rr);
779 else
780 LDi(rr, val);
783 void Assembler::asm_quad(LInsp ins)
785 Reservation *rR = getresv(ins);
786 Register rr = rR->reg;
787 if (rr != UnknownReg)
789 // @todo -- add special-cases for 0 and 1
790 _allocator.retire(rr);
791 rR->reg = UnknownReg;
792 NanoAssert((rmask(rr) & FpRegs) != 0);
794 const double d = ins->constvalf();
795 const uint64_t q = ins->constvalq();
796 if (rmask(rr) & XmmRegs) {
797 if (q == 0.0) {
798 // test (int64)0 since -0.0 == 0.0
799 SSE_XORPDr(rr, rr);
800 } else if (d == 1.0) {
801 // 1.0 is extremely frequent and worth special-casing!
802 static const double k_ONE = 1.0;
803 LDSDm(rr, &k_ONE);
804 } else {
805 findMemFor(ins);
806 const int d = disp(rR);
807 SSE_LDQ(rr, d, FP);
809 } else {
810 if (q == 0.0) {
811 // test (int64)0 since -0.0 == 0.0
812 FLDZ();
813 } else if (d == 1.0) {
814 FLD1();
815 } else {
816 findMemFor(ins);
817 int d = disp(rR);
818 FLDQ(d,FP);
823 // @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
824 int d = disp(rR);
825 freeRsrcOf(ins, false);
826 if (d)
828 const int32_t* p = (const int32_t*) (ins-2);
829 STi(FP,d+4,p[1]);
830 STi(FP,d,p[0]);
834 void Assembler::asm_qlo(LInsp ins)
836 LIns *q = ins->oprnd1();
837 Reservation *resv = getresv(ins);
838 Register rr = resv->reg;
839 if (rr == UnknownReg) {
840 // store quad in spill loc
841 int d = disp(resv);
842 freeRsrcOf(ins, false);
843 Register qr = findRegFor(q, XmmRegs);
844 SSE_MOVDm(d, FP, qr);
845 } else {
846 freeRsrcOf(ins, false);
847 Register qr = findRegFor(q, XmmRegs);
848 SSE_MOVD(rr,qr);
852 void Assembler::asm_nongp_copy(Register r, Register s)
854 // we will need this for VFP support
855 (void)r; (void)s;
856 NanoAssert(false);
859 Register Assembler::asm_binop_rhs_reg(LInsp ins)
861 return UnknownReg;
865 * copy 64 bits: (rd+dd) <- (rs+ds)
867 void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
869 // value is either a 64bit struct or maybe a float
870 // that isn't live in an FPU reg. Either way, don't
871 // put it in an FPU reg just to load & store it.
872 // get a scratch reg
873 Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
874 _allocator.addFree(t);
875 ST(rd, dd+4, t);
876 LD(t, ds+4, rs);
877 ST(rd, dd, t);
878 LD(t, ds, rs);
881 void Assembler::asm_pusharg(LInsp p)
883 // arg goes on stack
884 Reservation* rA = getresv(p);
885 if (rA == 0)
887 Register ra = findRegFor(p, GpRegs);
888 ST(SP,0,ra);
890 else if (rA->reg == UnknownReg)
892 ST(SP,0,Scratch);
893 LD(Scratch,disp(rA),FP);
895 else
897 ST(SP,0,rA->reg);
901 void Assembler::nativePageReset()
903 _nPool = 0;
904 _nSlot = 0;
905 _nExitPool = 0;
906 _nExitSlot = 0;
909 void Assembler::nativePageSetup()
911 if (!_nIns) _nIns = pageAlloc();
912 if (!_nExitIns) _nExitIns = pageAlloc(true);
913 //fprintf(stderr, "assemble onto %x exits into %x\n", (int)_nIns, (int)_nExitIns);
915 if (!_nPool) {
916 _nSlot = _nPool = (int*)_nIns;
918 // Make original pool at end of page. Currently
919 // we are pointing off the end of the original page,
920 // so back up 1+NJ_CPOOL_SIZE
921 _nPool = (int*)((int)_nIns - (sizeof(int32_t)*NJ_CPOOL_SIZE));
923 // _nSlot points at last slot in pool (fill upwards)
924 _nSlot = _nPool + (NJ_CPOOL_SIZE-1);
926 // Move _nIns to the top of the pool
927 _nIns = (NIns*)_nPool;
929 // no branch needed since this follows the epilogue
933 void Assembler::flushCache(NIns* n1, NIns* n2) {
934 #if defined(UNDER_CE)
935 // we changed the code, so we need to do this (sadly)
936 FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
937 #elif defined(AVMPLUS_LINUX)
938 // Just need to clear this one page (not even the whole page really)
939 //Page *page = (Page*)pageTop(_nIns);
940 register unsigned long _beg __asm("a1") = (unsigned long)(n1);
941 register unsigned long _end __asm("a2") = (unsigned long)(n2);
942 register unsigned long _flg __asm("a3") = 0;
943 register unsigned long _swi __asm("r7") = 0xF0002;
944 __asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
945 #endif
948 NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
950 NIns* save = _nIns;
951 NIns* was = (NIns*) (((((*(at+2))&0x7ff)<<12) | (((*(at+1))&0x7ff)<<1)) + (at-2+2));
953 _nIns = at + 2;
954 BL(target);
956 flushCache(_nIns, _nIns+2);
958 #ifdef AVMPLUS_PORTING_API
959 // XXX save.._nIns+2? really?
960 NanoJIT_PortAPI_FlushInstructionCache(save, _nIns+2);
961 #endif
963 _nIns = save;
965 return was;
968 void Assembler::STi(Register b, int32_t d, int32_t v)
970 ST(b, d, Scratch);
971 LDi(Scratch, v);
974 bool isB11(NIns *target, NIns *cur)
976 NIns *br_base = (cur-1)+2;
977 int br_off = int(target) - int(br_base);
978 return (-(1<<11) <= br_off && br_off < (1<<11));
981 void Assembler::underrunProtect(int bytes)
983 NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
985 // perhaps bytes + sizeof(PageHeader)/sizeof(NIns) + 4 ?
986 intptr_t u = bytes + 4;
987 if (!samepage(_nIns-u, _nIns-1)) {
988 NIns* target = _nIns;
989 _nIns = pageAlloc(_inExit);
990 // might be able to do a B instead of BL (save an instruction)
991 if (isB11(target, _nIns))
993 NIns *br_base = (_nIns-1)+2;
994 int br_off = int(target) - int(br_base);
995 *(--_nIns) = (NIns)(0xE000 | ((br_off>>1)&0x7FF));
997 else
999 int offset = int(target)-int(_nIns-2+2);
1000 *(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) );
1001 *(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) );
1006 bool isB22(NIns *target, NIns *cur)
1008 int offset = int(target)-int(cur-2+2);
1009 return (-(1<<22) <= offset && offset < (1<<22));
1012 void Assembler::BL(NIns* target)
1014 underrunProtect(4);
1015 NanoAssert(isB22(target,_nIns));
1016 int offset = int(target)-int(_nIns-2+2);
1017 *(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) );
1018 *(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) );
1019 asm_output("bl %X offset=%d",(int)target, offset);
1023 void Assembler::B(NIns *target)
1025 underrunProtect(2);
1026 NanoAssert(isB11(target,_nIns));
1027 NIns *br_base = (_nIns-1)+2;
1028 int br_off = int(target) - int(br_base);
1029 NanoAssert(-(1<<11) <= br_off && br_off < (1<<11));
1030 *(--_nIns) = (NIns)(0xE000 | ((br_off>>1)&0x7FF));
1031 asm_output("b %X offset=%d", (int)target, br_off);
1034 void Assembler::JMP(NIns *target)
1036 underrunProtect(4);
1037 if (isB11(target,_nIns))
1038 B(target);
1039 else
1040 BL(target);
1043 void Assembler::PUSH_mask(RegisterMask mask)
1045 NanoAssert((mask&(0xff|rmask(LR)))==mask);
1046 underrunProtect(2);
1047 if (mask & rmask(LR)) {
1048 mask &= ~rmask(LR);
1049 mask |= rmask(R8);
1051 *(--_nIns) = (NIns)(0xB400 | mask);
1052 asm_output("push {%x}", mask);
1055 void Assembler::POPr(Register r)
1057 underrunProtect(2);
1058 NanoAssert(((unsigned)r)<8 || r == PC);
1059 if (r == PC)
1060 r = R8;
1061 *(--_nIns) = (NIns)(0xBC00 | (1<<(r)));
1062 asm_output("pop {%s}",gpn(r));
1065 void Assembler::POP_mask(RegisterMask mask)
1067 NanoAssert((mask&(0xff|rmask(PC)))==mask);
1068 underrunProtect(2);
1069 if (mask & rmask(PC)) {
1070 mask &= ~rmask(PC);
1071 mask |= rmask(R8);
1073 *(--_nIns) = (NIns)(0xBC00 | mask);
1074 asm_output("pop {%x}", mask);
1077 void Assembler::MOVi(Register r, int32_t v)
1079 NanoAssert(isU8(v));
1080 underrunProtect(2);
1081 *(--_nIns) = (NIns)(0x2000 | r<<8 | v);
1082 asm_output("mov %s,#%d",gpn(r),v);
1085 void Assembler::LDi(Register r, int32_t v)
1087 if (isU8(v)) {
1088 MOVi(r,v);
1089 } else if (isU8(-v)) {
1090 NEG(r);
1091 MOVi(r,-v);
1092 } else {
1093 underrunProtect(2);
1094 LD32_nochk(r, v);
1098 void Assembler::B_cond(int c, NIns *target)
1100 #ifdef NJ_VERBOSE
1101 static const char *ccname[] = { "eq","ne","hs","lo","mi","pl","vs","vc","hi","ls","ge","lt","gt","le","al","nv" };
1102 #endif
1104 underrunProtect(6);
1105 int tt = int(target) - int(_nIns-1+2);
1106 if (tt < (1<<8) && tt >= -(1<<8)) {
1107 *(--_nIns) = (NIns)(0xD000 | ((c)<<8) | (tt>>1)&0xFF );
1108 asm_output("b%s %X offset=%d", ccname[c], target, tt);
1109 } else {
1110 NIns *skip = _nIns;
1111 BL(target);
1112 c ^= 1;
1113 *(--_nIns) = (NIns)(0xD000 | c<<8 | 1 );
1114 asm_output("b%s %X", ccname[c], skip);
1118 void Assembler::STR_sp(int32_t offset, Register reg)
1120 NanoAssert((offset&3)==0);// require natural alignment
1121 int32_t off = offset>>2;
1122 NanoAssert(isU8(off));
1123 underrunProtect(2);
1124 *(--_nIns) = (NIns)(0x9000 | ((reg)<<8) | off );
1125 asm_output("str %s, %d(%s)", gpn(reg), offset, gpn(SP));
1128 void Assembler::STR_index(Register base, Register off, Register reg)
1130 underrunProtect(2);
1131 *(--_nIns) = (NIns)(0x5000 | (off<<6) | (base<<3) | (reg));
1132 asm_output("str %s,(%s+%s)",gpn(reg),gpn(base),gpn(off));
1135 void Assembler::STR_m(Register base, int32_t offset, Register reg)
1137 NanoAssert(offset >= 0 && offset < 128 && (offset&3)==0);
1138 underrunProtect(2);
1139 int32_t off = offset>>2;
1140 *(--_nIns) = (NIns)(0x6000 | off<<6 | base<<3 | reg);
1141 asm_output("str %s,%d(%s)", gpn(reg), offset, gpn(base));
1144 void Assembler::LDMIA(Register base, RegisterMask regs)
1146 underrunProtect(2);
1147 NanoAssert((regs&rmask(base))==0 && isU8(regs));
1148 *(--_nIns) = (NIns)(0xC800 | base<<8 | regs);
1149 asm_output("ldmia %s!,{%x}", gpn(base), regs);
1152 void Assembler::STMIA(Register base, RegisterMask regs)
1154 underrunProtect(2);
1155 NanoAssert((regs&rmask(base))==0 && isU8(regs));
1156 *(--_nIns) = (NIns)(0xC000 | base<<8 | regs);
1157 asm_output("stmia %s!,{%x}", gpn(base), regs);
1160 void Assembler::ST(Register base, int32_t offset, Register reg)
1162 NanoAssert((offset&3)==0);// require natural alignment
1163 int off = offset>>2;
1164 if (base==SP) {
1165 STR_sp(offset, reg);
1166 } else if ((offset)<0) {
1167 STR_index(base, Scratch, reg);
1168 NEG(Scratch);
1169 if (offset < -255) {
1170 NanoAssert(offset >= -1020);
1171 SHLi(Scratch, 2);
1172 MOVi(Scratch, -off);
1174 else {
1175 MOVi(Scratch, -offset);
1177 } else {
1178 underrunProtect(6);
1179 if (off<32) {
1180 STR_m(base, offset, reg);
1182 else {
1183 STR_index(base, Scratch, reg);
1184 if (offset > 255) {
1185 SHLi(Scratch, 2);
1186 MOVi(Scratch, off);
1188 else {
1189 MOVi(Scratch, offset);
1195 void Assembler::ADDi8(Register r, int32_t i)
1197 underrunProtect(2);
1198 NanoAssert(isU8(i));
1199 *(--_nIns) = (NIns)(0x3000 | r<<8 | i);
1200 asm_output("add %s,#%d", gpn(r), i);
1203 void Assembler::ADDi(Register r, int32_t i)
1205 if (i < 0 && i != 0x80000000) {
1206 SUBi(r, -i);
1208 else if (r == SP) {
1209 NanoAssert((i&3)==0 && i >= 0 && i < (1<<9));
1210 underrunProtect(2);
1211 *(--_nIns) = (NIns)(0xB000 | i>>2);
1212 asm_output("add %s,#%d", gpn(SP), i);
1214 else if (isU8(i)) {
1215 ADDi8(r,i);
1217 else if (i >= 0 && i <= (255+255)) {
1218 ADDi8(r,i-255);
1219 ADDi8(r,255);
1221 else {
1222 ADD(r, Scratch);
1223 LDi(Scratch, i);
1227 void Assembler::SUBi8(Register r, int32_t i)
1229 underrunProtect(2);
1230 NanoAssert(isU8(i));
1231 *(--_nIns) = (NIns)(0x3800 | r<<8 | i);
1232 asm_output("sub %s,#%d", gpn(r), i);
1235 void Assembler::SUBi(Register r, int32_t i)
1237 if (i < 0 && i != 0x80000000) {
1238 ADDi(r, -i);
1240 else if (r == SP) {
1241 NanoAssert((i&3)==0 && i >= 0 && i < (1<<9));
1242 underrunProtect(2);
1243 *(--_nIns) = (NIns)(0xB080 | i>>2);
1244 asm_output("sub %s,#%d", gpn(SP), i);
1246 else if (isU8(i)) {
1247 SUBi8(r,i);
1249 else if (i >= 0 && i <= (255+255)) {
1250 SUBi8(r,i-255);
1251 SUBi8(r,255);
1253 else {
1254 SUB(r, Scratch);
1255 LDi(Scratch, i);
1259 void Assembler::CALL(const CallInfo *ci)
1261 intptr_t addr = ci->_address;
1262 if (isB22((NIns*)addr, _nIns)) {
1263 int offset = int(addr)-int(_nIns-2+2);
1264 *(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) );
1265 *(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) );
1266 asm_output("call %08X:%s", addr, ci->_name);
1268 else
1270 underrunProtect(2*(10));
1272 if ( (((int(_nIns))&0xFFFF)%4) != 0)
1273 *(--_nIns) = (NIns)0;
1275 *(--_nIns) = (NIns)(0xF800 | (((-14)&0xFFF)>>1) );
1276 *(--_nIns) = (NIns)(0xF000 | (((-14)>>12)&0x7FF) );
1278 *(--_nIns) = (NIns)(0x4600 | (1<<7) | (Scratch<<3) | (IP&7));
1279 *(--_nIns) = (NIns)0;
1280 *(--_nIns) = (short)((addr) >> 16);
1281 *(--_nIns) = (short)((addr) & 0xFFFF);
1282 *(--_nIns) = (NIns)(0x4700 | (IP<<3));
1283 *(--_nIns) = (NIns)(0xE000 | (4>>1));
1284 *(--_nIns) = (NIns)(0x4800 | (Scratch<<8) | (1));
1285 asm_output("call %08X:%s", addr, ci->_name);
1289 void Assembler::LD32_nochk(Register r, int32_t imm)
1291 // Can we reach the current slot/pool?
1292 int offset = (int)(_nSlot) - (int)(_nIns);
1293 if ((offset>=NJ_MAX_CPOOL_OFFSET || offset<0) ||
1294 (_nSlot < _nPool))
1296 // cant reach, or no room
1297 // need a new pool
1299 // Make sure we have space for a pool and the LDR
1300 underrunProtect(sizeof(int32_t)*NJ_CPOOL_SIZE+1);
1302 NIns* skip = _nIns;
1304 _nPool = (int*)(((int)_nIns - (sizeof(int32_t)*NJ_CPOOL_SIZE)) &~3);
1305 _nSlot = _nPool + (NJ_CPOOL_SIZE-1);
1306 _nIns = (NIns*)_nPool;
1308 // jump over the pool
1309 B(skip);
1310 //*(--_nIns) = (NIns)( COND_AL | (0x5<<25) | (NJ_CPOOL_SIZE-1) );
1313 *(_nSlot--) = (int)imm;
1315 NIns *data = (NIns*)(_nSlot+1);;
1317 int data_off = int(data) - (int(_nIns+1)&~3);
1318 *(--_nIns) = (NIns)(0x4800 | r<<8 | data_off>>2);
1319 asm_output("ldr %s,%d(PC) [%X]",gpn(r),data_off,(int)data);
1321 #endif /* FEATURE_NANOJIT */