js/src/nanojit/LIR.cpp

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: t; tab-width: 4 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is [Open Source Virtual Machine].
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Adobe System Incorporated.
  19  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   Adobe AS3 Team
  24  *
  25  * Alternatively, the contents of this file may be used under the terms of
  26  * either the GNU General Public License Version 2 or later (the "GPL"), or
  27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  28  * in which case the provisions of the GPL or the LGPL are applicable instead
  29  * of those above. If you wish to allow use of your version of this file only
  30  * under the terms of either the GPL or the LGPL, and not to allow others to
  31  * use your version of this file under the terms of the MPL, indicate your
  32  * decision by deleting the provisions above and replace them with the notice
  33  * and other provisions required by the GPL or the LGPL. If you do not delete
  34  * the provisions above, a recipient may use your version of this file under
  35  * the terms of any one of the MPL, the GPL or the LGPL.
  36  *
  37  * ***** END LICENSE BLOCK ***** */
  38
  39 #include "nanojit.h"
  40 #include <stdio.h>
  41 #include <ctype.h>
  42
  43 #ifdef PERFM
  44 #include "../vprof/vprof.h"
  45 #endif /* PERFM */
  46
  47 namespace nanojit
  48 {
  49     using namespace avmplus;
  50         #ifdef FEATURE_NANOJIT
  51
  52         const uint8_t operandCount[] = {
  53 #define OPDEF(op, number, operands) \
  54         operands,
  55 #define OPDEF64(op, number, operands) \
  56         operands,
  57 #include "LIRopcode.tbl"
  58 #undef OPDEF
  59 #undef OPDEF64
  60         0
  61         };
  62
  63         // LIR verbose specific
  64         #ifdef NJ_VERBOSE
  65
  66         const char* lirNames[] = {
  67 #define OPDEF(op, number, operands) \
  68         #op,
  69 #define OPDEF64(op, number, operands) \
  70         #op,
  71 #include "LIRopcode.tbl"
  72 #undef OPDEF
  73 #undef OPDEF64
  74         NULL
  75         };
  76
  77         #endif /* NANOJIT_VEBROSE */
  78
  79         // implementation
  80
  81 #ifdef NJ_PROFILE
  82         // @todo fixup move to nanojit.h
  83         #undef counter_value
  84         #define counter_value(x)                x
  85 #endif /* NJ_PROFILE */
  86
  87         //static int32_t buffer_count = 0;
  88
  89         // LCompressedBuffer
  90         LirBuffer::LirBuffer(Fragmento* frago, const CallInfo* functions)
  91                 : _frago(frago), _pages(frago->core()->GetGC()), _functions(functions), abi(ABI_FASTCALL)
  92         {
  93                 clear();
  94                 Page* start = pageAlloc();
  95                 if (start)
  96                         _unused = &start->lir[0];
  97                 //buffer_count++;
  98                 //fprintf(stderr, "LirBuffer %x unused %x\n", (int)this, (int)_unused);
  99         }
 100
 101         LirBuffer::~LirBuffer()
 102         {
 103                 //buffer_count--;
 104                 //fprintf(stderr, "~LirBuffer %x start %x\n", (int)this, (int)_start);
 105                 clear();
 106                 verbose_only(if (names) NJ_DELETE(names);)
 107                 _frago = 0;
 108         }
 109
 110         void LirBuffer::clear()
 111         {
 112                 // free all the memory and clear the stats
 113                 _frago->pagesRelease(_pages);
 114                 NanoAssert(!_pages.size());
 115                 _thresholdPage = 0;
 116                 _unused = 0;
 117                 _stats.lir = 0;
 118                 _noMem = 0;
 119                 for (int i = 0; i < NumSavedRegs; ++i)
 120                         savedRegs[i] = NULL;
 121                 explicitSavedRegs = false;
 122         }
 123
 124         int32_t LirBuffer::insCount()
 125         {
 126                 // doesn't include embedded constants nor LIR_skip payload
 127                 return _stats.lir;
 128         }
 129
 130         int32_t LirBuffer::byteCount()
 131         {
 132                 return ((_pages.size() ? _pages.size()-1 : 0) * sizeof(Page)) +
 133                         ((int32_t)_unused - (int32_t)pageTop(_unused));
 134         }
 135
 136         Page* LirBuffer::pageAlloc()
 137         {
 138                 Page* page = _frago->pageAlloc();
 139                 if (page)
 140                         _pages.add(page);
 141                 else
 142                         _noMem = 1;
 143                 return page;
 144         }
 145
 146         LInsp LirBuffer::next()
 147         {
 148                 return _unused;
 149         }
 150
 151         void LirBufWriter::ensureRoom(uint32_t count)
 152         {
 153                 LInsp before = _buf->next();
 154                 LInsp after = before+count+LIR_FAR_SLOTS;
 155                 if (!samepage(before,after+LirBuffer::LIR_BUF_THRESHOLD))
 156                 {
 157                         // transition to the next page?
 158                         if (!samepage(before,after))
 159                         {
 160                                 NanoAssert(_buf->_thresholdPage);
 161                                 _buf->_unused = &_buf->_thresholdPage->lir[0];
 162                                 _buf->_thresholdPage = 0;  // pageAlloc() stored it in _pages already
 163
 164                                 // link LIR stream back to prior instruction (careful insLink relies on _unused...)
 165                                 insLinkTo(LIR_skip, before-1);
 166                         }
 167                         else if (!_buf->_thresholdPage)
 168                         {
 169                                 // LIR_BUF_THRESHOLD away from a new page but pre-alloc it, setting noMem for early OOM detection
 170                                 _buf->_thresholdPage = _buf->pageAlloc();
 171                                 NanoAssert(_buf->_thresholdPage || _buf->_noMem);
 172                         }
 173                 }
 174         }
 175
 176         LInsp LirBufWriter::insLinkTo(LOpcode op, LInsp to)
 177         {
 178                 LInsp l = _buf->next();
 179                 NanoAssert(samepage(l,l+LIR_FAR_SLOTS)); // must have called ensureRoom()
 180         if (can24bReach(l,to))
 181                 {
 182             l->initOpcode(LOpcode(op-1)); // nearskip or neartramp
 183             l->setimm24(to-l);
 184             _buf->commit(1);
 185                         _buf->_stats.lir++;
 186         }
 187         else
 188                 {
 189                         l = insLinkToFar(op,to);
 190                 }
 191                 return l;
 192         }
 193
 194         LInsp LirBufWriter::insLinkToFar(LOpcode op, LInsp to)
 195         {
 196                 LirFarIns* ov = (LirFarIns*) _buf->next();
 197                 ov->v = to;
 198                 ov->i.initOpcode(op);
 199                 _buf->commit(LIR_FAR_SLOTS);
 200                 _buf->_stats.lir++;
 201
 202                 NanoAssert( (LInsp)(ov+1) == _buf->next() );
 203                 return &(ov->i);
 204         }
 205
 206         void LirBufWriter::makeReachable(LInsp& o, LInsp from)
 207         {
 208                 if (o && !can8bReach(from,o))
 209                 {
 210                         if (o == _buf->sp && spref && can8bReach(from, spref)) {
 211                                 o = spref;
 212                                 return;
 213                         }
 214                         if (o == _buf->rp && rpref && can8bReach(from, rpref)) {
 215                                 o = rpref;
 216                                 return;
 217                         }
 218
 219                         // need a trampoline to get to from
 220                         LInsp tramp = insLinkTo(LIR_tramp, o);  // will produce neartramp if possible
 221                         NanoAssert( tramp->ref() == o && samepage(from,tramp) );
 222                         if (o == _buf->sp)
 223                                 spref = tramp;
 224                         else if (o == _buf->rp)
 225                                 rpref = tramp;
 226                         o = tramp;
 227                 }
 228         }
 229
 230         void LirBufWriter::prepFor(LInsp& i1, LInsp& i2, LInsp& i3)
 231         {
 232                 uint32_t i = 0;  // count of operands
 233                 i += (i1) ? 1 : 0;
 234                 i += (i2) ? 1 : 0;
 235                 i += (i3) ? 1 : 0;
 236
 237                 uint32_t count = (LIR_FAR_SLOTS*i)+1;  // count of LIns if all operands require tramp
 238                 ensureRoom(count);
 239                 NanoAssert( samepage(_buf->next()+count,_buf->next()) );
 240
 241                 // guaranteed space for far tramps if necc.
 242                 LInsp from = _buf->next()+count;
 243                 makeReachable(i1, from);
 244                 makeReachable(i2, from);
 245                 makeReachable(i3, from);
 246                 NanoAssert(from>i1 && from>i2 && from>i3);
 247         }
 248
 249         LInsp LirBuffer::commit(uint32_t count)
 250         {
 251                 NanoAssertMsg( samepage(_unused, _unused+count), "You need to call ensureRoom first!" );
 252                 return _unused += count;
 253         }
 254
 255         uint32_t LIns::reference(LIns *r) const
 256         {
 257                 int delta = this-r-1;
 258                 NanoAssert(isU8(delta));
 259                 return delta;
 260         }
 261
 262     LIns* LIns::deref(int32_t off) const
 263     {
 264                 LInsp i = (LInsp) this-1 - off;
 265         while (i && i->isTramp()) {
 266             i = i->ref();
 267         }
 268                 return i;
 269     }
 270
 271         LInsp LirBufWriter::insStore(LInsp val, LInsp base, LInsp off)
 272         {
 273                 LOpcode op = val->isQuad() ? LIR_stq : LIR_st;
 274                 NanoAssert(val && base && off);
 275                 prepFor(val, base, off);
 276                 LInsp l = _buf->next();
 277                 l->initOpcode(op);
 278                 l->setOprnd1(val);
 279                 l->setOprnd2(base);
 280                 l->setOprnd3(off);
 281                 _buf->commit(1);
 282                 _buf->_stats.lir++;
 283                 return l;
 284         }
 285
 286         LInsp LirBufWriter::insStorei(LInsp val, LInsp base, int32_t d)
 287         {
 288                 LOpcode op = val->isQuad() ? LIR_stqi : LIR_sti;
 289                 NanoAssert(val && base && isS8(d));
 290                 LInsp u3=0;
 291                 prepFor(val, base, u3);
 292                 LInsp l = _buf->next();
 293                 l->initOpcode(op);
 294                 l->setOprnd1(val);
 295                 l->setOprnd2(base);
 296                 l->setDisp(int8_t(d));
 297                 _buf->commit(1);
 298                 _buf->_stats.lir++;
 299                 return l;
 300         }
 301
 302         LInsp LirBufWriter::ins0(LOpcode op)
 303         {
 304                 ensureRoom(1);
 305         LirBuffer *b = this->_buf;
 306                 LInsp l = b->next();
 307                 l->initOpcode(op);
 308                 b->commit(1);
 309                 b->_stats.lir++;
 310                 return l;
 311         }
 312
 313         LInsp LirBufWriter::ins1(LOpcode op, LInsp o1)
 314         {
 315                 LInsp u2=0,u3=0;
 316                 prepFor(o1,u2,u3);
 317                 LInsp l = _buf->next();
 318                 l->initOpcode(op);
 319                 l->setOprnd1(o1);
 320                 _buf->commit(1);
 321                 _buf->_stats.lir++;
 322                 return l;
 323         }
 324
 325         LInsp LirBufWriter::ins2(LOpcode op, LInsp o1, LInsp o2)
 326         {
 327                 LInsp u3=0;
 328                 prepFor(o1,o2,u3);
 329                 LInsp l = _buf->next();
 330                 l->initOpcode(op);
 331                 l->setOprnd1(o1);
 332                 l->setOprnd2(o2);
 333                 _buf->commit(1);
 334                 _buf->_stats.lir++;
 335                 return l;
 336         }
 337
 338         LInsp LirBufWriter::insLoad(LOpcode op, LInsp base, LInsp d)
 339         {
 340                 return ins2(op,base,d);
 341         }
 342
 343         LInsp LirBufWriter::insGuard(LOpcode op, LInsp c, LInsp data)
 344         {
 345                 return ins2(op, c, data);
 346         }
 347
 348         LInsp LirBufWriter::insBranch(LOpcode op, LInsp condition, LInsp toLabel)
 349         {
 350                 if (!toLabel)
 351                         toLabel = insFar(LIR_tramp,0); //empty tramp
 352         if (!condition) {
 353             // unconditional, just point to something
 354             condition = toLabel;
 355         }
 356             return ins2(op,condition,toLabel);
 357         }
 358
 359     LInsp LirBufWriter::insAlloc(int32_t size)
 360     {
 361         size = (size+3)>>2; // # of required 32bit words
 362         NanoAssert(isU16(size));
 363                 ensureRoom(1);
 364                 LInsp l = _buf->next();
 365                 l->initOpcode(LIR_alloc);
 366                 l->i.imm16 = uint16_t(size);
 367                 _buf->commit(1);
 368                 _buf->_stats.lir++;
 369                 return l;
 370     }
 371
 372     LInsp LirBufWriter::insParam(int32_t arg, int32_t kind)
 373     {
 374                 ensureRoom(1);
 375         LirBuffer *b = this->_buf;
 376                 LInsp l = b->next();
 377                 l->initOpcode(LIR_param);
 378         NanoAssert(isU8(arg) && isU8(kind));
 379                 l->c.imm8a = arg;
 380         l->c.imm8b = kind;
 381         if (kind) {
 382             NanoAssert(arg < NumSavedRegs);
 383             b->savedRegs[arg] = l;
 384             b->explicitSavedRegs = true;
 385         }
 386                 b->commit(1);
 387                 b->_stats.lir++;
 388                 return l;
 389     }
 390
 391         LInsp LirBufWriter::insFar(LOpcode op, LInsp target)
 392         {
 393                 ensureRoom(LIR_FAR_SLOTS);  // make room for it
 394                 LInsp l = insLinkToFar(op, target);
 395                 _buf->_stats.lir++;
 396                 return l;
 397         }
 398
 399         LInsp LirBufWriter::insImm(int32_t imm)
 400         {
 401                 if (isS16(imm)) {
 402                         ensureRoom(1);
 403                         LInsp l = _buf->next();
 404                         l->initOpcode(LIR_short);
 405                         l->setimm16(imm);
 406                         _buf->commit(1);
 407                         _buf->_stats.lir++;
 408                         return l;
 409                 } else {
 410                         ensureRoom(LIR_IMM32_SLOTS);
 411                         LirImm32Ins* l = (LirImm32Ins*)_buf->next();
 412                         l->v = imm;
 413                         l->i.initOpcode(LIR_int);
 414                         _buf->commit(LIR_IMM32_SLOTS);
 415                         _buf->_stats.lir++;
 416                         NanoAssert((LInsp)(l+1)==_buf->next());
 417                         return &(l->i);
 418                 }
 419         }
 420
 421         LInsp LirBufWriter::insImmq(uint64_t imm)
 422         {
 423                 ensureRoom(LIR_IMM64_SLOTS);
 424                 LirImm64Ins* l = (LirImm64Ins*)_buf->next();
 425                 l->v[0] = int32_t(imm);
 426                 l->v[1] = int32_t(imm>>32);
 427                 l->i.initOpcode(LIR_quad);
 428                 _buf->commit(LIR_IMM64_SLOTS);
 429                 _buf->_stats.lir++;
 430                 NanoAssert((LInsp)(l+1)==_buf->next());
 431                 return &(l->i);
 432         }
 433
 434         LInsp LirBufWriter::skip(size_t size)
 435         {
 436         const uint32_t n = (size+sizeof(LIns)-1)/sizeof(LIns);
 437                 ensureRoom(n); // make room for it
 438                 LInsp last = _buf->next()-1;  // safe, next()-1+n guaranteed to be on same page
 439                 _buf->commit(n);
 440                 NanoAssert(samepage(last,_buf->next()));
 441                 ensureRoom(LIR_FAR_SLOTS);
 442                 return insLinkTo(LIR_skip, last);
 443         }
 444
 445         LInsp LirReader::read()
 446         {
 447                 LInsp cur = _i;
 448                 if (!cur)
 449                         return 0;
 450                 LIns* i = cur;
 451                 LOpcode iop = i->opcode();
 452                 do
 453                 {
 454                         switch (iop)
 455                         {
 456                                 default:
 457                                         i--;
 458                                         break;
 459
 460 #if defined NANOJIT_64BIT
 461                 case LIR_callh:
 462 #endif
 463                                 case LIR_call:
 464                                 case LIR_fcall:
 465                 case LIR_calli:
 466                 case LIR_fcalli:
 467                                         NanoAssert( samepage(i,i+1-i->callInsWords()) );
 468                                         i -= i->callInsWords();
 469                                         break;
 470
 471                                 case LIR_skip:
 472                                 case LIR_nearskip:
 473                                         NanoAssert(i->ref() != i);
 474                                         i = i->ref();
 475                                         break;
 476
 477                 case LIR_tramp:
 478                     NanoAssert(samepage(i,i+1-LIR_FAR_SLOTS));
 479                                         i -= LIR_FAR_SLOTS;
 480                     break;
 481
 482                                 case LIR_int:
 483                     NanoAssert(samepage(i,i+1-LIR_IMM32_SLOTS));
 484                                         i -= LIR_IMM32_SLOTS;
 485                                         break;
 486
 487                                 case LIR_quad:
 488                     NanoAssert(samepage(i,i+1-LIR_IMM64_SLOTS));
 489                                         i -= LIR_IMM64_SLOTS;
 490                                         break;
 491
 492                                 case LIR_start:
 493                                         _i = 0;  // start of trace
 494                                         return cur;
 495                         }
 496                         iop = i->opcode();
 497                 }
 498                 while (is_trace_skip_tramp(iop)||iop==LIR_2);
 499                 _i = i;
 500                 return cur;
 501         }
 502
 503         bool FASTCALL isCmp(LOpcode c) {
 504                 return c >= LIR_eq && c <= LIR_uge || c >= LIR_feq && c <= LIR_fge;
 505         }
 506
 507         bool FASTCALL isCond(LOpcode c) {
 508                 return (c == LIR_ov) || (c == LIR_cs) || isCmp(c);
 509         }
 510
 511     bool FASTCALL isFloat(LOpcode c) {
 512         switch (c) {
 513             default:
 514                 return false;
 515             case LIR_fadd:
 516             case LIR_fsub:
 517             case LIR_fmul:
 518             case LIR_fdiv:
 519             case LIR_fneg:
 520             case LIR_fcall:
 521             case LIR_fcalli:
 522             case LIR_i2f:
 523             case LIR_u2f:
 524                 return true;
 525         }
 526     }
 527
 528         bool LIns::isCmp() const {
 529                 return nanojit::isCmp(u.code);
 530         }
 531
 532     bool LIns::isCond() const {
 533         return nanojit::isCond(u.code);
 534     }
 535
 536         bool LIns::isQuad() const {
 537                 #ifdef AVMPLUS_64BIT
 538                         // callh in 64bit cpu's means a call that returns an int64 in a single register
 539                         return (u.code & LIR64) != 0 || u.code == LIR_callh;
 540                 #else
 541                         // callh in 32bit cpu's means the 32bit MSW of an int64 result in 2 registers
 542                         return (u.code & LIR64) != 0;
 543                 #endif
 544         }
 545
 546         bool LIns::isconstval(int32_t val) const
 547         {
 548                 return isconst() && constval()==val;
 549         }
 550
 551         bool LIns::isconstq() const
 552         {
 553                 return isop(LIR_quad);
 554         }
 555
 556         bool LIns::isconstp() const
 557         {
 558     #ifdef AVMPLUS_64BIT
 559             return isconstq();
 560         #else
 561             return isconst();
 562     #endif
 563         }
 564
 565         bool FASTCALL isCse(LOpcode op) {
 566                 op = LOpcode(op & ~LIR64);
 567                 return op >= LIR_ldcs && op <= LIR_uge;
 568         }
 569
 570     bool LIns::isCse(const CallInfo *functions) const
 571     {
 572                 return nanojit::isCse(u.code) || isCall() && callInfo()->_cse;
 573     }
 574
 575         void LIns::setimm16(int32_t x)
 576         {
 577                 NanoAssert(isS16(x));
 578                 i.imm16 = int16_t(x);
 579         }
 580
 581         void LIns::setimm24(int32_t x)
 582         {
 583                 NanoAssert(isS24(x));
 584                 t.imm24 = x;
 585         }
 586
 587         void LIns::setresv(uint32_t resv)
 588         {
 589                 NanoAssert(isU8(resv));
 590                 g.resv = resv;
 591         }
 592
 593         void LIns::initOpcode(LOpcode op)
 594         {
 595                 i.code = op;
 596                 i.imm16 = 0;
 597         i.resv = 0;
 598         }
 599
 600         void LIns::setOprnd1(LInsp r)
 601         {
 602                 u.oprnd_1 = reference(r);
 603         }
 604
 605         void LIns::setOprnd2(LInsp r)
 606         {
 607                 u.oprnd_2 = reference(r);
 608         }
 609
 610         void LIns::setOprnd3(LInsp r)
 611         {
 612                 u.oprnd_3 = reference(r);
 613         }
 614
 615     void LIns::setDisp(int8_t d)
 616     {
 617         sti.disp = d;
 618     }
 619
 620     LIns **LIns::targetAddr() {
 621                 NanoAssert(isBranch());
 622                 LInsp i = (LInsp) this-1 - u.oprnd_2;
 623         NanoAssert(i->isTramp());
 624         LInsp ref;
 625         while ((ref=i->ref()) != 0 && ref->isTramp())
 626             i = ref;
 627                 NanoAssert(i->isop(LIR_tramp));
 628                 LirFarIns* ov = (LirFarIns*)(i-LIR_FAR_SLOTS+1);
 629                 return &(ov->v);
 630     }
 631
 632     void LIns::target(LInsp label) {
 633         NanoAssert(label && label->isop(LIR_label));
 634         *(targetAddr()) = label;
 635         }
 636
 637         LInsp LIns::getTarget()
 638         {
 639         NanoAssert(isBranch());
 640         return oprnd2();
 641         }
 642
 643         LInsp   LIns::oprnd1() const
 644         {
 645         return deref(u.oprnd_1);
 646         }
 647
 648         LInsp   LIns::oprnd2() const
 649         {
 650         return deref(u.oprnd_2);
 651         }
 652
 653         LInsp   LIns::oprnd3() const
 654         {
 655         return deref(u.oprnd_3);
 656         }
 657
 658     void *LIns::payload() const
 659     {
 660         NanoAssert(opcode()==LIR_skip || opcode()==LIR_nearskip);
 661         return (void*) (ref()+1);
 662     }
 663
 664         LIns* LIns::ref() const
 665         {
 666                 LIns const *r = 0;
 667                 if (t.code&1)
 668                         r = this + t.imm24;
 669                 else
 670                 {
 671                         LirFarIns* l = (LirFarIns*)(this-LIR_FAR_SLOTS+1);
 672                         r = l->v;
 673                 }
 674                 return (const LInsp)r;
 675         }
 676
 677         int32_t LIns::imm32() const
 678         {
 679                 LirImm32Ins* l = (LirImm32Ins*)(this-LIR_IMM32_SLOTS+1);
 680                 return l->v;
 681         }
 682
 683         uint64_t LIns::constvalq() const
 684         {
 685                 LirImm64Ins* l = (LirImm64Ins*)(this-LIR_IMM64_SLOTS+1);
 686     #ifdef AVMPLUS_UNALIGNED_ACCESS
 687         int* ptr = (int*)l->v;
 688         return *(const uint64_t*)ptr;
 689     #else
 690         union { uint64_t tmp; int32_t dst[2]; } u;
 691         u.dst[0] = l->v[0];
 692         u.dst[1] = l->v[1];
 693         return u.tmp;
 694     #endif
 695         }
 696
 697         double LIns::constvalf() const
 698         {
 699                 LirImm64Ins* l = (LirImm64Ins*)(this-LIR_IMM64_SLOTS+1);
 700                 NanoAssert(isconstq());
 701         #ifdef AVMPLUS_UNALIGNED_ACCESS
 702         int* ptr = (int*)l->v;
 703                 return *(const double*)ptr;
 704         #else
 705                 union { uint32_t dst[2]; double tmpf; } u;
 706                 u.dst[0] = l->v[0];
 707                 u.dst[1] = l->v[1];
 708                 return u.tmpf;
 709         #endif
 710         }
 711
 712         size_t LIns::callInsWords() const
 713         {
 714                 return LIR_CALL_SLOTS + argwords(argc());
 715         }
 716
 717         const CallInfo* LIns::callInfo() const
 718         {
 719                 LirCallIns* l = (LirCallIns*)(this-LIR_CALL_SLOTS+1);
 720                 return l->ci;
 721         }
 722
 723         // index args in r-l order.  arg(0) is rightmost arg
 724         LIns* LIns::arg(uint32_t i)
 725         {
 726                 NanoAssert(i < argc());
 727                 LirCallIns* l = (LirCallIns*)(this-LIR_CALL_SLOTS+1);
 728                 uint8_t* offs = (uint8_t*)l - (i+1);
 729                 return deref(*offs);
 730         }
 731
 732     LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
 733     {
 734         return ins2(v, oprnd1, insImm(imm));
 735     }
 736
 737     bool insIsS16(LInsp i)
 738     {
 739         if (i->isconst()) {
 740             int c = i->constval();
 741             return isS16(c);
 742         }
 743         if (i->isop(LIR_cmov) || i->isop(LIR_qcmov)) {
 744             LInsp vals = i->oprnd2();
 745             return insIsS16(vals->oprnd1()) && insIsS16(vals->oprnd2());
 746         }
 747         if (i->isCmp())
 748             return true;
 749         // many other possibilities too.
 750         return false;
 751     }
 752
 753         LIns* ExprFilter::ins1(LOpcode v, LIns* i)
 754         {
 755                 if (v == LIR_qlo) {
 756                         if (i->isconstq())
 757                                 return insImm(int32_t(i->constvalq()));
 758                         if (i->isop(LIR_qjoin))
 759                                 return i->oprnd1();
 760                 }
 761                 else if (v == LIR_qhi) {
 762                         if (i->isconstq())
 763                                 return insImm(int32_t(i->constvalq()>>32));
 764                         if (i->isop(LIR_qjoin))
 765                                 return i->oprnd2();
 766                 }
 767                 else if (i->isconst()) {
 768                         int32_t c = i->constval();
 769                         if (v == LIR_neg)
 770                                 return insImm(-c);
 771                         if (v == LIR_not)
 772                                 return insImm(~c);
 773                 }
 774                 else if (v == i->opcode() && (v == LIR_not || v == LIR_neg || v == LIR_fneg)) {
 775             // not(not(x)) = x;  neg(neg(x)) = x;  fneg(fneg(x)) = x;
 776                         return i->oprnd1();
 777                 }
 778         /* [ed 8.27.08] this causes a big slowdown in gameoflife.as.  why?
 779         else if (i->isconst()) {
 780             if (v == LIR_i2f) {
 781                 return insImmf(i->constval());
 782             }
 783             else if (v == LIR_u2f) {
 784                 return insImmf((uint32_t)i->constval());
 785             }
 786         }*/
 787
 788                 // todo
 789                 // -(a-b) = b-a
 790
 791                 return out->ins1(v, i);
 792         }
 793
 794         LIns* ExprFilter::ins2(LOpcode v, LIns* oprnd1, LIns* oprnd2)
 795         {
 796                 NanoAssert(oprnd1 && oprnd2);
 797                 if (v == LIR_cmov || v == LIR_qcmov) {
 798                         if (oprnd2->oprnd1() == oprnd2->oprnd2()) {
 799                                 // c ? a : a => a
 800                                 return oprnd2->oprnd1();
 801                         }
 802                         if (oprnd1->isconst()) {
 803                             // const ? x : y => return x or y depending on const
 804                             return oprnd1->constval() ? oprnd2->oprnd1() : oprnd2->oprnd2();
 805                         }
 806                 }
 807                 if (oprnd1 == oprnd2)
 808                 {
 809                         if (v == LIR_xor || v == LIR_sub ||
 810                                 v == LIR_ult || v == LIR_ugt || v == LIR_gt || v == LIR_lt)
 811                                 return insImm(0);
 812                         if (v == LIR_or || v == LIR_and)
 813                                 return oprnd1;
 814                         if (v == LIR_le || v == LIR_ule || v == LIR_ge || v == LIR_uge) {
 815                                 // x <= x == 1; x >= x == 1
 816                                 return insImm(1);
 817                         }
 818                 }
 819                 if (oprnd1->isconst() && oprnd2->isconst())
 820                 {
 821                         int c1 = oprnd1->constval();
 822                         int c2 = oprnd2->constval();
 823                         if (v == LIR_qjoin) {
 824                                 uint64_t q = c1 | uint64_t(c2)<<32;
 825                                 return insImmq(q);
 826                         }
 827                         if (v == LIR_eq)
 828                                 return insImm(c1 == c2);
 829             if (v == LIR_ov)
 830                 return insImm((c2 != 0) && ((c1 + c2) <= c1));
 831             if (v == LIR_cs)
 832                 return insImm((c2 != 0) && ((uint32_t(c1) + uint32_t(c2)) <= uint32_t(c1)));
 833                         if (v == LIR_lt)
 834                                 return insImm(c1 < c2);
 835                         if (v == LIR_gt)
 836                                 return insImm(c1 > c2);
 837                         if (v == LIR_le)
 838                                 return insImm(c1 <= c2);
 839                         if (v == LIR_ge)
 840                                 return insImm(c1 >= c2);
 841                         if (v == LIR_ult)
 842                                 return insImm(uint32_t(c1) < uint32_t(c2));
 843                         if (v == LIR_ugt)
 844                                 return insImm(uint32_t(c1) > uint32_t(c2));
 845                         if (v == LIR_ule)
 846                                 return insImm(uint32_t(c1) <= uint32_t(c2));
 847                         if (v == LIR_uge)
 848                                 return insImm(uint32_t(c1) >= uint32_t(c2));
 849                         if (v == LIR_rsh)
 850                                 return insImm(int32_t(c1) >> int32_t(c2));
 851                         if (v == LIR_lsh)
 852                                 return insImm(int32_t(c1) << int32_t(c2));
 853                         if (v == LIR_ush)
 854                                 return insImm(uint32_t(c1) >> int32_t(c2));
 855             if (v == LIR_or)
 856                 return insImm(uint32_t(c1) | int32_t(c2));
 857             if (v == LIR_and)
 858                 return insImm(uint32_t(c1) & int32_t(c2));
 859             if (v == LIR_xor)
 860                 return insImm(uint32_t(c1) ^ int32_t(c2));
 861                 }
 862                 else if (oprnd1->isconstq() && oprnd2->isconstq())
 863                 {
 864                         double c1 = oprnd1->constvalf();
 865                         double c2 = oprnd2->constvalf();
 866                         if (v == LIR_feq)
 867                                 return insImm(c1 == c2);
 868                         if (v == LIR_flt)
 869                                 return insImm(c1 < c2);
 870                         if (v == LIR_fgt)
 871                                 return insImm(c1 > c2);
 872                         if (v == LIR_fle)
 873                                 return insImm(c1 <= c2);
 874                         if (v == LIR_fge)
 875                                 return insImm(c1 >= c2);
 876                 }
 877                 else if (oprnd1->isconst() && !oprnd2->isconst())
 878                 {
 879                         if (v == LIR_add || v == LIR_addp || v == LIR_mul ||
 880                                 v == LIR_fadd || v == LIR_fmul ||
 881                                 v == LIR_xor || v == LIR_or || v == LIR_and ||
 882                                 v == LIR_eq) {
 883                                 // move const to rhs
 884                                 LIns* t = oprnd2;
 885                                 oprnd2 = oprnd1;
 886                                 oprnd1 = t;
 887                         }
 888                         else if (v >= LIR_lt && v <= LIR_uge) {
 889                                 NanoStaticAssert((LIR_lt ^ 1) == LIR_gt);
 890                                 NanoStaticAssert((LIR_le ^ 1) == LIR_ge);
 891                                 NanoStaticAssert((LIR_ult ^ 1) == LIR_ugt);
 892                                 NanoStaticAssert((LIR_ule ^ 1) == LIR_uge);
 893
 894                                 // move const to rhs, swap the operator
 895                                 LIns *t = oprnd2;
 896                                 oprnd2 = oprnd1;
 897                                 oprnd1 = t;
 898                                 v = LOpcode(v^1);
 899                         }
 900                 }
 901
 902                 if (oprnd2->isconst())
 903                 {
 904                         int c = oprnd2->constval();
 905                         if (v == LIR_add && oprnd1->isop(LIR_add) && oprnd1->oprnd2()->isconst()) {
 906                                 // add(add(x,c1),c2) => add(x,c1+c2)
 907                                 c += oprnd1->oprnd2()->constval();
 908                                 oprnd2 = insImm(c);
 909                                 oprnd1 = oprnd1->oprnd1();
 910                         }
 911                         else if (v == LIR_sub && oprnd1->isop(LIR_add) && oprnd1->oprnd2()->isconst()) {
 912                                 // sub(add(x,c1),c2) => add(x,c1-c2)
 913                                 c = oprnd1->oprnd2()->constval() - c;
 914                                 oprnd2 = insImm(c);
 915                                 oprnd1 = oprnd1->oprnd1();
 916                                 v = LIR_add;
 917                         }
 918                         else if (v == LIR_rsh && c == 16 && oprnd1->isop(LIR_lsh) &&
 919                                          oprnd1->oprnd2()->isconstval(16)) {
 920                                 if (insIsS16(oprnd1->oprnd1())) {
 921                                         // rsh(lhs(x,16),16) == x, if x is S16
 922                                         return oprnd1->oprnd1();
 923                                 }
 924                         }
 925                         else if (v == LIR_ult) {
 926                                 if (oprnd1->isop(LIR_cmov) || oprnd1->isop(LIR_qcmov)) {
 927                                         LInsp a = oprnd1->oprnd2()->oprnd1();
 928                                         LInsp b = oprnd1->oprnd2()->oprnd2();
 929                                         if (a->isconst() && b->isconst()) {
 930                                                 bool a_lt = uint32_t(a->constval()) < uint32_t(oprnd2->constval());
 931                                                 bool b_lt = uint32_t(b->constval()) < uint32_t(oprnd2->constval());
 932                                                 if (a_lt == b_lt)
 933                                                         return insImm(a_lt);
 934                                         }
 935                                 }
 936                         }
 937
 938                         if (c == 0)
 939                         {
 940                                 if (v == LIR_add || v == LIR_addp || v == LIR_or || v == LIR_xor ||
 941                                         v == LIR_sub || v == LIR_lsh || v == LIR_rsh || v == LIR_ush)
 942                                         return oprnd1;
 943                                 else if (v == LIR_and || v == LIR_mul)
 944                                         return oprnd2;
 945                                 else if (v == LIR_eq && oprnd1->isop(LIR_or) &&
 946                                         oprnd1->oprnd2()->isconst() &&
 947                                         oprnd1->oprnd2()->constval() != 0) {
 948                                         // (x or c) != 0 if c != 0
 949                                         return insImm(0);
 950                                 }
 951                         }
 952                         else if (c == -1 || c == 1 && oprnd1->isCmp()) {
 953                                 if (v == LIR_or) {
 954                                         // x | -1 = -1, cmp | 1 = 1
 955                                         return oprnd2;
 956                                 }
 957                                 else if (v == LIR_and) {
 958                                         // x & -1 = x, cmp & 1 = cmp
 959                                         return oprnd1;
 960                                 }
 961                         }
 962                 }
 963
 964                 LInsp i;
 965                 if (v == LIR_qjoin && oprnd1->isop(LIR_qlo) && oprnd2->isop(LIR_qhi)
 966                         && (i = oprnd1->oprnd1()) == oprnd2->oprnd1()) {
 967                         // qjoin(qlo(x),qhi(x)) == x
 968                         return i;
 969                 }
 970
 971                 return out->ins2(v, oprnd1, oprnd2);
 972         }
 973
 974         LIns* ExprFilter::insGuard(LOpcode v, LInsp c, LInsp x)
 975         {
 976                 if (v == LIR_xt || v == LIR_xf) {
 977                         if (c->isconst()) {
 978                                 if (v == LIR_xt && !c->constval() || v == LIR_xf && c->constval()) {
 979                                         return 0; // no guard needed
 980                                 }
 981                                 else {
 982                                         // need a way to EOT now, since this is trace end.
 983 #ifdef JS_TRACER
 984                                     NanoAssertMsg(0, "need a way to EOT now, since this is trace end");
 985 #endif
 986                                         return out->insGuard(LIR_x, out->insImm(1), x);
 987                                 }
 988                         }
 989                         else {
 990                                 while (c->isop(LIR_eq) && c->oprnd1()->isCmp() &&
 991                                         c->oprnd2()->isconstval(0)) {
 992                                     // xt(eq(cmp,0)) => xf(cmp)   or   xf(eq(cmp,0)) => xt(cmp)
 993                                     v = LOpcode(v^1);
 994                                     c = c->oprnd1();
 995                                 }
 996                         }
 997                 }
 998                 return out->insGuard(v, c, x);
 999         }
1000
1001     LIns* ExprFilter::insBranch(LOpcode v, LIns *c, LIns *t)
1002     {
1003         if (v == LIR_jt || v == LIR_jf) {
1004             while (c->isop(LIR_eq) && c->oprnd1()->isCmp() && c->oprnd2()->isconstval(0)) {
1005                 // jt(eq(cmp,0)) => jf(cmp)   or   jf(eq(cmp,0)) => jt(cmp)
1006                 v = LOpcode(v ^ 1);
1007                 c = c->oprnd1();
1008             }
1009         }
1010         return out->insBranch(v, c, t);
1011     }
1012
1013     LIns* LirWriter::insLoadi(LIns *base, int disp)
1014     {
1015         return insLoad(LIR_ld,base,disp);
1016     }
1017
1018         LIns* LirWriter::insLoad(LOpcode op, LIns *base, int disp)
1019         {
1020                 return insLoad(op, base, insImm(disp));
1021         }
1022
1023     LIns* LirWriter::store(LInsp value, LInsp base, int32_t d)
1024     {
1025                 return isS8(d) ? insStorei(value, base, d)
1026                         : insStore(value, base, insImm(d));
1027     }
1028
1029         LIns* LirWriter::ins_eq0(LIns* oprnd1)
1030         {
1031                 return ins2i(LIR_eq, oprnd1, 0);
1032         }
1033
1034     LIns* LirWriter::insImmf(double f)
1035     {
1036         union {
1037             double f;
1038             uint64_t q;
1039         } u;
1040         u.f = f;
1041         return insImmq(u.q);
1042     }
1043
1044         LIns* LirWriter::qjoin(LInsp lo, LInsp hi)
1045         {
1046                 return ins2(LIR_qjoin, lo, hi);
1047         }
1048
1049         LIns* LirWriter::insImmPtr(const void *ptr)
1050         {
1051                 return sizeof(ptr) == 8 ? insImmq((uintptr_t)ptr) : insImm((intptr_t)ptr);
1052         }
1053
1054         LIns* LirWriter::ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse)
1055         {
1056                 // if not a conditional, make it implicitly an ==0 test (then flop results)
1057                 if (!cond->isCmp())
1058                 {
1059                         cond = ins_eq0(cond);
1060                         LInsp tmp = iftrue;
1061                         iftrue = iffalse;
1062                         iffalse = tmp;
1063                 }
1064
1065                 if (true/*avmplus::AvmCore::use_cmov()*/)
1066                 {
1067                         return ins2((iftrue->isQuad() || iffalse->isQuad()) ? LIR_qcmov : LIR_cmov, cond, ins2(LIR_2, iftrue, iffalse));
1068                 }
1069
1070                 // @todo -- it might be better to use a short conditional branch rather than
1071                 // the bit-twiddling on systems that don't provide a conditional move instruction.
1072                 LInsp ncond = ins1(LIR_neg, cond); // cond ? -1 : 0
1073                 return ins2(LIR_or,
1074                                         ins2(LIR_and, iftrue, ncond),
1075                                         ins2(LIR_and, iffalse, ins1(LIR_not, ncond)));
1076         }
1077
1078     LIns* LirBufWriter::insCall(const CallInfo *ci, LInsp args[])
1079         {
1080                 static const LOpcode k_callmap[] = { LIR_call, LIR_fcall, LIR_call, LIR_callh };
1081                 static const LOpcode k_callimap[] = { LIR_calli, LIR_fcalli, LIR_calli, LIR_skip };
1082
1083                 uint32_t argt = ci->_argtypes;
1084         LOpcode op = (ci->isIndirect() ? k_callimap : k_callmap)[argt & 3];
1085         NanoAssert(op != LIR_skip); // LIR_skip here is just an error condition
1086
1087         ArgSize sizes[2*MAXARGS];
1088         int32_t argc = ci->get_sizes(sizes);
1089
1090 #ifdef NJ_SOFTFLOAT
1091                 if (op == LIR_fcall)
1092                         op = LIR_callh;
1093                 LInsp args2[MAXARGS*2]; // arm could require 2 args per double
1094                 int32_t j = 0;
1095         int32_t i = 0;
1096         while (j < argc) {
1097                         argt >>= 2;
1098                         ArgSize a = ArgSize(argt&3);
1099                         if (a == ARGSIZE_F) {
1100                                 LInsp q = args[i++];
1101                                 args2[j++] = ins1(LIR_qhi, q);
1102                                 args2[j++] = ins1(LIR_qlo, q);
1103                         } else {
1104                                 args2[j++] = args[i++];
1105                         }
1106                 }
1107                 args = args2;
1108         NanoAssert(j == argc);
1109 #endif
1110
1111                 NanoAssert(argc <= (int)MAXARGS);
1112                 uint32_t words = argwords(argc);
1113                 int32_t insSz = words + LIR_CALL_SLOTS; // words need for offsets + size of instruction
1114                 ensureRoom(argc+insSz);  // argc=# possible tramps for args
1115                 LInsp from = _buf->next()+argc+words; // assuming all args need a tramp, offsets are written here
1116                 for (int32_t i=0; i < argc; i++)
1117                         makeReachable(args[i], from);
1118
1119                 // skip 'words' needed for call parameters
1120                 LirCallIns *l = (LirCallIns*) (_buf->next()+words);
1121                 l->ci = ci;
1122
1123                 // call parameters laid in reverse order
1124                 uint8_t* offs = (uint8_t*)l;
1125                 for (int32_t i=0; i < argc; i++)
1126                         *--offs = (uint8_t) l->i.reference(args[i]);
1127                 NanoAssert((LInsp)offs>=_buf->next());
1128
1129 #ifndef NANOJIT_64BIT
1130                 l->i.initOpcode(op==LIR_callh ? LIR_call : op);
1131 #else
1132                 l->i.initOpcode(op);
1133 #endif
1134                 l->i.c.imm8a = 0;
1135                 l->i.c.imm8b = argc;
1136                 _buf->commit(insSz);
1137                 _buf->_stats.lir++;
1138                 NanoAssert((LInsp)(l+1)==_buf->next());
1139                 return &(l->i);
1140         }
1141
1142     using namespace avmplus;
1143
1144         StackFilter::StackFilter(LirFilter *in, GC *gc, LirBuffer *lirbuf, LInsp sp)
1145                 : LirFilter(in), gc(gc), lirbuf(lirbuf), sp(sp), top(0)
1146         {}
1147
1148         LInsp StackFilter::read()
1149         {
1150                 for (;;)
1151                 {
1152                         LInsp i = in->read();
1153                         if (!i)
1154                                 return i;
1155                         if (i->isStore())
1156                         {
1157                                 LInsp base = i->oprnd2();
1158                                 if (base == sp)
1159                                 {
1160                                         LInsp v = i->oprnd1();
1161                                         int d = i->immdisp() >> 2;
1162                                         if (d >= top) {
1163                                                 continue;
1164                                         } else {
1165                                                 d = top - d;
1166                                                 if (v->isQuad()) {
1167                                                         // storing 8 bytes
1168                                                         if (stk.get(d) && stk.get(d-1)) {
1169                                                                 continue;
1170                                                         } else {
1171                                                                 stk.set(gc, d);
1172                                                                 stk.set(gc, d-1);
1173                                                         }
1174                                                 }
1175                                                 else {
1176                                                         // storing 4 bytes
1177                                                         if (stk.get(d))
1178                                                                 continue;
1179                                                         else
1180                                                                 stk.set(gc, d);
1181                                                 }
1182                                         }
1183                                 }
1184                         }
1185                         /*
1186                          * NB: If there is a backward branch other than the loop-restart branch, this is
1187                          * going to be wrong. Unfortunately there doesn't seem to be an easy way to detect
1188                          * such branches. Just do not create any.
1189                          */
1190                         else if (i->isGuard())
1191                         {
1192                                 stk.reset();
1193                                 top = getTop(i) >> 2;
1194                         }
1195                         return i;
1196                 }
1197         }
1198
1199         //
1200         // inlined/separated version of SuperFastHash
1201         // This content is copyrighted by Paul Hsieh, For reference see : http://www.azillionmonkeys.com/qed/hash.html
1202         //
1203         inline uint32_t _hash8(uint32_t hash, const uint8_t data)
1204         {
1205                 hash += data;
1206                 hash ^= hash << 10;
1207                 hash += hash >> 1;
1208                 return hash;
1209         }
1210
1211         inline uint32_t _hash32(uint32_t hash, const uint32_t data)
1212         {
1213                 const uint32_t dlo = data & 0xffff;
1214                 const uint32_t dhi = data >> 16;
1215                 hash += dlo;
1216                 const uint32_t tmp = (dhi << 11) ^ hash;
1217                 hash = (hash << 16) ^ tmp;
1218                 hash += hash >> 11;
1219                 return hash;
1220         }
1221
1222         inline uint32_t _hashptr(uint32_t hash, const void* data)
1223         {
1224 #ifdef NANOJIT_64BIT
1225                 hash = _hash32(hash, uint32_t(uintptr_t(data) >> 32));
1226                 hash = _hash32(hash, uint32_t(uintptr_t(data)));
1227                 return hash;
1228 #else
1229                 return _hash32(hash, uint32_t(data));
1230 #endif
1231         }
1232
1233         inline uint32_t _hashfinish(uint32_t hash)
1234         {
1235                 /* Force "avalanching" of final 127 bits */
1236                 hash ^= hash << 3;
1237                 hash += hash >> 5;
1238                 hash ^= hash << 4;
1239                 hash += hash >> 17;
1240                 hash ^= hash << 25;
1241                 hash += hash >> 6;
1242                 return hash;
1243         }
1244
1245         LInsHashSet::LInsHashSet(GC* gc) :
1246                         m_used(0), m_cap(kInitialCap), m_gc(gc)
1247         {
1248 #ifdef MEMORY_INFO
1249 //              m_list.set_meminfo_name("LInsHashSet.list");
1250 #endif
1251         LInsp *list = (LInsp*) gc->Alloc(sizeof(LInsp)*m_cap, GC::kZero);
1252         WB(gc, this, &m_list, list);
1253         }
1254
1255     LInsHashSet::~LInsHashSet()
1256     {
1257         m_gc->Free(m_list);
1258     }
1259
1260     void LInsHashSet::clear() {
1261         memset(m_list, 0, sizeof(LInsp)*m_cap);
1262         m_used = 0;
1263     }
1264
1265         /*static*/ uint32_t FASTCALL LInsHashSet::hashcode(LInsp i)
1266         {
1267                 const LOpcode op = i->opcode();
1268                 switch (op)
1269                 {
1270                         case LIR_short:
1271                                 return hashimm(i->imm16());
1272                         case LIR_int:
1273                                 return hashimm(i->imm32());
1274                         case LIR_quad:
1275                                 return hashimmq(i->constvalq());
1276                         case LIR_call:
1277                         case LIR_fcall:
1278 #if defined NANOJIT_64BIT
1279                         case LIR_callh:
1280 #endif
1281                         {
1282                                 LInsp args[10];
1283                                 int32_t argc = i->argc();
1284                                 NanoAssert(argc < 10);
1285                                 for (int32_t j=0; j < argc; j++)
1286                                         args[j] = i->arg(j);
1287                                 return hashcall(i->callInfo(), argc, args);
1288                         }
1289                         default:
1290                                 if (operandCount[op] == 2)
1291                                         return hash2(op, i->oprnd1(), i->oprnd2());
1292                                 else
1293                                         return hash1(op, i->oprnd1());
1294                 }
1295         }
1296
1297         /*static*/ bool FASTCALL LInsHashSet::equals(LInsp a, LInsp b)
1298         {
1299                 if (a==b)
1300                         return true;
1301                 AvmAssert(a->opcode() == b->opcode());
1302                 const LOpcode op = a->opcode();
1303                 switch (op)
1304                 {
1305                         case LIR_short:
1306                         {
1307                                 return a->imm16() == b->imm16();
1308                         }
1309                         case LIR_int:
1310                         {
1311                                 return a->imm32() == b->imm32();
1312                         }
1313                         case LIR_quad:
1314                         {
1315                                 return a->constvalq() == b->constvalq();
1316                         }
1317                         case LIR_call:
1318                         case LIR_fcall:
1319 #if defined NANOJIT_64BIT
1320                         case LIR_callh:
1321 #endif
1322                         {
1323                                 if (a->callInfo() != b->callInfo()) return false;
1324                                 uint32_t argc=a->argc();
1325                 NanoAssert(argc == b->argc());
1326                                 for (uint32_t i=0; i < argc; i++)
1327                                         if (a->arg(i) != b->arg(i))
1328                                                 return false;
1329                                 return true;
1330                         }
1331                         default:
1332                         {
1333                                 const uint32_t count = operandCount[op];
1334                                 if ((count >= 1 && a->oprnd1() != b->oprnd1()) ||
1335                                         (count >= 2 && a->oprnd2() != b->oprnd2()))
1336                                         return false;
1337                                 return true;
1338                         }
1339                 }
1340         }
1341
1342         void FASTCALL LInsHashSet::grow()
1343         {
1344                 const uint32_t newcap = m_cap << 1;
1345         LInsp *newlist = (LInsp*) m_gc->Alloc(newcap * sizeof(LInsp), GC::kZero);
1346         LInsp *list = m_list;
1347 #ifdef MEMORY_INFO
1348 //              newlist.set_meminfo_name("LInsHashSet.list");
1349 #endif
1350                 for (uint32_t i=0, n=m_cap; i < n; i++) {
1351                         LInsp name = list[i];
1352                         if (!name) continue;
1353                         uint32_t j = find(name, hashcode(name), newlist, newcap);
1354             newlist[j] = name;
1355                 }
1356         m_cap = newcap;
1357         m_gc->Free(list);
1358         WB(m_gc, this, &m_list, newlist);
1359         }
1360
1361         uint32_t FASTCALL LInsHashSet::find(LInsp name, uint32_t hash, const LInsp *list, uint32_t cap)
1362         {
1363                 const uint32_t bitmask = (cap - 1) & ~0x1;
1364
1365                 uint32_t n = 7 << 1;
1366                 hash &= bitmask;
1367                 LInsp k;
1368                 while ((k = list[hash]) != NULL &&
1369                         (!LIns::sameop(k,name) || !equals(k, name)))
1370                 {
1371                         hash = (hash + (n += 2)) & bitmask;             // quadratic probe
1372                 }
1373                 return hash;
1374         }
1375
1376         LInsp LInsHashSet::add(LInsp name, uint32_t k)
1377         {
1378                 // this is relatively short-lived so let's try a more aggressive load factor
1379                 // in the interest of improving performance
1380                 if (((m_used+1)<<1) >= m_cap) // 0.50
1381                 {
1382                         grow();
1383                         k = find(name, hashcode(name), m_list, m_cap);
1384                 }
1385                 NanoAssert(!m_list[k]);
1386                 m_used++;
1387         return m_list[k] = name;
1388         }
1389
1390         void LInsHashSet::replace(LInsp i)
1391         {
1392         LInsp *list = m_list;
1393                 uint32_t k = find(i, hashcode(i), list, m_cap);
1394                 if (list[k]) {
1395                         // already there, so replace it
1396                         list[k] = i;
1397                 } else {
1398                         add(i, k);
1399                 }
1400         }
1401
1402         uint32_t LInsHashSet::hashimm(int32_t a) {
1403                 return _hashfinish(_hash32(0,a));
1404         }
1405
1406         uint32_t LInsHashSet::hashimmq(uint64_t a) {
1407                 uint32_t hash = _hash32(0, uint32_t(a >> 32));
1408                 return _hashfinish(_hash32(hash, uint32_t(a)));
1409         }
1410
1411         uint32_t LInsHashSet::hash1(LOpcode op, LInsp a) {
1412                 uint32_t hash = _hash8(0,uint8_t(op));
1413                 return _hashfinish(_hashptr(hash, a));
1414         }
1415
1416         uint32_t LInsHashSet::hash2(LOpcode op, LInsp a, LInsp b) {
1417                 uint32_t hash = _hash8(0,uint8_t(op));
1418                 hash = _hashptr(hash, a);
1419                 return _hashfinish(_hashptr(hash, b));
1420         }
1421
1422         uint32_t LInsHashSet::hashcall(const CallInfo *ci, uint32_t argc, LInsp args[]) {
1423                 uint32_t hash = _hashptr(0, ci);
1424                 for (int32_t j=argc-1; j >= 0; j--)
1425                         hash = _hashptr(hash,args[j]);
1426                 return _hashfinish(hash);
1427         }
1428
1429         LInsp LInsHashSet::find32(int32_t a, uint32_t &i)
1430         {
1431                 uint32_t cap = m_cap;
1432                 const LInsp *list = m_list;
1433                 const uint32_t bitmask = (cap - 1) & ~0x1;
1434                 uint32_t hash = hashimm(a) & bitmask;
1435                 uint32_t n = 7 << 1;
1436                 LInsp k;
1437                 while ((k = list[hash]) != NULL &&
1438                         (!k->isconst() || k->constval() != a))
1439                 {
1440                         hash = (hash + (n += 2)) & bitmask;             // quadratic probe
1441                 }
1442                 i = hash;
1443                 return k;
1444         }
1445
1446         LInsp LInsHashSet::find64(uint64_t a, uint32_t &i)
1447         {
1448                 uint32_t cap = m_cap;
1449                 const LInsp *list = m_list;
1450                 const uint32_t bitmask = (cap - 1) & ~0x1;
1451                 uint32_t hash = hashimmq(a) & bitmask;
1452                 uint32_t n = 7 << 1;
1453                 LInsp k;
1454                 while ((k = list[hash]) != NULL &&
1455                         (!k->isconstq() || k->constvalq() != a))
1456                 {
1457                         hash = (hash + (n += 2)) & bitmask;             // quadratic probe
1458                 }
1459                 i = hash;
1460                 return k;
1461         }
1462
1463         LInsp LInsHashSet::find1(LOpcode op, LInsp a, uint32_t &i)
1464         {
1465                 uint32_t cap = m_cap;
1466                 const LInsp *list = m_list;
1467                 const uint32_t bitmask = (cap - 1) & ~0x1;
1468                 uint32_t hash = hash1(op,a) & bitmask;
1469                 uint32_t n = 7 << 1;
1470                 LInsp k;
1471                 while ((k = list[hash]) != NULL &&
1472                         (k->opcode() != op || k->oprnd1() != a))
1473                 {
1474                         hash = (hash + (n += 2)) & bitmask;             // quadratic probe
1475                 }
1476                 i = hash;
1477                 return k;
1478         }
1479
1480         LInsp LInsHashSet::find2(LOpcode op, LInsp a, LInsp b, uint32_t &i)
1481         {
1482                 uint32_t cap = m_cap;
1483                 const LInsp *list = m_list;
1484                 const uint32_t bitmask = (cap - 1) & ~0x1;
1485                 uint32_t hash = hash2(op,a,b) & bitmask;
1486                 uint32_t n = 7 << 1;
1487                 LInsp k;
1488                 while ((k = list[hash]) != NULL &&
1489                         (k->opcode() != op || k->oprnd1() != a || k->oprnd2() != b))
1490                 {
1491                         hash = (hash + (n += 2)) & bitmask;             // quadratic probe
1492                 }
1493                 i = hash;
1494                 return k;
1495         }
1496
1497         bool argsmatch(LInsp i, uint32_t argc, LInsp args[])
1498         {
1499                 for (uint32_t j=0; j < argc; j++)
1500                         if (i->arg(j) != args[j])
1501                                 return false;
1502                 return true;
1503         }
1504
1505         LInsp LInsHashSet::findcall(const CallInfo *ci, uint32_t argc, LInsp args[], uint32_t &i)
1506         {
1507                 uint32_t cap = m_cap;
1508                 const LInsp *list = m_list;
1509                 const uint32_t bitmask = (cap - 1) & ~0x1;
1510                 uint32_t hash = hashcall(ci, argc, args) & bitmask;
1511                 uint32_t n = 7 << 1;
1512                 LInsp k;
1513                 while ((k = list[hash]) != NULL &&
1514                         (!k->isCall() || k->callInfo() != ci || !argsmatch(k, argc, args)))
1515                 {
1516                         hash = (hash + (n += 2)) & bitmask;             // quadratic probe
1517                 }
1518                 i = hash;
1519                 return k;
1520         }
1521
1522     GuardRecord *LIns::record()
1523     {
1524         NanoAssert(isGuard());
1525         return (GuardRecord*)oprnd2()->payload();
1526     }
1527
1528 #ifdef NJ_VERBOSE
1529     class RetiredEntry: public GCObject
1530     {
1531     public:
1532         List<LInsp, LIST_NonGCObjects> live;
1533         LInsp i;
1534         RetiredEntry(GC *gc): live(gc) {}
1535     };
1536         class LiveTable
1537         {
1538         public:
1539                 SortedMap<LInsp,LInsp,LIST_NonGCObjects> live;
1540         List<RetiredEntry*, LIST_GCObjects> retired;
1541                 int maxlive;
1542                 LiveTable(GC *gc) : live(gc), retired(gc), maxlive(0) {}
1543         ~LiveTable()
1544         {
1545             for (size_t i = 0; i < retired.size(); i++) {
1546                 NJ_DELETE(retired.get(i));
1547             }
1548
1549         }
1550                 void add(LInsp i, LInsp use) {
1551             if (!i->isconst() && !i->isconstq() && !live.containsKey(i)) {
1552                 NanoAssert(size_t(i->opcode()) < sizeof(lirNames) / sizeof(lirNames[0]));
1553                 live.put(i,use);
1554             }
1555                 }
1556         void retire(LInsp i, GC *gc) {
1557             RetiredEntry *e = NJ_NEW(gc, RetiredEntry)(gc);
1558             e->i = i;
1559             for (int j=0, n=live.size(); j < n; j++) {
1560                 LInsp l = live.keyAt(j);
1561                 if (!l->isStore() && !l->isGuard())
1562                     e->live.add(l);
1563             }
1564             int size=0;
1565                     if ((size = e->live.size()) > maxlive)
1566                             maxlive = size;
1567
1568             live.remove(i);
1569             retired.add(e);
1570                 }
1571                 bool contains(LInsp i) {
1572                         return live.containsKey(i);
1573                 }
1574         };
1575
1576     void live(GC *gc, LirBuffer *lirbuf)
1577         {
1578                 // traverse backwards to find live exprs and a few other stats.
1579
1580                 LiveTable live(gc);
1581                 uint32_t exits = 0;
1582         LirReader br(lirbuf);
1583                 StackFilter sf(&br, gc, lirbuf, lirbuf->sp);
1584                 StackFilter r(&sf, gc, lirbuf, lirbuf->rp);
1585         int total = 0;
1586         if (lirbuf->state)
1587             live.add(lirbuf->state, r.pos());
1588                 for (LInsp i = r.read(); i != 0; i = r.read())
1589                 {
1590             total++;
1591
1592             // first handle side-effect instructions
1593                         if (!i->isCse(lirbuf->_functions))
1594                         {
1595                                 live.add(i,0);
1596                 if (i->isGuard())
1597                     exits++;
1598                         }
1599
1600                         // now propagate liveness
1601                         if (live.contains(i))
1602                         {
1603                                 live.retire(i,gc);
1604                 NanoAssert(size_t(i->opcode()) < sizeof(operandCount) / sizeof(operandCount[0]));
1605                                 if (i->isStore()) {
1606                                         live.add(i->oprnd2(),i); // base
1607                                         live.add(i->oprnd1(),i); // val
1608                                 }
1609                 else if (i->isop(LIR_cmov) || i->isop(LIR_qcmov)) {
1610                     live.add(i->oprnd1(),i);
1611                     live.add(i->oprnd2()->oprnd1(),i);
1612                     live.add(i->oprnd2()->oprnd2(),i);
1613                 }
1614                                 else if (operandCount[i->opcode()] == 1) {
1615                                     live.add(i->oprnd1(),i);
1616                                 }
1617                                 else if (operandCount[i->opcode()] == 2) {
1618                                         live.add(i->oprnd1(),i);
1619                                         live.add(i->oprnd2(),i);
1620                                 }
1621                                 else if (i->isCall()) {
1622                                         for (int j=0, c=i->argc(); j < c; j++)
1623                                                 live.add(i->arg(j),i);
1624                                 }
1625                         }
1626                 }
1627
1628                 printf("live instruction count %d, total %u, max pressure %d\n",
1629                         live.retired.size(), total, live.maxlive);
1630         printf("side exits %u\n", exits);
1631
1632                 // print live exprs, going forwards
1633                 LirNameMap *names = lirbuf->names;
1634         bool newblock = true;
1635                 for (int j=live.retired.size()-1; j >= 0; j--)
1636         {
1637             RetiredEntry *e = live.retired[j];
1638             char livebuf[4000], *s=livebuf;
1639             *s = 0;
1640             if (!newblock && e->i->isop(LIR_label)) {
1641                 printf("\n");
1642             }
1643             newblock = false;
1644             for (int k=0,n=e->live.size(); k < n; k++) {
1645                                 strcpy(s, names->formatRef(e->live[k]));
1646                                 s += strlen(s);
1647                                 *s++ = ' '; *s = 0;
1648                                 NanoAssert(s < livebuf+sizeof(livebuf));
1649             }
1650                         printf("%-60s %s\n", livebuf, names->formatIns(e->i));
1651             if (e->i->isGuard() || e->i->isBranch() || isRet(e->i->opcode())) {
1652                                 printf("\n");
1653                 newblock = true;
1654             }
1655                 }
1656         }
1657
1658     LabelMap::Entry::~Entry()
1659     {
1660     }
1661
1662     LirNameMap::Entry::~Entry()
1663     {
1664     }
1665
1666     LirNameMap::~LirNameMap()
1667     {
1668         Entry *e;
1669
1670         while ((e = names.removeLast()) != NULL) {
1671             labels->core->freeString(e->name);
1672             NJ_DELETE(e);
1673         }
1674     }
1675
1676         bool LirNameMap::addName(LInsp i, Stringp name) {
1677                 if (!names.containsKey(i)) {
1678                         Entry *e = NJ_NEW(labels->core->gc, Entry)(name);
1679                         names.put(i, e);
1680             return true;
1681                 }
1682         return false;
1683         }
1684         void LirNameMap::addName(LInsp i, const char *name) {
1685         Stringp new_name = labels->core->newString(name);
1686         if (!addName(i, new_name)) {
1687             labels->core->freeString(new_name);
1688         }
1689         }
1690
1691         void LirNameMap::copyName(LInsp i, const char *s, int suffix) {
1692                 char s2[200];
1693                 if (isdigit(s[strlen(s)-1])) {
1694                         // if s ends with a digit, add '_' to clarify the suffix
1695                         sprintf(s2,"%s_%d", s, suffix);
1696                 } else {
1697                         sprintf(s2,"%s%d", s, suffix);
1698                 }
1699                 addName(i, labels->core->newString(s2));
1700         }
1701
1702         void LirNameMap::formatImm(int32_t c, char *buf) {
1703                 if (c >= 10000 || c <= -10000)
1704                         sprintf(buf,"#%s",labels->format((void*)c));
1705         else
1706             sprintf(buf,"%d", c);
1707         }
1708
1709         const char* LirNameMap::formatRef(LIns *ref)
1710         {
1711                 char buffer[200], *buf=buffer;
1712                 buf[0]=0;
1713                 GC *gc = labels->core->gc;
1714                 if (names.containsKey(ref)) {
1715                         StringNullTerminatedUTF8 cname(gc, names.get(ref)->name);
1716                         strcat(buf, cname.c_str());
1717                 }
1718                 else if (ref->isconstq()) {
1719 #if defined NANOJIT_64BIT
1720             sprintf(buf, "#0x%lx", (nj_printf_ld)ref->constvalq());
1721 #else
1722                         formatImm(uint32_t(ref->constvalq()>>32), buf);
1723                         buf += strlen(buf);
1724                         *buf++ = ':';
1725                         formatImm(uint32_t(ref->constvalq()), buf);
1726 #endif
1727                 }
1728                 else if (ref->isconst()) {
1729                         formatImm(ref->constval(), buf);
1730                 }
1731                 else {
1732                         if (ref->isCall()) {
1733 #if !defined NANOJIT_64BIT
1734                                 if (ref->isop(LIR_callh)) {
1735                                         // we've presumably seen the other half already
1736                                         ref = ref->oprnd1();
1737                                 } else {
1738 #endif
1739                                         copyName(ref, ref->callInfo()->_name, funccounts.add(ref->callInfo()));
1740 #if !defined NANOJIT_64BIT
1741                                 }
1742 #endif
1743                         } else {
1744                 NanoAssert(size_t(ref->opcode()) < sizeof(lirNames) / sizeof(lirNames[0]));
1745                                 copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
1746                         }
1747                         StringNullTerminatedUTF8 cname(gc, names.get(ref)->name);
1748                         strcat(buf, cname.c_str());
1749                 }
1750                 return labels->dup(buffer);
1751         }
1752
1753         const char* LirNameMap::formatIns(LIns* i)
1754         {
1755                 char sbuf[200];
1756                 char *s = sbuf;
1757                 LOpcode op = i->opcode();
1758                 switch(op)
1759                 {
1760                         case LIR_short:
1761                         case LIR_int:
1762                         {
1763                 sprintf(s, "%s", formatRef(i));
1764                                 break;
1765                         }
1766
1767             case LIR_alloc: {
1768                 sprintf(s, "%s = %s %d", formatRef(i), lirNames[op], i->size());
1769                 break;
1770             }
1771
1772                         case LIR_quad:
1773                         {
1774                                 int32_t *p = (int32_t*) (i-2);
1775                                 sprintf(s, "#%X:%X /* %g */", p[1], p[0], i->constvalf());
1776                                 break;
1777                         }
1778
1779                         case LIR_loop:
1780                         case LIR_start:
1781                                 sprintf(s, "%s", lirNames[op]);
1782                                 break;
1783
1784 #if defined NANOJIT_64BIT
1785                         case LIR_callh:
1786 #endif
1787                         case LIR_fcall:
1788                         case LIR_call: {
1789                                 sprintf(s, "%s = %s ( ", formatRef(i), i->callInfo()->_name);
1790                                 for (int32_t j=i->argc()-1; j >= 0; j--) {
1791                                         s += strlen(s);
1792                                         sprintf(s, "%s ",formatRef(i->arg(j)));
1793                                 }
1794                                 s += strlen(s);
1795                                 sprintf(s, ")");
1796                                 break;
1797                         }
1798                         case LIR_fcalli:
1799                         case LIR_calli: {
1800                 int32_t argc = i->argc();
1801                                 sprintf(s, "%s = [%s] ( ", formatRef(i), formatRef(i->arg(argc-1)));
1802                 s += strlen(s);
1803                 argc--;
1804                                 for (int32_t j=argc-1; j >= 0; j--) {
1805                                         s += strlen(s);
1806                                         sprintf(s, "%s ",formatRef(i->arg(j)));
1807                                 }
1808                                 s += strlen(s);
1809                                 sprintf(s, ")");
1810                                 break;
1811                         }
1812
1813                         case LIR_param: {
1814                                 uint32_t arg = i->imm8();
1815                                 if (!i->imm8b()) {
1816                                         if (arg < sizeof(Assembler::argRegs)/sizeof(Assembler::argRegs[0])) {
1817                                                 sprintf(s, "%s = %s %d %s", formatRef(i), lirNames[op],
1818                                                         arg, gpn(Assembler::argRegs[arg]));
1819                                         } else {
1820                                                 sprintf(s, "%s = %s %d", formatRef(i), lirNames[op], arg);
1821                                         }
1822                                 } else {
1823                                         sprintf(s, "%s = %s %d %s", formatRef(i), lirNames[op],
1824                                                 arg, gpn(Assembler::savedRegs[arg]));
1825                                 }
1826                                 break;
1827                         }
1828
1829                         case LIR_label:
1830                 sprintf(s, "%s:", formatRef(i));
1831                                 break;
1832
1833                         case LIR_jt:
1834                         case LIR_jf:
1835                 sprintf(s, "%s %s -> %s", lirNames[op], formatRef(i->oprnd1()),
1836                     i->oprnd2() ? formatRef(i->oprnd2()) : "unpatched");
1837                                 break;
1838
1839                         case LIR_j:
1840                 sprintf(s, "%s -> %s", lirNames[op],
1841                     i->oprnd2() ? formatRef(i->oprnd2()) : "unpatched");
1842                                 break;
1843
1844             case LIR_live:
1845                         case LIR_ret:
1846             case LIR_fret:
1847                 sprintf(s, "%s %s", lirNames[op], formatRef(i->oprnd1()));
1848                                 break;
1849
1850             case LIR_callh:
1851                         case LIR_neg:
1852                         case LIR_fneg:
1853                         case LIR_i2f:
1854                         case LIR_u2f:
1855                         case LIR_qlo:
1856                         case LIR_qhi:
1857             case LIR_ov:
1858             case LIR_cs:
1859                         case LIR_not:
1860                                 sprintf(s, "%s = %s %s", formatRef(i), lirNames[op], formatRef(i->oprnd1()));
1861                                 break;
1862
1863                         case LIR_x:
1864                         case LIR_xt:
1865                         case LIR_xf:
1866                                 formatGuard(i, s);
1867                                 break;
1868
1869                         case LIR_add:
1870                         case LIR_addp:
1871                         case LIR_sub:
1872                         case LIR_mul:
1873                         case LIR_fadd:
1874                         case LIR_fsub:
1875                         case LIR_fmul:
1876                         case LIR_fdiv:
1877                         case LIR_and:
1878                         case LIR_or:
1879                         case LIR_xor:
1880                         case LIR_lsh:
1881                         case LIR_rsh:
1882                         case LIR_ush:
1883                         case LIR_eq:
1884                         case LIR_lt:
1885                         case LIR_le:
1886                         case LIR_gt:
1887                         case LIR_ge:
1888                         case LIR_ult:
1889                         case LIR_ule:
1890                         case LIR_ugt:
1891                         case LIR_uge:
1892                         case LIR_feq:
1893                         case LIR_flt:
1894                         case LIR_fle:
1895                         case LIR_fgt:
1896                         case LIR_fge:
1897             case LIR_qiadd:
1898             case LIR_qiand:
1899             case LIR_qilsh:
1900             case LIR_qior:
1901                                 sprintf(s, "%s = %s %s, %s", formatRef(i), lirNames[op],
1902                                         formatRef(i->oprnd1()),
1903                                         formatRef(i->oprnd2()));
1904                                 break;
1905
1906                         case LIR_qjoin:
1907                                 sprintf(s, "%s (%s), %s", lirNames[op],
1908                                         formatIns(i->oprnd1()),
1909                                         formatRef(i->oprnd2()));
1910                                 break;
1911
1912                         case LIR_qcmov:
1913                         case LIR_cmov:
1914                 sprintf(s, "%s = %s %s ? %s : %s", formatRef(i), lirNames[op],
1915                                         formatRef(i->oprnd1()),
1916                                         formatRef(i->oprnd2()->oprnd1()),
1917                                         formatRef(i->oprnd2()->oprnd2()));
1918                                 break;
1919
1920                         case LIR_ld:
1921                         case LIR_ldc:
1922                         case LIR_ldq:
1923                         case LIR_ldqc:
1924                         case LIR_ldcb:
1925                         case LIR_ldcs:
1926                                 sprintf(s, "%s = %s %s[%s]", formatRef(i), lirNames[op],
1927                                         formatRef(i->oprnd1()),
1928                                         formatRef(i->oprnd2()));
1929                                 break;
1930
1931                         case LIR_st:
1932             case LIR_sti:
1933                         case LIR_stq:
1934             case LIR_stqi:
1935                                 sprintf(s, "%s %s[%d] = %s", lirNames[op],
1936                                         formatRef(i->oprnd2()),
1937                                         i->immdisp(),
1938                                         formatRef(i->oprnd1()));
1939                                 break;
1940
1941                         default:
1942                                 sprintf(s, "?");
1943                                 break;
1944                 }
1945                 return labels->dup(sbuf);
1946         }
1947
1948
1949 #endif
1950         CseFilter::CseFilter(LirWriter *out, GC *gc)
1951                 : LirWriter(out), exprs(gc) {}
1952
1953         LIns* CseFilter::insImm(int32_t imm)
1954         {
1955                 uint32_t k;
1956                 LInsp found = exprs.find32(imm, k);
1957                 if (found)
1958                         return found;
1959                 return exprs.add(out->insImm(imm), k);
1960         }
1961
1962         LIns* CseFilter::insImmq(uint64_t q)
1963         {
1964                 uint32_t k;
1965                 LInsp found = exprs.find64(q, k);
1966                 if (found)
1967                         return found;
1968                 return exprs.add(out->insImmq(q), k);
1969         }
1970
1971         LIns* CseFilter::ins0(LOpcode v)
1972         {
1973             if (v == LIR_label)
1974                 exprs.clear();
1975             return out->ins0(v);
1976         }
1977
1978         LIns* CseFilter::ins1(LOpcode v, LInsp a)
1979         {
1980                 if (isCse(v)) {
1981                         NanoAssert(operandCount[v]==1);
1982                         uint32_t k;
1983                         LInsp found = exprs.find1(v, a, k);
1984                         if (found)
1985                                 return found;
1986                         return exprs.add(out->ins1(v,a), k);
1987                 }
1988                 return out->ins1(v,a);
1989         }
1990
1991         LIns* CseFilter::ins2(LOpcode v, LInsp a, LInsp b)
1992         {
1993                 if (isCse(v)) {
1994                         NanoAssert(operandCount[v]==2);
1995                         uint32_t k;
1996                         LInsp found = exprs.find2(v, a, b, k);
1997                         if (found)
1998                                 return found;
1999                         return exprs.add(out->ins2(v,a,b), k);
2000                 }
2001                 return out->ins2(v,a,b);
2002         }
2003
2004         LIns* CseFilter::insLoad(LOpcode v, LInsp base, LInsp disp)
2005         {
2006                 if (isCse(v)) {
2007                         NanoAssert(operandCount[v]==2);
2008                         uint32_t k;
2009                         LInsp found = exprs.find2(v, base, disp, k);
2010                         if (found)
2011                                 return found;
2012                         return exprs.add(out->insLoad(v,base,disp), k);
2013                 }
2014                 return out->insLoad(v,base,disp);
2015         }
2016
2017         LInsp CseFilter::insGuard(LOpcode v, LInsp c, LInsp x)
2018         {
2019                 if (isCse(v)) {
2020                         // conditional guard
2021                         NanoAssert(operandCount[v]==1);
2022                         uint32_t k;
2023                         LInsp found = exprs.find1(v, c, k);
2024                         if (found)
2025                                 return 0;
2026                         return exprs.add(out->insGuard(v,c,x), k);
2027                 }
2028                 return out->insGuard(v, c, x);
2029         }
2030
2031         LInsp CseFilter::insCall(const CallInfo *ci, LInsp args[])
2032         {
2033                 if (ci->_cse) {
2034                         uint32_t k;
2035             uint32_t argc = ci->count_args();
2036                         LInsp found = exprs.findcall(ci, argc, args, k);
2037                         if (found)
2038                                 return found;
2039                         return exprs.add(out->insCall(ci, args), k);
2040                 }
2041                 return out->insCall(ci, args);
2042         }
2043
2044         CseReader::CseReader(LirFilter *in, LInsHashSet *exprs, const CallInfo *functions)
2045                 : LirFilter(in), exprs(exprs), functions(functions)
2046         {}
2047
2048         LInsp CseReader::read()
2049         {
2050                 LInsp i = in->read();
2051                 if (i) {
2052                         if (i->isCse(functions))
2053                                 exprs->replace(i);
2054                 }
2055                 return i;
2056         }
2057
2058     LIns* FASTCALL callArgN(LIns* i, uint32_t n)
2059         {
2060                 return i->arg(i->argc()-n-1);
2061         }
2062
2063     void compile(Assembler* assm, Fragment* triggerFrag)
2064     {
2065         Fragmento *frago = triggerFrag->lirbuf->_frago;
2066         AvmCore *core = frago->core();
2067         GC *gc = core->gc;
2068
2069                 verbose_only( StringList asmOutput(gc); )
2070                 verbose_only( assm->_outputCache = &asmOutput; )
2071
2072                 verbose_only(if (assm->_verbose && core->config.verbose_live)
2073                         live(gc, triggerFrag->lirbuf);)
2074
2075                 bool treeCompile = core->config.tree_opt && (triggerFrag->kind == BranchTrace);
2076                 RegAllocMap regMap(gc);
2077                 NInsList loopJumps(gc);
2078 #ifdef MEMORY_INFO
2079 //              loopJumps.set_meminfo_name("LIR loopjumps");
2080 #endif
2081                 assm->beginAssembly(triggerFrag, &regMap);
2082                 if (assm->error())
2083                         return;
2084
2085                 //fprintf(stderr, "recompile trigger %X kind %d\n", (int)triggerFrag, triggerFrag->kind);
2086                 Fragment* root = triggerFrag;
2087                 if (treeCompile)
2088                 {
2089                         // recompile the entire tree
2090                         root = triggerFrag->root;
2091                         root->fragEntry = 0;
2092                         root->loopEntry = 0;
2093                         root->releaseCode(frago);
2094
2095                         // do the tree branches
2096                         Fragment* frag = root->treeBranches;
2097                         while(frag)
2098                         {
2099                                 // compile til no more frags
2100                                 if (frag->lastIns)
2101                                 {
2102                                         assm->assemble(frag, loopJumps);
2103                                         verbose_only(if (assm->_verbose)
2104                                                 assm->outputf("compiling branch %s ip %s",
2105                                                         frago->labels->format(frag),
2106                                                         frago->labels->format(frag->ip)); )
2107
2108                                         NanoAssert(frag->kind == BranchTrace);
2109                                         RegAlloc* regs = NJ_NEW(gc, RegAlloc)();
2110                                         assm->copyRegisters(regs);
2111                                         assm->releaseRegisters();
2112                                         SideExit* exit = frag->spawnedFrom;
2113                                         regMap.put(exit, regs);
2114                                 }
2115                                 frag = frag->treeBranches;
2116                         }
2117                 }
2118
2119                 // now the the main trunk
2120                 assm->assemble(root, loopJumps);
2121                 verbose_only(if (assm->_verbose)
2122                         assm->outputf("compiling trunk %s",
2123                                 frago->labels->format(root));)
2124                 NanoAssert(!frago->core()->config.tree_opt || root == root->anchor || root->kind == MergeTrace);
2125                 assm->endAssembly(root, loopJumps);
2126
2127                 // reverse output so that assembly is displayed low-to-high
2128                 verbose_only( assm->_outputCache = 0; )
2129                 verbose_only(for(int i=asmOutput.size()-1; i>=0; --i) { assm->outputf("%s",asmOutput.get(i)); } );
2130
2131                 if (assm->error()) {
2132                         root->fragEntry = 0;
2133                         root->loopEntry = 0;
2134                 }
2135     }
2136
2137     LInsp LoadFilter::insLoad(LOpcode v, LInsp base, LInsp disp)
2138     {
2139         if (base != sp && base != rp && (v == LIR_ld || v == LIR_ldq)) {
2140             uint32_t k;
2141             LInsp found = exprs.find2(v, base, disp, k);
2142             if (found)
2143                 return found;
2144             return exprs.add(out->insLoad(v,base,disp), k);
2145         }
2146         return out->insLoad(v, base, disp);
2147     }
2148
2149     void LoadFilter::clear(LInsp p)
2150     {
2151         if (p != sp && p != rp)
2152             exprs.clear();
2153     }
2154
2155     LInsp LoadFilter::insStore(LInsp v, LInsp b, LInsp d)
2156     {
2157         clear(b);
2158         return out->insStore(v, b, d);
2159     }
2160
2161     LInsp LoadFilter::insStorei(LInsp v, LInsp b, int32_t d)
2162     {
2163         clear(b);
2164         return out->insStorei(v, b, d);
2165     }
2166
2167     LInsp LoadFilter::insCall(const CallInfo *ci, LInsp args[])
2168     {
2169         if (!ci->_cse)
2170             exprs.clear();
2171         return out->insCall(ci, args);
2172     }
2173
2174     LInsp LoadFilter::ins0(LOpcode op)
2175     {
2176         if (op == LIR_label)
2177             exprs.clear();
2178         return out->ins0(op);
2179     }
2180
2181         #endif /* FEATURE_NANOJIT */
2182
2183 #if defined(NJ_VERBOSE)
2184     LabelMap::LabelMap(AvmCore *core, LabelMap* parent)
2185         : parent(parent), names(core->gc), addrs(core->config.verbose_addrs), end(buf), core(core)
2186         {}
2187
2188     LabelMap::~LabelMap()
2189     {
2190         Entry *e;
2191
2192         while ((e = names.removeLast()) != NULL) {
2193             core->freeString(e->name);
2194             NJ_DELETE(e);
2195         }
2196     }
2197
2198     void LabelMap::add(const void *p, size_t size, size_t align, const char *name)
2199         {
2200                 if (!this || names.containsKey(p))
2201                         return;
2202                 add(p, size, align, core->newString(name));
2203         }
2204
2205     void LabelMap::add(const void *p, size_t size, size_t align, Stringp name)
2206     {
2207                 if (!this || names.containsKey(p))
2208                         return;
2209                 Entry *e = NJ_NEW(core->gc, Entry)(name, size<<align, align);
2210                 names.put(p, e);
2211     }
2212
2213     const char *LabelMap::format(const void *p)
2214     {
2215                 char b[200];
2216                 int i = names.findNear(p);
2217                 if (i >= 0) {
2218                         const void *start = names.keyAt(i);
2219                         Entry *e = names.at(i);
2220                         const void *end = (const char*)start + e->size;
2221                         avmplus::StringNullTerminatedUTF8 cname(core->gc, e->name);
2222                         const char *name = cname.c_str();
2223                         if (p == start) {
2224                                 if (addrs)
2225                                         sprintf(b,"%p %s",p,name);
2226                                 else
2227                                         strcpy(b, name);
2228                                 return dup(b);
2229                         }
2230                         else if (p > start && p < end) {
2231                                 int32_t d = int32_t(intptr_t(p)-intptr_t(start)) >> e->align;
2232                                 if (addrs)
2233                                         sprintf(b, "%p %s+%d", p, name, d);
2234                                 else
2235                                         sprintf(b,"%s+%d", name, d);
2236                                 return dup(b);
2237                         }
2238                         else {
2239                                 if (parent)
2240                                         return parent->format(p);
2241
2242                                 sprintf(b, "%p", p);
2243                                 return dup(b);
2244                         }
2245                 }
2246                 if (parent)
2247                         return parent->format(p);
2248
2249                 sprintf(b, "%p", p);
2250                 return dup(b);
2251     }
2252
2253         const char *LabelMap::dup(const char *b)
2254         {
2255                 size_t need = strlen(b)+1;
2256                 char *s = end;
2257                 end += need;
2258                 if (end > buf+sizeof(buf)) {
2259                         s = buf;
2260                         end = s+need;
2261                 }
2262                 strcpy(s, b);
2263                 return s;
2264         }
2265
2266         // copy all labels to parent, adding newbase to label addresses
2267         void LabelMap::promoteAll(const void *newbase)
2268         {
2269                 for (int i=0, n=names.size(); i < n; i++) {
2270                         void *base = (char*)newbase + (intptr_t)names.keyAt(i);
2271                         parent->names.put(base, names.at(i));
2272                 }
2273         }
2274 #endif // NJ_VERBOSE
2275 }
2276