Add 469782 to NEWS
[valgrind.git] / VEX / priv / guest_generic_bb_to_IR.c
blob1b9821a7cf02e0718676307bf20ca88992f9d7b1
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_generic_bb_to_IR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_ir.h"
36 #include "libvex.h"
37 #include "main_util.h"
38 #include "main_globals.h"
39 #include "guest_generic_bb_to_IR.h"
40 #include "ir_opt.h"
43 /*--------------------------------------------------------------*/
44 /*--- Forwards for fns called by self-checking translations ---*/
45 /*--------------------------------------------------------------*/
47 /* Forwards .. */
48 VEX_REGPARM(2) static UInt genericg_compute_checksum_4al ( HWord first_w32,
49 HWord n_w32s );
50 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
51 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
52 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
53 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
54 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
55 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
56 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
57 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
58 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
59 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
60 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
61 VEX_REGPARM(1) static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
63 VEX_REGPARM(2) static ULong genericg_compute_checksum_8al ( HWord first_w64,
64 HWord n_w64s );
65 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
66 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
67 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
68 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
69 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
70 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
71 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
72 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
73 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
74 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
75 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
76 VEX_REGPARM(1) static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
79 /*--------------------------------------------------------------*/
80 /*--- Creation of self-check IR ---*/
81 /*--------------------------------------------------------------*/
83 static void create_self_checks_as_needed(
84 /*MOD*/ IRSB* irsb,
85 /*OUT*/ UInt* n_sc_extents,
86 /*MOD*/ VexRegisterUpdates* pxControl,
87 /*MOD*/ void* callback_opaque,
88 /*IN*/ UInt (*needs_self_check)
89 (void*, /*MB_MOD*/VexRegisterUpdates*,
90 const VexGuestExtents*),
91 const VexGuestExtents* vge,
92 const VexAbiInfo* abiinfo_both,
93 const IRType guest_word_type,
94 const Int selfcheck_idx,
95 /*IN*/ Int offB_GUEST_CMSTART,
96 /*IN*/ Int offB_GUEST_CMLEN,
97 /*IN*/ Int offB_GUEST_IP,
98 const Addr guest_IP_sbstart
101 /* The scheme is to compute a rather crude checksum of the code
102 we're making a translation of, and add to the IR a call to a
103 helper routine which recomputes the checksum every time the
104 translation is run, and requests a retranslation if it doesn't
105 match. This is obviously very expensive and considerable
106 efforts are made to speed it up:
108 * the checksum is computed from all the naturally aligned
109 host-sized words that overlap the translated code. That means
110 it could depend on up to 7 bytes before and 7 bytes after
111 which aren't part of the translated area, and so if those
112 change then we'll unnecessarily have to discard and
113 retranslate. This seems like a pretty remote possibility and
114 it seems as if the benefit of not having to deal with the ends
115 of the range at byte precision far outweigh any possible extra
116 translations needed.
118 * there's a generic routine and 12 specialised cases, which
119 handle the cases of 1 through 12-word lengths respectively.
120 They seem to cover about 90% of the cases that occur in
121 practice.
123 We ask the caller, via needs_self_check, which of the 3 vge
124 extents needs a check, and only generate check code for those
125 that do.
128 Addr base2check;
129 UInt len2check;
130 HWord expectedhW;
131 IRTemp tistart_tmp, tilen_tmp, callresult_tmp, exitguard_tmp;
132 HWord VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
133 HWord VEX_REGPARM(1) (*fn_spec)(HWord);
134 const HChar* nm_generic;
135 const HChar* nm_spec;
136 HWord fn_generic_entry = 0;
137 HWord fn_spec_entry = 0;
138 UInt host_word_szB = sizeof(HWord);
139 IRType host_word_type = Ity_INVALID;
141 UInt extents_needing_check
142 = needs_self_check(callback_opaque, pxControl, vge);
144 if (host_word_szB == 4) host_word_type = Ity_I32;
145 if (host_word_szB == 8) host_word_type = Ity_I64;
146 vassert(host_word_type != Ity_INVALID);
148 vassert(vge->n_used >= 1 && vge->n_used <= 3);
150 /* Caller shouldn't claim that nonexistent extents need a
151 check. */
152 vassert((extents_needing_check >> vge->n_used) == 0);
154 /* Guest addresses as IRConsts. Used in self-checks to specify the
155 restart-after-discard point. */
156 IRConst* guest_IP_sbstart_IRConst
157 = guest_word_type==Ity_I32
158 ? IRConst_U32(toUInt(guest_IP_sbstart))
159 : IRConst_U64(guest_IP_sbstart);
161 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
162 vassert(n_extent_slots == 3);
164 vassert(selfcheck_idx + (n_extent_slots - 1) * 7 + 6 < irsb->stmts_used);
166 for (Int i = 0; i < vge->n_used; i++) {
167 /* Do we need to generate a check for this extent? */
168 if ((extents_needing_check & (1 << i)) == 0)
169 continue;
171 /* Tell the caller */
172 (*n_sc_extents)++;
174 /* the extent we're generating a check for */
175 base2check = vge->base[i];
176 len2check = vge->len[i];
178 /* stay sane */
179 vassert(len2check < 2000/*arbitrary*/);
181 /* Skip the check if the translation involved zero bytes */
182 if (len2check == 0)
183 continue;
185 HWord first_hW = ((HWord)base2check)
186 & ~(HWord)(host_word_szB-1);
187 HWord last_hW = (((HWord)base2check) + len2check - 1)
188 & ~(HWord)(host_word_szB-1);
189 vassert(first_hW <= last_hW);
190 HWord hW_diff = last_hW - first_hW;
191 vassert(0 == (hW_diff & (host_word_szB-1)));
192 HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
193 vassert(hWs_to_check > 0
194 && hWs_to_check < 2004/*arbitrary*/ / host_word_szB);
196 /* vex_printf("%lx %lx %ld\n", first_hW, last_hW, hWs_to_check); */
198 if (host_word_szB == 8) {
199 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
200 genericg_compute_checksum_8al;
201 nm_generic = "genericg_compute_checksum_8al";
202 } else {
203 fn_generic = (VEX_REGPARM(2) HWord(*)(HWord, HWord))
204 genericg_compute_checksum_4al;
205 nm_generic = "genericg_compute_checksum_4al";
208 fn_spec = NULL;
209 nm_spec = NULL;
211 if (host_word_szB == 8) {
212 const HChar* nm = NULL;
213 ULong VEX_REGPARM(1) (*fn)(HWord) = NULL;
214 switch (hWs_to_check) {
215 case 1: fn = genericg_compute_checksum_8al_1;
216 nm = "genericg_compute_checksum_8al_1"; break;
217 case 2: fn = genericg_compute_checksum_8al_2;
218 nm = "genericg_compute_checksum_8al_2"; break;
219 case 3: fn = genericg_compute_checksum_8al_3;
220 nm = "genericg_compute_checksum_8al_3"; break;
221 case 4: fn = genericg_compute_checksum_8al_4;
222 nm = "genericg_compute_checksum_8al_4"; break;
223 case 5: fn = genericg_compute_checksum_8al_5;
224 nm = "genericg_compute_checksum_8al_5"; break;
225 case 6: fn = genericg_compute_checksum_8al_6;
226 nm = "genericg_compute_checksum_8al_6"; break;
227 case 7: fn = genericg_compute_checksum_8al_7;
228 nm = "genericg_compute_checksum_8al_7"; break;
229 case 8: fn = genericg_compute_checksum_8al_8;
230 nm = "genericg_compute_checksum_8al_8"; break;
231 case 9: fn = genericg_compute_checksum_8al_9;
232 nm = "genericg_compute_checksum_8al_9"; break;
233 case 10: fn = genericg_compute_checksum_8al_10;
234 nm = "genericg_compute_checksum_8al_10"; break;
235 case 11: fn = genericg_compute_checksum_8al_11;
236 nm = "genericg_compute_checksum_8al_11"; break;
237 case 12: fn = genericg_compute_checksum_8al_12;
238 nm = "genericg_compute_checksum_8al_12"; break;
239 default: break;
241 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
242 nm_spec = nm;
243 } else {
244 const HChar* nm = NULL;
245 UInt VEX_REGPARM(1) (*fn)(HWord) = NULL;
246 switch (hWs_to_check) {
247 case 1: fn = genericg_compute_checksum_4al_1;
248 nm = "genericg_compute_checksum_4al_1"; break;
249 case 2: fn = genericg_compute_checksum_4al_2;
250 nm = "genericg_compute_checksum_4al_2"; break;
251 case 3: fn = genericg_compute_checksum_4al_3;
252 nm = "genericg_compute_checksum_4al_3"; break;
253 case 4: fn = genericg_compute_checksum_4al_4;
254 nm = "genericg_compute_checksum_4al_4"; break;
255 case 5: fn = genericg_compute_checksum_4al_5;
256 nm = "genericg_compute_checksum_4al_5"; break;
257 case 6: fn = genericg_compute_checksum_4al_6;
258 nm = "genericg_compute_checksum_4al_6"; break;
259 case 7: fn = genericg_compute_checksum_4al_7;
260 nm = "genericg_compute_checksum_4al_7"; break;
261 case 8: fn = genericg_compute_checksum_4al_8;
262 nm = "genericg_compute_checksum_4al_8"; break;
263 case 9: fn = genericg_compute_checksum_4al_9;
264 nm = "genericg_compute_checksum_4al_9"; break;
265 case 10: fn = genericg_compute_checksum_4al_10;
266 nm = "genericg_compute_checksum_4al_10"; break;
267 case 11: fn = genericg_compute_checksum_4al_11;
268 nm = "genericg_compute_checksum_4al_11"; break;
269 case 12: fn = genericg_compute_checksum_4al_12;
270 nm = "genericg_compute_checksum_4al_12"; break;
271 default: break;
273 fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
274 nm_spec = nm;
277 expectedhW = fn_generic( first_hW, hWs_to_check );
278 /* If we got a specialised version, check it produces the same
279 result as the generic version! */
280 if (fn_spec) {
281 vassert(nm_spec);
282 vassert(expectedhW == fn_spec( first_hW ));
283 } else {
284 vassert(!nm_spec);
287 /* Set CMSTART and CMLEN. These will describe to the despatcher
288 the area of guest code to invalidate should we exit with a
289 self-check failure. */
290 tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
291 tilen_tmp = newIRTemp(irsb->tyenv, guest_word_type);
293 IRConst* base2check_IRConst
294 = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
295 : IRConst_U64(base2check);
296 IRConst* len2check_IRConst
297 = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
298 : IRConst_U64(len2check);
300 IRStmt** stmt0 = &irsb->stmts[selfcheck_idx + i * 7 + 0];
301 IRStmt** stmt1 = &irsb->stmts[selfcheck_idx + i * 7 + 1];
302 IRStmt** stmt2 = &irsb->stmts[selfcheck_idx + i * 7 + 2];
303 IRStmt** stmt3 = &irsb->stmts[selfcheck_idx + i * 7 + 3];
304 IRStmt** stmt4 = &irsb->stmts[selfcheck_idx + i * 7 + 4];
305 IRStmt** stmt5 = &irsb->stmts[selfcheck_idx + i * 7 + 5];
306 IRStmt** stmt6 = &irsb->stmts[selfcheck_idx + i * 7 + 6];
307 vassert((*stmt0)->tag == Ist_NoOp);
308 vassert((*stmt1)->tag == Ist_NoOp);
309 vassert((*stmt2)->tag == Ist_NoOp);
310 vassert((*stmt3)->tag == Ist_NoOp);
311 vassert((*stmt4)->tag == Ist_NoOp);
312 vassert((*stmt5)->tag == Ist_NoOp);
313 vassert((*stmt6)->tag == Ist_NoOp);
315 *stmt0 = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
316 *stmt1 = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
317 *stmt2 = IRStmt_Put( offB_GUEST_CMSTART, IRExpr_RdTmp(tistart_tmp) );
318 *stmt3 = IRStmt_Put( offB_GUEST_CMLEN, IRExpr_RdTmp(tilen_tmp) );
320 /* Generate the entry point descriptors */
321 if (abiinfo_both->host_ppc_calls_use_fndescrs) {
322 HWord* descr = (HWord*)fn_generic;
323 fn_generic_entry = descr[0];
324 if (fn_spec) {
325 descr = (HWord*)fn_spec;
326 fn_spec_entry = descr[0];
327 } else {
328 fn_spec_entry = (HWord)NULL;
330 } else {
331 fn_generic_entry = (HWord)fn_generic;
332 if (fn_spec) {
333 fn_spec_entry = (HWord)fn_spec;
334 } else {
335 fn_spec_entry = (HWord)NULL;
339 /* Generate the call to the relevant function, and the comparison of
340 the result against the expected value. */
341 IRExpr* callexpr = NULL;
342 if (fn_spec) {
343 callexpr = mkIRExprCCall(
344 host_word_type, 1/*regparms*/,
345 nm_spec, (void*)fn_spec_entry,
346 mkIRExprVec_1(
347 mkIRExpr_HWord( (HWord)first_hW )
350 } else {
351 callexpr = mkIRExprCCall(
352 host_word_type, 2/*regparms*/,
353 nm_generic, (void*)fn_generic_entry,
354 mkIRExprVec_2(
355 mkIRExpr_HWord( (HWord)first_hW ),
356 mkIRExpr_HWord( (HWord)hWs_to_check )
361 callresult_tmp = newIRTemp(irsb->tyenv, host_word_type);
362 *stmt4 = IRStmt_WrTmp(callresult_tmp, callexpr);
364 exitguard_tmp = newIRTemp(irsb->tyenv, Ity_I1);
365 *stmt5 = IRStmt_WrTmp(
366 exitguard_tmp,
367 IRExpr_Binop(
368 host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
369 IRExpr_RdTmp(callresult_tmp),
370 host_word_type==Ity_I64
371 ? IRExpr_Const(IRConst_U64(expectedhW))
372 : IRExpr_Const(IRConst_U32(expectedhW))));
374 *stmt6 = IRStmt_Exit(
375 IRExpr_RdTmp(exitguard_tmp),
376 Ijk_InvalICache,
377 /* Where we must restart if there's a failure: at the
378 first extent, regardless of which extent the failure
379 actually happened in. */
380 guest_IP_sbstart_IRConst,
381 offB_GUEST_IP
383 } /* for (i = 0; i < vge->n_used; i++) */
385 for (Int i = vge->n_used;
386 i < sizeof(vge->base) / sizeof(vge->base[0]); i++) {
387 IRStmt* stmt0 = irsb->stmts[selfcheck_idx + i * 7 + 0];
388 IRStmt* stmt1 = irsb->stmts[selfcheck_idx + i * 7 + 1];
389 IRStmt* stmt2 = irsb->stmts[selfcheck_idx + i * 7 + 2];
390 IRStmt* stmt3 = irsb->stmts[selfcheck_idx + i * 7 + 3];
391 IRStmt* stmt4 = irsb->stmts[selfcheck_idx + i * 7 + 4];
392 IRStmt* stmt5 = irsb->stmts[selfcheck_idx + i * 7 + 5];
393 IRStmt* stmt6 = irsb->stmts[selfcheck_idx + i * 7 + 6];
394 vassert(stmt0->tag == Ist_NoOp);
395 vassert(stmt1->tag == Ist_NoOp);
396 vassert(stmt2->tag == Ist_NoOp);
397 vassert(stmt3->tag == Ist_NoOp);
398 vassert(stmt4->tag == Ist_NoOp);
399 vassert(stmt5->tag == Ist_NoOp);
400 vassert(stmt6->tag == Ist_NoOp);
406 /*--------------------------------------------------------------*/
407 /*--- To do with guarding (conditionalisation) of IRStmts ---*/
408 /*--------------------------------------------------------------*/
410 // Is it possible to guard |e|? Meaning, is it safe (exception-free) to compute
411 // |e| and ignore the result? Since |e| is by definition otherwise
412 // side-effect-free, we don't have to ask about any other effects caused by
413 // first computing |e| and then ignoring the result.
414 static Bool expr_is_guardable ( const IRExpr* e )
416 switch (e->tag) {
417 case Iex_Load:
418 return False;
419 case Iex_Unop:
420 return !primopMightTrap(e->Iex.Unop.op);
421 case Iex_Binop:
422 return !primopMightTrap(e->Iex.Binop.op);
423 case Iex_Triop:
424 return !primopMightTrap(e->Iex.Triop.details->op);
425 case Iex_Qop:
426 return !primopMightTrap(e->Iex.Qop.details->op);
427 case Iex_ITE:
428 case Iex_CCall:
429 case Iex_Get:
430 case Iex_GetI:
431 case Iex_Const:
432 case Iex_RdTmp:
433 return True;
434 default:
435 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
436 vpanic("expr_is_guardable: unhandled expr");
440 // Is it possible to guard |st|? Meaning, is it possible to replace |st| by
441 // some other sequence of IRStmts which have the same effect on the architected
442 // state when the guard is true, but when it is false, have no effect on the
443 // architected state and are guaranteed not to cause any exceptions?
445 // Note that this isn't as aggressive as it could be: it sometimes returns False
446 // in cases where |st| is actually guardable. This routine must coordinate
447 // closely with add_guarded_stmt_to_end_of below, in the sense that that routine
448 // must be able to handle any |st| for which this routine returns True.
449 static Bool stmt_is_guardable ( const IRStmt* st )
451 switch (st->tag) {
452 // These are easily guarded.
453 case Ist_NoOp:
454 case Ist_IMark:
455 case Ist_Put:
456 case Ist_PutI:
457 return True;
458 // These are definitely not guardable, or at least it's way too much
459 // hassle to do so.
460 case Ist_CAS:
461 case Ist_LLSC:
462 case Ist_MBE:
463 return False;
464 // These could be guarded, with some effort, if really needed, but
465 // currently aren't guardable.
466 case Ist_LoadG:
467 case Ist_Store:
468 case Ist_StoreG:
469 case Ist_Exit:
470 case Ist_Dirty:
471 return False;
472 // This is probably guardable, but it depends on the RHS of the
473 // assignment.
474 case Ist_WrTmp:
475 return expr_is_guardable(st->Ist.WrTmp.data);
476 default:
477 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
478 vpanic("stmt_is_guardable: unhandled stmt");
482 // Are all stmts (but not the end dst value) in |bb| guardable, per
483 // stmt_is_guardable?
484 static Bool block_is_guardable ( const IRSB* bb )
486 Int i = bb->stmts_used;
487 vassert(i >= 2); // Must have at least: IMark, side Exit (at the end)
488 i--;
489 vassert(bb->stmts[i]->tag == Ist_Exit);
490 i--;
491 for (; i >= 0; i--) {
492 if (!stmt_is_guardable(bb->stmts[i]))
493 return False;
495 return True;
498 // Guard |st| with |guard| and add it to |bb|. This must be able to handle any
499 // |st| for which stmt_is_guardable returns True.
500 static void add_guarded_stmt_to_end_of ( /*MOD*/IRSB* bb,
501 /*IN*/ IRStmt* st, IRTemp guard )
503 switch (st->tag) {
504 case Ist_NoOp:
505 case Ist_IMark:
506 case Ist_WrTmp:
507 addStmtToIRSB(bb, st);
508 break;
509 case Ist_Put: {
510 // Put(offs, e) ==> Put(offs, ITE(guard, e, Get(offs, sizeof(e))))
511 // Which when flattened out is:
512 // t1 = Get(offs, sizeof(e))
513 // t2 = ITE(guard, e, t1)
514 // Put(offs, t2)
515 Int offset = st->Ist.Put.offset;
516 IRExpr* e = st->Ist.Put.data;
517 IRType ty = typeOfIRExpr(bb->tyenv, e);
518 IRTemp t1 = newIRTemp(bb->tyenv, ty);
519 IRTemp t2 = newIRTemp(bb->tyenv, ty);
520 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Get(offset, ty)));
521 addStmtToIRSB(bb, IRStmt_WrTmp(t2, IRExpr_ITE(IRExpr_RdTmp(guard),
522 e, IRExpr_RdTmp(t1))));
523 addStmtToIRSB(bb, IRStmt_Put(offset, IRExpr_RdTmp(t2)));
524 break;
526 case Ist_PutI: {
527 // PutI(descr,ix,bias, e) ==> Put(descr,ix,bias, ITE(guard, e, GetI(descr,ix,bias)))
528 // Which when flattened out is:
529 // t1 = GetI(descr,ix,bias)
530 // t2 = ITE(guard, e, t1)
531 // PutI(descr,ix,bias, t2)
532 IRPutI* details = st->Ist.PutI.details;
533 IRRegArray* descr = details->descr;
534 IRExpr* ix = details->ix;
535 Int bias = details->bias;
536 IRExpr* e = details->data;
537 IRType ty = typeOfIRExpr(bb->tyenv, e);
538 IRTemp t1 = newIRTemp(bb->tyenv, ty);
539 IRTemp t2 = newIRTemp(bb->tyenv, ty);
540 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_GetI(descr,ix,bias)));
541 addStmtToIRSB(bb, IRStmt_WrTmp(t2, IRExpr_ITE(IRExpr_RdTmp(guard),
542 e, IRExpr_RdTmp(t1))));
543 addStmtToIRSB(bb, IRStmt_PutI(mkIRPutI(descr,ix,bias, IRExpr_RdTmp(t2))));
544 break;
546 case Ist_Exit: {
547 // Exit(xguard, dst, jk, offsIP)
548 // ==> t1 = And1(xguard, guard)
549 // Exit(t1, dst, jk, offsIP)
550 IRExpr* xguard = st->Ist.Exit.guard;
551 IRTemp t1 = newIRTemp(bb->tyenv, Ity_I1);
552 addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_Binop(Iop_And1, xguard,
553 IRExpr_RdTmp(guard))));
554 addStmtToIRSB(bb, IRStmt_Exit(IRExpr_RdTmp(t1), st->Ist.Exit.jk,
555 st->Ist.Exit.dst, st->Ist.Exit.offsIP));
556 break;
558 default:
559 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
560 vpanic("add_guarded_stmt_to_end_of: unhandled stmt");
565 /*--------------------------------------------------------------*/
566 /*--- Analysis of block ends ---*/
567 /*--------------------------------------------------------------*/
569 typedef
570 enum {
571 Be_Other=1, // Block end isn't of interest to us
572 Be_Uncond, // Unconditional branch to known destination, unassisted
573 Be_Cond // Conditional branch to known destinations, unassisted
575 BlockEndTag;
577 typedef
578 struct {
579 BlockEndTag tag;
580 union {
581 struct {
582 } Other;
583 struct {
584 Long delta;
585 } Uncond;
586 struct {
587 IRTemp condSX;
588 Long deltaSX;
589 Long deltaFT;
590 } Cond;
591 } Be;
593 BlockEnd;
595 static void ppBlockEnd ( const BlockEnd* be )
597 switch (be->tag) {
598 case Be_Other:
599 vex_printf("Other");
600 break;
601 case Be_Uncond:
602 vex_printf("Uncond{delta=%lld}", be->Be.Uncond.delta);
603 break;
604 case Be_Cond:
605 vex_printf("Cond{condSX=");
606 ppIRTemp(be->Be.Cond.condSX);
607 vex_printf(", deltaSX=%lld, deltaFT=%lld}",
608 be->Be.Cond.deltaSX, be->Be.Cond.deltaFT);
609 break;
610 default:
611 vassert(0);
615 // Return True if |be| definitely does not jump to |delta|. In case of
616 // doubt, returns False.
617 static Bool definitely_does_not_jump_to_delta ( const BlockEnd* be, Long delta )
619 switch (be->tag) {
620 case Be_Other:
621 return False;
622 case Be_Uncond:
623 return be->Be.Uncond.delta != delta;
624 case Be_Cond:
625 return be->Be.Cond.deltaSX != delta && be->Be.Cond.deltaFT != delta;
626 default:
627 vassert(0);
631 static Addr irconst_to_Addr ( const IRConst* con, const IRType guest_word_type )
633 switch (con->tag) {
634 case Ico_U32:
635 vassert(guest_word_type == Ity_I32);
636 return con->Ico.U32;
637 case Ico_U64:
638 vassert(guest_word_type == Ity_I64);
639 return con->Ico.U64;
640 default:
641 vassert(0);
645 static Bool irconst_to_maybe_delta ( /*OUT*/Long* delta,
646 const IRConst* known_dst,
647 const Addr guest_IP_sbstart,
648 const IRType guest_word_type,
649 Bool (*chase_into_ok)(void*,Addr),
650 void* callback_opaque )
652 vassert(typeOfIRConst(known_dst) == guest_word_type);
654 *delta = 0;
656 // Extract the destination guest address.
657 Addr dst_ga = irconst_to_Addr(known_dst, guest_word_type);
659 // Check we're allowed to chase into it.
660 if (!chase_into_ok(callback_opaque, dst_ga))
661 return False;
663 Addr delta_as_Addr = dst_ga - guest_IP_sbstart;
664 // Either |delta_as_Addr| is a 64-bit value, in which case copy it directly
665 // to |delta|, or it's a 32 bit value, in which case sign extend it.
666 *delta = sizeof(Addr) == 8 ? (Long)delta_as_Addr : (Long)(Int)delta_as_Addr;
667 return True;
670 static Bool any_overlap ( Int start1, Int len1, Int start2, Int len2 )
672 vassert(len1 > 0 && len2 > 0);
673 vassert(start1 >= 0 && start2 >= 0);
674 if (start1 + len1 <= start2) return False;
675 if (start2 + len2 <= start1) return False;
676 return True;
679 /* Scan |stmts|, starting at |scan_start| and working backwards, to detect the
680 case where there are no IRStmt_Exits before we find the IMark. In other
681 words, it scans backwards through some prefix of an instruction's IR to see
682 if there is an exit there.
684 It also checks for explicit PUTs to the PC, via Ist_Put, Ist_PutI or
685 Ist_Dirty. I suspect this is ridiculous overkill, but is here for safety. */
686 static Bool insn_has_no_other_exits_or_PUTs_to_PC (
687 IRStmt** const stmts, Int scan_start,
688 Int offB_GUEST_IP, Int szB_GUEST_IP,
689 const IRTypeEnv* tyenv
692 Bool found_exit = False;
693 Bool found_PUT_to_PC = False;
694 Int i = scan_start;
695 while (True) {
696 if (i < 0)
697 break;
698 const IRStmt* st = stmts[i];
699 if (st->tag == Ist_IMark) {
700 // We're back at the start of the insn. Stop searching.
701 break;
703 if (st->tag == Ist_Exit) {
704 found_exit = True;
705 break;
707 if (st->tag == Ist_Put) {
708 Int offB = st->Ist.Put.offset;
709 Int szB = sizeofIRType(typeOfIRExpr(tyenv, st->Ist.Put.data));
710 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
711 found_PUT_to_PC = True;
712 break;
715 if (st->tag == Ist_PutI) {
716 const IRPutI* details = st->Ist.PutI.details;
717 const IRRegArray* descr = details->descr;
718 Int offB = descr->base;
719 Int szB = descr->nElems * sizeofIRType(descr->elemTy);
720 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
721 found_PUT_to_PC = True;
722 break;
725 if (st->tag == Ist_Dirty) {
726 vassert(!found_PUT_to_PC);
727 const IRDirty* details = st->Ist.Dirty.details;
728 for (Int j = 0; j < details->nFxState; j++) {
729 const IREffect fx = details->fxState[j].fx;
730 const Int offset = details->fxState[j].offset;
731 const Int size = details->fxState[j].size;
732 const Int nRepeats = details->fxState[j].nRepeats;
733 const Int repeatLen = details->fxState[j].repeatLen;
734 if (fx == Ifx_Write || fx == Ifx_Modify) {
735 for (Int k = 0; k < nRepeats; k++) {
736 Int offB = offset + k * repeatLen;
737 Int szB = size;
738 if (any_overlap(offB, szB, offB_GUEST_IP, szB_GUEST_IP)) {
739 found_PUT_to_PC = True;
744 if (found_PUT_to_PC) {
745 break;
748 i--;
750 // We expect IR for all instructions to start with an IMark.
751 vassert(i >= 0);
752 return !found_exit && !found_PUT_to_PC;
755 static void analyse_block_end ( /*OUT*/BlockEnd* be, const IRSB* irsb,
756 const Addr guest_IP_sbstart,
757 const IRType guest_word_type,
758 Bool (*chase_into_ok)(void*,Addr),
759 void* callback_opaque,
760 Int offB_GUEST_IP,
761 Int szB_GUEST_IP,
762 Bool debug_print )
764 vex_bzero(be, sizeof(*be));
766 // -- Conditional branch to known destination
767 /* In short, detect the following end form:
768 ------ IMark(0x4002009, 2, 0) ------
769 // Zero or more non-exit statements
770 if (t14) { PUT(184) = 0x4002040:I64; exit-Boring }
771 PUT(184) = 0x400200B:I64; exit-Boring
772 Checks:
773 - Both transfers are 'boring'
774 - Both dsts are constants
775 - The cond is non-constant (an IRExpr_Tmp)
776 - There are no other exits in this instruction
777 - The client allows chasing into both destinations
779 if (irsb->jumpkind == Ijk_Boring && irsb->stmts_used >= 2) {
780 const IRStmt* maybe_exit = irsb->stmts[irsb->stmts_used - 1];
781 if (maybe_exit->tag == Ist_Exit
782 && maybe_exit->Ist.Exit.guard->tag == Iex_RdTmp
783 && maybe_exit->Ist.Exit.jk == Ijk_Boring
784 && irsb->next->tag == Iex_Const
785 && insn_has_no_other_exits_or_PUTs_to_PC(
786 irsb->stmts, irsb->stmts_used - 2,
787 offB_GUEST_IP, szB_GUEST_IP, irsb->tyenv)) {
788 vassert(maybe_exit->Ist.Exit.offsIP == irsb->offsIP);
789 IRConst* dst_SX = maybe_exit->Ist.Exit.dst;
790 IRConst* dst_FT = irsb->next->Iex.Const.con;
791 IRTemp cond_SX = maybe_exit->Ist.Exit.guard->Iex.RdTmp.tmp;
792 Long delta_SX = 0;
793 Long delta_FT = 0;
794 Bool ok_SX
795 = irconst_to_maybe_delta(&delta_SX, dst_SX,
796 guest_IP_sbstart, guest_word_type,
797 chase_into_ok, callback_opaque);
798 Bool ok_FT
799 = irconst_to_maybe_delta(&delta_FT, dst_FT,
800 guest_IP_sbstart, guest_word_type,
801 chase_into_ok, callback_opaque);
802 if (ok_SX && ok_FT) {
803 be->tag = Be_Cond;
804 be->Be.Cond.condSX = cond_SX;
805 be->Be.Cond.deltaSX = delta_SX;
806 be->Be.Cond.deltaFT = delta_FT;
807 goto out;
812 // -- Unconditional branch/call to known destination
813 /* Four checks:
814 - The transfer is 'boring' or 'call', so that no assistance is needed
815 - The dst is a constant (known at jit time)
816 - There are no other exits in this instruction. In other words, the
817 transfer is unconditional.
818 - The client allows chasing into the destination.
820 if ((irsb->jumpkind == Ijk_Boring || irsb->jumpkind == Ijk_Call)
821 && irsb->next->tag == Iex_Const) {
822 if (insn_has_no_other_exits_or_PUTs_to_PC(
823 irsb->stmts, irsb->stmts_used - 1,
824 offB_GUEST_IP, szB_GUEST_IP, irsb->tyenv)) {
825 // We've got the right pattern. Check whether we can chase into the
826 // destination, and if so convert that to a delta value.
827 const IRConst* known_dst = irsb->next->Iex.Const.con;
828 Long delta = 0;
829 // This call also checks the type of the dst addr, and that the client
830 // allows chasing into it.
831 Bool ok = irconst_to_maybe_delta(&delta, known_dst,
832 guest_IP_sbstart, guest_word_type,
833 chase_into_ok, callback_opaque);
834 if (ok) {
835 be->tag = Be_Uncond;
836 be->Be.Uncond.delta = delta;
837 goto out;
842 // Not identified as anything of interest to us.
843 be->tag = Be_Other;
845 out:
846 if (debug_print) {
847 vex_printf("\nBlockEnd: ");
848 ppBlockEnd(be);
849 vex_printf("\n");
854 /*--------------------------------------------------------------*/
855 /*--- Disassembly of basic (not super) blocks ---*/
856 /*--------------------------------------------------------------*/
858 /* Disassemble instructions, starting at |&guest_code[delta_IN]|, into |irbb|,
859 and terminate the block properly. At most |n_instrs_allowed_IN| may be
860 disassembled, and this function may choose to disassemble fewer.
862 Also do minimal simplifications on the resulting block, so as to convert the
863 end of the block into something that |analyse_block_end| can reliably
864 recognise.
866 |irbb| will both be modified, and replaced by a new, simplified version,
867 which is returned.
869 static IRSB* disassemble_basic_block_till_stop(
870 /*OUT*/ Int* n_instrs, // #instrs actually used
871 /*OUT*/ Bool* is_verbose_seen, // did we get a 'verbose' hint?
872 /*OUT*/ Addr* extent_base, // VexGuestExtents[..].base
873 /*OUT*/ UShort* extent_len, // VexGuestExtents[..].len
874 /*MOD*/ IRSB* irbb,
875 const Long delta_IN,
876 const Int n_instrs_allowed_IN,
877 const Addr guest_IP_sbstart,
878 const VexEndness host_endness,
879 const Bool sigill_diag,
880 const VexArch arch_guest,
881 const VexArchInfo* archinfo_guest,
882 const VexAbiInfo* abiinfo_both,
883 const IRType guest_word_type,
884 const Bool debug_print,
885 const DisOneInstrFn dis_instr_fn,
886 const UChar* guest_code,
887 const Int offB_GUEST_IP
890 /* This is the max instrs we allow in the block. It starts off at
891 |n_instrs_allowed_IN| but we may choose to reduce it in the case where the
892 instruction disassembler returns an 'is verbose' hint. This is so as to
893 ensure that the JIT doesn't run out of space. See bug 375839 for a
894 motivating example. */
896 /* Process instructions. */
897 Long delta = delta_IN;
898 Int n_instrs_allowed = n_instrs_allowed_IN;
900 *n_instrs = 0;
901 *is_verbose_seen = False;
902 *extent_base = guest_IP_sbstart + delta;
903 *extent_len = 0;
905 while (True) {
906 vassert(*n_instrs < n_instrs_allowed);
908 /* This is the IP of the instruction we're just about to deal
909 with. */
910 Addr guest_IP_curr_instr = guest_IP_sbstart + delta;
912 /* This is the irbb statement array index of the first stmt in
913 this insn. That will always be the instruction-mark
914 descriptor. */
915 Int first_stmt_idx = irbb->stmts_used;
917 /* Add an instruction-mark statement. We won't know until after
918 disassembling the instruction how long it instruction is, so
919 just put in a zero length and we'll fix it up later.
921 On ARM, the least significant bit of the instr address
922 distinguishes ARM vs Thumb instructions. All instructions
923 actually start on at least 2-aligned addresses. So we need
924 to ignore the bottom bit of the insn address when forming the
925 IMark's address field, but put that bottom bit in the delta
926 field, so that comparisons against guest_R15T for Thumb can
927 be done correctly. By inspecting the delta field,
928 instruction processors can determine whether the instruction
929 was originally Thumb or ARM. For more details of this
930 convention, see comments on definition of guest_R15T in
931 libvex_guest_arm.h. */
932 if (arch_guest == VexArchARM && (guest_IP_curr_instr & 1)) {
933 /* Thumb insn => mask out the T bit, but put it in delta */
934 addStmtToIRSB( irbb,
935 IRStmt_IMark(guest_IP_curr_instr & ~(Addr)1,
936 0, /* len */
937 1 /* delta */
940 } else {
941 /* All other targets: store IP as-is, and set delta to zero. */
942 addStmtToIRSB( irbb,
943 IRStmt_IMark(guest_IP_curr_instr,
944 0, /* len */
945 0 /* delta */
950 if (debug_print && *n_instrs > 0)
951 vex_printf("\n");
953 /* Finally, actually disassemble an instruction. */
954 vassert(irbb->next == NULL);
955 DisResult dres
956 = dis_instr_fn ( irbb, guest_code, delta, guest_IP_curr_instr,
957 arch_guest, archinfo_guest, abiinfo_both,
958 host_endness, sigill_diag );
960 /* stay sane ... */
961 vassert(dres.whatNext == Dis_StopHere || dres.whatNext == Dis_Continue);
962 /* ... disassembled insn length is sane ... */
963 vassert(dres.len <= 24);
965 /* If the disassembly function passed us a hint, take note of it. */
966 if (LIKELY(dres.hint == Dis_HintNone)) {
967 /* Do nothing */
968 } else {
969 vassert(dres.hint == Dis_HintVerbose);
970 /* The current insn is known to be verbose. Lower the max insns limit
971 if necessary so as to avoid running the JIT out of space in the
972 event that we've encountered the start of a long sequence of them.
973 This is expected to be a very rare event. In any case the remaining
974 limit (in the default setting, 30 insns) is still so high that most
975 blocks will terminate anyway before then. So this is very unlikely
976 to give a perf hit in practice. See bug 375839 for the motivating
977 example. */
978 if (!(*is_verbose_seen)) {
979 *is_verbose_seen = True;
980 // Halve the number of allowed insns, but only above 2
981 if (n_instrs_allowed > 2) {
982 n_instrs_allowed = ((n_instrs_allowed - 2) / 2) + 2;
983 //vassert(*n_instrs <= n_instrs_allowed);
988 /* Fill in the insn-mark length field. */
989 vassert(first_stmt_idx >= 0 && first_stmt_idx < irbb->stmts_used);
990 IRStmt* imark = irbb->stmts[first_stmt_idx];
991 vassert(imark);
992 vassert(imark->tag == Ist_IMark);
993 vassert(imark->Ist.IMark.len == 0);
994 imark->Ist.IMark.len = dres.len;
996 /* Print the resulting IR, if needed. */
997 if (vex_traceflags & VEX_TRACE_FE) {
998 for (Int i = first_stmt_idx; i < irbb->stmts_used; i++) {
999 vex_printf(" ");
1000 ppIRStmt(irbb->stmts[i]);
1001 vex_printf("\n");
1005 /* Individual insn disassembly may not mess with irbb->next.
1006 This function is the only place where it can be set. */
1007 vassert(irbb->next == NULL);
1008 vassert(irbb->jumpkind == Ijk_Boring);
1009 vassert(irbb->offsIP == 0);
1011 /* Individual insn disassembly must finish the IR for each
1012 instruction with an assignment to the guest PC. */
1013 vassert(first_stmt_idx < irbb->stmts_used);
1014 /* it follows that irbb->stmts_used must be > 0 */
1015 { IRStmt* st = irbb->stmts[irbb->stmts_used-1];
1016 vassert(st);
1017 vassert(st->tag == Ist_Put);
1018 vassert(st->Ist.Put.offset == offB_GUEST_IP);
1019 /* Really we should also check that the type of the Put'd data
1020 == guest_word_type, but that's a bit expensive. */
1023 /* Update the extents entry that we are constructing. */
1024 /* If vex_control.guest_max_insns is required to be < 100 and
1025 each insn is at max 20 bytes long, this limit of 5000 then
1026 seems reasonable since the max possible extent length will be
1027 100 * 20 == 2000. */
1028 vassert(*extent_len < 5000);
1029 (*extent_len) += dres.len;
1030 (*n_instrs)++;
1032 /* Advance delta (inconspicuous but very important :-) */
1033 delta += (Long)dres.len;
1035 Bool stopNow = False;
1036 switch (dres.whatNext) {
1037 case Dis_Continue:
1038 vassert(dres.jk_StopHere == Ijk_INVALID);
1039 if (*n_instrs >= n_instrs_allowed) {
1040 /* We have to stop. See comment above re irbb field
1041 settings here. */
1042 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
1043 /* irbb->jumpkind must already by Ijk_Boring */
1044 irbb->offsIP = offB_GUEST_IP;
1045 stopNow = True;
1047 break;
1048 case Dis_StopHere:
1049 vassert(dres.jk_StopHere != Ijk_INVALID);
1050 /* See comment above re irbb field settings here. */
1051 irbb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
1052 irbb->jumpkind = dres.jk_StopHere;
1053 irbb->offsIP = offB_GUEST_IP;
1054 stopNow = True;
1055 break;
1056 default:
1057 vpanic("bb_to_IR");
1060 if (stopNow)
1061 break;
1062 } /* while (True) */
1064 /* irbb->next must now be set, since we've finished the block.
1065 Print it if necessary.*/
1066 vassert(irbb->next != NULL);
1067 if (debug_print) {
1068 vex_printf(" ");
1069 vex_printf( "PUT(%d) = ", irbb->offsIP);
1070 ppIRExpr( irbb->next );
1071 vex_printf( "; exit-");
1072 ppIRJumpKind(irbb->jumpkind);
1073 vex_printf( "\n");
1074 vex_printf( "\n");
1077 /* And clean it up. */
1078 irbb = do_minimal_initial_iropt_BB ( irbb );
1079 if (debug_print) {
1080 ppIRSB(irbb);
1083 return irbb;
1087 /*--------------------------------------------------------------*/
1088 /*--- Disassembly of traces: helper functions ---*/
1089 /*--------------------------------------------------------------*/
1091 // Swap the side exit and fall through exit for |bb|. Update |be| so as to be
1092 // consistent.
1093 static void swap_sx_and_ft ( /*MOD*/IRSB* bb, /*MOD*/BlockEnd* be )
1095 vassert(be->tag == Be_Cond);
1096 vassert(bb->stmts_used >= 2); // Must have at least: IMark, Exit
1097 IRStmt* exit = bb->stmts[bb->stmts_used - 1];
1098 vassert(exit->tag == Ist_Exit);
1099 vassert(exit->Ist.Exit.guard->tag == Iex_RdTmp);
1100 vassert(exit->Ist.Exit.guard->Iex.RdTmp.tmp == be->Be.Cond.condSX);
1101 vassert(bb->next->tag == Iex_Const);
1102 vassert(bb->jumpkind == Ijk_Boring);
1103 // We need to insert a new stmt, just before the exit, that computes 'Not1'
1104 // of the guard condition. Replace |bb->stmts[bb->stmts_used - 1]| by the
1105 // new stmt, and then place |exit| immediately after it.
1106 IRTemp invertedGuard = newIRTemp(bb->tyenv, Ity_I1);
1107 bb->stmts[bb->stmts_used - 1]
1108 = IRStmt_WrTmp(invertedGuard,
1109 IRExpr_Unop(Iop_Not1, IRExpr_RdTmp(exit->Ist.Exit.guard
1110 ->Iex.RdTmp.tmp)));
1111 exit->Ist.Exit.guard->Iex.RdTmp.tmp = invertedGuard;
1112 addStmtToIRSB(bb, exit);
1114 // Swap the actual destination constants.
1115 { IRConst* tmp = exit->Ist.Exit.dst;
1116 exit->Ist.Exit.dst = bb->next->Iex.Const.con;
1117 bb->next->Iex.Const.con = tmp;
1120 // And update |be|.
1121 { be->Be.Cond.condSX = invertedGuard;
1122 Long tmp = be->Be.Cond.deltaSX;
1123 be->Be.Cond.deltaSX = be->Be.Cond.deltaFT;
1124 be->Be.Cond.deltaFT = tmp;
1129 static void update_instr_budget( /*MOD*/Int* instrs_avail,
1130 /*MOD*/Bool* verbose_mode,
1131 const Int bb_instrs_used,
1132 const Bool bb_verbose_seen )
1134 if (0)
1135 vex_printf("UIB: verbose_mode %d, instrs_avail %d, "
1136 "bb_instrs_used %d, bb_verbose_seen %d\n",
1137 *verbose_mode ? 1 : 0, *instrs_avail,
1138 bb_instrs_used, bb_verbose_seen ? 1 : 0);
1140 vassert(bb_instrs_used <= *instrs_avail);
1142 if (bb_verbose_seen && !(*verbose_mode)) {
1143 *verbose_mode = True;
1144 // Adjust *instrs_avail so that, when it becomes zero, we haven't used
1145 // more than 50% of vex_control.guest_max_instrs.
1146 if (bb_instrs_used > vex_control.guest_max_insns / 2) {
1147 *instrs_avail = 0;
1148 } else {
1149 *instrs_avail = vex_control.guest_max_insns / 2;
1151 vassert(*instrs_avail >= 0);
1154 // Subtract bb_instrs_used from *instrs_avail, clamping at 0 if necessary.
1155 if (bb_instrs_used > *instrs_avail) {
1156 *instrs_avail = 0;
1157 } else {
1158 *instrs_avail -= bb_instrs_used;
1161 vassert(*instrs_avail >= 0);
1164 // Add the extent [base, +len) to |vge|. Asserts if |vge| is already full.
1165 // As an optimisation only, tries to also merge the new extent with the
1166 // previous one, if possible.
1167 static void add_extent ( /*MOD*/VexGuestExtents* vge, Addr base, UShort len )
1169 const UInt limit = sizeof(vge->base) / sizeof(vge->base[0]);
1170 vassert(limit == 3);
1171 const UInt i = vge->n_used;
1172 vassert(i < limit);
1173 vge->n_used++;
1174 vge->base[i] = base;
1175 vge->len[i] = len;
1176 // Try to merge with the previous extent
1177 if (i > 0
1178 && (((UInt)vge->len[i-1]) + ((UInt)len))
1179 < 200*25 /* say, 200 insns of size 25 bytes, absolute worst case */
1180 && vge->base[i-1] + vge->len[i-1] == base) {
1181 vge->len[i-1] += len;
1182 vge->n_used--;
1183 //vex_printf("MERGE\n");
1188 /*--------------------------------------------------------------*/
1189 /*--- Disassembly of traces: main function ---*/
1190 /*--------------------------------------------------------------*/
1192 /* Disassemble a complete basic block, starting at guest_IP_start,
1193 returning a new IRSB. The disassembler may chase across basic
1194 block boundaries if it wishes and if chase_into_ok allows it.
1195 The precise guest address ranges from which code has been taken
1196 are written into vge. guest_IP_sbstart is taken to be the IP in
1197 the guest's address space corresponding to the instruction at
1198 &guest_code[0].
1200 dis_instr_fn is the arch-specific fn to disassemble on function; it
1201 is this that does the real work.
1203 needs_self_check is a callback used to ask the caller which of the
1204 extents, if any, a self check is required for. The returned value
1205 is a bitmask with a 1 in position i indicating that the i'th extent
1206 needs a check. Since there can be at most 3 extents, the returned
1207 values must be between 0 and 7.
1209 The number of extents which did get a self check (0 to 3) is put in
1210 n_sc_extents. The caller already knows this because it told us
1211 which extents to add checks for, via the needs_self_check callback,
1212 but we ship the number back out here for the caller's convenience.
1214 preamble_function is a callback which allows the caller to add
1215 its own IR preamble (following the self-check, if any). May be
1216 NULL. If non-NULL, the IRSB under construction is handed to
1217 this function, which presumably adds IR statements to it. The
1218 callback may optionally complete the block and direct bb_to_IR
1219 not to disassemble any instructions into it; this is indicated
1220 by the callback returning True.
1222 offB_CMADDR and offB_CMLEN are the offsets of guest_CMADDR and
1223 guest_CMLEN. Since this routine has to work for any guest state,
1224 without knowing what it is, those offsets have to passed in.
1226 callback_opaque is a caller-supplied pointer to data which the
1227 callbacks may want to see. Vex has no idea what it is.
1228 (In fact it's a VgInstrumentClosure.)
1231 /* Regarding IP updating. dis_instr_fn (that does the guest specific
1232 work of disassembling an individual instruction) must finish the
1233 resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
1234 state the next instruction address.
1236 If the block is to be ended at that point, then this routine
1237 (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
1238 make a transfer (of the right kind) to "GET(guest_IP)". Hence if
1239 dis_instr_fn generates incorrect IP updates we will see it
1240 immediately (due to jumping to the wrong next guest address).
1242 However it is also necessary to set this up so it can be optimised
1243 nicely. The IRSB exit is defined to update the guest IP, so that
1244 chaining works -- since the chain_me stubs expect the chain-to
1245 address to be in the guest state. Hence what the IRSB next fields
1246 will contain initially is (implicitly)
1248 PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
1250 which looks pretty strange at first. Eg so unconditional branch
1251 to some address 0x123456 looks like this:
1253 PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
1254 // the exit
1255 PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
1257 after redundant-GET and -PUT removal by iropt, we get what we want:
1259 // the exit
1260 PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
1262 This makes the IRSB-end case the same as the side-exit case: update
1263 IP, then transfer. There is no redundancy of representation for
1264 the destination, and we use the destination specified by
1265 dis_instr_fn, so any errors it makes show up sooner.
1267 IRSB* bb_to_IR (
1268 /*OUT*/VexGuestExtents* vge,
1269 /*OUT*/UInt* n_sc_extents,
1270 /*OUT*/UInt* n_guest_instrs, /* stats only */
1271 /*OUT*/UShort* n_uncond_in_trace, /* stats only */
1272 /*OUT*/UShort* n_cond_in_trace, /* stats only */
1273 /*MOD*/VexRegisterUpdates* pxControl,
1274 /*IN*/ void* callback_opaque,
1275 /*IN*/ DisOneInstrFn dis_instr_fn,
1276 /*IN*/ const UChar* guest_code,
1277 /*IN*/ Addr guest_IP_sbstart,
1278 /*IN*/ Bool (*chase_into_ok)(void*,Addr),
1279 /*IN*/ VexEndness host_endness,
1280 /*IN*/ Bool sigill_diag,
1281 /*IN*/ VexArch arch_guest,
1282 /*IN*/ const VexArchInfo* archinfo_guest,
1283 /*IN*/ const VexAbiInfo* abiinfo_both,
1284 /*IN*/ IRType guest_word_type,
1285 /*IN*/ UInt (*needs_self_check)
1286 (void*, /*MB_MOD*/VexRegisterUpdates*,
1287 const VexGuestExtents*),
1288 /*IN*/ Bool (*preamble_function)(void*,IRSB*),
1289 /*IN*/ Int offB_GUEST_CMSTART,
1290 /*IN*/ Int offB_GUEST_CMLEN,
1291 /*IN*/ Int offB_GUEST_IP,
1292 /*IN*/ Int szB_GUEST_IP
1295 Bool debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
1297 /* check sanity .. */
1298 vassert(sizeof(HWord) == sizeof(void*));
1299 vassert(vex_control.guest_max_insns >= 1);
1300 vassert(vex_control.guest_max_insns <= 100);
1301 vassert(vex_control.guest_chase == False || vex_control.guest_chase == True);
1302 vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
1304 if (guest_word_type == Ity_I32) {
1305 vassert(szB_GUEST_IP == 4);
1306 vassert((offB_GUEST_IP % 4) == 0);
1307 } else {
1308 vassert(szB_GUEST_IP == 8);
1309 vassert((offB_GUEST_IP % 8) == 0);
1312 /* Initialise all return-by-ref state. */
1313 vge->n_used = 0;
1314 *n_sc_extents = 0;
1315 *n_guest_instrs = 0;
1316 *n_uncond_in_trace = 0;
1317 *n_cond_in_trace = 0;
1319 /* And a new IR superblock to dump the result into. */
1320 IRSB* irsb = emptyIRSB();
1322 /* Leave 21 spaces in which to put the check statements for a self
1323 checking translation (up to 3 extents, and 7 stmts required for
1324 each). We won't know until later the extents and checksums of
1325 the areas, if any, that need to be checked. */
1326 IRStmt* nop = IRStmt_NoOp();
1327 Int selfcheck_idx = irsb->stmts_used;
1328 for (Int i = 0; i < 3 * 7; i++)
1329 addStmtToIRSB( irsb, nop );
1331 /* If the caller supplied a function to add its own preamble, use
1332 it now. */
1333 if (preamble_function) {
1334 Bool stopNow = preamble_function( callback_opaque, irsb );
1335 if (stopNow) {
1336 /* The callback has completed the IR block without any guest
1337 insns being disassembled into it, so just return it at
1338 this point, even if a self-check was requested - as there
1339 is nothing to self-check. The 21 self-check no-ops will
1340 still be in place, but they are harmless. */
1341 vge->n_used = 1;
1342 vge->base[0] = guest_IP_sbstart;
1343 vge->len[0] = 0;
1344 return irsb;
1348 /* Running state:
1349 irsb the SB we are incrementally constructing
1350 vge associated extents for irsb
1351 instrs_used instrs incorporated in irsb so far
1352 instrs_avail number of instrs we have space for
1353 verbose_mode did we see an 'is verbose' hint at some point?
1355 Int instrs_used = 0;
1356 Int instrs_avail = vex_control.guest_max_insns;
1357 Bool verbose_mode = False;
1359 /* Disassemble the initial block until we have to stop. */
1361 Int ib_instrs_used = 0;
1362 Bool ib_verbose_seen = False;
1363 Addr ib_base = 0;
1364 UShort ib_len = 0;
1365 irsb = disassemble_basic_block_till_stop(
1366 /*OUT*/ &ib_instrs_used, &ib_verbose_seen, &ib_base, &ib_len,
1367 /*MOD*/ irsb,
1368 /*IN*/ 0/*delta for the first block in the trace*/,
1369 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1370 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1371 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1373 vassert(ib_instrs_used <= instrs_avail);
1375 // Update instrs_used, extents, budget.
1376 instrs_used += ib_instrs_used;
1377 add_extent(vge, ib_base, ib_len);
1378 update_instr_budget(&instrs_avail, &verbose_mode,
1379 ib_instrs_used, ib_verbose_seen);
1382 /* Now, see if we can extend the initial block. */
1383 while (True) {
1384 const Int n_extent_slots = sizeof(vge->base) / sizeof(vge->base[0]);
1385 vassert(n_extent_slots == 3);
1387 // Reasons to give up immediately:
1388 // User or tool asked us not to chase
1389 if (!vex_control.guest_chase)
1390 break;
1392 // Out of extent slots
1393 vassert(vge->n_used <= n_extent_slots);
1394 if (vge->n_used == n_extent_slots)
1395 break;
1397 // Almost out of available instructions
1398 vassert(instrs_avail >= 0);
1399 if (instrs_avail < 3)
1400 break;
1402 // Try for an extend. What kind we do depends on how the current trace
1403 // ends.
1404 /* Regarding the use of |sigill_diag| in the extension logic below. This
1405 is a Bool which controls whether or not the individual insn
1406 disassemblers print an error message in the case where they don't
1407 recognise an instruction. Generally speaking this is set to True, but
1408 VEX's client can set it to False if it wants.
1410 Now that we are speculatively chasing both arms of a conditional
1411 branch, this can lead to the following problem: one of those arms
1412 contains an undecodable instruction. That insn is not reached at run
1413 time, because the branch itself tests some CPU hwcaps info (or
1414 whatever) and execution goes down the other path. However, it has the
1415 bad side effect that the speculative disassembly will nevertheless
1416 produce an error message when |sigill_diag| is True.
1418 To avoid this, in calls to |disassemble_basic_block_till_stop| for
1419 speculative code, we pass False instead of |sigill_diag|. Note that
1420 any (unconditional-chase) call to |disassemble_basic_block_till_stop|
1421 that happens after a conditional chase that results in recovery of an
1422 &&-idiom, is still really non-speculative, because the &&-idiom
1423 translation can only happen when both paths lead to the same
1424 continuation point. The result is that we know that the initial BB,
1425 and BBs recovered via chasing an unconditional branch, are sure to be
1426 executed, even if that unconditional branch follows a conditional
1427 branch which got folded into an &&-idiom. So we don't need to change
1428 the |sigill_diag| value used for them. It's only for the
1429 conditional-branch SX and FT disassembly that it must be set to
1430 |False|.
1432 BlockEnd irsb_be;
1433 analyse_block_end(&irsb_be, irsb, guest_IP_sbstart, guest_word_type,
1434 chase_into_ok, callback_opaque,
1435 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1437 // Try for an extend based on an unconditional branch or call to a known
1438 // destination.
1439 if (irsb_be.tag == Be_Uncond) {
1440 if (debug_print) {
1441 vex_printf("\n-+-+ Unconditional follow (ext# %d) to 0x%llx "
1442 "-+-+\n\n",
1443 (Int)vge->n_used,
1444 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Uncond.delta));
1446 Int bb_instrs_used = 0;
1447 Bool bb_verbose_seen = False;
1448 Addr bb_base = 0;
1449 UShort bb_len = 0;
1450 IRSB* bb
1451 = disassemble_basic_block_till_stop(
1452 /*OUT*/ &bb_instrs_used, &bb_verbose_seen, &bb_base, &bb_len,
1453 /*MOD*/ emptyIRSB(),
1454 /*IN*/ irsb_be.Be.Uncond.delta,
1455 instrs_avail, guest_IP_sbstart, host_endness, sigill_diag,
1456 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1457 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1459 vassert(bb_instrs_used <= instrs_avail);
1461 /* Now we have to append 'bb' to 'irsb'. */
1462 concatenate_irsbs(irsb, bb);
1464 // Update instrs_used, extents, budget.
1465 instrs_used += bb_instrs_used;
1466 add_extent(vge, bb_base, bb_len);
1467 update_instr_budget(&instrs_avail, &verbose_mode,
1468 bb_instrs_used, bb_verbose_seen);
1469 *n_uncond_in_trace += 1;
1470 } // if (be.tag == Be_Uncond)
1472 // Try for an extend based on a conditional branch, specifically in the
1473 // hope of identifying and recovering, an "A && B" condition spread across
1474 // two basic blocks.
1475 if (irsb_be.tag == Be_Cond)
1477 if (debug_print) {
1478 vex_printf("\n-+-+ (ext# %d) Considering cbranch to"
1479 " SX=0x%llx FT=0x%llx -+-+\n\n",
1480 (Int)vge->n_used,
1481 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaSX),
1482 (ULong)((Long)guest_IP_sbstart+ irsb_be.Be.Cond.deltaFT));
1484 const Int instrs_avail_spec = 3;
1486 if (debug_print) {
1487 vex_printf("-+-+ SPEC side exit -+-+\n\n");
1489 Int sx_instrs_used = 0;
1490 Bool sx_verbose_seen = False;
1491 Addr sx_base = 0;
1492 UShort sx_len = 0;
1493 IRSB* sx_bb
1494 = disassemble_basic_block_till_stop(
1495 /*OUT*/ &sx_instrs_used, &sx_verbose_seen, &sx_base, &sx_len,
1496 /*MOD*/ emptyIRSB(),
1497 /*IN*/ irsb_be.Be.Cond.deltaSX,
1498 instrs_avail_spec, guest_IP_sbstart, host_endness,
1499 /*sigill_diag=*/False, // See comment above
1500 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1501 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1503 vassert(sx_instrs_used <= instrs_avail_spec);
1504 BlockEnd sx_be;
1505 analyse_block_end(&sx_be, sx_bb, guest_IP_sbstart, guest_word_type,
1506 chase_into_ok, callback_opaque,
1507 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1509 if (debug_print) {
1510 vex_printf("\n-+-+ SPEC fall through -+-+\n\n");
1512 Int ft_instrs_used = 0;
1513 Bool ft_verbose_seen = False;
1514 Addr ft_base = 0;
1515 UShort ft_len = 0;
1516 IRSB* ft_bb
1517 = disassemble_basic_block_till_stop(
1518 /*OUT*/ &ft_instrs_used, &ft_verbose_seen, &ft_base, &ft_len,
1519 /*MOD*/ emptyIRSB(),
1520 /*IN*/ irsb_be.Be.Cond.deltaFT,
1521 instrs_avail_spec, guest_IP_sbstart, host_endness,
1522 /*sigill_diag=*/False, // See comment above
1523 arch_guest, archinfo_guest, abiinfo_both, guest_word_type,
1524 debug_print, dis_instr_fn, guest_code, offB_GUEST_IP
1526 vassert(ft_instrs_used <= instrs_avail_spec);
1527 BlockEnd ft_be;
1528 analyse_block_end(&ft_be, ft_bb, guest_IP_sbstart, guest_word_type,
1529 chase_into_ok, callback_opaque,
1530 offB_GUEST_IP, szB_GUEST_IP, debug_print);
1532 /* In order for the transformation to be remotely valid, we need:
1533 - At least one of the sx_bb or ft_bb to be have a Be_Cond end.
1534 - sx_bb and ft_bb definitely don't form a loop.
1536 Bool ok = sx_be.tag == Be_Cond || ft_be.tag == Be_Cond;
1537 if (ok) {
1538 ok = definitely_does_not_jump_to_delta(&sx_be,
1539 irsb_be.Be.Cond.deltaFT)
1540 || definitely_does_not_jump_to_delta(&ft_be,
1541 irsb_be.Be.Cond.deltaSX);
1544 // Check for other mutancy:
1545 // irsb ft == sx, or the same for ft itself or sx itself
1546 if (ok) {
1547 if (irsb_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT
1548 || (sx_be.tag == Be_Cond
1549 && sx_be.Be.Cond.deltaSX == sx_be.Be.Cond.deltaFT)
1550 || (ft_be.tag == Be_Cond
1551 && ft_be.Be.Cond.deltaSX == ft_be.Be.Cond.deltaFT)) {
1552 ok = False;
1556 /* Now let's see if any of our four cases actually holds (viz, is this
1557 really an && idiom? */
1558 UInt idiom = 4;
1559 if (ok) {
1560 vassert(irsb_be.tag == Be_Cond);
1561 UInt iom1 = 4/*invalid*/;
1562 if (sx_be.tag == Be_Cond) {
1563 /**/ if (sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT)
1564 iom1 = 0;
1565 else if (sx_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaFT)
1566 iom1 = 1;
1568 UInt iom2 = 4/*invalid*/;
1569 if (ft_be.tag == Be_Cond) {
1570 /**/ if (ft_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaSX)
1571 iom2 = 2;
1572 else if (ft_be.Be.Cond.deltaSX == irsb_be.Be.Cond.deltaSX)
1573 iom2 = 3;
1576 /* We should only have identified at most one of the four idioms. */
1577 vassert(iom1 == 4 || iom2 == 4);
1578 idiom = (iom1 < 4) ? iom1 : (iom2 < 4 ? iom2 : 4);
1579 if (idiom == 4) {
1580 ok = False;
1581 if (debug_print) {
1582 vex_printf("\n-+-+ &&-idiom not recognised, "
1583 "giving up. -+-+\n\n");
1588 if (ok) {
1589 vassert(idiom < 4);
1590 // "Normalise" the data so as to ensure we only have one of the four
1591 // idioms to transform.
1592 if (idiom == 2 || idiom == 3) {
1593 swap_sx_and_ft(irsb, &irsb_be);
1594 # define SWAP(_ty, _aa, _bb) \
1595 do { _ty _tmp = _aa; _aa = _bb; _bb = _tmp; } while (0)
1596 SWAP(Int, sx_instrs_used, ft_instrs_used);
1597 SWAP(Bool, sx_verbose_seen, ft_verbose_seen);
1598 SWAP(Addr, sx_base, ft_base);
1599 SWAP(UShort, sx_len, ft_len);
1600 SWAP(IRSB*, sx_bb, ft_bb);
1601 SWAP(BlockEnd, sx_be, ft_be);
1602 # undef SWAP
1604 if (idiom == 1 || idiom == 3) {
1605 swap_sx_and_ft(sx_bb, &sx_be);
1607 vassert(sx_be.tag == Be_Cond);
1608 vassert(sx_be.Be.Cond.deltaFT == irsb_be.Be.Cond.deltaFT);
1610 if (debug_print) {
1611 vex_printf("\n-+-+ After normalisation (idiom=%u) -+-+\n", idiom);
1612 vex_printf("\n-+-+ IRSB -+-+\n");
1613 ppIRSB(irsb);
1614 ppBlockEnd(&irsb_be);
1615 vex_printf("\n\n-+-+ SX -+-+\n");
1616 ppIRSB(sx_bb);
1617 ppBlockEnd(&sx_be);
1618 vex_printf("\n");
1620 // Finally, check the sx block actually is guardable.
1621 ok = block_is_guardable(sx_bb);
1622 if (!ok && debug_print) {
1623 vex_printf("\n-+-+ SX not guardable, giving up. -+-+\n\n");
1627 if (ok) {
1628 if (0 || debug_print) {
1629 vex_printf("\n-+-+ DOING &&-TRANSFORM -+-+\n");
1631 // Finally really actually do the transformation.
1632 // 0. remove the last Exit on irsb.
1633 // 1. Add irsb->tyenv->types_used to all the tmps in sx_bb,
1634 // by calling deltaIRStmt on all stmts.
1635 // 2. Guard all stmts in sx_bb on irsb_be.Be.Cond.condSX,
1636 // **including** the last stmt (which must be an Exit). It's
1637 // here that the And1 is generated.
1638 // 3. Copy all guarded stmts to the end of irsb.
1639 vassert(irsb->stmts_used >= 2);
1640 irsb->stmts_used--;
1641 Int delta = irsb->tyenv->types_used;
1643 // Append sx_bb's tyenv to irsb's
1644 for (Int i = 0; i < sx_bb->tyenv->types_used; i++) {
1645 (void)newIRTemp(irsb->tyenv, sx_bb->tyenv->types[i]);
1648 for (Int i = 0; i < sx_bb->stmts_used; i++) {
1649 IRStmt* st = deepCopyIRStmt(sx_bb->stmts[i]);
1650 deltaIRStmt(st, delta);
1651 add_guarded_stmt_to_end_of(irsb, st, irsb_be.Be.Cond.condSX);
1654 if (debug_print) {
1655 vex_printf("\n-+-+ FINAL RESULT -+-+\n\n");
1656 ppIRSB(irsb);
1657 vex_printf("\n");
1660 // Update instrs_used, extents, budget.
1661 instrs_used += sx_instrs_used;
1662 add_extent(vge, sx_base, sx_len);
1663 update_instr_budget(&instrs_avail, &verbose_mode,
1664 sx_instrs_used, sx_verbose_seen);
1665 *n_cond_in_trace += 1;
1667 break;
1668 } // if (be.tag == Be_Cond)
1670 // We don't know any other way to extend the block. Give up.
1671 else {
1672 break;
1675 } // while (True)
1677 /* We're almost done. The only thing that might need attending to is that
1678 a self-checking preamble may need to be created. If so it gets placed
1679 in the 21 slots reserved above. */
1680 create_self_checks_as_needed(
1681 irsb, n_sc_extents, pxControl, callback_opaque, needs_self_check,
1682 vge, abiinfo_both, guest_word_type, selfcheck_idx, offB_GUEST_CMSTART,
1683 offB_GUEST_CMLEN, offB_GUEST_IP, guest_IP_sbstart
1686 *n_guest_instrs = instrs_used;
1687 return irsb;
1691 /*--------------------------------------------------------------*/
1692 /*--- Functions called by self-checking transations ---*/
1693 /*--------------------------------------------------------------*/
1695 /* All of these are CLEAN HELPERs */
1696 /* All of these are CALLED FROM GENERATED CODE */
1698 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
1699 as possible. All _4al versions assume that the supplied address is
1700 4 aligned. All length values are in 4-byte chunks. These fns
1701 arecalled once for every use of a self-checking translation, so
1702 they needs to be as fast as possible. */
1704 /* --- 32-bit versions, used only on 32-bit hosts --- */
1706 static inline UInt ROL32 ( UInt w, Int n ) {
1707 w = (w << n) | (w >> (32-n));
1708 return w;
1711 VEX_REGPARM(2)
1712 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
1714 UInt sum1 = 0, sum2 = 0;
1715 UInt* p = (UInt*)first_w32;
1716 /* unrolled */
1717 while (n_w32s >= 4) {
1718 UInt w;
1719 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1720 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1721 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1722 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1723 p += 4;
1724 n_w32s -= 4;
1725 sum1 ^= sum2;
1727 while (n_w32s >= 1) {
1728 UInt w;
1729 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1730 p += 1;
1731 n_w32s -= 1;
1732 sum1 ^= sum2;
1734 return sum1 + sum2;
1737 /* Specialised versions of the above function */
1739 VEX_REGPARM(1)
1740 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
1742 UInt sum1 = 0, sum2 = 0;
1743 UInt* p = (UInt*)first_w32;
1744 UInt w;
1745 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1746 sum1 ^= sum2;
1747 return sum1 + sum2;
1750 VEX_REGPARM(1)
1751 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
1753 UInt sum1 = 0, sum2 = 0;
1754 UInt* p = (UInt*)first_w32;
1755 UInt w;
1756 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1757 sum1 ^= sum2;
1758 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1759 sum1 ^= sum2;
1760 return sum1 + sum2;
1763 VEX_REGPARM(1)
1764 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
1766 UInt sum1 = 0, sum2 = 0;
1767 UInt* p = (UInt*)first_w32;
1768 UInt w;
1769 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1770 sum1 ^= sum2;
1771 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1772 sum1 ^= sum2;
1773 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1774 sum1 ^= sum2;
1775 return sum1 + sum2;
1778 VEX_REGPARM(1)
1779 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
1781 UInt sum1 = 0, sum2 = 0;
1782 UInt* p = (UInt*)first_w32;
1783 UInt w;
1784 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1785 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1786 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1787 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1788 sum1 ^= sum2;
1789 return sum1 + sum2;
1792 VEX_REGPARM(1)
1793 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
1795 UInt sum1 = 0, sum2 = 0;
1796 UInt* p = (UInt*)first_w32;
1797 UInt w;
1798 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1799 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1800 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1801 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1802 sum1 ^= sum2;
1803 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1804 sum1 ^= sum2;
1805 return sum1 + sum2;
1808 VEX_REGPARM(1)
1809 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
1811 UInt sum1 = 0, sum2 = 0;
1812 UInt* p = (UInt*)first_w32;
1813 UInt w;
1814 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1815 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1816 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1817 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1818 sum1 ^= sum2;
1819 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1820 sum1 ^= sum2;
1821 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1822 sum1 ^= sum2;
1823 return sum1 + sum2;
1826 VEX_REGPARM(1)
1827 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
1829 UInt sum1 = 0, sum2 = 0;
1830 UInt* p = (UInt*)first_w32;
1831 UInt w;
1832 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1833 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1834 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1835 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1836 sum1 ^= sum2;
1837 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1838 sum1 ^= sum2;
1839 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1840 sum1 ^= sum2;
1841 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1842 sum1 ^= sum2;
1843 return sum1 + sum2;
1846 VEX_REGPARM(1)
1847 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
1849 UInt sum1 = 0, sum2 = 0;
1850 UInt* p = (UInt*)first_w32;
1851 UInt w;
1852 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1853 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1854 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1855 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1856 sum1 ^= sum2;
1857 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1858 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1859 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1860 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1861 sum1 ^= sum2;
1862 return sum1 + sum2;
1865 VEX_REGPARM(1)
1866 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
1868 UInt sum1 = 0, sum2 = 0;
1869 UInt* p = (UInt*)first_w32;
1870 UInt w;
1871 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1872 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1873 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1874 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1875 sum1 ^= sum2;
1876 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1877 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1878 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1879 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1880 sum1 ^= sum2;
1881 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1882 sum1 ^= sum2;
1883 return sum1 + sum2;
1886 VEX_REGPARM(1)
1887 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
1889 UInt sum1 = 0, sum2 = 0;
1890 UInt* p = (UInt*)first_w32;
1891 UInt w;
1892 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1893 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1894 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1895 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1896 sum1 ^= sum2;
1897 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1898 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1899 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1900 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1901 sum1 ^= sum2;
1902 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1903 sum1 ^= sum2;
1904 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1905 sum1 ^= sum2;
1906 return sum1 + sum2;
1909 VEX_REGPARM(1)
1910 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
1912 UInt sum1 = 0, sum2 = 0;
1913 UInt* p = (UInt*)first_w32;
1914 UInt w;
1915 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1916 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1917 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1918 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1919 sum1 ^= sum2;
1920 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1921 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1922 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1923 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1924 sum1 ^= sum2;
1925 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1926 sum1 ^= sum2;
1927 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1928 sum1 ^= sum2;
1929 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1930 sum1 ^= sum2;
1931 return sum1 + sum2;
1934 VEX_REGPARM(1)
1935 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
1937 UInt sum1 = 0, sum2 = 0;
1938 UInt* p = (UInt*)first_w32;
1939 UInt w;
1940 w = p[0]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1941 w = p[1]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1942 w = p[2]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1943 w = p[3]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1944 sum1 ^= sum2;
1945 w = p[4]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1946 w = p[5]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1947 w = p[6]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1948 w = p[7]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1949 sum1 ^= sum2;
1950 w = p[8]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1951 w = p[9]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1952 w = p[10]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1953 w = p[11]; sum1 = ROL32(sum1 ^ w, 31); sum2 += w;
1954 sum1 ^= sum2;
1955 return sum1 + sum2;
1959 /* --- 64-bit versions, used only on 64-bit hosts --- */
1961 static inline ULong ROL64 ( ULong w, Int n ) {
1962 w = (w << n) | (w >> (64-n));
1963 return w;
1966 VEX_REGPARM(2)
1967 static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
1969 ULong sum1 = 0, sum2 = 0;
1970 ULong* p = (ULong*)first_w64;
1971 /* unrolled */
1972 while (n_w64s >= 4) {
1973 ULong w;
1974 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1975 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1976 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1977 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1978 p += 4;
1979 n_w64s -= 4;
1980 sum1 ^= sum2;
1982 while (n_w64s >= 1) {
1983 ULong w;
1984 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
1985 p += 1;
1986 n_w64s -= 1;
1987 sum1 ^= sum2;
1989 return sum1 + sum2;
1992 /* Specialised versions of the above function */
1994 VEX_REGPARM(1)
1995 static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
1997 ULong sum1 = 0, sum2 = 0;
1998 ULong* p = (ULong*)first_w64;
1999 ULong w;
2000 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2001 sum1 ^= sum2;
2002 return sum1 + sum2;
2005 VEX_REGPARM(1)
2006 static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
2008 ULong sum1 = 0, sum2 = 0;
2009 ULong* p = (ULong*)first_w64;
2010 ULong w;
2011 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2012 sum1 ^= sum2;
2013 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2014 sum1 ^= sum2;
2015 return sum1 + sum2;
2018 VEX_REGPARM(1)
2019 static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
2021 ULong sum1 = 0, sum2 = 0;
2022 ULong* p = (ULong*)first_w64;
2023 ULong w;
2024 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2025 sum1 ^= sum2;
2026 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2027 sum1 ^= sum2;
2028 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2029 sum1 ^= sum2;
2030 return sum1 + sum2;
2033 VEX_REGPARM(1)
2034 static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
2036 ULong sum1 = 0, sum2 = 0;
2037 ULong* p = (ULong*)first_w64;
2038 ULong w;
2039 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2040 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2041 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2042 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2043 sum1 ^= sum2;
2044 return sum1 + sum2;
2047 VEX_REGPARM(1)
2048 static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
2050 ULong sum1 = 0, sum2 = 0;
2051 ULong* p = (ULong*)first_w64;
2052 ULong w;
2053 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2054 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2055 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2056 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2057 sum1 ^= sum2;
2058 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2059 sum1 ^= sum2;
2060 return sum1 + sum2;
2063 VEX_REGPARM(1)
2064 static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
2066 ULong sum1 = 0, sum2 = 0;
2067 ULong* p = (ULong*)first_w64;
2068 ULong w;
2069 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2070 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2071 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2072 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2073 sum1 ^= sum2;
2074 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2075 sum1 ^= sum2;
2076 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2077 sum1 ^= sum2;
2078 return sum1 + sum2;
2081 VEX_REGPARM(1)
2082 static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
2084 ULong sum1 = 0, sum2 = 0;
2085 ULong* p = (ULong*)first_w64;
2086 ULong w;
2087 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2088 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2089 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2090 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2091 sum1 ^= sum2;
2092 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2093 sum1 ^= sum2;
2094 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2095 sum1 ^= sum2;
2096 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2097 sum1 ^= sum2;
2098 return sum1 + sum2;
2101 VEX_REGPARM(1)
2102 static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
2104 ULong sum1 = 0, sum2 = 0;
2105 ULong* p = (ULong*)first_w64;
2106 ULong w;
2107 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2108 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2109 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2110 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2111 sum1 ^= sum2;
2112 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2113 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2114 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2115 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2116 sum1 ^= sum2;
2117 return sum1 + sum2;
2120 VEX_REGPARM(1)
2121 static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
2123 ULong sum1 = 0, sum2 = 0;
2124 ULong* p = (ULong*)first_w64;
2125 ULong w;
2126 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2127 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2128 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2129 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2130 sum1 ^= sum2;
2131 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2132 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2133 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2134 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2135 sum1 ^= sum2;
2136 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2137 sum1 ^= sum2;
2138 return sum1 + sum2;
2141 VEX_REGPARM(1)
2142 static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
2144 ULong sum1 = 0, sum2 = 0;
2145 ULong* p = (ULong*)first_w64;
2146 ULong w;
2147 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2148 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2149 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2150 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2151 sum1 ^= sum2;
2152 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2153 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2154 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2155 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2156 sum1 ^= sum2;
2157 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2158 sum1 ^= sum2;
2159 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2160 sum1 ^= sum2;
2161 return sum1 + sum2;
2164 VEX_REGPARM(1)
2165 static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
2167 ULong sum1 = 0, sum2 = 0;
2168 ULong* p = (ULong*)first_w64;
2169 ULong w;
2170 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2171 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2172 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2173 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2174 sum1 ^= sum2;
2175 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2176 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2177 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2178 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2179 sum1 ^= sum2;
2180 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2181 sum1 ^= sum2;
2182 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2183 sum1 ^= sum2;
2184 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2185 sum1 ^= sum2;
2186 return sum1 + sum2;
2189 VEX_REGPARM(1)
2190 static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
2192 ULong sum1 = 0, sum2 = 0;
2193 ULong* p = (ULong*)first_w64;
2194 ULong w;
2195 w = p[0]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2196 w = p[1]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2197 w = p[2]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2198 w = p[3]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2199 sum1 ^= sum2;
2200 w = p[4]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2201 w = p[5]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2202 w = p[6]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2203 w = p[7]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2204 sum1 ^= sum2;
2205 w = p[8]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2206 w = p[9]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2207 w = p[10]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2208 w = p[11]; sum1 = ROL64(sum1 ^ w, 63); sum2 += w;
2209 sum1 ^= sum2;
2210 return sum1 + sum2;
2213 /*--------------------------------------------------------------------*/
2214 /*--- end guest_generic_bb_to_IR.c ---*/
2215 /*--------------------------------------------------------------------*/