regtest: add check for -Wl,--no-warn-execstack
[valgrind.git] / VEX / priv / guest_ppc_helpers.c
blob2611e52107e43670dd50e093c24cc511907afa9a
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_ppc_helpers.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex_emnote.h"
36 #include "libvex_guest_ppc32.h"
37 #include "libvex_guest_ppc64.h"
38 #include "libvex_ir.h"
39 #include "libvex.h"
41 #include "main_util.h"
42 #include "main_globals.h"
43 #include "guest_generic_bb_to_IR.h"
44 #include "guest_ppc_defs.h"
47 /* This file contains helper functions for ppc32 and ppc64 guest code.
48 Calls to these functions are generated by the back end. These
49 calls are of course in the host machine code and this file will be
50 compiled to host machine code, so that all makes sense.
52 Only change the signatures of these helper functions very
53 carefully. If you change the signature here, you'll have to change
54 the parameters passed to it in the IR calls constructed by
55 guest-ppc/toIR.c.
59 /*---------------------------------------------------------------*/
60 /*--- Misc integer helpers. ---*/
61 /*---------------------------------------------------------------*/
63 /* CALLED FROM GENERATED CODE */
64 /* DIRTY HELPER (non-referentially-transparent) */
65 /* Horrible hack. On non-ppc platforms, return 1. */
66 /* Reads a complete, consistent 64-bit TB value. */
67 ULong ppcg_dirtyhelper_MFTB ( void )
69 # if defined(__powerpc__)
70 ULong res;
71 UInt lo, hi1, hi2;
72 while (1) {
73 __asm__ __volatile__ ("\n"
74 "\tmftbu %0\n"
75 "\tmftb %1\n"
76 "\tmftbu %2\n"
77 : "=r" (hi1), "=r" (lo), "=r" (hi2)
79 if (hi1 == hi2) break;
81 res = ((ULong)hi1) << 32;
82 res |= (ULong)lo;
83 return res;
84 # else
85 return 1ULL;
86 # endif
90 /* CALLED FROM GENERATED CODE */
91 /* DIRTY HELPER (non-referentially transparent) */
92 UInt ppc32g_dirtyhelper_MFSPR_268_269 ( UInt r269 )
94 # if defined(__powerpc__)
95 UInt spr;
96 if (r269) {
97 __asm__ __volatile__("mfspr %0,269" : "=b"(spr));
98 } else {
99 __asm__ __volatile__("mfspr %0,268" : "=b"(spr));
101 return spr;
102 # else
103 return 0;
104 # endif
108 /* CALLED FROM GENERATED CODE */
109 /* DIRTY HELPER (I'm not really sure what the side effects are) */
110 UInt ppc32g_dirtyhelper_MFSPR_287 ( void )
112 # if defined(__powerpc__)
113 UInt spr;
114 __asm__ __volatile__("mfspr %0,287" : "=b"(spr));
115 return spr;
116 # else
117 return 0;
118 # endif
122 /* CALLED FROM GENERATED CODE */
123 /* DIRTY HELPER (reads guest state, writes guest mem) */
124 void ppc32g_dirtyhelper_LVS ( VexGuestPPC32State* gst,
125 UInt vD_off, UInt sh, UInt shift_right )
127 static
128 UChar ref[32] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
129 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
130 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
131 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F };
132 U128* pU128_src;
133 U128* pU128_dst;
135 vassert( vD_off <= sizeof(VexGuestPPC32State)-8 );
136 vassert( sh <= 15 );
137 vassert( shift_right <= 1 );
138 if (shift_right)
139 sh = 16-sh;
140 /* else shift left */
142 pU128_src = (U128*)&ref[sh];
143 pU128_dst = (U128*)( ((UChar*)gst) + vD_off );
145 (*pU128_dst)[0] = (*pU128_src)[0];
146 (*pU128_dst)[1] = (*pU128_src)[1];
147 (*pU128_dst)[2] = (*pU128_src)[2];
148 (*pU128_dst)[3] = (*pU128_src)[3];
151 /* CALLED FROM GENERATED CODE */
152 /* DIRTY HELPER (reads guest state, writes guest mem) */
153 void ppc64g_dirtyhelper_LVS ( VexGuestPPC64State* gst,
154 UInt vD_off, UInt sh, UInt shift_right,
155 UInt endness )
157 UChar ref[32];
158 ULong i;
159 Int k;
160 /* ref[] used to be a static const array, but this doesn't work on
161 ppc64 because VEX doesn't load the TOC pointer for the call here,
162 and so we wind up picking up some totally random other data.
163 (It's a wonder we don't segfault.) So, just to be clear, this
164 "fix" (vex r2073) is really a kludgearound for the fact that
165 VEX's 64-bit ppc code generation doesn't provide a valid TOC
166 pointer for helper function calls. Ick. (Bug 250038) */
167 for (i = 0; i < 32; i++) ref[i] = i;
169 U128* pU128_src;
170 U128* pU128_dst;
172 vassert( vD_off <= sizeof(VexGuestPPC64State)-8 );
173 vassert( sh <= 15 );
174 vassert( shift_right <= 1 );
175 if (shift_right)
176 sh = 16-sh;
177 /* else shift left */
179 pU128_src = (U128*)&ref[sh];
180 pU128_dst = (U128*)( ((UChar*)gst) + vD_off );
182 if ((0x1 & endness) == 0x0) {
183 /* Little endian */
184 unsigned char *srcp, *dstp;
185 srcp = (unsigned char *)pU128_src;
186 dstp = (unsigned char *)pU128_dst;
187 for (k = 15; k >= 0; k--, srcp++)
188 dstp[k] = *srcp;
189 } else {
190 (*pU128_dst)[0] = (*pU128_src)[0];
191 (*pU128_dst)[1] = (*pU128_src)[1];
192 (*pU128_dst)[2] = (*pU128_src)[2];
193 (*pU128_dst)[3] = (*pU128_src)[3];
198 /* Helper-function specialiser. */
200 IRExpr* guest_ppc32_spechelper ( const HChar* function_name,
201 IRExpr** args,
202 IRStmt** precedingStmts,
203 Int n_precedingStmts )
205 return NULL;
208 IRExpr* guest_ppc64_spechelper ( const HChar* function_name,
209 IRExpr** args,
210 IRStmt** precedingStmts,
211 Int n_precedingStmts )
213 return NULL;
217 /* 16-bit floating point number is stored in the lower 16-bits of 32-bit value */
218 #define I16_EXP_MASK 0x7C00
219 #define I16_FRACTION_MASK 0x03FF
220 #define I32_EXP_MASK 0x7F800000
221 #define I32_FRACTION_MASK 0x007FFFFF
222 #define I64_EXP_MASK 0x7FF0000000000000ULL
223 #define I64_FRACTION_MASK 0x000FFFFFFFFFFFFFULL
224 #define V128_EXP_MASK 0x7FFF000000000000ULL
225 #define V128_FRACTION_MASK 0x0000FFFFFFFFFFFFULL /* upper 64-bit fractional mask */
227 ULong generate_C_FPCC_helper( ULong irType, ULong src_hi, ULong src )
229 UInt NaN, inf, zero, norm, dnorm, pos;
230 UInt bit0, bit1, bit2, bit3;
231 UInt sign_bit = 0;
232 ULong exp_mask = 0, exp_part = 0, frac_part = 0;
233 ULong fpcc, c;
235 if ( irType == Ity_I16 ) {
236 frac_part = I16_FRACTION_MASK & src;
237 exp_mask = I16_EXP_MASK;
238 exp_part = exp_mask & src;
239 sign_bit = src >> 15;
241 } else if ( irType == Ity_I32 ) {
242 frac_part = I32_FRACTION_MASK & src;
243 exp_mask = I32_EXP_MASK;
244 exp_part = exp_mask & src;
245 sign_bit = src >> 31;
247 } else if ( irType == Ity_I64 ) {
248 frac_part = I64_FRACTION_MASK & src;
249 exp_mask = I64_EXP_MASK;
250 exp_part = exp_mask & src;
251 sign_bit = src >> 63;
253 } else if ( irType == Ity_F128 ) {
254 /* only care if the frac part is zero or non-zero */
255 frac_part = (V128_FRACTION_MASK & src_hi) | src;
256 exp_mask = V128_EXP_MASK;
257 exp_part = exp_mask & src_hi;
258 sign_bit = src_hi >> 63;
259 } else {
260 vassert(0); // Unknown value of irType
263 /* NaN: exponene is all ones, fractional part not zero */
264 if ((exp_part == exp_mask) && (frac_part != 0))
265 NaN = 1;
266 else
267 NaN = 0;
269 /* inf: exponent all 1's, fraction part is zero */
270 if ((exp_part == exp_mask) && (frac_part == 0))
271 inf = 1;
272 else
273 inf = 0;
275 /* zero: exponent is 0, fraction part is zero */
276 if ((exp_part == 0) && (frac_part == 0))
277 zero = 1;
278 else
279 zero = 0;
281 /* norm: exponent is not 0, exponent is not all 1's */
282 if ((exp_part != 0) && (exp_part != exp_mask))
283 norm = 1;
284 else
285 norm = 0;
287 /* dnorm: exponent is all 0's, fraction is not 0 */
288 if ((exp_part == 0) && (frac_part != 0))
289 dnorm = 1;
290 else
291 dnorm = 0;
293 /* pos: MSB is 1 */
294 if (sign_bit == 0)
295 pos = 1;
296 else
297 pos = 0;
299 /* calculate FPCC */
300 /* If the result is NaN then must force bits 1, 2 and 3 to zero
301 * to get correct result.
303 bit0 = NaN | inf;
305 bit1 = (!NaN) & zero;
306 bit2 = (!NaN) & ((pos & dnorm) | (pos & norm) | (pos & inf))
307 & ((!zero) & (!NaN));
308 bit3 = (!NaN) & (((!pos) & dnorm) |((!pos) & norm) | ((!pos) & inf))
309 & ((!zero) & (!NaN));
311 fpcc = (bit3 << 3) | (bit2 << 2) | (bit1 << 1) | bit0;
313 /* calculate C */
314 c = NaN | ((!pos) & dnorm) | ((!pos) & zero) | (pos & dnorm);
316 /* return C in the upper 32-bits and FPCC in the lower 32 bits */
317 return (c <<32) | fpcc;
321 UInt generate_DFP_FPRF_value_helper( UInt gfield,
322 ULong exponent,
323 UInt exponent_bias,
324 Int min_norm_exp,
325 UInt sign,
326 UInt T_value_is_zero )
328 UInt gfield_5_bit_mask = 0xF8000000;
329 UInt gfield_upper_5_bits = (gfield & gfield_5_bit_mask) >> (32 - 5);
330 UInt gfield_6_bit_mask = 0xF8000000;
331 UInt gfield_upper_6_bits = (gfield & gfield_6_bit_mask) >> (32 - 6);
332 UInt fprf_value = 0;
333 Int unbiased_exponent = exponent - exponent_bias;
335 /* The assumption is the gfield bits are left justified. Mask off
336 the most significant 5-bits in the 32-bit wide field. */
337 if ( T_value_is_zero == 1) {
338 if (sign == 0)
339 fprf_value = 0b00010; // positive zero
340 else
341 fprf_value = 0b10010; // negative zero
342 } else if ( unbiased_exponent < min_norm_exp ) {
343 if (sign == 0)
344 fprf_value = 0b10100; // posative subnormal
345 else
346 fprf_value = 0b11000; // negative subnormal
348 } else if ( gfield_upper_5_bits == 0b11110 ) { // infinity
349 if (sign == 0)
350 fprf_value = 0b00101; // positive infinity
351 else
352 fprf_value = 0b01001; // negative infinity
354 } else if ( gfield_upper_6_bits == 0b111110 ) {
355 fprf_value = 0b10001; // Quiet NaN
357 } else if ( gfield_upper_6_bits == 0b111111 ) {
358 fprf_value = 0b10001; // Signaling NaN
360 } else {
361 if (sign == 0)
362 fprf_value = 0b00100; // positive normal
363 else
364 fprf_value = 0b01000; // negative normal
367 return fprf_value;
370 /*---------------------------------------------------------------*/
371 /*--- Misc BCD clean helpers. ---*/
372 /*---------------------------------------------------------------*/
374 /* NOTE, the clean and dirty helpers need to called using the
375 * fnptr_to_fnentry() function wrapper to handle the Big Endian
376 * pointer-to-function ABI and the Little Endian ABI.
379 /* This C-helper takes a 128-bit BCD value as two 64-bit pieces.
380 * It checks the string to see if it is a valid 128-bit BCD value.
381 * A valid BCD value has a sign value in bits [3:0] between 0xA
382 * and 0xF inclusive. each of the BCD digits represented as a 4-bit
383 * hex number in bits BCD value[128:4] mut be between 0 and 9
384 * inclusive. Returns an unsigned 64-bit value if valid.
386 ULong is_BCDstring128_helper( ULong Signed, ULong bcd_string_hi,
387 ULong bcd_string_low ) {
388 Int i;
389 ULong valid_bcd, sign_valid = False;
390 ULong digit;
391 UInt sign;
393 if ( Signed == True ) {
394 sign = bcd_string_low & 0xF;
395 if( ( sign >= 0xA ) && ( sign <= 0xF ) )
396 sign_valid = True;
398 /* Change the sign digit to a zero
399 * so the for loop below works the same
400 * for signed and unsigned BCD stings
402 bcd_string_low &= 0xFFFFFFFFFFFFFFF0ULL;
404 } else {
405 sign_valid = True; /* set sign to True so result is only
406 based on the validity of the digits */
409 valid_bcd = True; // Assume true to start
410 for( i = 0; i < 32; i++ ) {
411 /* check high and low 64-bit strings in parallel */
412 digit = bcd_string_low & 0xF;
413 if ( digit > 0x9 )
414 valid_bcd = False;
415 bcd_string_low = bcd_string_low >> 4;
417 digit = bcd_string_hi & 0xF;
418 if ( digit > 0x9 )
419 valid_bcd = False;
420 bcd_string_hi = bcd_string_hi >> 4;
423 return valid_bcd & sign_valid;
426 /* This clean helper takes a signed 32-bit BCD value and a carry in
427 * and adds 1 to the value of the BCD value. The BCD value is passed
428 * in as a single 64-bit value. The incremented value is returned in
429 * the lower 32 bits of the result. If the input was signed the sign of
430 * the result is the same as the input. The carry out is returned in
431 * bits [35:32] of the result.
433 ULong increment_BCDstring32_helper( ULong Signed,
434 ULong bcd_string, ULong carry_in ) {
435 UInt i, num_digits = 8;
436 ULong bcd_value, result = 0;
437 ULong carry, digit, new_digit;
439 carry = carry_in;
441 if ( Signed == True ) {
442 bcd_value = bcd_string >> 4; /* remove sign */
443 num_digits = num_digits - 1;
444 } else {
445 bcd_value = bcd_string;
448 for( i = 0; i < num_digits; i++ ) {
449 digit = bcd_value & 0xF;
450 bcd_value = bcd_value >> 4;
451 new_digit = digit + carry;
453 if ( new_digit > 10 ) {
454 carry = 1;
455 new_digit = new_digit - 10;
457 } else {
458 carry = 0;
460 result = result | (new_digit << (i*4) );
463 if ( Signed == True ) {
464 result = ( carry << 32) | ( result << 4 ) | ( bcd_string & 0xF );
465 } else {
466 result = ( carry << 32) | result;
469 return result;
472 /*---------------------------------------------------------------*/
473 /*--- Misc packed decimal clean helpers. ---*/
474 /*---------------------------------------------------------------*/
476 /* This C-helper takes a 64-bit packed decimal value stored in a
477 * 64-bit value. It converts the zoned decimal format. The lower
478 * byte may contain a sign value, set it to zero. If return_upper
479 * is zero, return lower 64 bits of result, otherwise return upper
480 * 64 bits of the result.
482 ULong convert_to_zoned_helper( ULong src_hi, ULong src_low,
483 ULong upper_byte, ULong return_upper ) {
484 UInt i, sh;
485 ULong tmp = 0, new_value;
487 /* Remove the sign from the source. Put in the upper byte of result.
488 * Sign inserted later.
490 if ( return_upper == 0 ) { /* return lower 64-bit result */
491 for(i = 0; i < 7; i++) {
492 sh = ( 8 - i ) * 4;
493 new_value = ( ( src_low >> sh ) & 0xf ) | upper_byte;
494 tmp = tmp | ( new_value << ( ( 7 - i ) * 8 ) );
497 } else {
498 /* Byte for i=0 is in upper 64-bit of the source, do it separately */
499 new_value = ( src_hi & 0xf ) | upper_byte;
500 tmp = tmp | new_value << 56;
502 for( i = 1; i < 8; i++ ) {
503 sh = ( 16 - i ) * 4;
504 new_value = ( ( src_low >> sh ) & 0xf ) | upper_byte;
505 tmp = tmp | ( new_value << ( ( 7 - i ) * 8 ) );
508 return tmp;
511 /* This C-helper takes the lower 64-bits of the 128-bit packed decimal
512 * src value. It converts the src value to a 128-bit national format.
513 * If return_upper is zero, the helper returns lower 64 bits of result,
514 * otherwise it returns the upper 64-bits of the result.
516 ULong convert_to_national_helper( ULong src, ULong return_upper ) {
518 UInt i;
519 UInt sh = 3, max = 4, min = 0; /* initialize max, min for return upper */
520 ULong tmp = 0, new_value;
522 if ( return_upper == 0 ) { /* return lower 64-bit result */
523 min = 4;
524 max = 7;
525 sh = 7;
528 for( i = min; i < max; i++ ) {
529 new_value = ( ( src >> ( ( 7 - i ) * 4 ) ) & 0xf ) | 0x0030;
530 tmp = tmp | ( new_value << ( ( sh - i ) * 16 ) );
532 return tmp;
535 /* This C-helper takes a 128-bit zoned value stored in a 128-bit
536 * value. It converts it to the packed 64-bit decimal format without a
537 * a sign value. The sign is supposed to be in bits [3:0] and the packed
538 * value in bits [67:4]. This helper leaves it to the caller to put the
539 * result into a V128 and shift the returned value over and put the sign
540 * in.
542 ULong convert_from_zoned_helper( ULong src_hi, ULong src_low ) {
543 UInt i;
544 ULong tmp = 0, nibble;
546 /* Unroll the i = 0 iteration so the sizes of the loop for the upper
547 * and lower extraction match. Skip sign in lease significant byte.
549 nibble = ( src_hi >> 56 ) & 0xF;
550 tmp = tmp | ( nibble << 60 );
552 for( i = 1; i < 8; i++ ) {
553 /* get the high nibbles, put into result */
554 nibble = ( src_hi >> ( ( 7 - i ) * 8 ) ) & 0xF;
555 tmp = tmp | ( nibble << ( ( 15 - i ) * 4 ) );
557 /* get the low nibbles, put into result */
558 nibble = ( src_low >> ( ( 8 - i ) * 8 ) ) & 0xF;
559 tmp = tmp | ( nibble << ( ( 8 - i ) * 4 ) );
561 return tmp;
564 /* This C-helper takes a 128-bit national value stored in a 128-bit
565 * value. It converts it to a signless packed 64-bit decimal format.
567 ULong convert_from_national_helper( ULong src_hi, ULong src_low ) {
568 UInt i;
569 ULong tmp = 0, hword;
571 src_low = src_low & 0xFFFFFFFFFFFFFFF0ULL; /* remove the sign */
573 for( i = 0; i < 4; i++ ) {
574 /* get the high half-word, put into result */
575 hword = ( src_hi >> ( ( 3 - i ) * 16 ) ) & 0xF;
576 tmp = tmp | ( hword << ( ( 7 - i ) * 4 ) );
578 /* get the low half-word, put into result */
579 hword = ( src_low >> ( ( 3 - i ) * 16 ) ) & 0xF;
580 tmp = tmp | ( hword << ( ( 3 - i ) * 4 ) );
582 return tmp;
585 /*------------------------------------------------*/
586 /*--- Population count ---------------------------*/
587 /*------------------------------------------------*/
588 ULong population_count64_helper( ULong src ) {
589 /* Fast population count based on algorithm in the "Hacker's Delight" by
590 Henery S. Warren. */
591 src = (src & 0x5555555555555555) + ((src >> 1) & 0x5555555555555555);
592 src = (src & 0x3333333333333333) + ((src >> 2) & 0x3333333333333333);
593 src = (src & 0x0F0F0F0F0F0F0F0F) + ((src >> 4) & 0x0F0F0F0F0F0F0F0F);
594 src = (src & 0x00FF00FF00FF00FF) + ((src >> 8) & 0x00FF00FF00FF00FF);
595 src = (src & 0x0000FFFF0000FFFF) + ((src >> 16) & 0x0000FFFF0000FFFF);
596 src = (src & 0x00000000FFFFFFFF) + ((src >> 32) & 0x00000000FFFFFFFF);
597 return src & 0x3F;
600 /*------------------------------------------------*/
601 /*---- Extract/Deposit bits under mask helpers ---*/
602 /*------------------------------------------------*/
603 ULong extract_bits_under_mask_helper( ULong src, ULong mask, UInt flag ) {
605 UInt i;
606 ULong ones, zeros, mask_bit, bit_src;
608 zeros = 0;
609 ones = 0;
611 for (i=0; i<64; i++){
612 mask_bit = 0x1 & (mask >> (63-i));
613 bit_src = 0x1 & (src >> (63-i));
615 ones = ones << mask_bit;
616 ones = ones | (mask_bit & bit_src);
618 zeros = zeros << (1^mask_bit);
619 zeros = zeros | ((1^mask_bit) & bit_src);
622 if (flag == 1)
623 return ones;
624 else
625 return zeros;
628 UInt count_bits_under_mask_helper( ULong src, ULong mask, UInt flag ) {
630 UInt i, count_extracted_1, count_extracted_0;;
631 ULong mask_bit;
633 count_extracted_1 = 0;
634 count_extracted_0 = 0;
636 for (i=0; i<64; i++){
637 mask_bit = 0x1 & (mask >> (63-i));
639 if (mask_bit == 1)
640 count_extracted_1++;
642 if ((1^mask_bit) == 1)
643 count_extracted_0++;
646 if (flag == 1)
647 return count_extracted_1;
648 else
649 return count_extracted_0;
652 ULong deposit_bits_under_mask_helper( ULong src, ULong mask ) {
654 UInt i, src_bit_pos;
655 ULong result, mask_bit, bit_src;
657 result = 0;
658 src_bit_pos = 0;
660 for (i=0; i<64; i++){
661 mask_bit = 0x1 & (mask >> i);
663 if (mask_bit == 1) {
664 bit_src = 0x1 & (src >> src_bit_pos);
665 result = result | (bit_src << i);
666 src_bit_pos++;
669 return result;
672 /*----------------------------------------------*/
673 /*--- Vector Evaluate Inst helper --------------*/
674 /*----------------------------------------------*/
675 /* This is a 64-bit version of the VXS Vector Evaluate
676 instruction xxeval. */
678 ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC,
679 ULong IMM ) {
680 #define MAX_BITS 64
681 #define MAX_IMM_BITS 8
683 UInt i, select;
684 ULong bitA, bitB, bitC, result;
685 ULong bitIMM;
687 result = 0;
689 for (i=0; i<MAX_BITS; i++){
690 bitA = 0x1 & (srcA >> i);
691 bitB = 0x1 & (srcB >> i);
692 bitC = 0x1 & (srcC >> i);
694 /* The value of select is IBM numbering based, i.e. MSB is bit 0 */
695 select = (bitA << 2) | (bitB << 1) | bitC;
696 bitIMM = (IMM >> (MAX_IMM_BITS - 1 - select)) & 0x1;
697 result = result | (bitIMM << i);
699 return result;
700 #undef MAX_BITS
701 #undef MAX_IMM_BITS
704 /*---------------------------------------------------------------*/
705 /* --- Clean helper for vbpermq instruction ---*/
706 /*---------------------------------------------------------------*/
707 UInt vbpermq_clean_helper( ULong vA_high, ULong vA_low, ULong vB) {
708 ULong bit, result = 0x0;
709 UInt i, index;
711 /* IBM numbering bit 0 on is MSB, bit 63 is LSB */
712 for ( i = 0; i < 8; i++) {
713 index = 0xFFULL & (vB >> (56 - 8*i) );
715 if (index < 64) {
716 bit = 0x1 & (vA_high >> (63 - index));
718 } else if (index < 128) {
719 bit = 0x1 & (vA_low >> (127 - index));
721 } else
722 bit = 0;
724 result |= bit << (7 - i);
726 return result;
730 /*--------------------------------------------------*/
731 /*---- VSX Vector Generate PCV from Mask helpers ---*/
732 /*--------------------------------------------------*/
733 static void write_VSX_entry (VexGuestPPC64State* gst, UInt reg_offset,
734 ULong *vsx_entry)
736 U128* pU128_dst;
737 pU128_dst = (U128*) (((UChar*) gst) + reg_offset);
739 /* The U128 type is defined as an array of unsigned intetgers. */
740 /* Writing in LE order */
741 (*pU128_dst)[0] = (UInt)(vsx_entry[1] & 0xFFFFFFFF);
742 (*pU128_dst)[1] = (UInt)(vsx_entry[1] >> 32);
743 (*pU128_dst)[2] = (UInt)(vsx_entry[0] & 0xFFFFFFFF);
744 (*pU128_dst)[3] = (UInt)(vsx_entry[0] >> 32);
745 return;
748 /* CALLED FROM GENERATED CODE */
749 void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst,
750 ULong src_hi, ULong src_lo,
751 UInt reg_offset, UInt imm ) {
752 /* The function computes the 128-bit result then writes it directly
753 into the guest state VSX register. */
755 UInt i, shift_by, sel_shift_by, half_sel;
756 ULong index, src, result[2];
757 ULong j;
759 result[0] = 0;
760 result[1] = 0;
761 j = 0;
763 /* The algorithm in the ISA is written with IBM numbering zero on left and
764 N-1 on right. The loop index is converted to "i" to match the algorithm
765 for claritiy of matching the C code to the algorithm in the ISA. */
767 if (imm == 0b00) { // big endian expansion
768 for( index = 0; index < 16; index++) {
769 i = 15 - index;
771 shift_by = i*8;
773 if ( i >= 8) {
774 src = src_hi;
775 shift_by = shift_by - 64;
776 half_sel = 0;
777 } else {
778 src = src_lo;
779 half_sel = 1;
782 sel_shift_by = shift_by + 7;
784 if ( ((src >> sel_shift_by) & 0x1) == 1) {
785 result[half_sel] |= j << shift_by;
786 j++;
787 } else {
788 result[half_sel] |= (index + (unsigned long long)0x10) << shift_by;
793 } else if (imm == 0b01) { // big endian compression
794 /* If IMM=0b00001, let pcv be the permute control vector required to
795 enable a left-indexed permute (vperm or xxperm) to implement a
796 compression of the sparse byte elements in a source vector specified
797 by the byte-element mask in VSR[VRB+32] into the leftmost byte
798 elements of a result vector.
800 for( index = 0; index < 16; index++) {
801 i = 15 - index;
802 shift_by = i*8;
804 if ( i >= 8) {
805 src = src_hi;
806 shift_by = shift_by - 64;
807 half_sel = 0;
808 } else {
809 src = src_lo;
810 half_sel = 1;
813 sel_shift_by = shift_by + 7;
815 if ( ((src >> sel_shift_by) & 0x1) == 1) {
816 if (j >= 8)
817 result[1] |= (index) << (15 - j)*8;
818 else
819 result[0] |= (index) << (7 - j)*8;
820 j++;
823 /* The algorithim says set to undefined, leave as 0
824 for( index = 3 - j; index < 4; index++) {
825 result |= (0 << (index*8));
829 } else if (imm == 0b10) { //little-endian expansion
830 /* If IMM=0b00010, let pcv be the permute control vector required to
831 enable a right-indexed permute (vpermr or xxpermr) to implement an
832 expansion of the rightmost byte elements of a source vector into the
833 byte elements of a result vector specified by the byte-element mask
834 in VSR[VRB+32]. */
835 for( index = 0; index < 16; index++) {
836 i = index;
838 shift_by = i*8;
840 if ( i >= 8) {
841 src = src_hi;
842 shift_by = shift_by - 64;
843 half_sel = 0;
844 } else {
845 src = src_lo;
846 half_sel = 1;
849 sel_shift_by = shift_by + 7;
851 /* mod shift amount by 8 since src is either the upper or lower
852 64-bits. */
853 if ( ((src >> sel_shift_by) & 0x1) == 1) {
854 result[half_sel] |= j << shift_by;
855 j++;
856 } else {
857 result[half_sel] |= (index + (unsigned long long)0x10) << shift_by;
861 } else if (imm == 0b11) { //little-endian compression
862 /* If IMM=0b00011, let pcv be the permute control vector required to
863 enable a right-indexed permute (vpermr or xxpermr) to implement a
864 compression of the sparse byte elements in a source vector specified
865 by the byte-element mask in VSR[VRB+32] into the rightmost byte
866 elements of a result vector. */
868 for( index = 0; index < 16; index++) {
869 i = index;
871 shift_by = i*8;
873 if ( i >= 8) {
874 src = src_hi;
875 shift_by = shift_by - 64;
876 half_sel = 0;
877 } else {
878 src = src_lo;
879 half_sel = 1;
882 sel_shift_by = shift_by + 7;
884 if ( ((src >> sel_shift_by) & 0x1) == 1) {
885 if (j >= 8)
886 result[0] |= (index) << (j-8)*8;
887 else
888 result[1] |= (index) << j*8;
889 j++;
893 /* The algorithim says set to undefined, leave as 0
894 for( index = 3 - j; index < 4; index++) {
895 result |= (0 << (index*8));
899 } else {
900 vex_printf("ERROR, vector_gen_pvc_byte_mask_dirty_helper, imm value %u not supported.\n",
901 imm);
902 vassert(0);
904 write_VSX_entry( gst, reg_offset, result);
907 /* CALLED FROM GENERATED CODE */
908 void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst,
909 ULong src_hi, ULong src_lo,
910 UInt reg_offset,
911 UInt imm ) {
912 /* The function computes the 128-bit result then writes it directly
913 into the guest state VSX register. */
914 UInt i, shift_by, sel_shift_by, half_sel;
915 ULong index, src, result[2];
916 ULong j;
918 result[0] = 0;
919 result[1] = 0;
920 j = 0;
922 /* The algorithm in the ISA is written with IBM numbering zero on left and
923 N-1 on right. The loop index is converted to "i" to match the algorithm
924 for claritiy of matching the C code to the algorithm in the ISA. */
926 if (imm == 0b00) { // big endian expansion
927 /* If IMM=0b00000, let pcv be the permute control vector required to
928 enable a left-indexed permute (vperm or xxperm) to implement an
929 expansion of the leftmost halfword elements of a source vector into
930 the halfword elements of a result vector specified by the halfword-
931 element mask in VSR[VRB+32].
933 for( index = 0; index < 8; index++) {
934 i = 7 - index;
936 shift_by = i*16;
938 if ( i >= 4) {
939 src = src_hi;
940 shift_by = shift_by - 64;
941 half_sel = 0;
942 } else {
943 src = src_lo;
944 half_sel = 1;
947 sel_shift_by = shift_by + 15;
949 if ( ((src >> sel_shift_by) & 0x1) == 1) {
950 // half-word i, byte 0
951 result[half_sel] |= (2*j + 0x0) << (shift_by+8);
952 // half-word i, byte 1
953 result[half_sel] |= (2*j + 0x1) << shift_by;
954 j++;
955 } else {
956 result[half_sel] |= (2*index + 0x10) << (shift_by+8);
957 result[half_sel] |= (2*index + 0x11) << shift_by;
961 } else if (imm == 0b01) { // big endian expansion
962 /* If IMM=0b00001,let pcv be the permute control vector required to
963 enable a left-indexed permute (vperm or xxperm) to implement a
964 compression of the sparse halfword elements in a source vector
965 specified by the halfword-element mask in VSR[VRB+32] into the
966 leftmost halfword elements of a result vector.
968 for( index = 0; index < 8; index++) {
969 i = 7 - index;
971 shift_by = i*16;
973 if ( i >= 4) {
974 src = src_hi;
975 shift_by = shift_by - 64;
976 half_sel = 0;
977 } else {
978 src = src_lo;
979 half_sel = 1;
982 sel_shift_by = shift_by + 15;
984 if ( ((src >> sel_shift_by) & 0x1) == 1) {
985 if (j >= 4) {
986 // half-word i, byte 0
987 result[1] |= (2*index + 0x0) << ((7 - j)*16 + 8);
988 // half-word i, byte 1
989 result[1] |= (2*index + 0x1) << ((7 - j)*16);
990 } else {
991 // half-word i, byte 0
992 result[0] |= (2*index + 0x0) << ((3 - j)*16 + 8);
993 // half-word i, byte 1
994 result[0] |= (2*index + 0x1) << ((3 - j)*16);
996 j++;
1000 } else if (imm == 0b10) { //little-endian expansion
1001 /* If IMM=0b00010, let pcv be the permute control vector required to
1002 enable a right-indexed permute (vpermr or xxpermr) to implement an
1003 expansion of the rightmost halfword elements of a source vector into
1004 the halfword elements of a result vector specified by the halfword-
1005 element mask in VSR[VRB+32].
1007 for( index = 0; index < 8; index++) {
1008 i = index;
1009 shift_by = i*16;
1011 if ( i >= 4) {
1012 src = src_hi;
1013 shift_by = shift_by - 64;
1014 half_sel = 0;
1015 } else {
1016 src = src_lo;
1017 half_sel = 1;
1020 sel_shift_by = shift_by + 15;
1022 if ( ((src >> sel_shift_by) & 0x1) == 1) {
1023 // half-word i, byte 0
1024 result[half_sel] |= (2*j + 0x00) << shift_by;
1025 // half-word i, byte 1
1026 result[half_sel] |= (2*j + 0x01) << (shift_by+8);
1027 j++;
1029 } else {
1030 // half-word i, byte 0
1031 result[half_sel] |= (2*index + 0x10) << shift_by;
1032 // half-word i, byte 1
1033 result[half_sel] |= (2*index + 0x11) << (shift_by+8);
1037 } else if (imm == 0b11) { //little-endian compression
1038 /* If IMM=0b00011, let pcv be the permute control vector required to
1039 enable a right-indexed permute (vpermr or xxpermr) to implement a
1040 compression of the sparse halfword elements in a source vector
1041 specified by the halfword-element mask in VSR[VRB+32] into the
1042 rightmost halfword elements of a result vector. */
1043 for( index = 0; index < 8; index++) {
1044 i = index;
1045 shift_by = i*16;
1047 if ( i >= 4) {
1048 src = src_hi;
1049 shift_by = shift_by - 64;
1050 half_sel = 0;
1051 } else {
1052 src = src_lo;
1053 half_sel = 1;
1056 sel_shift_by = shift_by + 15;
1058 if ( ((src >> sel_shift_by) & 0x1) == 1) {
1059 if (j >= 4) {
1060 // half-word j, byte 0
1061 result[0] |= (2*index + 0x0) << ((j-4)*16);
1062 // half-word j, byte 1
1063 result[0] |= (2*index + 0x1) << ((j-4)*16+8);
1064 } else {
1065 // half-word j, byte 0
1066 result[1] |= (2*index + 0x0) << (j*16);
1067 // half-word j, byte 1
1068 result[1] |= (2*index + 0x1) << ((j*16)+8);
1070 j++;
1074 } else {
1075 vex_printf("ERROR, vector_gen_pvc_hword_dirty_mask_helper, imm value %u not supported.\n",
1076 imm);
1077 vassert(0);
1079 write_VSX_entry( gst, reg_offset, result);
1082 /* CALLED FROM GENERATED CODE */
1083 void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst,
1084 ULong src_hi, ULong src_lo,
1085 UInt reg_offset, UInt imm ) {
1086 /* The function computes the 128-bit result then writes it directly
1087 into the guest state VSX register. */
1088 UInt i, shift_by, sel_shift_by, half_sel;
1089 ULong index, src, result[2];
1090 ULong j;
1092 result[0] = 0;
1093 result[1] = 0;
1094 j = 0;
1096 /* The algorithm in the ISA is written with IBM numbering zero on left and
1097 N-1 on right. The loop index is converted to "i" to match the algorithm
1098 for claritiy of matching the C code to the algorithm in the ISA. */
1100 if (imm == 0b00) { // big endian expansion
1101 /* If IMM=0b00000, let pcv be the permute control vector required to
1102 enable a left-indexed permute (vperm or xxperm) to implement an
1103 expansion of the leftmost word elements of a source vector into the
1104 word elements of a result vector specified by the word-element mask
1105 in VSR[VRB+32].
1107 for( index = 0; index < 4; index++) {
1108 i = 3 - index;
1110 shift_by = i*32;
1112 if ( i >= 2) {
1113 src = src_hi;
1114 shift_by = shift_by - 64;
1115 half_sel = 0;
1116 } else {
1117 src = src_lo;
1118 half_sel = 1;
1121 sel_shift_by = shift_by + 31;
1123 if ( ((src >> sel_shift_by) & 0x1) == 1) {
1124 result[half_sel] |= (4*j+0) << (shift_by+24); // word i, byte 0
1125 result[half_sel] |= (4*j+1) << (shift_by+16); // word i, byte 1
1126 result[half_sel] |= (4*j+2) << (shift_by+8); // word i, byte 2
1127 result[half_sel] |= (4*j+3) << shift_by; // word i, byte 3
1128 j++;
1129 } else {
1130 result[half_sel] |= (4*index + 0x10) << (shift_by+24);
1131 result[half_sel] |= (4*index + 0x11) << (shift_by+16);
1132 result[half_sel] |= (4*index + 0x12) << (shift_by+8);
1133 result[half_sel] |= (4*index + 0x13) << shift_by;
1137 } else if (imm == 0b01) { // big endian compression
1138 /* If IMM=0b00001, let pcv be the permute control vector required to
1139 enable a left-indexed permute (vperm or xxperm) to implement a
1140 compression of the sparse word elements in a source vector specified
1141 by the word-element mask in VSR[VRB+32] into the leftmost word
1142 elements of a result vector.
1144 for( index = 0; index < 4; index++) {
1145 i = 3 - index;
1147 shift_by = i*32;
1149 if ( i >= 2) {
1150 src = src_hi;
1151 shift_by = shift_by - 64;
1152 half_sel = 0;
1153 } else {
1154 src = src_lo;
1155 half_sel = 1;
1158 sel_shift_by = shift_by + 31;
1160 if (((src >> sel_shift_by) & 0x1) == 1) {
1161 if (j >= 2) {
1162 // word j, byte 0
1163 result[1] |= (4*index+0) << ((3 - j)*32 + 24);
1164 // word j, byte 1
1165 result[1] |= (4*index+1) << ((3 - j)*32 + 16);
1166 // word j, byte 2
1167 result[1] |= (4*index+2) << ((3 - j)*32 + 8);
1168 // word j, byte 3
1169 result[1] |= (4*index+3) << ((3 - j)*32 + 0);
1170 } else {
1171 result[0] |= (4*index+0) << ((1 - j)*32 + 24);
1172 result[0] |= (4*index+1) << ((1 - j)*32 + 16);
1173 result[0] |= (4*index+2) << ((1 - j)*32 + 8);
1174 result[0] |= (4*index+3) << ((1 - j)*32 + 0);
1176 j++;
1180 } else if (imm == 0b10) { //little-endian expansion
1181 /* If IMM=0b00010, let pcv be the permute control vector required to
1182 enable a right-indexed permute (vpermr or xxpermr) to implement an
1183 expansion of the rightmost word elements of a source vector into the
1184 word elements of a result vector specified by the word-element mask
1185 in VSR[VRB+32].
1187 for( index = 0; index < 4; index++) {
1188 i = index;
1190 shift_by = i*32;
1192 if ( i >= 2) {
1193 src = src_hi;
1194 shift_by = shift_by - 64;
1195 half_sel = 0;
1196 } else {
1197 src = src_lo;
1198 half_sel = 1;
1201 sel_shift_by = shift_by + 31;
1203 if (((src >> sel_shift_by) & 0x1) == 1) {
1204 result[half_sel] |= (4*j+0) << (shift_by + 0); // word j, byte 0
1205 result[half_sel] |= (4*j+1) << (shift_by + 8); // word j, byte 1
1206 result[half_sel] |= (4*j+2) << (shift_by + 16); // word j, byte 2
1207 result[half_sel] |= (4*j+3) << (shift_by + 24); // word j, byte 3
1208 j++;
1209 } else {
1210 result[half_sel] |= (4*index + 0x10) << (shift_by + 0);
1211 result[half_sel] |= (4*index + 0x11) << (shift_by + 8);
1212 result[half_sel] |= (4*index + 0x12) << (shift_by + 16);
1213 result[half_sel] |= (4*index + 0x13) << (shift_by + 24);
1217 } else if (imm == 0b11) { //little-endian compression
1218 /* If IMM=0b00011, let pcv be the permute control vector required to
1219 enable a right-indexed permute (vpermr or xxpermr) to implement a
1220 compression of the sparse word elements in a source vector specified
1221 by the word-element mask in VSR[VRB+32] into the rightmost word
1222 elements of a result vector. */
1223 for( index = 0; index < 4; index++) {
1224 i =index;
1226 shift_by = i*32;
1228 if ( i >= 2) {
1229 src = src_hi;
1230 shift_by = shift_by - 64;
1231 half_sel = 0;
1232 } else {
1233 src = src_lo;
1234 half_sel = 1;
1237 sel_shift_by = shift_by + 31;
1239 if (((src >> sel_shift_by) & 0x1) == 1) {
1240 if (j >= 2){
1241 // word j, byte 0
1242 result[0] |= (4*index + 0x0) << ((j-2)*32+0);
1243 // word j, byte 1
1244 result[0] |= (4*index + 0x1) << ((j-2)*32+8);
1245 // word j, byte 2
1246 result[0] |= (4*index + 0x2) << ((j-2)*32+16);
1247 // word j, byte 3
1248 result[0] |= (4*index + 0x3) << ((j-2)*32+24);
1249 } else {
1250 result[1] |= (4*index + 0x0) << (j*32+0);
1251 result[1] |= (4*index + 0x1) << (j*32+8);
1252 result[1] |= (4*index + 0x2) << (j*32+16);
1253 result[1] |= (4*index + 0x3) << (j*32+24);
1255 j++;
1258 } else {
1259 vex_printf("ERROR, vector_gen_pvc_word_mask_dirty_helper, imm value %u not supported.\n",
1260 imm);
1261 vassert(0);
1264 write_VSX_entry( gst, reg_offset, result);
1267 /* CALLED FROM GENERATED CODE */
1268 void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,
1269 ULong src_hi, ULong src_lo,
1270 UInt reg_offset, UInt imm ) {
1271 /* The function computes the 128-bit result then writes it directly
1272 into the guest state VSX register. */
1273 UInt sel_shift_by, half_sel;
1274 ULong index, src, result[2];
1275 ULong j, i;
1277 result[0] = 0;
1278 result[1] = 0;
1279 j = 0;
1281 /* The algorithm in the ISA is written with IBM numbering zero on left and
1282 N-1 on right. The loop index is converted to "i" to match the algorithm
1283 for claritiy of matching the C code to the algorithm in the ISA. */
1285 if (imm == 0b00) { // big endian expansion
1286 /* If IMM=0b00000, let pcv be the permute control vector required to
1287 enable a left-indexed permute (vperm or xxperm) to implement an
1288 expansion of the leftmost doubleword elements of a source vector into
1289 the doubleword elements of a result vector specified by the
1290 doubleword-element mask in VSR[VRB+32].
1292 for( index = 0; index < 2; index++) {
1293 i = 1 - index;
1295 if ( i == 1) {
1296 src = src_hi;
1297 half_sel = 0;
1298 } else {
1299 src = src_lo;
1300 half_sel = 1;
1303 sel_shift_by = 63;
1305 if ( ((src >> sel_shift_by) & 0x1) == 1) {
1306 result[half_sel] |= (8*j + 0x0) << 56; // dword i, byte 0
1307 result[half_sel] |= (8*j + 0x1) << 48; // dword i, byte 1
1308 result[half_sel] |= (8*j + 0x2) << 40; // dword i, byte 2
1309 result[half_sel] |= (8*j + 0x3) << 32; // dword i, byte 3
1310 result[half_sel] |= (8*j + 0x4) << 24; // dword i, byte 4
1311 result[half_sel] |= (8*j + 0x5) << 16; // dword i, byte 5
1312 result[half_sel] |= (8*j + 0x6) << 8; // dword i, byte 6
1313 result[half_sel] |= (8*j + 0x7) << 0; // dword i, byte 7
1314 j++;
1315 } else {
1316 result[half_sel] |= (8*index + 0x10) << 56;
1317 result[half_sel] |= (8*index + 0x11) << 48;
1318 result[half_sel] |= (8*index + 0x12) << 40;
1319 result[half_sel] |= (8*index + 0x13) << 32;
1320 result[half_sel] |= (8*index + 0x14) << 24;
1321 result[half_sel] |= (8*index + 0x15) << 16;
1322 result[half_sel] |= (8*index + 0x16) << 8;
1323 result[half_sel] |= (8*index + 0x17) << 0;
1326 } else if (imm == 0b01) { // big endian compression
1327 /* If IMM=0b00001, let pcv be the the permute control vector required to
1328 enable a left-indexed permute (vperm or xxperm) to implement a
1329 compression of the sparse doubleword elements in a source vector
1330 specified by the doubleword-element mask in VSR[VRB+32] into the
1331 leftmost doubleword elements of a result vector.
1333 for( index = 0; index < 2; index++) {
1334 i = 1 - index;
1336 if ( i == 1) {
1337 src = src_hi;
1338 half_sel = 0;
1339 } else {
1340 src = src_lo;
1341 half_sel = 1;
1344 sel_shift_by = 63;
1346 if ( ((src >> sel_shift_by) & 0x1) == 1) {
1347 if (j == 1) {
1348 result[1] |= (8*index + 0x0) << 56; // double-word j, byte 0
1349 result[1] |= (8*index + 0x1) << 48; // double-word j, byte 1
1350 result[1] |= (8*index + 0x2) << 40; // double-word j, byte 2
1351 result[1] |= (8*index + 0x3) << 32; // double-word j, byte 3
1352 result[1] |= (8*index + 0x4) << 24; // double-word j, byte 4
1353 result[1] |= (8*index + 0x5) << 16; // double-word j, byte 5
1354 result[1] |= (8*index + 0x6) << 8; // double-word j, byte 6
1355 result[1] |= (8*index + 0x7) << 0; // double-word j, byte 7
1356 } else {
1357 result[0] |= (8*index + 0x0) << 56; // double-word j, byte 0
1358 result[0] |= (8*index + 0x1) << 48; // double-word j, byte 1
1359 result[0] |= (8*index + 0x2) << 40; // double-word j, byte 2
1360 result[0] |= (8*index + 0x3) << 32; // double-word j, byte 3
1361 result[0] |= (8*index + 0x4) << 24; // double-word j, byte 4
1362 result[0] |= (8*index + 0x5) << 16; // double-word j, byte 5
1363 result[0] |= (8*index + 0x6) << 8; // double-word j, byte 6
1364 result[0] |= (8*index + 0x7) << 0; // double-word j, byte 7
1366 j++;
1369 } else if (imm == 0b10) { //little-endian expansion
1370 /* If IMM=0b00010, let pcv be the permute control vector required to
1371 enable a right-indexed permute (vpermr or xxpermr) to implement an
1372 expansion of the rightmost doubleword elements of a source vector
1373 into the doubleword elements of a result vector specified by the
1374 doubleword-element mask in VSR[VRB+32].
1377 for( index = 0; index < 2; index++) {
1378 i = index;
1380 if ( i == 1) {
1381 src = src_hi;
1382 half_sel = 0;
1383 } else {
1384 src = src_lo;
1385 half_sel = 1;
1388 sel_shift_by = 63;
1390 if ( ((src >> sel_shift_by) & 0x1) == 1) {
1391 result[half_sel] |= (8*j+0) << 0; // double-word i, byte 0
1392 result[half_sel] |= (8*j+1) << 8; // double-word i, byte 1
1393 result[half_sel] |= (8*j+2) << 16; // double-word i, byte 2
1394 result[half_sel] |= (8*j+3) << 24; // double-word i, byte 3
1395 result[half_sel] |= (8*j+4) << 32; // double-word i, byte 4
1396 result[half_sel] |= (8*j+5) << 40; // double-word i, byte 5
1397 result[half_sel] |= (8*j+6) << 48; // double-word i, byte 6
1398 result[half_sel] |= (8*j+7) << 56; // double-word i, byte 7
1399 j++;
1400 } else {
1401 result[half_sel] |= (8*index + 0x10) << 0;
1402 result[half_sel] |= (8*index + 0x11) << 8;
1403 result[half_sel] |= (8*index + 0x12) << 16;
1404 result[half_sel] |= (8*index + 0x13) << 24;
1405 result[half_sel] |= (8*index + 0x14) << 32;
1406 result[half_sel] |= (8*index + 0x15) << 40;
1407 result[half_sel] |= (8*index + 0x16) << 48;
1408 result[half_sel] |= (8*index + 0x17) << 56;
1412 } else if (imm == 0b11) { //little-endian compression
1413 /* If IMM=0b00011, let pcv be the permute control vector required to
1414 enable a right-indexed permute (vpermr or xxpermr) to implement a
1415 compression of the sparse doubleword elements in a source vector
1416 specified by the doubleword-element mask in VSR[VRB+32] into the
1417 rightmost doubleword elements of a result vector. */
1418 for( index = 0; index < 2; index++) {
1419 i = index;
1421 if ( i == 1) {
1422 src = src_hi;
1423 half_sel = 0;
1424 } else {
1425 src = src_lo;
1426 half_sel = 1;
1429 sel_shift_by = 63;
1431 if (((src >> sel_shift_by) & 0x1) == 1) {
1432 if (j == 1) {
1433 result[0] |= (8*index + 0x0) << 0; // double-word j, byte 0
1434 result[0] |= (8*index + 0x1) << 8; // double-word j, byte 1
1435 result[0] |= (8*index + 0x2) << 16; // double-word j, byte 2
1436 result[0] |= (8*index + 0x3) << 24; // double-word j, byte 3
1437 result[0] |= (8*index + 0x4) << 32; // double-word j, byte 4
1438 result[0] |= (8*index + 0x5) << 40; // double-word j, byte 5
1439 result[0] |= (8*index + 0x6) << 48; // double-word j, byte 6
1440 result[0] |= (8*index + 0x7) << 56; // double-word j, byte 7
1441 } else {
1442 result[1] |= (8*index + 0x0) << 0;
1443 result[1] |= (8*index + 0x1) << 8;
1444 result[1] |= (8*index + 0x2) << 16;
1445 result[1] |= (8*index + 0x3) << 24;
1446 result[1] |= (8*index + 0x4) << 32;
1447 result[1] |= (8*index + 0x5) << 40;
1448 result[1] |= (8*index + 0x6) << 48;
1449 result[1] |= (8*index + 0x7) << 56;
1451 j++;
1454 } else {
1455 vex_printf("ERROR, vector_gen_pvc_dword_mask_helper, imm value %u not supported.\n",
1456 imm);
1457 vassert(0);
1460 write_VSX_entry( gst, reg_offset, result);
1463 /*------------------------------------------------*/
1464 /*---- VSX Matrix signed integer GER functions ---*/
1465 /*------------------------------------------------*/
1466 static UInt exts4( UInt src)
1468 /* Input is an 4-bit value. Extend bit 3 to bits [31:4] */
1469 if (( src >> 3 ) & 0x1)
1470 return src | 0xFFFFFFF0; /* sign bit is a 1, extend */
1471 else
1472 return src & 0xF; /* make sure high order bits are zero */
1475 static ULong exts8( UInt src)
1477 /* Input is an 8-bit value. Extend bit 7 to bits [63:8] */
1478 if (( src >> 7 ) & 0x1)
1479 return src | 0xFFFFFFFFFFFFFF00ULL; /* sign bit is a 1, extend */
1480 else
1481 return src & 0xFF; /* make sure high order bits are zero */
1484 static ULong extz8( UInt src)
1486 /* Input is an 8-bit value. Extend src on the left with zeros. */
1487 return src & 0xFF; /* make sure high order bits are zero */
1490 static ULong exts16to64( UInt src)
1492 /* Input is an 16-bit value. Extend bit 15 to bits [63:16] */
1493 if (( src >> 15 ) & 0x1)
1494 return ((ULong) src) | 0xFFFFFFFFFFFF0000ULL; /* sign is 1, extend */
1495 else
1496 /* make sure high order bits are zero */
1497 return ((ULong) src) & 0xFFFFULL;
1500 static UInt chop64to32( Long src ) {
1501 /* Take a 64-bit input, return the lower 32-bits */
1502 return (UInt)(0xFFFFFFFF & src);
1505 static UInt clampS64toS32( Long src ) {
1506 /* Take a 64-bit signed input, clamp positive values to 2^31,
1507 clamp negative values at -2^31. Return the result in an
1508 unsigned 32-bit value. */
1509 Long max_val = 2147483647; // 2^31-1
1510 if ( src > max_val)
1511 return (UInt)max_val;
1513 if (src < -max_val)
1514 return (UInt)-max_val;
1516 return (UInt)src;
1519 void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg,
1520 UInt *acc_word)
1522 U128* pU128_dst;
1524 vassert(acc < 8);
1525 vassert(reg < 4);
1527 pU128_dst = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128)
1528 + reg*sizeof(U128));
1530 /* The U128 type is defined as an array of unsigned intetgers. */
1531 (*pU128_dst)[0] = acc_word[0];
1532 (*pU128_dst)[1] = acc_word[1];
1533 (*pU128_dst)[2] = acc_word[2];
1534 (*pU128_dst)[3] = acc_word[3];
1535 return;
1538 void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg,
1539 UInt *acc_word)
1541 U128* pU128_src;
1543 acc_word[3] = 0xDEAD;
1544 acc_word[2] = 0xBEEF;
1545 acc_word[1] = 0xBAD;
1546 acc_word[0] = 0xBEEF;
1548 vassert(acc < 8);
1549 vassert(reg < 4);
1551 pU128_src = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128)
1552 + reg*sizeof(U128));
1554 /* The U128 type is defined as an array of unsigned intetgers. */
1555 acc_word[0] = (*pU128_src)[0];
1556 acc_word[1] = (*pU128_src)[1];
1557 acc_word[2] = (*pU128_src)[2];
1558 acc_word[3] = (*pU128_src)[3];
1559 return;
1562 void vsx_matrix_4bit_ger_dirty_helper ( VexGuestPPC64State* gst,
1563 UInt offset_ACC,
1564 ULong srcA_hi, ULong srcA_lo,
1565 ULong srcB_hi, ULong srcB_lo,
1566 UInt masks_inst )
1568 /* This helper calculates the result for one of the four ACC entires.
1569 It is called twice, to get the hi and then the low 64-bit of the
1570 128-bit result. */
1571 UInt i, j, mask, sum, inst, acc_entry, prefix_inst;
1573 UInt srcA_nibbles[4][8]; /* word, nibble */
1574 UInt srcB_nibbles[4][8]; /* word, nibble */
1575 UInt acc_word[4];
1576 UInt prod0, prod1, prod2, prod3, prod4, prod5, prod6, prod7;
1577 UInt result[4];
1578 UInt pmsk = 0;
1579 UInt xmsk = 0;
1580 UInt ymsk = 0;
1582 mask = 0xF;
1583 inst = (masks_inst >> 5) & 0xFF;
1584 prefix_inst = (masks_inst >> 13) & 0x1;
1585 acc_entry = masks_inst & 0xF;
1587 /* LE word numbering */
1588 if ( prefix_inst == 0 ) {
1589 /* Set the masks for non-prefix instructions */
1590 pmsk = 0b11111111;
1591 xmsk = 0b1111;
1592 ymsk = 0b1111;
1594 } else {
1595 pmsk = (masks_inst >> 22) & 0xFF;
1596 xmsk = (masks_inst >> 18) & 0xF;
1597 ymsk = (masks_inst >> 14) & 0xF;
1600 /* Address nibbles using IBM numbering */
1601 for( i = 0; i < 4; i++) {
1602 /* Get the ACC contents directly from the PPC64 state */
1603 get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word);
1605 // input is in double words
1606 for( j = 0; j< 8; j++) {
1607 srcA_nibbles[3][j] = (srcA_hi >> (60-4*j)) & mask; // hi bits [63:32]
1608 srcA_nibbles[2][j] = (srcA_hi >> (28-4*j)) & mask; // hi bits [31:0]
1609 srcA_nibbles[1][j] = (srcA_lo >> (60-4*j)) & mask; // lo bits [63:32]
1610 srcA_nibbles[0][j] = (srcA_lo >> (28-4*j)) & mask; // lo bits [31:0]
1612 srcB_nibbles[3][j] = (srcB_hi >> (60-4*j)) & mask;
1613 srcB_nibbles[2][j] = (srcB_hi >> (28-4*j)) & mask;
1614 srcB_nibbles[1][j] = (srcB_lo >> (60-4*j)) & mask;
1615 srcB_nibbles[0][j] = (srcB_lo >> (28-4*j)) & mask;
1618 for( j = 0; j < 4; j++) {
1619 if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
1620 if (((pmsk >> 7) & 0x1) == 0)
1621 prod0 = 0;
1622 else
1623 prod0 = exts4( srcA_nibbles[i][0] )
1624 * exts4( srcB_nibbles[j][0] );
1626 if (((pmsk >> 6) & 0x1) == 0)
1627 prod1 = 0;
1628 else
1629 prod1 = exts4( srcA_nibbles[i][1] )
1630 * exts4( srcB_nibbles[j][1] );
1632 if (((pmsk >> 5) & 0x1) == 0)
1633 prod2 = 0;
1634 else
1635 prod2 = exts4( srcA_nibbles[i][2] )
1636 * exts4( srcB_nibbles[j][2] );
1638 if (((pmsk >> 4) & 0x1) == 0)
1639 prod3 = 0;
1640 else
1641 prod3 = exts4( srcA_nibbles[i][3] )
1642 * exts4( srcB_nibbles[j][3] );
1644 if (((pmsk >> 3) & 0x1) == 0)
1645 prod4 = 0;
1646 else
1647 prod4 = exts4( srcA_nibbles[i][4] )
1648 * exts4( srcB_nibbles[j][4] );
1650 if (((pmsk >> 2) & 0x1) == 0)
1651 prod5 = 0;
1652 else
1653 prod5 = exts4( srcA_nibbles[i][5] )
1654 * exts4( srcB_nibbles[j][5] );
1656 if (((pmsk >> 1) & 0x1) == 0)
1657 prod6 = 0;
1658 else
1659 prod6 = exts4( srcA_nibbles[i][6] )
1660 * exts4( srcB_nibbles[j][6] );
1662 if ((pmsk & 0x1) == 0)
1663 prod7 = 0;
1664 else
1665 prod7 = exts4( srcA_nibbles[i][7] )
1666 * exts4( srcB_nibbles[j][7] );
1667 /* sum is UInt so the result is choped to 32-bits */
1668 sum = prod0 + prod1 + prod2 + prod3 + prod4
1669 + prod5 + prod6 + prod7;
1671 if ( inst == XVI4GER8 )
1672 result[j] = sum;
1674 else if ( inst == XVI4GER8PP )
1675 result[j] = sum + acc_word[j];
1677 } else {
1678 result[j] = 0;
1681 write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
1685 void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst,
1686 UInt offset_ACC,
1687 ULong srcA_hi, ULong srcA_lo,
1688 ULong srcB_hi, ULong srcB_lo,
1689 UInt masks_inst )
1691 UInt i, j, mask, inst, acc_entry, prefix_inst;
1693 UInt srcA_bytes[4][4]; /* word, byte */
1694 UInt srcB_bytes[4][4]; /* word, byte */
1695 UInt acc_word[4];
1696 ULong prod0, prod1, prod2, prod3, sum;
1697 UInt result[4];
1698 UInt pmsk = 0;
1699 UInt xmsk = 0;
1700 UInt ymsk = 0;
1702 mask = 0xFF;
1703 inst = (masks_inst >> 5) & 0xFF;
1704 prefix_inst = (masks_inst >> 13) & 0x1;
1705 acc_entry = masks_inst & 0xF;
1707 /* LE word numbering */
1708 if ( prefix_inst == 0 ) {
1709 /* Set the masks */
1710 pmsk = 0b1111;
1711 xmsk = 0b1111;
1712 ymsk = 0b1111;
1714 } else {
1715 pmsk = (masks_inst >> 26) & 0xF;
1716 xmsk = (masks_inst >> 18) & 0xF;
1717 ymsk = (masks_inst >> 14) & 0xF;
1720 /* Address byes using IBM numbering */
1721 for( i = 0; i < 4; i++) {
1722 /* Get the ACC contents directly from the PPC64 state */
1723 get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word);
1725 for( j = 0; j< 4; j++) {
1726 srcA_bytes[3][j] = (srcA_hi >> (56-8*j)) & mask;
1727 srcA_bytes[2][j] = (srcA_hi >> (24-8*j)) & mask;
1728 srcA_bytes[1][j] = (srcA_lo >> (56-8*j)) & mask;
1729 srcA_bytes[0][j] = (srcA_lo >> (24-8*j)) & mask;
1731 srcB_bytes[3][j] = (srcB_hi >> (56-8*j)) & mask;
1732 srcB_bytes[2][j] = (srcB_hi >> (24-8*j)) & mask;
1733 srcB_bytes[1][j] = (srcB_lo >> (56-8*j)) & mask;
1734 srcB_bytes[0][j] = (srcB_lo >> (24-8*j)) & mask;
1737 for( j = 0; j < 4; j++) {
1738 if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
1739 if (((pmsk >> 3) & 0x1) == 0)
1740 prod0 = 0;
1741 else
1742 prod0 =
1743 exts8( srcA_bytes[i][0] )
1744 * extz8( srcB_bytes[j][0] );
1746 if (((pmsk >> 2) & 0x1) == 0)
1747 prod1 = 0;
1748 else
1749 prod1 =
1750 exts8( srcA_bytes[i][1] )
1751 * extz8( srcB_bytes[j][1] );
1753 if (((pmsk >> 1) & 0x1) == 0)
1754 prod2 = 0;
1755 else
1756 prod2 =
1757 exts8( srcA_bytes[i][2] )
1758 * extz8( srcB_bytes[j][2] );
1760 if (((pmsk >> 0) & 0x1) == 0)
1761 prod3 = 0;
1762 else
1763 prod3 =
1764 exts8( srcA_bytes[i][3] )
1765 * extz8( srcB_bytes[j][3] );
1767 /* sum is UInt so the result is choped to 32-bits */
1768 sum = prod0 + prod1 + prod2 + prod3;
1770 if ( inst == XVI8GER4 )
1771 result[j] = chop64to32( sum );
1773 else if ( inst == XVI8GER4PP )
1774 result[j] = chop64to32( sum + acc_word[j] );
1776 else if ( inst == XVI8GER4SPP )
1777 result[j] = clampS64toS32(sum + acc_word[j]);
1779 // @todo PJF Coverity complains that if none of the abofe ifs are true
1780 // then result gets used uninitialized
1781 } else {
1782 result[j] = 0;
1785 write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
1789 void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst,
1790 UInt offset_ACC,
1791 ULong srcA_hi, ULong srcA_lo,
1792 ULong srcB_hi, ULong srcB_lo,
1793 UInt masks_inst )
1795 UInt i, j, mask, inst, acc_entry, prefix_inst;
1796 ULong sum;
1797 UInt srcA_word[4][2]; /* word, hword */
1798 UInt srcB_word[4][2]; /* word, hword */
1799 UInt acc_word[4];
1800 ULong prod0, prod1;
1801 UInt result[4];
1802 UInt pmsk = 0;
1803 UInt xmsk = 0;
1804 UInt ymsk = 0;
1806 mask = 0xFFFF;
1807 inst = (masks_inst >> 5) & 0xFF;
1808 prefix_inst = (masks_inst >> 13) & 0x1;
1809 acc_entry = masks_inst & 0xF;
1811 /* LE word numbering */
1812 if ( prefix_inst == 0 ) {
1813 /* Set the masks for non prefix instructions */
1814 pmsk = 0b11;
1815 xmsk = 0b1111;
1816 ymsk = 0b1111;
1818 } else {
1819 pmsk = (masks_inst >> 28) & 0x3;
1820 xmsk = (masks_inst >> 18) & 0xF;
1821 ymsk = (masks_inst >> 14) & 0xF;
1824 /* Address half-words using IBM numbering */
1825 for( i = 0; i < 4; i++) {
1826 /* Get the ACC contents directly from the PPC64 state */
1827 get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word);
1829 for( j = 0; j< 2; j++) {
1830 srcA_word[3][j] = (srcA_hi >> (48-16*j)) & mask;
1831 srcA_word[2][j] = (srcA_hi >> (16-16*j)) & mask;
1832 srcA_word[1][j] = (srcA_lo >> (48-16*j)) & mask;
1833 srcA_word[0][j] = (srcA_lo >> (16-16*j)) & mask;
1835 srcB_word[3][j] = (srcB_hi >> (48-16*j)) & mask;
1836 srcB_word[2][j] = (srcB_hi >> (16-16*j)) & mask;
1837 srcB_word[1][j] = (srcB_lo >> (48-16*j)) & mask;
1838 srcB_word[0][j] = (srcB_lo >> (16-16*j)) & mask;
1841 for( j = 0; j < 4; j++) {
1842 if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
1843 if (((pmsk >> 1) & 0x1) == 0)
1844 prod0 = 0;
1846 else
1847 prod0 = exts16to64( srcA_word[i][0] )
1848 * exts16to64( srcB_word[j][0] );
1850 if (((pmsk >> 0) & 0x1) == 0)
1851 prod1 = 0;
1852 else
1853 prod1 = exts16to64( srcA_word[i][1] )
1854 * exts16to64( srcB_word[j][1] );
1856 sum = prod0 + prod1;
1858 if ( inst == XVI16GER2 )
1859 result[j] = chop64to32( sum );
1861 else if ( inst == XVI16GER2S )
1862 result[j] = clampS64toS32( sum );
1864 else if ( inst == XVI16GER2PP )
1865 result[j] = chop64to32( sum + acc_word[j] );
1867 else if ( inst == XVI16GER2SPP )
1868 result[j] = clampS64toS32( sum + acc_word[j] );
1870 } else {
1871 result[j] = 0;
1874 write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
1878 //matrix 16 float stuff
1879 union
1880 convert_t {
1881 UInt u32;
1882 ULong u64;
1883 Float f;
1884 Double d;
1887 static Float reinterpret_int_as_float( UInt input )
1889 /* Reinterpret the bit pattern of an int as a float. */
1890 __attribute__ ((aligned (128))) union convert_t conv;
1892 conv.u32 = input;
1893 return conv.f;
1896 static UInt reinterpret_float_as_int( Float input )
1898 /* Reinterpret the bit pattern of an int as a float. */
1899 __attribute__ ((aligned (128))) union convert_t conv;
1901 conv.f = input;
1902 return conv.u32;
1905 static Double reinterpret_long_as_double( ULong input )
1907 /* Reinterpret the bit pattern of an int as a float. */
1908 __attribute__ ((aligned (128))) union convert_t conv;
1910 conv.u64 = input;
1911 return conv.d;
1914 static ULong reinterpret_double_as_long( Double input )
1916 /* Reinterpret the bit pattern of an int as a float. */
1917 __attribute__ ((aligned (128))) union convert_t conv;
1919 conv.d = input;
1920 return conv.u64;
1923 static Double conv_f16_to_double( ULong input )
1925 # if defined (HAS_XSCVHPDP)
1926 // This all seems to be very alignment sensitive??
1927 __attribute__ ((aligned (64))) ULong src;
1928 __attribute__ ((aligned (64))) Double result;
1929 src = input;
1930 __asm__ __volatile__ (".machine push;\n" ".machine power9;\n" \
1931 "xscvhpdp %x0,%x1 ;\n .machine pop" \
1932 : "=wa" (result) : "wa" (src) );
1933 return result;
1934 # else
1935 return 0.0;
1936 # endif
1939 #define BF16_SIGN_MASK 0x8000
1940 #define BF16_EXP_MASK 0x7F80
1941 #define BF16_FRAC_MASK 0x007F
1942 #define BF16_BIAS 127
1943 #define BF16_MAX_UNBIASED_EXP 127
1944 #define BF16_MIN_UNBIASED_EXP -126
1945 #define FLOAT_SIGN_MASK 0x80000000
1946 #define FLOAT_EXP_MASK 0x7F800000
1947 #define FLOAT_FRAC_MASK 0x007FFFFF
1948 #define FLOAT_FRAC_BIT8 0x00008000
1949 #define FLOAT_BIAS 127
1951 static Float conv_bf16_to_float( UInt input )
1953 /* input is 16-bit bfloat.
1954 bias +127, exponent 8-bits, fraction 7-bits
1956 output is 32-bit float.
1957 bias +127, exponent 8-bits, fraction 22-bits
1960 UInt input_exp, input_fraction, unbiased_exp;
1961 UInt output_exp, output_fraction;
1962 UInt sign;
1963 union convert_t conv;
1965 sign = (UInt)(input & BF16_SIGN_MASK);
1966 input_exp = input & BF16_EXP_MASK;
1967 unbiased_exp = (input_exp >> 7) - (UInt)BF16_BIAS;
1968 input_fraction = input & BF16_FRAC_MASK;
1970 if (((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) &&
1971 (input_fraction != 0)) {
1972 /* input is NaN or SNaN, exp all 1's, fraction != 0 */
1973 output_exp = FLOAT_EXP_MASK;
1974 output_fraction = input_fraction;
1976 } else if(((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) &&
1977 ( input_fraction == 0)) {
1978 /* input is infinity, exp all 1's, fraction = 0 */
1979 output_exp = FLOAT_EXP_MASK;
1980 output_fraction = 0;
1982 } else if((input_exp == 0) && (input_fraction == 0)) {
1983 /* input is zero */
1984 output_exp = 0;
1985 output_fraction = 0;
1987 } else if((input_exp == 0) && (input_fraction != 0)) {
1988 /* input is denormal */
1989 output_fraction = input_fraction;
1990 output_exp = (-(Int)BF16_BIAS + (Int)FLOAT_BIAS ) << 23;
1992 } else {
1993 /* result is normal */
1994 output_exp = (unbiased_exp + FLOAT_BIAS) << 23;
1995 output_fraction = input_fraction;
1998 conv.u32 = sign << (31 - 15) | output_exp | (output_fraction << (23-7));
1999 return conv.f;
2002 static UInt conv_float_to_bf16( UInt input )
2004 /* input is 32-bit float stored as unsigned 32-bit.
2005 bias +127, exponent 8-bits, fraction 23-bits
2007 output is 16-bit bfloat.
2008 bias +127, exponent 8-bits, fraction 7-bits
2010 If the unbiased exponent of the input is greater than the max floating
2011 point unbiased exponent value, the result of the floating point 16-bit
2012 value is infinity.
2015 UInt input_exp, input_fraction;
2016 UInt output_exp, output_fraction;
2017 UInt result, sign;
2019 sign = input & FLOAT_SIGN_MASK;
2020 input_exp = input & FLOAT_EXP_MASK;
2021 input_fraction = input & FLOAT_FRAC_MASK;
2023 if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) &&
2024 (input_fraction != 0)) {
2025 /* input is NaN or SNaN, exp all 1's, fraction != 0 */
2026 output_exp = BF16_EXP_MASK;
2027 output_fraction = (ULong)input_fraction >> (23 - 7);
2028 } else if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) &&
2029 ( input_fraction == 0)) {
2030 /* input is infinity, exp all 1's, fraction = 0 */
2031 output_exp = BF16_EXP_MASK;
2032 output_fraction = 0;
2033 } else if ((input_exp == 0) && (input_fraction == 0)) {
2034 /* input is zero */
2035 output_exp = 0;
2036 output_fraction = 0;
2037 } else if ((input_exp == 0) && (input_fraction != 0)) {
2038 /* input is denormal */
2039 output_exp = 0;
2040 output_fraction = (ULong)input_fraction >> (23 - 7);
2041 } else {
2042 /* result is normal */
2043 output_exp = (input_exp - BF16_BIAS + FLOAT_BIAS) >> (23 - 7);
2044 output_fraction = (ULong)input_fraction >> (23 - 7);
2046 /* Round result. Look at the 8th bit position of the 32-bit floating
2047 pointt fraction. The F16 fraction is only 7 bits wide so if the 8th
2048 bit of the F32 is a 1 we need to round up by adding 1 to the output
2049 fraction. */
2050 if ((input_fraction & FLOAT_FRAC_BIT8) == FLOAT_FRAC_BIT8)
2051 /* Round the F16 fraction up by 1 */
2052 output_fraction = output_fraction + 1;
2055 result = sign >> (31 - 15) | output_exp | output_fraction;
2056 return result;
2059 static Float conv_double_to_float( Double src )
2061 return (float) src ;
2065 static Double negate_double( Double input )
2067 /* Don't negate a NaN value. A NaN has an exponet
2068 of all 1's, non zero fraction. */
2069 __attribute__ ((aligned (128))) union convert_t conv;
2071 conv.d = input;
2073 if ( ( ( conv.u64 & I64_EXP_MASK) == I64_EXP_MASK )
2074 && ( ( conv.u64 & I64_FRACTION_MASK ) != 0 ) )
2075 return input;
2076 else
2077 return -input;
2080 static Float negate_float( Float input )
2082 /* Don't negate a NaN value. A NaN has an exponet
2083 of all 1's, non zero fraction. */
2084 __attribute__ ((aligned (128))) union convert_t conv;
2086 conv.f = input;
2088 if ( ( ( conv.u32 & I32_EXP_MASK) == I32_EXP_MASK )
2089 && ( ( conv.u32 & I32_FRACTION_MASK ) != 0 ) )
2090 return input;
2091 else
2092 return -input;
2095 /* This C-helper takes a vector of two 32-bit floating point values
2096 * and returns a vector containing two 16-bit bfloats.
2097 input: word0 word1
2098 output 0x0 hword1 0x0 hword3
2099 Called from generated code.
2101 ULong convert_from_floattobf16_helper( ULong src ) {
2102 ULong resultHi, resultLo;
2104 resultHi = (ULong)conv_float_to_bf16( (UInt)(src >> 32));
2105 resultLo = (ULong)conv_float_to_bf16( (UInt)(src & 0xFFFFFFFF));
2106 return (resultHi << 32) | resultLo;
2110 /* This C-helper takes a vector of two 16-bit bfloating point values
2111 * and returns a vector containing one 32-bit float.
2112 input: 0x0 hword1 0x0 hword3
2113 output: word0 word1
2115 ULong convert_from_bf16tofloat_helper( ULong src ) {
2116 ULong result;
2117 union convert_t conv;
2118 conv.f = conv_bf16_to_float( (UInt)(src >> 32) );
2119 result = (ULong) conv.u32;
2120 conv.f = conv_bf16_to_float( (UInt)(src & 0xFFFFFFFF));
2121 result = (result << 32) | (ULong) conv.u32;
2122 return result;
2125 void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
2126 UInt offset_ACC,
2127 ULong srcA_hi, ULong srcA_lo,
2128 ULong srcB_hi, ULong srcB_lo,
2129 UInt masks_inst )
2131 UInt i, j, mask, inst, acc_entry, prefix_inst;
2133 UInt srcA_word[4][2]; /* word, hword */
2134 UInt srcB_word[4][2]; /* word, hword */
2135 Double src10, src11, src20, src21;
2136 UInt acc_word_input[4];
2137 Float acc_word[4];
2138 Double prod;
2139 Double msum;
2140 UInt result[4];
2141 UInt pmsk = 0;
2142 UInt xmsk = 0;
2143 UInt ymsk = 0;
2145 mask = 0xFFFF;
2146 inst = (masks_inst >> 5) & 0xFF;
2147 prefix_inst = (masks_inst >> 13) & 0x1;
2148 acc_entry = masks_inst & 0xF;
2150 if ( prefix_inst == 0 ) {
2151 /* Set the masks for non-prefix instructions */
2152 pmsk = 0b11;
2153 xmsk = 0b1111;
2154 ymsk = 0b1111;
2156 } else {
2157 /* Use mask supplied with prefix inst */
2158 pmsk = (masks_inst >> 28) & 0x3;
2159 xmsk = (masks_inst >> 18) & 0xF;
2160 ymsk = (masks_inst >> 14) & 0xF;
2163 /* Address half-words using IBM numbering */
2164 for( i = 0; i < 4; i++) {
2165 /* Get the ACC contents directly from the PPC64 state */
2166 get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input);
2168 acc_word[3] = reinterpret_int_as_float( acc_word_input[3] );
2169 acc_word[2] = reinterpret_int_as_float( acc_word_input[2] );
2170 acc_word[1] = reinterpret_int_as_float( acc_word_input[1] );
2171 acc_word[0] = reinterpret_int_as_float( acc_word_input[0] );
2173 for( j = 0; j < 2; j++) { // input is in double words
2174 srcA_word[3][j] = (UInt)((srcA_hi >> (48-16*j)) & mask);
2175 srcA_word[2][j] = (UInt)((srcA_hi >> (16-16*j)) & mask);
2176 srcA_word[1][j] = (UInt)((srcA_lo >> (48-16*j)) & mask);
2177 srcA_word[0][j] = (UInt)((srcA_lo >> (16-16*j)) & mask);
2179 srcB_word[3][j] = (UInt)((srcB_hi >> (48-16*j)) & mask);
2180 srcB_word[2][j] = (UInt)((srcB_hi >> (16-16*j)) & mask);
2181 srcB_word[1][j] = (UInt)((srcB_lo >> (48-16*j)) & mask);
2182 srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask);
2185 /* Note the isa is not consistent in the src naming. Will use the
2186 naming src10, src11, src20, src21 used with xvf16ger2 instructions.
2188 for( j = 0; j < 4; j++) {
2189 if (((pmsk >> 1) & 0x1) == 0) {
2190 src10 = 0;
2191 src20 = 0;
2192 } else {
2193 if (( inst == XVF16GER2 ) || ( inst == XVF16GER2PP )
2194 || ( inst == XVF16GER2PN ) || ( inst == XVF16GER2NP )
2195 || ( inst == XVF16GER2NN )) {
2196 src10 = conv_f16_to_double((ULong)srcA_word[i][0]);
2197 src20 = conv_f16_to_double((ULong)srcB_word[j][0]);
2198 } else {
2199 /* Input is in bfloat format, result is stored in the
2200 "traditional" 64-bit float format. */
2201 src10 = (double)conv_bf16_to_float((ULong)srcA_word[i][0]);
2202 src20 = (double)conv_bf16_to_float((ULong)srcB_word[j][0]);
2206 if ((pmsk & 0x1) == 0) {
2207 src11 = 0;
2208 src21 = 0;
2209 } else {
2210 if (( inst == XVF16GER2 ) || ( inst == XVF16GER2PP )
2211 || ( inst == XVF16GER2PN ) || ( inst == XVF16GER2NP )
2212 || ( inst == XVF16GER2NN )) {
2213 src11 = conv_f16_to_double((ULong)srcA_word[i][1]);
2214 src21 = conv_f16_to_double((ULong)srcB_word[j][1]);
2215 } else {
2216 /* Input is in bfloat format, result is stored in the
2217 "traditional" 64-bit float format. */
2218 src11 = (double)conv_bf16_to_float((ULong)srcA_word[i][1]);
2219 src21 = (double)conv_bf16_to_float((ULong)srcB_word[j][1]);
2223 prod = src10 * src20;
2224 msum = prod + src11 * src21;
2226 if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
2227 /* Note, we do not track the exception handling bits
2228 ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */
2230 if (( inst == XVF16GER2 ) || ( inst == XVBF16GER2 ) )
2231 result[j] = reinterpret_float_as_int(
2232 conv_double_to_float(msum) );
2234 else if (( inst == XVF16GER2PP ) || (inst == XVBF16GER2PP ))
2235 result[j] = reinterpret_float_as_int(
2236 conv_double_to_float(msum)
2237 + acc_word[j] );
2239 else if (( inst == XVF16GER2PN ) || ( inst == XVBF16GER2PN ))
2240 result[j] = reinterpret_float_as_int(
2241 conv_double_to_float(msum)
2242 + negate_float( acc_word[j] ) );
2244 else if (( inst == XVF16GER2NP ) || ( inst == XVBF16GER2NP ))
2245 result[j] = reinterpret_float_as_int(
2246 conv_double_to_float( negate_double( msum ) )
2247 + acc_word[j] );
2249 else if (( inst == XVF16GER2NN ) || ( inst == XVBF16GER2NN ))
2250 result[j] = reinterpret_float_as_int(
2251 conv_double_to_float( negate_double( msum ) )
2252 + negate_float( acc_word[j] ) );
2253 } else {
2254 result[j] = 0;
2257 write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
2261 void vsx_matrix_32bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
2262 UInt offset_ACC,
2263 ULong srcA_hi, ULong srcA_lo,
2264 ULong srcB_hi, ULong srcB_lo,
2265 UInt masks_inst )
2267 UInt i, j, mask, inst, acc_entry, prefix_inst;
2269 Float srcA_word[4];
2270 Float srcB_word[4];
2271 UInt acc_word_input[4];
2272 Float acc_word[4];
2273 UInt result[4];
2274 UInt xmsk = 0;
2275 UInt ymsk = 0;
2276 Float src1, src2, acc;
2278 mask = 0xFFFFFFFF;
2279 inst = (masks_inst >> 5) & 0xFF;
2280 prefix_inst = (masks_inst >> 13) & 0x1;
2281 acc_entry = masks_inst & 0xF;
2283 if ( prefix_inst == 0 ) {
2284 /* Set the masks for non-prefix instructions */
2285 xmsk = 0b1111;
2286 ymsk = 0b1111;
2288 } else {
2289 xmsk = (masks_inst >> 18) & 0xF;
2290 ymsk = (masks_inst >> 14) & 0xF;
2293 srcA_word[3] = reinterpret_int_as_float( (srcA_hi >> 32) & mask );
2294 srcA_word[2] = reinterpret_int_as_float( srcA_hi & mask );
2295 srcA_word[1] = reinterpret_int_as_float( (srcA_lo >> 32) & mask );
2296 srcA_word[0] = reinterpret_int_as_float( srcA_lo & mask );
2298 srcB_word[3] = reinterpret_int_as_float( (srcB_hi >> 32) & mask );
2299 srcB_word[2] = reinterpret_int_as_float( srcB_hi & mask );
2300 srcB_word[1] = reinterpret_int_as_float( (srcB_lo >> 32) & mask );
2301 srcB_word[0] = reinterpret_int_as_float( srcB_lo & mask );
2303 /* Address byes using IBM numbering */
2304 for( i = 0; i < 4; i++) {
2305 /* Get the ACC contents directly from the PPC64 state */
2306 get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input);
2308 acc_word[3] = reinterpret_int_as_float( acc_word_input[3] );
2309 acc_word[2] = reinterpret_int_as_float( acc_word_input[2] );
2310 acc_word[1] = reinterpret_int_as_float( acc_word_input[1] );
2311 acc_word[0] = reinterpret_int_as_float( acc_word_input[0] );
2313 for( j = 0; j < 4; j++) {
2315 if ((((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) == 0x1) {
2316 /* Note, we do not track the exception handling bits
2317 ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */
2319 src1 = srcA_word[i];
2320 src2 = srcB_word[j];
2321 acc = acc_word[j];
2323 if ( inst == XVF32GER )
2324 result[j] = reinterpret_float_as_int( src1 * src2 );
2326 else if ( inst == XVF32GERPP )
2327 result[j] = reinterpret_float_as_int( ( src1 * src2 ) + acc );
2329 else if ( inst == XVF32GERPN )
2330 result[j] = reinterpret_float_as_int( ( src1 * src2 )
2331 + negate_float( acc ) );
2333 else if ( inst == XVF32GERNP )
2334 result[j] = reinterpret_float_as_int(
2335 negate_float( src1 * src2 ) + acc );
2337 else if ( inst == XVF32GERNN )
2338 result[j] = reinterpret_float_as_int(
2339 negate_float( src1 * src2 ) + negate_float( acc ) );
2340 } else {
2341 result[j] = 0;
2344 write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
2348 void vsx_matrix_64bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
2349 UInt offset_ACC,
2350 ULong srcX_hi, ULong srcX_lo,
2351 ULong srcY_hi, ULong srcY_lo,
2352 UInt masks_inst )
2354 /* This function just computes the result for one entry in the ACC. */
2355 UInt i, j, inst, acc_entry, prefix_inst;
2357 Double srcX_dword[4];
2358 Double srcY_dword[2];
2359 Double result[2];
2360 UInt result_uint[4];
2361 ULong result_ulong[2];
2362 Double acc_dword[4];
2363 ULong acc_word_ulong[2];
2364 UInt acc_word_input[4];
2365 UInt xmsk = 0;
2366 UInt ymsk = 0;
2367 UInt start_i;
2368 Double src1, src2, acc;
2370 inst = (masks_inst >> 8) & 0xFF;
2371 prefix_inst = (masks_inst >> 16) & 0x1;
2372 start_i = (masks_inst >> 4) & 0xF;
2373 acc_entry = masks_inst & 0xF;
2375 if ( prefix_inst == 0 ) {
2376 /* Set the masks for non-prefix instructions */
2377 xmsk = 0b1111;
2378 ymsk = 0b11;
2380 } else {
2381 xmsk = (masks_inst >> 21) & 0xF;
2382 ymsk = (masks_inst >> 19) & 0x3;
2385 /* Need to store the srcX_dword in the correct index for the following
2386 for loop. */
2387 srcX_dword[1+start_i] = reinterpret_long_as_double( srcX_lo);
2388 srcX_dword[0+start_i] = reinterpret_long_as_double( srcX_hi );
2389 srcY_dword[1] = reinterpret_long_as_double( srcY_lo );
2390 srcY_dword[0] = reinterpret_long_as_double( srcY_hi );
2392 for( i = start_i; i < start_i+2; i++) {
2393 /* Get the ACC contents directly from the PPC64 state */
2394 get_ACC_entry (gst, offset_ACC, acc_entry, 3 - i,
2395 acc_word_input);
2397 acc_word_ulong[1] = acc_word_input[3];
2398 acc_word_ulong[1] = (acc_word_ulong[1] << 32) | acc_word_input[2];
2399 acc_word_ulong[0] = acc_word_input[1];
2400 acc_word_ulong[0] = (acc_word_ulong[0] << 32) | acc_word_input[0];
2401 acc_dword[0] = reinterpret_long_as_double( acc_word_ulong[0] );
2402 acc_dword[1] = reinterpret_long_as_double( acc_word_ulong[1]);
2404 for( j = 0; j < 2; j++) {
2406 if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
2407 /* Note, we do not track the exception handling bits
2408 ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */
2410 src1 = srcX_dword[i];
2411 src2 = srcY_dword[j];
2412 acc = acc_dword[j];
2414 if ( inst == XVF64GER )
2415 result[j] = src1 * src2;
2417 else if ( inst == XVF64GERPP )
2418 result[j] = ( src1 * src2 ) + acc;
2420 else if ( inst == XVF64GERPN )
2421 result[j] = ( src1 * src2 ) + negate_double( acc );
2423 else if ( inst == XVF64GERNP )
2424 result[j] = negate_double( src1 * src2 ) + acc;
2426 else if ( inst == XVF64GERNN )
2427 result[j] = negate_double( src1 * src2 ) + negate_double( acc );
2429 } else {
2430 result[j] = 0;
2434 /* Need to store the two double float values as two unsigned ints in
2435 order to store them to the ACC. */
2436 result_ulong[0] = reinterpret_double_as_long ( result[0] );
2437 result_ulong[1] = reinterpret_double_as_long ( result[1] );
2439 result_uint[0] = result_ulong[0] & 0xFFFFFFFF;
2440 result_uint[1] = (result_ulong[0] >> 32) & 0xFFFFFFFF;
2441 result_uint[2] = result_ulong[1] & 0xFFFFFFFF;
2442 result_uint[3] = (result_ulong[1] >> 32) & 0xFFFFFFFF;
2444 write_ACC_entry (gst, offset_ACC, acc_entry, 3 - i,
2445 result_uint);
2449 /* CALLED FROM GENERATED CODE */
2450 /* DIRTY HELPER uses inline assembly to call random number instruction on
2451 the host machine. Note, the dirty helper takes the value returned from
2452 the host and returns it. The helper does not change the guest state
2453 or guest memory. */
2454 ULong darn_dirty_helper ( UInt L )
2456 ULong val = 0xFFFFFFFFFFFFFFFFULL; /* error */
2458 # if defined (HAS_DARN)
2459 if ( L == 0)
2460 __asm__ __volatile__(".machine push; .machine power9;" \
2461 "darn %0,0; .machine pop;" : "=r"(val));
2462 else if (L == 1)
2463 __asm__ __volatile__(".machine push; .machine power9;" \
2464 "darn %0,1; .machine pop;" : "=r"(val));
2465 else if (L == 2)
2466 __asm__ __volatile__(".machine push; .machine power9;"
2467 "darn %0,2; .machine pop;" : "=r"(val));
2468 # endif
2470 return val;
2473 /*----------------------------------------------*/
2474 /*--- The exported fns .. ---*/
2475 /*----------------------------------------------*/
2477 /* VISIBLE TO LIBVEX CLIENT */
2478 UInt LibVEX_GuestPPC32_get_CR ( /*IN*/const VexGuestPPC32State* vex_state )
2480 # define FIELD(_n) \
2481 ( ( (UInt) \
2482 ( (vex_state->guest_CR##_n##_321 & (7<<1)) \
2483 | (vex_state->guest_CR##_n##_0 & 1) \
2486 << (4 * (7-(_n))) \
2489 return
2490 FIELD(0) | FIELD(1) | FIELD(2) | FIELD(3)
2491 | FIELD(4) | FIELD(5) | FIELD(6) | FIELD(7);
2493 # undef FIELD
2497 /* VISIBLE TO LIBVEX CLIENT */
2498 /* Note: %CR is 32 bits even for ppc64 */
2499 UInt LibVEX_GuestPPC64_get_CR ( /*IN*/const VexGuestPPC64State* vex_state )
2501 # define FIELD(_n) \
2502 ( ( (UInt) \
2503 ( (vex_state->guest_CR##_n##_321 & (7<<1)) \
2504 | (vex_state->guest_CR##_n##_0 & 1) \
2507 << (4 * (7-(_n))) \
2510 return
2511 FIELD(0) | FIELD(1) | FIELD(2) | FIELD(3)
2512 | FIELD(4) | FIELD(5) | FIELD(6) | FIELD(7);
2514 # undef FIELD
2518 /* VISIBLE TO LIBVEX CLIENT */
2519 void LibVEX_GuestPPC32_put_CR ( UInt cr_native,
2520 /*OUT*/VexGuestPPC32State* vex_state )
2522 UInt t;
2524 # define FIELD(_n) \
2525 do { \
2526 t = cr_native >> (4*(7-(_n))); \
2527 vex_state->guest_CR##_n##_0 = toUChar(t & 1); \
2528 vex_state->guest_CR##_n##_321 = toUChar(t & (7<<1)); \
2529 } while (0)
2531 FIELD(0);
2532 FIELD(1);
2533 FIELD(2);
2534 FIELD(3);
2535 FIELD(4);
2536 FIELD(5);
2537 FIELD(6);
2538 FIELD(7);
2540 # undef FIELD
2544 /* VISIBLE TO LIBVEX CLIENT */
2545 /* Note: %CR is 32 bits even for ppc64 */
2546 void LibVEX_GuestPPC64_put_CR ( UInt cr_native,
2547 /*OUT*/VexGuestPPC64State* vex_state )
2549 UInt t;
2551 # define FIELD(_n) \
2552 do { \
2553 t = cr_native >> (4*(7-(_n))); \
2554 vex_state->guest_CR##_n##_0 = toUChar(t & 1); \
2555 vex_state->guest_CR##_n##_321 = toUChar(t & (7<<1)); \
2556 } while (0)
2558 FIELD(0);
2559 FIELD(1);
2560 FIELD(2);
2561 FIELD(3);
2562 FIELD(4);
2563 FIELD(5);
2564 FIELD(6);
2565 FIELD(7);
2567 # undef FIELD
2571 /* VISIBLE TO LIBVEX CLIENT */
2572 UInt LibVEX_GuestPPC32_get_XER ( /*IN*/const VexGuestPPC32State* vex_state )
2574 UInt w = 0;
2575 w |= ( ((UInt)vex_state->guest_XER_BC) & 0xFF );
2576 w |= ( (((UInt)vex_state->guest_XER_SO) & 0x1) << 31 );
2577 w |= ( (((UInt)vex_state->guest_XER_OV) & 0x1) << 30 );
2578 w |= ( (((UInt)vex_state->guest_XER_CA) & 0x1) << 29 );
2579 w |= ( (((UInt)vex_state->guest_XER_OV32) & 0x1) << 19 );
2580 w |= ( (((UInt)vex_state->guest_XER_CA32) & 0x1) << 18 );
2581 return w;
2585 /* VISIBLE TO LIBVEX CLIENT */
2586 /* Note: %XER is 32 bits even for ppc64 */
2587 UInt LibVEX_GuestPPC64_get_XER ( /*IN*/const VexGuestPPC64State* vex_state )
2589 UInt w = 0;
2590 w |= ( ((UInt)vex_state->guest_XER_BC) & 0xFF );
2591 w |= ( (((UInt)vex_state->guest_XER_SO) & 0x1) << 31 );
2592 w |= ( (((UInt)vex_state->guest_XER_OV) & 0x1) << 30 );
2593 w |= ( (((UInt)vex_state->guest_XER_CA) & 0x1) << 29 );
2594 w |= ( (((UInt)vex_state->guest_XER_OV32) & 0x1) << 19 );
2595 w |= ( (((UInt)vex_state->guest_XER_CA32) & 0x1) << 18 );
2596 return w;
2600 /* VISIBLE TO LIBVEX CLIENT */
2601 void LibVEX_GuestPPC32_put_XER ( UInt xer_native,
2602 /*OUT*/VexGuestPPC32State* vex_state )
2604 vex_state->guest_XER_BC = toUChar(xer_native & 0xFF);
2605 vex_state->guest_XER_SO = toUChar((xer_native >> 31) & 0x1);
2606 vex_state->guest_XER_OV = toUChar((xer_native >> 30) & 0x1);
2607 vex_state->guest_XER_CA = toUChar((xer_native >> 29) & 0x1);
2608 vex_state->guest_XER_OV32 = toUChar((xer_native >> 19) & 0x1);
2609 vex_state->guest_XER_CA32 = toUChar((xer_native >> 18) & 0x1);
2612 /* VISIBLE TO LIBVEX CLIENT */
2613 /* Note: %XER is 32 bits even for ppc64 */
2614 void LibVEX_GuestPPC64_put_XER ( UInt xer_native,
2615 /*OUT*/VexGuestPPC64State* vex_state )
2617 vex_state->guest_XER_BC = toUChar(xer_native & 0xFF);
2618 vex_state->guest_XER_SO = toUChar((xer_native >> 31) & 0x1);
2619 vex_state->guest_XER_OV = toUChar((xer_native >> 30) & 0x1);
2620 vex_state->guest_XER_CA = toUChar((xer_native >> 29) & 0x1);
2621 vex_state->guest_XER_OV32 = toUChar((xer_native >> 19) & 0x1);
2622 vex_state->guest_XER_CA32 = toUChar((xer_native >> 18) & 0x1);
2625 /* VISIBLE TO LIBVEX CLIENT */
2626 void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state )
2628 Int i;
2629 vex_state->host_EvC_FAILADDR = 0;
2630 vex_state->host_EvC_COUNTER = 0;
2631 vex_state->pad3 = 0;
2632 vex_state->pad4 = 0;
2634 vex_state->guest_GPR0 = 0;
2635 vex_state->guest_GPR1 = 0;
2636 vex_state->guest_GPR2 = 0;
2637 vex_state->guest_GPR3 = 0;
2638 vex_state->guest_GPR4 = 0;
2639 vex_state->guest_GPR5 = 0;
2640 vex_state->guest_GPR6 = 0;
2641 vex_state->guest_GPR7 = 0;
2642 vex_state->guest_GPR8 = 0;
2643 vex_state->guest_GPR9 = 0;
2644 vex_state->guest_GPR10 = 0;
2645 vex_state->guest_GPR11 = 0;
2646 vex_state->guest_GPR12 = 0;
2647 vex_state->guest_GPR13 = 0;
2648 vex_state->guest_GPR14 = 0;
2649 vex_state->guest_GPR15 = 0;
2650 vex_state->guest_GPR16 = 0;
2651 vex_state->guest_GPR17 = 0;
2652 vex_state->guest_GPR18 = 0;
2653 vex_state->guest_GPR19 = 0;
2654 vex_state->guest_GPR20 = 0;
2655 vex_state->guest_GPR21 = 0;
2656 vex_state->guest_GPR22 = 0;
2657 vex_state->guest_GPR23 = 0;
2658 vex_state->guest_GPR24 = 0;
2659 vex_state->guest_GPR25 = 0;
2660 vex_state->guest_GPR26 = 0;
2661 vex_state->guest_GPR27 = 0;
2662 vex_state->guest_GPR28 = 0;
2663 vex_state->guest_GPR29 = 0;
2664 vex_state->guest_GPR30 = 0;
2665 vex_state->guest_GPR31 = 0;
2667 /* Initialise the vector state. */
2668 # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
2670 VECZERO(vex_state->guest_VSR0 );
2671 VECZERO(vex_state->guest_VSR1 );
2672 VECZERO(vex_state->guest_VSR2 );
2673 VECZERO(vex_state->guest_VSR3 );
2674 VECZERO(vex_state->guest_VSR4 );
2675 VECZERO(vex_state->guest_VSR5 );
2676 VECZERO(vex_state->guest_VSR6 );
2677 VECZERO(vex_state->guest_VSR7 );
2678 VECZERO(vex_state->guest_VSR8 );
2679 VECZERO(vex_state->guest_VSR9 );
2680 VECZERO(vex_state->guest_VSR10);
2681 VECZERO(vex_state->guest_VSR11);
2682 VECZERO(vex_state->guest_VSR12);
2683 VECZERO(vex_state->guest_VSR13);
2684 VECZERO(vex_state->guest_VSR14);
2685 VECZERO(vex_state->guest_VSR15);
2686 VECZERO(vex_state->guest_VSR16);
2687 VECZERO(vex_state->guest_VSR17);
2688 VECZERO(vex_state->guest_VSR18);
2689 VECZERO(vex_state->guest_VSR19);
2690 VECZERO(vex_state->guest_VSR20);
2691 VECZERO(vex_state->guest_VSR21);
2692 VECZERO(vex_state->guest_VSR22);
2693 VECZERO(vex_state->guest_VSR23);
2694 VECZERO(vex_state->guest_VSR24);
2695 VECZERO(vex_state->guest_VSR25);
2696 VECZERO(vex_state->guest_VSR26);
2697 VECZERO(vex_state->guest_VSR27);
2698 VECZERO(vex_state->guest_VSR28);
2699 VECZERO(vex_state->guest_VSR29);
2700 VECZERO(vex_state->guest_VSR30);
2701 VECZERO(vex_state->guest_VSR31);
2702 VECZERO(vex_state->guest_VSR32);
2703 VECZERO(vex_state->guest_VSR33);
2704 VECZERO(vex_state->guest_VSR34);
2705 VECZERO(vex_state->guest_VSR35);
2706 VECZERO(vex_state->guest_VSR36);
2707 VECZERO(vex_state->guest_VSR37);
2708 VECZERO(vex_state->guest_VSR38);
2709 VECZERO(vex_state->guest_VSR39);
2710 VECZERO(vex_state->guest_VSR40);
2711 VECZERO(vex_state->guest_VSR41);
2712 VECZERO(vex_state->guest_VSR42);
2713 VECZERO(vex_state->guest_VSR43);
2714 VECZERO(vex_state->guest_VSR44);
2715 VECZERO(vex_state->guest_VSR45);
2716 VECZERO(vex_state->guest_VSR46);
2717 VECZERO(vex_state->guest_VSR47);
2718 VECZERO(vex_state->guest_VSR48);
2719 VECZERO(vex_state->guest_VSR49);
2720 VECZERO(vex_state->guest_VSR50);
2721 VECZERO(vex_state->guest_VSR51);
2722 VECZERO(vex_state->guest_VSR52);
2723 VECZERO(vex_state->guest_VSR53);
2724 VECZERO(vex_state->guest_VSR54);
2725 VECZERO(vex_state->guest_VSR55);
2726 VECZERO(vex_state->guest_VSR56);
2727 VECZERO(vex_state->guest_VSR57);
2728 VECZERO(vex_state->guest_VSR58);
2729 VECZERO(vex_state->guest_VSR59);
2730 VECZERO(vex_state->guest_VSR60);
2731 VECZERO(vex_state->guest_VSR61);
2732 VECZERO(vex_state->guest_VSR62);
2733 VECZERO(vex_state->guest_VSR63);
2735 VECZERO( vex_state->guest_ACC_0_r0 );
2736 VECZERO( vex_state->guest_ACC_0_r1 );
2737 VECZERO( vex_state->guest_ACC_0_r2 );
2738 VECZERO( vex_state->guest_ACC_0_r3 );
2739 VECZERO( vex_state->guest_ACC_1_r0 );
2740 VECZERO( vex_state->guest_ACC_1_r1 );
2741 VECZERO( vex_state->guest_ACC_1_r2 );
2742 VECZERO( vex_state->guest_ACC_1_r3 );
2743 VECZERO( vex_state->guest_ACC_2_r0 );
2744 VECZERO( vex_state->guest_ACC_2_r1 );
2745 VECZERO( vex_state->guest_ACC_2_r2 );
2746 VECZERO( vex_state->guest_ACC_2_r3 );
2747 VECZERO( vex_state->guest_ACC_3_r0 );
2748 VECZERO( vex_state->guest_ACC_3_r1 );
2749 VECZERO( vex_state->guest_ACC_3_r2 );
2750 VECZERO( vex_state->guest_ACC_3_r3 );
2751 VECZERO( vex_state->guest_ACC_4_r0 );
2752 VECZERO( vex_state->guest_ACC_4_r1 );
2753 VECZERO( vex_state->guest_ACC_4_r2 );
2754 VECZERO( vex_state->guest_ACC_4_r3 );
2755 VECZERO( vex_state->guest_ACC_5_r0 );
2756 VECZERO( vex_state->guest_ACC_5_r1 );
2757 VECZERO( vex_state->guest_ACC_5_r2 );
2758 VECZERO( vex_state->guest_ACC_5_r3 );
2759 VECZERO( vex_state->guest_ACC_6_r0 );
2760 VECZERO( vex_state->guest_ACC_6_r1 );
2761 VECZERO( vex_state->guest_ACC_6_r2 );
2762 VECZERO( vex_state->guest_ACC_6_r3 );
2763 VECZERO( vex_state->guest_ACC_7_r0 );
2764 VECZERO( vex_state->guest_ACC_7_r1 );
2765 VECZERO( vex_state->guest_ACC_7_r2 );
2766 VECZERO( vex_state->guest_ACC_7_r3 );
2768 # undef VECZERO
2770 vex_state->guest_CIA = 0;
2771 vex_state->guest_LR = 0;
2772 vex_state->guest_CTR = 0;
2774 vex_state->guest_XER_SO = 0;
2775 vex_state->guest_XER_OV = 0;
2776 vex_state->guest_XER_CA = 0;
2777 vex_state->guest_XER_BC = 0;
2779 vex_state->guest_XER_OV32 = 0;
2780 vex_state->guest_XER_CA32 = 0;
2782 vex_state->guest_CR0_321 = 0;
2783 vex_state->guest_CR0_0 = 0;
2784 vex_state->guest_CR1_321 = 0;
2785 vex_state->guest_CR1_0 = 0;
2786 vex_state->guest_CR2_321 = 0;
2787 vex_state->guest_CR2_0 = 0;
2788 vex_state->guest_CR3_321 = 0;
2789 vex_state->guest_CR3_0 = 0;
2790 vex_state->guest_CR4_321 = 0;
2791 vex_state->guest_CR4_0 = 0;
2792 vex_state->guest_CR5_321 = 0;
2793 vex_state->guest_CR5_0 = 0;
2794 vex_state->guest_CR6_321 = 0;
2795 vex_state->guest_CR6_0 = 0;
2796 vex_state->guest_CR7_321 = 0;
2797 vex_state->guest_CR7_0 = 0;
2799 vex_state->guest_FPROUND = PPCrm_NEAREST;
2800 vex_state->guest_DFPROUND = PPCrm_NEAREST;
2801 vex_state->guest_C_FPCC = 0;
2802 vex_state->pad2 = 0;
2804 vex_state->guest_VRSAVE = 0;
2806 # if defined(VGP_ppc64be_linux)
2807 /* By default, the HW for BE sets the VSCR[NJ] bit to 1.
2808 VSR is a 128-bit register, NJ bit is bit 111 (IBM numbering).
2809 However, VSCR is modeled as a 64-bit register. */
2810 vex_state->guest_VSCR = 0x1 << (127 - 111);
2811 # else
2812 /* LE API requires NJ be set to 0. */
2813 vex_state->guest_VSCR = 0x0;
2814 #endif
2816 vex_state->guest_EMNOTE = EmNote_NONE;
2818 vex_state->guest_CMSTART = 0;
2819 vex_state->guest_CMLEN = 0;
2821 vex_state->guest_NRADDR = 0;
2822 vex_state->guest_NRADDR_GPR2 = 0;
2824 vex_state->guest_REDIR_SP = -1;
2825 for (i = 0; i < VEX_GUEST_PPC32_REDIR_STACK_SIZE; i++)
2826 vex_state->guest_REDIR_STACK[i] = 0;
2828 vex_state->guest_IP_AT_SYSCALL = 0;
2829 vex_state->guest_SPRG3_RO = 0;
2830 vex_state->guest_PPR = 0x4ULL << 50; // medium priority
2831 vex_state->guest_PSPB = 0x100; // an arbitrary non-zero value to start with
2833 vex_state->padding1 = 0;
2834 /* vex_state->padding2 = 0; currently not used */
2838 /* VISIBLE TO LIBVEX CLIENT */
2839 void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state )
2841 Int i;
2842 vex_state->host_EvC_FAILADDR = 0;
2843 vex_state->host_EvC_COUNTER = 0;
2844 vex_state->pad0 = 0;
2845 vex_state->guest_GPR0 = 0;
2846 vex_state->guest_GPR1 = 0;
2847 vex_state->guest_GPR2 = 0;
2848 vex_state->guest_GPR3 = 0;
2849 vex_state->guest_GPR4 = 0;
2850 vex_state->guest_GPR5 = 0;
2851 vex_state->guest_GPR6 = 0;
2852 vex_state->guest_GPR7 = 0;
2853 vex_state->guest_GPR8 = 0;
2854 vex_state->guest_GPR9 = 0;
2855 vex_state->guest_GPR10 = 0;
2856 vex_state->guest_GPR11 = 0;
2857 vex_state->guest_GPR12 = 0;
2858 vex_state->guest_GPR13 = 0;
2859 vex_state->guest_GPR14 = 0;
2860 vex_state->guest_GPR15 = 0;
2861 vex_state->guest_GPR16 = 0;
2862 vex_state->guest_GPR17 = 0;
2863 vex_state->guest_GPR18 = 0;
2864 vex_state->guest_GPR19 = 0;
2865 vex_state->guest_GPR20 = 0;
2866 vex_state->guest_GPR21 = 0;
2867 vex_state->guest_GPR22 = 0;
2868 vex_state->guest_GPR23 = 0;
2869 vex_state->guest_GPR24 = 0;
2870 vex_state->guest_GPR25 = 0;
2871 vex_state->guest_GPR26 = 0;
2872 vex_state->guest_GPR27 = 0;
2873 vex_state->guest_GPR28 = 0;
2874 vex_state->guest_GPR29 = 0;
2875 vex_state->guest_GPR30 = 0;
2876 vex_state->guest_GPR31 = 0;
2878 /* Initialise the vector state. */
2879 # define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
2881 VECZERO(vex_state->guest_VSR0 );
2882 VECZERO(vex_state->guest_VSR1 );
2883 VECZERO(vex_state->guest_VSR2 );
2884 VECZERO(vex_state->guest_VSR3 );
2885 VECZERO(vex_state->guest_VSR4 );
2886 VECZERO(vex_state->guest_VSR5 );
2887 VECZERO(vex_state->guest_VSR6 );
2888 VECZERO(vex_state->guest_VSR7 );
2889 VECZERO(vex_state->guest_VSR8 );
2890 VECZERO(vex_state->guest_VSR9 );
2891 VECZERO(vex_state->guest_VSR10);
2892 VECZERO(vex_state->guest_VSR11);
2893 VECZERO(vex_state->guest_VSR12);
2894 VECZERO(vex_state->guest_VSR13);
2895 VECZERO(vex_state->guest_VSR14);
2896 VECZERO(vex_state->guest_VSR15);
2897 VECZERO(vex_state->guest_VSR16);
2898 VECZERO(vex_state->guest_VSR17);
2899 VECZERO(vex_state->guest_VSR18);
2900 VECZERO(vex_state->guest_VSR19);
2901 VECZERO(vex_state->guest_VSR20);
2902 VECZERO(vex_state->guest_VSR21);
2903 VECZERO(vex_state->guest_VSR22);
2904 VECZERO(vex_state->guest_VSR23);
2905 VECZERO(vex_state->guest_VSR24);
2906 VECZERO(vex_state->guest_VSR25);
2907 VECZERO(vex_state->guest_VSR26);
2908 VECZERO(vex_state->guest_VSR27);
2909 VECZERO(vex_state->guest_VSR28);
2910 VECZERO(vex_state->guest_VSR29);
2911 VECZERO(vex_state->guest_VSR30);
2912 VECZERO(vex_state->guest_VSR31);
2913 VECZERO(vex_state->guest_VSR32);
2914 VECZERO(vex_state->guest_VSR33);
2915 VECZERO(vex_state->guest_VSR34);
2916 VECZERO(vex_state->guest_VSR35);
2917 VECZERO(vex_state->guest_VSR36);
2918 VECZERO(vex_state->guest_VSR37);
2919 VECZERO(vex_state->guest_VSR38);
2920 VECZERO(vex_state->guest_VSR39);
2921 VECZERO(vex_state->guest_VSR40);
2922 VECZERO(vex_state->guest_VSR41);
2923 VECZERO(vex_state->guest_VSR42);
2924 VECZERO(vex_state->guest_VSR43);
2925 VECZERO(vex_state->guest_VSR44);
2926 VECZERO(vex_state->guest_VSR45);
2927 VECZERO(vex_state->guest_VSR46);
2928 VECZERO(vex_state->guest_VSR47);
2929 VECZERO(vex_state->guest_VSR48);
2930 VECZERO(vex_state->guest_VSR49);
2931 VECZERO(vex_state->guest_VSR50);
2932 VECZERO(vex_state->guest_VSR51);
2933 VECZERO(vex_state->guest_VSR52);
2934 VECZERO(vex_state->guest_VSR53);
2935 VECZERO(vex_state->guest_VSR54);
2936 VECZERO(vex_state->guest_VSR55);
2937 VECZERO(vex_state->guest_VSR56);
2938 VECZERO(vex_state->guest_VSR57);
2939 VECZERO(vex_state->guest_VSR58);
2940 VECZERO(vex_state->guest_VSR59);
2941 VECZERO(vex_state->guest_VSR60);
2942 VECZERO(vex_state->guest_VSR61);
2943 VECZERO(vex_state->guest_VSR62);
2944 VECZERO(vex_state->guest_VSR63);
2946 # undef VECZERO
2948 vex_state->guest_CIA = 0;
2949 vex_state->guest_LR = 0;
2950 vex_state->guest_CTR = 0;
2952 vex_state->guest_XER_SO = 0;
2953 vex_state->guest_XER_OV = 0;
2954 vex_state->guest_XER_CA = 0;
2955 vex_state->guest_XER_BC = 0;
2957 vex_state->guest_CR0_321 = 0;
2958 vex_state->guest_CR0_0 = 0;
2959 vex_state->guest_CR1_321 = 0;
2960 vex_state->guest_CR1_0 = 0;
2961 vex_state->guest_CR2_321 = 0;
2962 vex_state->guest_CR2_0 = 0;
2963 vex_state->guest_CR3_321 = 0;
2964 vex_state->guest_CR3_0 = 0;
2965 vex_state->guest_CR4_321 = 0;
2966 vex_state->guest_CR4_0 = 0;
2967 vex_state->guest_CR5_321 = 0;
2968 vex_state->guest_CR5_0 = 0;
2969 vex_state->guest_CR6_321 = 0;
2970 vex_state->guest_CR6_0 = 0;
2971 vex_state->guest_CR7_321 = 0;
2972 vex_state->guest_CR7_0 = 0;
2974 vex_state->guest_FPROUND = PPCrm_NEAREST;
2975 vex_state->guest_DFPROUND = PPCrm_NEAREST;
2976 vex_state->guest_C_FPCC = 0;
2977 vex_state->pad2 = 0;
2979 vex_state->guest_VRSAVE = 0;
2981 # if defined(VGP_ppc64be_linux)
2982 /* By default, the HW for BE sets the VSCR[NJ] bit to 1.
2983 VSR is a 128-bit register, NJ bit is bit 111 (IBM numbering).
2984 However, VSCR is modeled as a 64-bit register. */
2985 vex_state->guest_VSCR = 0x1 << (127 - 111);
2986 # else
2987 /* LE API requires NJ be set to 0. */
2988 vex_state->guest_VSCR = 0x0;
2989 #endif
2991 vex_state->guest_EMNOTE = EmNote_NONE;
2993 vex_state->padding = 0;
2995 vex_state->guest_CMSTART = 0;
2996 vex_state->guest_CMLEN = 0;
2998 vex_state->guest_NRADDR = 0;
2999 vex_state->guest_NRADDR_GPR2 = 0;
3001 vex_state->guest_REDIR_SP = -1;
3002 for (i = 0; i < VEX_GUEST_PPC64_REDIR_STACK_SIZE; i++)
3003 vex_state->guest_REDIR_STACK[i] = 0;
3005 vex_state->guest_IP_AT_SYSCALL = 0;
3006 vex_state->guest_SPRG3_RO = 0;
3007 vex_state->guest_TFHAR = 0;
3008 vex_state->guest_TFIAR = 0;
3009 vex_state->guest_TEXASR = 0;
3010 vex_state->guest_PPR = 0x4ULL << 50; // medium priority
3011 vex_state->guest_PSPB = 0x100; // an arbitrary non-zero value to start with
3012 vex_state->guest_DSCR = 0;
3017 /*-----------------------------------------------------------*/
3018 /*--- Describing the ppc guest state, for the benefit ---*/
3019 /*--- of iropt and instrumenters. ---*/
3020 /*-----------------------------------------------------------*/
3022 /* Figure out if any part of the guest state contained in minoff
3023 .. maxoff requires precise memory exceptions. If in doubt return
3024 True (but this is generates significantly slower code).
3026 By default we enforce precise exns for guest R1 (stack pointer),
3027 CIA (current insn address) and LR (link register). These are the
3028 minimum needed to extract correct stack backtraces from ppc
3029 code. [[NB: not sure if keeping LR up to date is actually
3030 necessary.]]
3032 Only R1 is needed in mode VexRegUpdSpAtMemAccess.
3034 Bool guest_ppc32_state_requires_precise_mem_exns (
3035 Int minoff, Int maxoff, VexRegisterUpdates pxControl
3038 Int lr_min = offsetof(VexGuestPPC32State, guest_LR);
3039 Int lr_max = lr_min + 4 - 1;
3040 Int r1_min = offsetof(VexGuestPPC32State, guest_GPR1);
3041 Int r1_max = r1_min + 4 - 1;
3042 Int cia_min = offsetof(VexGuestPPC32State, guest_CIA);
3043 Int cia_max = cia_min + 4 - 1;
3045 if (maxoff < r1_min || minoff > r1_max) {
3046 /* no overlap with R1 */
3047 if (pxControl == VexRegUpdSpAtMemAccess)
3048 return False; // We only need to check stack pointer.
3049 } else {
3050 return True;
3053 if (maxoff < lr_min || minoff > lr_max) {
3054 /* no overlap with LR */
3055 } else {
3056 return True;
3059 if (maxoff < cia_min || minoff > cia_max) {
3060 /* no overlap with CIA */
3061 } else {
3062 return True;
3065 return False;
3068 Bool guest_ppc64_state_requires_precise_mem_exns (
3069 Int minoff, Int maxoff, VexRegisterUpdates pxControl
3072 /* Given that R2 is a Big Deal in the ELF ppc64 ABI, it seems
3073 prudent to be conservative with it, even though thus far there
3074 is no evidence to suggest that it actually needs to be kept up
3075 to date wrt possible exceptions. */
3076 Int lr_min = offsetof(VexGuestPPC64State, guest_LR);
3077 Int lr_max = lr_min + 8 - 1;
3078 Int r1_min = offsetof(VexGuestPPC64State, guest_GPR1);
3079 Int r1_max = r1_min + 8 - 1;
3080 Int r2_min = offsetof(VexGuestPPC64State, guest_GPR2);
3081 Int r2_max = r2_min + 8 - 1;
3082 Int cia_min = offsetof(VexGuestPPC64State, guest_CIA);
3083 Int cia_max = cia_min + 8 - 1;
3085 if (maxoff < r1_min || minoff > r1_max) {
3086 /* no overlap with R1 */
3087 if (pxControl == VexRegUpdSpAtMemAccess)
3088 return False; // We only need to check stack pointer.
3089 } else {
3090 return True;
3093 if (maxoff < lr_min || minoff > lr_max) {
3094 /* no overlap with LR */
3095 } else {
3096 return True;
3099 if (maxoff < r2_min || minoff > r2_max) {
3100 /* no overlap with R2 */
3101 } else {
3102 return True;
3105 if (maxoff < cia_min || minoff > cia_max) {
3106 /* no overlap with CIA */
3107 } else {
3108 return True;
3111 return False;
3115 #define ALWAYSDEFD32(field) \
3116 { offsetof(VexGuestPPC32State, field), \
3117 (sizeof ((VexGuestPPC32State*)0)->field) }
3119 VexGuestLayout
3120 ppc32Guest_layout
3121 = {
3122 /* Total size of the guest state, in bytes. */
3123 .total_sizeB = sizeof(VexGuestPPC32State),
3125 /* Describe the stack pointer. */
3126 .offset_SP = offsetof(VexGuestPPC32State,guest_GPR1),
3127 .sizeof_SP = 4,
3129 /* Describe the frame pointer. */
3130 .offset_FP = offsetof(VexGuestPPC32State,guest_GPR1),
3131 .sizeof_FP = 4,
3133 /* Describe the instruction pointer. */
3134 .offset_IP = offsetof(VexGuestPPC32State,guest_CIA),
3135 .sizeof_IP = 4,
3137 /* Describe any sections to be regarded by Memcheck as
3138 'always-defined'. */
3139 .n_alwaysDefd = 12,
3141 .alwaysDefd
3142 = { /* 0 */ ALWAYSDEFD32(guest_CIA),
3143 /* 1 */ ALWAYSDEFD32(guest_EMNOTE),
3144 /* 2 */ ALWAYSDEFD32(guest_CMSTART),
3145 /* 3 */ ALWAYSDEFD32(guest_CMLEN),
3146 /* 4 */ ALWAYSDEFD32(guest_VSCR),
3147 /* 5 */ ALWAYSDEFD32(guest_FPROUND),
3148 /* 6 */ ALWAYSDEFD32(guest_NRADDR),
3149 /* 7 */ ALWAYSDEFD32(guest_NRADDR_GPR2),
3150 /* 8 */ ALWAYSDEFD32(guest_REDIR_SP),
3151 /* 9 */ ALWAYSDEFD32(guest_REDIR_STACK),
3152 /* 10 */ ALWAYSDEFD32(guest_IP_AT_SYSCALL),
3153 /* 11 */ ALWAYSDEFD32(guest_C_FPCC)
3157 #define ALWAYSDEFD64(field) \
3158 { offsetof(VexGuestPPC64State, field), \
3159 (sizeof ((VexGuestPPC64State*)0)->field) }
3161 VexGuestLayout
3162 ppc64Guest_layout
3163 = {
3164 /* Total size of the guest state, in bytes. */
3165 .total_sizeB = sizeof(VexGuestPPC64State),
3167 /* Describe the stack pointer. */
3168 .offset_SP = offsetof(VexGuestPPC64State,guest_GPR1),
3169 .sizeof_SP = 8,
3171 /* Describe the frame pointer. */
3172 .offset_FP = offsetof(VexGuestPPC64State,guest_GPR1),
3173 .sizeof_FP = 8,
3175 /* Describe the instruction pointer. */
3176 .offset_IP = offsetof(VexGuestPPC64State,guest_CIA),
3177 .sizeof_IP = 8,
3179 /* Describe any sections to be regarded by Memcheck as
3180 'always-defined'. */
3181 .n_alwaysDefd = 12,
3183 .alwaysDefd
3184 = { /* 0 */ ALWAYSDEFD64(guest_CIA),
3185 /* 1 */ ALWAYSDEFD64(guest_EMNOTE),
3186 /* 2 */ ALWAYSDEFD64(guest_CMSTART),
3187 /* 3 */ ALWAYSDEFD64(guest_CMLEN),
3188 /* 4 */ ALWAYSDEFD64(guest_VSCR),
3189 /* 5 */ ALWAYSDEFD64(guest_FPROUND),
3190 /* 6 */ ALWAYSDEFD64(guest_NRADDR),
3191 /* 7 */ ALWAYSDEFD64(guest_NRADDR_GPR2),
3192 /* 8 */ ALWAYSDEFD64(guest_REDIR_SP),
3193 /* 9 */ ALWAYSDEFD64(guest_REDIR_STACK),
3194 /* 10 */ ALWAYSDEFD64(guest_IP_AT_SYSCALL),
3195 /* 11 */ ALWAYSDEFD64(guest_C_FPCC)
3199 UInt copy_paste_abort_dirty_helper(UInt addr, UInt op) {
3200 # if defined(__powerpc__) && defined(HAS_ISA_3_00)
3201 /* The enable copy, paste., and cpabort were introduced in ISA 3.0. */
3202 ULong ret;
3203 UInt cr;
3205 if (op == COPY_INST)
3206 __asm__ __volatile__ (".machine push;\n"
3207 ".machine power9;\n"
3208 "copy 0,%0;\n"
3209 ".machine pop" :: "r" (addr));
3211 else if (op == PASTE_INST)
3212 __asm__ __volatile__ (".machine push;\n"
3213 ".machine power9;\n"
3214 "paste. 0,%0\n"
3215 ".machine pop" :: "r" (addr));
3217 else if (op == CPABORT_INST)
3218 __asm__ __volatile__ (".machine push;\n"
3219 ".machine power9;\n"
3220 "cpabort\n"
3221 ".machine pop");
3223 else
3224 /* Unknown operation */
3225 vassert(0);
3227 /* Return the CR0 value. Contains status for the paste instruction. */
3228 __asm__ __volatile__ ("mfocrf %0,128" : "=r" (cr));
3229 __asm__ __volatile__ ("srawi %0,%1,28" : "=r" (ret) : "r" (cr));
3230 /* Make sure the upper bits of the return value are zero per the hack
3231 described in function dis_copy_paste(). */
3232 return 0xFF & ret;
3233 # else
3234 return 0;
3235 # endif
3238 /*---------------------------------------------------------------*/
3239 /*--- end guest_ppc_helpers.c ---*/
3240 /*---------------------------------------------------------------*/