Add missing zstd.h to coregrind Makefile.am noinst_HEADERS
[valgrind.git] / memcheck / tests / vbit-test / vbits.c
blobca4bc72bdb8d9fa5024d3ae9deff379b1843e9a4
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*
4 This file is part of MemCheck, a heavyweight Valgrind tool for
5 detecting memory errors.
7 Copyright (C) 2012-2017 Florian Krohm
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, see <http://www.gnu.org/licenses/>.
22 The GNU General Public License is contained in the file COPYING.
25 #include <stdio.h> // fprintf
26 #include <assert.h> // assert
27 #if defined(__APPLE__)
28 #include <machine/endian.h>
29 #define __BYTE_ORDER BYTE_ORDER
30 #define __LITTLE_ENDIAN LITTLE_ENDIAN
31 #elif defined(__sun)
32 #define __LITTLE_ENDIAN 1234
33 #define __BIG_ENDIAN 4321
34 # if defined(_LITTLE_ENDIAN)
35 # define __BYTE_ORDER __LITTLE_ENDIAN
36 # else
37 # define __BYTE_ORDER __BIG_ENDIAN
38 # endif
39 #elif defined(__linux__)
40 #include <endian.h>
41 #else
42 #include <sys/endian.h>
43 #define __BYTE_ORDER BYTE_ORDER
44 #define __LITTLE_ENDIAN LITTLE_ENDIAN
45 #endif
46 #include <inttypes.h>
47 #include "vbits.h"
48 #include "vtest.h"
50 #include "memcheck.h" // VALGRIND_MAKE_MEM_DEFINED
53 /* Return the bits of V if they fit into 64-bit. If V has fewer than
54 64 bits, the bit pattern is zero-extended to the left. */
55 static uint64_t
56 get_bits64(vbits_t v)
58 switch (v.num_bits) {
59 case 1: return v.bits.u32;
60 case 8: return v.bits.u8;
61 case 16: return v.bits.u16;
62 case 32: return v.bits.u32;
63 case 64: return v.bits.u64;
64 case 128:
65 case 256:
66 /* fall through */
67 default:
68 panic(__func__);
72 void
73 print_vbits(FILE *fp, vbits_t v)
75 switch (v.num_bits) {
76 case 1: fprintf(fp, "%08x", v.bits.u32); break;
77 case 8: fprintf(fp, "%02x", v.bits.u8); break;
78 case 16: fprintf(fp, "%04x", v.bits.u16); break;
79 case 32: fprintf(fp, "%08x", v.bits.u32); break;
80 case 64: fprintf(fp, "%016"PRIx64, v.bits.u64); break;
81 case 128:
82 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
83 fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
84 fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
85 } else {
86 fprintf(fp, "%016"PRIx64, v.bits.u128[0]);
87 fprintf(fp, "%016"PRIx64, v.bits.u128[1]);
89 break;
90 case 256:
91 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
92 fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
93 fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
94 fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
95 fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
96 } else {
97 fprintf(fp, "%016"PRIx64, v.bits.u256[0]);
98 fprintf(fp, "%016"PRIx64, v.bits.u256[1]);
99 fprintf(fp, "%016"PRIx64, v.bits.u256[2]);
100 fprintf(fp, "%016"PRIx64, v.bits.u256[3]);
102 break;
103 default:
104 panic(__func__);
109 /* Return a value where all bits are set to undefined. */
110 vbits_t
111 undefined_vbits(unsigned num_bits)
113 vbits_t new = { .num_bits = num_bits };
115 switch (num_bits) {
116 case 1: new.bits.u32 = 0x01; break;
117 case 8: new.bits.u8 = 0xff; break;
118 case 16: new.bits.u16 = 0xffff; break;
119 case 32: new.bits.u32 = ~0; break;
120 case 64: new.bits.u64 = ~0ull; break;
121 case 128: new.bits.u128[0] = ~0ull;
122 new.bits.u128[1] = ~0ull;
123 break;
124 case 256: new.bits.u256[0] = ~0ull;
125 new.bits.u256[1] = ~0ull;
126 new.bits.u256[2] = ~0ull;
127 new.bits.u256[3] = ~0ull;
128 break;
129 default:
130 panic(__func__);
132 return new;
135 /* The following routines named undefined_vbits_BxE() return a 128-bit
136 * vector with E elements each of size bits. If any of the bits in an
137 * element is undefined, then return a value where all bits in that
138 * element are undefined.
140 vbits_t
141 undefined_vbits_BxE(unsigned int bits, unsigned int elements, vbits_t v)
143 vbits_t new = { .num_bits = v.num_bits };
144 uint64_t mask = ~0ull >> (64 - bits);
145 int i, j;
147 assert ((elements % 2) == 0);
148 assert (bits <= 64);
150 for (i = 0; i<2; i++) {
151 new.bits.u128[i] = 0ull;
153 for (j = 0; j<elements/2; j++) {
154 if ((v.bits.u128[i] & (mask << (j*bits))) != 0)
155 new.bits.u128[i] |= (mask << (j*bits));
158 return new;
161 /* The following routines named undefined_vbits_BxE_rotate() return a 128-bit
162 * vector with E elements each of size bits. The bits in v are rotated
163 * left by the amounts in the corresponding element of val. Specified rotate
164 * amount field is assumed to be at most 8-bits wide.
166 vbits_t
167 undefined_vbits_BxE_rotate(unsigned int bits, unsigned int elements,
168 vbits_t v, value_t val)
170 vbits_t new = { .num_bits = v.num_bits };
171 uint64_t mask = ~0ull >> (64 - bits);
172 uint64_t const shift_mask = 0xFF;
173 uint64_t element;
174 int i, j;
175 signed char shift;
176 assert ((elements % 2) == 0);
177 assert (bits <= 64);
179 for (i = 0; i<2; i++) {
180 new.bits.u128[i] = 0ull;
182 for (j = 0; j<elements/2; j++) {
183 element = (v.bits.u128[i] >> (j*bits)) & mask;
184 shift = (int)((val.u128[i] >> (j*bits)) & shift_mask);
186 if (shift < 0) {
187 /* right shift */
188 new.bits.u128[i] = element >> -shift;
190 /* OR in the bits shifted out into the top of the element */
191 new.bits.u128[i] |= element << (bits + shift);
192 } else {
193 /* left shift */
194 /* upper bits from shift */
195 new.bits.u128[i] = element << shift;
197 /* OR in the bits shifted out into the bottom of the element */
198 new.bits.u128[i] |= element >> (bits - shift);
202 return new;
205 /* Only the even elements of the input are used by the Iop*/
206 vbits_t
207 undefined_vbits_128_even_element(unsigned int bits, unsigned int elements,
208 vbits_t v)
210 int i;
211 uint64_t mask;
212 unsigned int const element_width = 128/elements;
213 vbits_t new = { .num_bits = v.num_bits };
215 assert ((elements % 2) == 0);
216 assert (bits <= 64);
218 /* Create a 128-bit mask with the bits in the even numbered
219 * elements are all ones.
221 mask = ~0ull >> (64 - bits);
223 for (i = 2; i < elements/2; i=i+2) {
224 mask |= mask << (i * element_width);
227 new.bits.u128[0] = mask & v.bits.u128[0];
228 new.bits.u128[1] = mask & v.bits.u128[1];
230 return new;
233 /* Concatenate bit i from each byte j. Place concatenated 8 bit value into
234 * byte i of the result. Do for all i from 0 to 7 and j from 0 to 7 of each
235 * 64-bit element.
237 vbits_t
238 undefined_vbits_64x2_transpose(vbits_t v)
240 vbits_t new = { .num_bits = v.num_bits };
241 unsigned int bit, byte, element;
242 uint64_t value, new_value, select_bit;
244 for (element = 0; element < 2; element++) {
245 value = v.bits.u128[element];
246 new_value = 0;
247 for (byte = 0; byte < 8; byte++) {
248 for (bit = 0; bit < 8; bit++) {
249 select_bit = 1ULL & (value >> (bit + 8*byte));
250 new_value |= select_bit << (bit*8 + byte);
253 new.bits.u128[element] = new_value;
255 return new;
258 /* The routine takes a 256-bit vector value stored across the two 128-bit
259 * source operands src1 and src2. The size of each element in the input is
260 * src_num_bits. The elements are narrowed to result_num_bits and packed
261 * into the result. If saturate is True, then the all the result bits are
262 * set to 1 if the source element can not be represented in result_num_bits.
264 vbits_t
265 undefined_vbits_Narrow256_AtoB(unsigned int src_num_bits,
266 unsigned int result_num_bits,
267 vbits_t src1_v, value_t src1_value,
268 vbits_t src2_v, value_t src2_value,
269 bool saturate)
272 vbits_t new = { .num_bits = src1_v.num_bits };
273 unsigned int i;
274 uint64_t vbits, new_value;
275 uint64_t const src_mask = ~0x0ULL >> (64 - src_num_bits);
276 uint64_t const result_mask = ~0x0ULL >> (64 - result_num_bits);
277 unsigned int num_elements_per_64_bits = src_num_bits/64;
278 unsigned int shift;
281 * NOTE: POWER PPC
282 * the saturated value is 0xFFFF for the vbit is in one of the lower
283 * 32-bits of the source. The saturated result is 0xFFFF0000 if the
284 * vbit is in the upper 32-bits of the source. Not sure what
285 * the saturated result is in general for a B-bit result.
287 * ONLY TESTED FOR 64 bit input, 32 bit result
289 uint64_t const saturated_result = 0xFFFFULL;
291 /* Source elements are split between the two source operands */
293 assert(src_num_bits <= 64);
294 assert(result_num_bits < 64);
295 assert(result_num_bits < src_num_bits);
297 /* Narrow the elements from src1 to the upper 64-bits of result.
298 * Do each of the 64 bit values that make up a u128
300 new_value = 0;
301 for (i = 0; i < num_elements_per_64_bits; i++) {
302 vbits = src1_v.bits.u128[0] >> (i * src_num_bits);
303 vbits &= src_mask;
305 shift = result_num_bits * i;
306 if (vbits) {
307 if (saturate) {
308 /* Value will not fit in B-bits, saturate the result as needed. */
309 if (vbits >> (src_num_bits/2))
310 /* vbit is upper half of the source */
311 new_value |= saturated_result << ( shift + result_num_bits/2);
312 else
313 new_value |= saturated_result << shift;
314 } else {
315 new_value |= (vbits & result_mask) << shift;
320 for (i = 0; i < num_elements_per_64_bits; i++) {
321 vbits = src1_v.bits.u128[1] >> (i * src_num_bits);
322 vbits &= src_mask;
324 shift = result_num_bits * i + (num_elements_per_64_bits
325 * result_num_bits);
326 if (vbits) {
327 if (saturate) {
328 /* Value will not fit in result_num_bits, saturate the result
329 * as needed.
331 if (vbits >> (src_num_bits/2))
332 /* vbit is upper half of the source */
333 new_value |= saturated_result << (shift + result_num_bits/2);
335 else
336 new_value |= saturated_result << shift;
338 } else {
339 new_value |= (vbits & result_mask) << shift;
343 if (__BYTE_ORDER == __LITTLE_ENDIAN)
344 new.bits.u128[1] = new_value;
345 else
346 /* Big endian, swap the upper and lower 32-bits of new_value */
347 new.bits.u128[0] = (new_value << 32) | (new_value >> 32);
349 new_value = 0;
350 /* Narrow the elements from src2 to the lower 64-bits of result.
351 * Do each of the 64 bit values that make up a u128
353 for (i = 0; i < num_elements_per_64_bits; i++) {
354 vbits = src2_v.bits.u128[0] >> (i * src_num_bits);
355 vbits &= src_mask;
357 shift = result_num_bits * i;
358 if (vbits) {
359 if (saturate) {
360 /* Value will not fit in result, saturate the result as needed. */
361 if (vbits >> (src_num_bits/2))
362 /* vbit is upper half of the source */
363 new_value |= saturated_result << (shift + result_num_bits/2);
364 else
365 new_value |= saturated_result << shift;
366 } else {
367 new_value |= (vbits & result_mask) << shift;
372 for (i = 0; i < num_elements_per_64_bits; i++) {
373 vbits = src2_v.bits.u128[1] >> (i * src_num_bits);
374 vbits &= src_mask;
376 if (vbits) {
377 if (saturate) {
378 /* Value will not fit in result_num_bits, saturate the result
379 * as needed.
381 if (vbits >> (src_num_bits/2))
382 /* vbit is upper half of the source */
383 new_value |= saturated_result << (result_num_bits * i
384 + result_num_bits/2
385 + (num_elements_per_64_bits
386 * result_num_bits));
387 else
388 new_value |= saturated_result << (result_num_bits * i
389 + (num_elements_per_64_bits
390 * result_num_bits));
392 } else {
393 new_value |= (vbits & result_mask) << (result_num_bits * i
394 + (num_elements_per_64_bits
395 * result_num_bits));
399 if (__BYTE_ORDER == __LITTLE_ENDIAN)
400 new.bits.u128[0] = new_value;
401 else
402 /* Big endian, swap the upper and lower 32-bits of new_value */
403 new.bits.u128[1] = (new_value << 32) | (new_value >> 32);
405 return new;
408 /* Return a value where all bits are set to defined. */
409 vbits_t
410 defined_vbits(unsigned num_bits)
412 vbits_t new = { .num_bits = num_bits };
414 switch (num_bits) {
415 case 1: new.bits.u32 = 0x0; break;
416 case 8: new.bits.u8 = 0x0; break;
417 case 16: new.bits.u16 = 0x0; break;
418 case 32: new.bits.u32 = 0x0; break;
419 case 64: new.bits.u64 = 0x0; break;
420 case 128: new.bits.u128[0] = 0x0;
421 new.bits.u128[1] = 0x0;
422 break;
423 case 256: new.bits.u256[0] = 0x0;
424 new.bits.u256[1] = 0x0;
425 new.bits.u256[2] = 0x0;
426 new.bits.u256[3] = 0x0;
427 break;
428 default:
429 panic(__func__);
431 return new;
435 /* Return 1, if equal. */
437 equal_vbits(vbits_t v1, vbits_t v2)
439 assert(v1.num_bits == v2.num_bits);
441 switch (v1.num_bits) {
442 case 1: return v1.bits.u32 == v2.bits.u32;
443 case 8: return v1.bits.u8 == v2.bits.u8;
444 case 16: return v1.bits.u16 == v2.bits.u16;
445 case 32: return v1.bits.u32 == v2.bits.u32;
446 case 64: return v1.bits.u64 == v2.bits.u64;
447 case 128: return v1.bits.u128[0] == v2.bits.u128[0] &&
448 v1.bits.u128[1] == v2.bits.u128[1];
449 case 256: return v1.bits.u256[0] == v2.bits.u256[0] &&
450 v1.bits.u256[1] == v2.bits.u256[1] &&
451 v1.bits.u256[2] == v2.bits.u256[2] &&
452 v1.bits.u256[3] == v2.bits.u256[3];
453 default:
454 panic(__func__);
459 /* Truncate the bit pattern in V1 to NUM_BITS bits */
460 vbits_t
461 truncate_vbits(vbits_t v, unsigned num_bits)
463 assert(num_bits <= v.num_bits);
465 if (num_bits == v.num_bits) return v;
467 vbits_t new = { .num_bits = num_bits };
469 if (num_bits <= 64) {
470 uint64_t bits;
472 if (v.num_bits <= 64)
473 bits = get_bits64(v);
474 else if (v.num_bits == 128)
475 if (__BYTE_ORDER == __LITTLE_ENDIAN)
476 bits = v.bits.u128[0];
477 else
478 bits = v.bits.u128[1];
479 else if (v.num_bits == 256)
480 if (__BYTE_ORDER == __LITTLE_ENDIAN)
481 bits = v.bits.u256[0];
482 else
483 bits = v.bits.u256[3];
484 else
485 panic(__func__);
487 switch (num_bits) {
488 case 1: new.bits.u32 = bits & 0x01; break;
489 case 8: new.bits.u8 = bits & 0xff; break;
490 case 16: new.bits.u16 = bits & 0xffff; break;
491 case 32: new.bits.u32 = bits & ~0u; break;
492 case 64: new.bits.u64 = bits & ~0ll; break;
493 default:
494 panic(__func__);
496 return new;
499 if (num_bits == 128) {
500 assert(v.num_bits == 256);
501 /* From 256 bits to 128 */
502 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
503 new.bits.u128[0] = v.bits.u256[0];
504 new.bits.u128[1] = v.bits.u256[1];
505 } else {
506 new.bits.u128[0] = v.bits.u256[2];
507 new.bits.u128[1] = v.bits.u256[3];
509 return new;
512 /* Cannot truncate to 256 bits from something larger */
513 panic(__func__);
517 /* Helper function to compute left_vbits */
518 static uint64_t
519 left64(uint64_t x)
521 // left(x) = x | -x
522 return x | (~x + 1);
526 vbits_t
527 left_vbits(vbits_t v, unsigned num_bits)
529 assert(num_bits >= v.num_bits);
531 vbits_t new = { .num_bits = num_bits };
533 if (v.num_bits <= 64) {
534 uint64_t bits = left64(get_bits64(v));
536 switch (num_bits) {
537 case 8: new.bits.u8 = bits & 0xff; break;
538 case 16: new.bits.u16 = bits & 0xffff; break;
539 case 32: new.bits.u32 = bits & ~0u; break;
540 case 64: new.bits.u64 = bits & ~0ll; break;
541 case 128:
542 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
543 new.bits.u128[0] = bits;
544 if (bits & (1ull << 63)) { // MSB is set
545 new.bits.u128[1] = ~0ull;
546 } else {
547 new.bits.u128[1] = 0;
549 } else {
550 new.bits.u128[1] = bits;
551 if (bits & (1ull << 63)) { // MSB is set
552 new.bits.u128[0] = ~0ull;
553 } else {
554 new.bits.u128[0] = 0;
557 break;
558 case 256:
559 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
560 new.bits.u256[0] = bits;
561 if (bits & (1ull << 63)) { // MSB is set
562 new.bits.u256[1] = ~0ull;
563 new.bits.u256[2] = ~0ull;
564 new.bits.u256[3] = ~0ull;
565 } else {
566 new.bits.u256[1] = 0;
567 new.bits.u256[2] = 0;
568 new.bits.u256[3] = 0;
570 } else {
571 new.bits.u256[3] = bits;
572 if (bits & (1ull << 63)) { // MSB is set
573 new.bits.u256[0] = ~0ull;
574 new.bits.u256[1] = ~0ull;
575 new.bits.u256[2] = ~0ull;
576 } else {
577 new.bits.u256[0] = 0;
578 new.bits.u256[1] = 0;
579 new.bits.u256[2] = 0;
582 break;
583 default:
584 panic(__func__);
586 return new;
589 if (v.num_bits == 128) {
590 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
591 if (v.bits.u128[1] != 0) {
592 new.bits.u128[0] = v.bits.u128[0];
593 new.bits.u128[1] = left64(v.bits.u128[1]);
594 } else {
595 new.bits.u128[0] = left64(v.bits.u128[0]);
596 if (new.bits.u128[0] & (1ull << 63)) { // MSB is set
597 new.bits.u128[1] = ~0ull;
598 } else {
599 new.bits.u128[1] = 0;
602 } else {
603 if (v.bits.u128[0] != 0) {
604 new.bits.u128[0] = left64(v.bits.u128[0]);
605 new.bits.u128[1] = v.bits.u128[1];
606 } else {
607 new.bits.u128[1] = left64(v.bits.u128[1]);
608 if (new.bits.u128[1] & (1ull << 63)) { // MSB is set
609 new.bits.u128[0] = ~0ull;
610 } else {
611 new.bits.u128[0] = 0;
615 if (num_bits == 128) return new;
617 assert(num_bits == 256);
619 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
620 uint64_t b1 = new.bits.u128[1];
621 uint64_t b0 = new.bits.u128[0];
623 new.bits.u256[0] = b0;
624 new.bits.u256[1] = b1;
626 if (new.bits.u256[1] & (1ull << 63)) { // MSB is set
627 new.bits.u256[2] = ~0ull;
628 new.bits.u256[3] = ~0ull;
629 } else {
630 new.bits.u256[2] = 0;
631 new.bits.u256[3] = 0;
633 } else {
634 uint64_t b1 = new.bits.u128[0];
635 uint64_t b0 = new.bits.u128[1];
637 new.bits.u256[2] = b0;
638 new.bits.u256[3] = b1;
640 if (new.bits.u256[2] & (1ull << 63)) { // MSB is set
641 new.bits.u256[0] = ~0ull;
642 new.bits.u256[1] = ~0ull;
643 } else {
644 new.bits.u256[0] = 0;
645 new.bits.u256[1] = 0;
648 return new;
651 panic(__func__);
655 vbits_t
656 or_vbits(vbits_t v1, vbits_t v2)
658 assert(v1.num_bits == v2.num_bits);
660 vbits_t new = { .num_bits = v1.num_bits };
662 switch (v1.num_bits) {
663 case 1: new.bits.u32 = (v1.bits.u32 | v2.bits.u32) & 1; break;
664 case 8: new.bits.u8 = v1.bits.u8 | v2.bits.u8; break;
665 case 16: new.bits.u16 = v1.bits.u16 | v2.bits.u16; break;
666 case 32: new.bits.u32 = v1.bits.u32 | v2.bits.u32; break;
667 case 64: new.bits.u64 = v1.bits.u64 | v2.bits.u64; break;
668 case 128: new.bits.u128[0] = v1.bits.u128[0] | v2.bits.u128[0];
669 new.bits.u128[1] = v1.bits.u128[1] | v2.bits.u128[1];
670 break;
671 case 256: new.bits.u256[0] = v1.bits.u256[0] | v2.bits.u256[0];
672 new.bits.u256[1] = v1.bits.u256[1] | v2.bits.u256[1];
673 new.bits.u256[2] = v1.bits.u256[2] | v2.bits.u256[2];
674 new.bits.u256[3] = v1.bits.u256[3] | v2.bits.u256[3];
675 break;
676 default:
677 panic(__func__);
680 return new;
684 vbits_t
685 and_vbits(vbits_t v1, vbits_t v2)
687 assert(v1.num_bits == v2.num_bits);
689 vbits_t new = { .num_bits = v1.num_bits };
691 switch (v1.num_bits) {
692 case 1: new.bits.u32 = (v1.bits.u32 & v2.bits.u32) & 1; break;
693 case 8: new.bits.u8 = v1.bits.u8 & v2.bits.u8; break;
694 case 16: new.bits.u16 = v1.bits.u16 & v2.bits.u16; break;
695 case 32: new.bits.u32 = v1.bits.u32 & v2.bits.u32; break;
696 case 64: new.bits.u64 = v1.bits.u64 & v2.bits.u64; break;
697 case 128: new.bits.u128[0] = v1.bits.u128[0] & v2.bits.u128[0];
698 new.bits.u128[1] = v1.bits.u128[1] & v2.bits.u128[1];
699 break;
700 case 256: new.bits.u256[0] = v1.bits.u256[0] & v2.bits.u256[0];
701 new.bits.u256[1] = v1.bits.u256[1] & v2.bits.u256[1];
702 new.bits.u256[2] = v1.bits.u256[2] & v2.bits.u256[2];
703 new.bits.u256[3] = v1.bits.u256[3] & v2.bits.u256[3];
704 break;
705 default:
706 panic(__func__);
709 return new;
713 vbits_t
714 concat_vbits(vbits_t v1, vbits_t v2)
716 assert(v1.num_bits == v2.num_bits);
718 vbits_t new = { .num_bits = v1.num_bits * 2 };
720 switch (v1.num_bits) {
721 case 8: new.bits.u16 = v1.bits.u8;
722 new.bits.u16 = (new.bits.u16 << 8) | v2.bits.u8; break;
723 case 16: new.bits.u32 = v1.bits.u16;
724 new.bits.u32 = (new.bits.u32 << 16) | v2.bits.u16; break;
725 case 32: new.bits.u64 = v1.bits.u32;
726 new.bits.u64 = (new.bits.u64 << 32) | v2.bits.u32; break;
727 case 64:
728 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
729 new.bits.u128[0] = v2.bits.u64;
730 new.bits.u128[1] = v1.bits.u64;
731 } else {
732 new.bits.u128[0] = v1.bits.u64;
733 new.bits.u128[1] = v2.bits.u64;
735 break;
736 case 128:
737 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
738 new.bits.u256[0] = v2.bits.u128[0];
739 new.bits.u256[1] = v2.bits.u128[1];
740 new.bits.u256[2] = v1.bits.u128[0];
741 new.bits.u256[3] = v1.bits.u128[1];
742 } else {
743 new.bits.u256[0] = v1.bits.u128[0];
744 new.bits.u256[1] = v1.bits.u128[1];
745 new.bits.u256[2] = v2.bits.u128[0];
746 new.bits.u256[3] = v2.bits.u128[1];
748 break;
749 case 256: /* Fall through */
750 default:
751 panic(__func__);
754 return new;
758 vbits_t
759 upper_vbits(vbits_t v)
761 vbits_t new = { .num_bits = v.num_bits / 2 };
763 switch (v.num_bits) {
764 case 16: new.bits.u8 = v.bits.u16 >> 8; break;
765 case 32: new.bits.u16 = v.bits.u32 >> 16; break;
766 case 64: new.bits.u32 = v.bits.u64 >> 32; break;
767 case 128:
768 if (__BYTE_ORDER == __LITTLE_ENDIAN)
769 new.bits.u64 = v.bits.u128[1];
770 else
771 new.bits.u64 = v.bits.u128[0];
772 break;
773 case 256:
774 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
775 new.bits.u128[0] = v.bits.u256[2];
776 new.bits.u128[1] = v.bits.u256[3];
777 } else {
778 new.bits.u128[0] = v.bits.u256[0];
779 new.bits.u128[1] = v.bits.u256[1];
781 break;
782 case 8:
783 default:
784 panic(__func__);
787 return new;
791 vbits_t
792 zextend_vbits(vbits_t v, unsigned num_bits)
794 assert(num_bits >= v.num_bits);
796 if (num_bits == v.num_bits) return v;
798 vbits_t new = { .num_bits = num_bits };
800 if (v.num_bits <= 64) {
801 uint64_t bits = get_bits64(v);
803 switch (num_bits) {
804 case 8: new.bits.u8 = bits; break;
805 case 16: new.bits.u16 = bits; break;
806 case 32: new.bits.u32 = bits; break;
807 case 64: new.bits.u64 = bits; break;
808 case 128:
809 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
810 new.bits.u128[0] = bits;
811 new.bits.u128[1] = 0;
812 } else {
813 new.bits.u128[0] = 0;
814 new.bits.u128[1] = bits;
816 break;
817 case 256:
818 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
819 new.bits.u256[0] = bits;
820 new.bits.u256[1] = 0;
821 new.bits.u256[2] = 0;
822 new.bits.u256[3] = 0;
823 } else {
824 new.bits.u256[0] = 0;
825 new.bits.u256[1] = 0;
826 new.bits.u256[2] = 0;
827 new.bits.u256[3] = bits;
829 break;
830 default:
831 panic(__func__);
833 return new;
836 if (v.num_bits == 128) {
837 assert(num_bits == 256);
839 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
840 new.bits.u256[0] = v.bits.u128[0];
841 new.bits.u256[1] = v.bits.u128[1];
842 new.bits.u256[2] = 0;
843 new.bits.u256[3] = 0;
844 } else {
845 new.bits.u256[0] = 0;
846 new.bits.u256[1] = 0;
847 new.bits.u256[2] = v.bits.u128[1];
848 new.bits.u256[3] = v.bits.u128[0];
850 return new;
853 /* Cannot zero-extend a 256-bit value to something larger */
854 panic(__func__);
858 vbits_t
859 sextend_vbits(vbits_t v, unsigned num_bits)
861 assert(num_bits >= v.num_bits);
863 int sextend = 0;
865 switch (v.num_bits) {
866 case 8: if (v.bits.u8 == 0x80) sextend = 1; break;
867 case 16: if (v.bits.u16 == 0x8000) sextend = 1; break;
868 case 32: if (v.bits.u32 == 0x80000000) sextend = 1; break;
869 case 64: if (v.bits.u64 == (1ull << 63)) sextend = 1; break;
870 case 128: if (v.bits.u128[1] == (1ull << 63)) sextend = 1; break;
871 case 256: if (v.bits.u256[3] == (1ull << 63)) sextend = 1; break;
873 default:
874 panic(__func__);
877 return sextend ? left_vbits(v, num_bits) : zextend_vbits(v, num_bits);
881 vbits_t
882 onehot_vbits(unsigned bitno, unsigned num_bits)
884 assert(bitno < num_bits);
886 vbits_t new = { .num_bits = num_bits };
888 switch (num_bits) {
889 case 1: new.bits.u32 = 1 << bitno; break;
890 case 8: new.bits.u8 = 1 << bitno; break;
891 case 16: new.bits.u16 = 1 << bitno; break;
892 case 32: new.bits.u32 = 1u << bitno; break;
893 case 64: new.bits.u64 = 1ull << bitno; break;
894 case 128:
895 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
896 if (bitno < 64) {
897 new.bits.u128[0] = 1ull << bitno;
898 new.bits.u128[1] = 0;
899 } else {
900 new.bits.u128[0] = 0;
901 new.bits.u128[1] = 1ull << (bitno - 64);
903 } else {
904 if (bitno < 64) {
905 new.bits.u128[0] = 0;
906 new.bits.u128[1] = 1ull << bitno;
907 } else {
908 new.bits.u128[0] = 1ull << (bitno - 64);
909 new.bits.u128[1] = 0;
912 break;
913 case 256:
914 if (__BYTE_ORDER == __LITTLE_ENDIAN) {
915 if (bitno < 64) {
916 new.bits.u256[0] = 1ull << bitno;
917 new.bits.u256[1] = 0;
918 new.bits.u256[2] = 0;
919 new.bits.u256[3] = 0;
920 } else if (bitno < 128) {
921 new.bits.u256[0] = 0;
922 new.bits.u256[1] = 1ull << (bitno - 64);
923 new.bits.u256[2] = 0;
924 new.bits.u256[3] = 0;
925 } else if (bitno < 192) {
926 new.bits.u256[0] = 0;
927 new.bits.u256[1] = 0;
928 new.bits.u256[2] = 1ull << (bitno - 128);
929 new.bits.u256[3] = 0;
930 } else {
931 new.bits.u256[0] = 0;
932 new.bits.u256[1] = 0;
933 new.bits.u256[2] = 0;
934 new.bits.u256[3] = 1ull << (bitno - 192);
936 } else {
937 if (bitno < 64) {
938 new.bits.u256[0] = 0;
939 new.bits.u256[1] = 0;
940 new.bits.u256[2] = 0;
941 new.bits.u256[3] = 1ull << bitno;
942 } else if (bitno < 128) {
943 new.bits.u256[0] = 0;
944 new.bits.u256[1] = 0;
945 new.bits.u256[2] = 1ull << (bitno - 64);
946 new.bits.u256[3] = 0;
947 } else if (bitno < 192) {
948 new.bits.u256[0] = 0;
949 new.bits.u256[1] = 1ull << (bitno - 128);
950 new.bits.u256[2] = 0;
951 new.bits.u256[3] = 0;
952 } else {
953 new.bits.u256[0] = 1ull << (bitno - 192);
954 new.bits.u256[1] = 0;
955 new.bits.u256[2] = 0;
956 new.bits.u256[3] = 0;
959 break;
960 default:
961 panic(__func__);
963 return new;
968 completely_defined_vbits(vbits_t v)
970 return equal_vbits(v, defined_vbits(v.num_bits));
974 vbits_t
975 shl_vbits(vbits_t v, unsigned shift_amount)
977 assert(shift_amount < v.num_bits);
979 vbits_t new = v;
981 switch (v.num_bits) {
982 case 8: new.bits.u8 <<= shift_amount; break;
983 case 16: new.bits.u16 <<= shift_amount; break;
984 case 32: new.bits.u32 <<= shift_amount; break;
985 case 64: new.bits.u64 <<= shift_amount; break;
986 case 128: /* fall through */
987 case 256: /* fall through */
988 default:
989 panic(__func__);
992 return new;
996 vbits_t
997 shr_vbits(vbits_t v, unsigned shift_amount)
999 assert(shift_amount < v.num_bits);
1001 vbits_t new = v;
1003 switch (v.num_bits) {
1004 case 8: new.bits.u8 >>= shift_amount; break;
1005 case 16: new.bits.u16 >>= shift_amount; break;
1006 case 32: new.bits.u32 >>= shift_amount; break;
1007 case 64: new.bits.u64 >>= shift_amount; break;
1008 case 128: /* fall through */
1009 case 256: /* fall through */
1010 default:
1011 panic(__func__);
1014 return new;
1018 vbits_t
1019 sar_vbits(vbits_t v, unsigned shift_amount)
1021 assert(shift_amount < v.num_bits);
1023 vbits_t new = v;
1024 int msb;
1026 switch (v.num_bits) {
1027 case 8:
1028 new.bits.u8 >>= shift_amount;
1029 msb = (v.bits.u8 & 0x80) != 0;
1030 break;
1031 case 16:
1032 new.bits.u16 >>= shift_amount;
1033 msb = (v.bits.u16 & 0x8000) != 0;
1034 break;
1035 case 32:
1036 new.bits.u32 >>= shift_amount;
1037 msb = (v.bits.u32 & (1u << 31)) != 0;
1038 break;
1039 case 64:
1040 new.bits.u64 >>= shift_amount;
1041 msb = (v.bits.u64 & (1ull << 63)) != 0;
1042 break;
1043 case 128: /* fall through */
1044 case 256: /* fall through */
1045 default:
1046 panic(__func__);
1049 if (msb)
1050 new = left_vbits(new, new.num_bits);
1051 return new;
1054 /* Return a value for the POWER Iop_CmpORD class iops */
1055 vbits_t
1056 cmpord_vbits(unsigned v1_num_bits, unsigned v2_num_bits)
1058 vbits_t new = { .num_bits = v1_num_bits };
1060 /* Size of values being compared must be the same */
1061 assert( v1_num_bits == v2_num_bits);
1063 /* Comparison only produces 32-bit or 64-bit value where
1064 * the lower 3 bits are set to indicate, less than, equal and greater than.
1066 switch (v1_num_bits) {
1067 case 32:
1068 new.bits.u32 = 0xE;
1069 break;
1071 case 64:
1072 new.bits.u64 = 0xE;
1073 break;
1075 default:
1076 panic(__func__);
1079 return new;
1083 /* Deal with precise integer EQ and NE. Needs some helpers. The helpers
1084 compute the result for 64-bit inputs, but can also be used for the
1085 32/16/8 bit cases, because we can zero extend both the vbits and values
1086 out to 64 bits and still get the correct result. */
1089 /* Get both vbits and values for a binary operation, that has args of the
1090 same size (type?), namely 8, 16, 32 or 64 bit. Unused bits are set to
1091 zero in both vbit_ and val_ cases. */
1092 static
1093 void get_binary_vbits_and_vals64 ( /*OUT*/uint64_t* varg1,
1094 /*OUT*/uint64_t* arg1,
1095 /*OUT*/uint64_t* varg2,
1096 /*OUT*/uint64_t* arg2,
1097 vbits_t vbits1, vbits_t vbits2,
1098 value_t val1, value_t val2)
1100 assert(vbits1.num_bits == vbits2.num_bits);
1102 *varg1 = *arg1 = *varg2 = *arg2 = 0;
1104 switch (vbits1.num_bits) {
1105 case 8: *arg1 = (uint64_t)val1.u8; *arg2 = (uint64_t)val2.u8; break;
1106 case 16: *arg1 = (uint64_t)val1.u16; *arg2 = (uint64_t)val2.u16; break;
1107 case 32: *arg1 = (uint64_t)val1.u32; *arg2 = (uint64_t)val2.u32; break;
1108 case 64: *arg1 = val1.u64; *arg2 = val2.u64; break;
1109 default: panic(__func__);
1112 *varg1 = get_bits64(vbits1);
1113 *varg2 = get_bits64(vbits2);
1116 static uint64_t uifu64 ( uint64_t x, uint64_t y ) { return x | y; }
1118 /* Returns 0 (defined) or 1 (undefined). */
1119 static uint32_t ref_CmpEQ64_with_vbits ( uint64_t arg1, uint64_t varg1,
1120 uint64_t arg2, uint64_t varg2 )
1122 uint64_t naive = uifu64(varg1, varg2);
1123 if (naive == 0) {
1124 return 0; /* defined */
1127 // Mark the two actual arguments as fully defined, else Memcheck will
1128 // complain about undefinedness in them, which is correct but confusing
1129 // (and pollutes the output of this test program.)
1130 VALGRIND_MAKE_MEM_DEFINED(&arg1, sizeof(arg1));
1131 VALGRIND_MAKE_MEM_DEFINED(&arg2, sizeof(arg2));
1133 // if any bit in naive is 1, then the result is undefined. Except,
1134 // if we can find two corresponding bits in arg1 and arg2 such that they
1135 // are different but both defined, then the overall result is defined
1136 // (because the two bits guarantee that the bit vectors arg1 and arg2
1137 // are different.)
1138 UInt i;
1139 for (i = 0; i < 64; i++) {
1140 if ((arg1 & 1) != (arg2 & 1) && (varg1 & 1) == 0 && (varg2 & 1) == 0) {
1141 return 0; /* defined */
1143 arg1 >>= 1; arg2 >>= 1; varg1 >>= 1; varg2 >>= 1;
1146 return 1; /* undefined */
1150 vbits_t
1151 cmp_eq_ne_vbits(vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2)
1153 uint64_t varg1 = 0, arg1 = 0, varg2 = 0, arg2 = 0;
1154 get_binary_vbits_and_vals64(&varg1, &arg1, &varg2, &arg2,
1155 vbits1, vbits2, val1, val2);
1157 vbits_t res = { .num_bits = 1 };
1158 res.bits.u32 = ref_CmpEQ64_with_vbits(arg1, varg1, arg2, varg2);
1160 return res;
1163 /* Given unsigned vbits and value, return the minimum possible value. */
1164 uint64_t min_vbits(uint64_t vbits, uint64_t value)
1166 // This is derived from expensiveAddSub() in mc_translate.c.
1167 return value & ~vbits;
1170 /* Given unsigned vbits and value, return the maximum possible value. */
1171 uint64_t max_vbits(uint64_t vbits, uint64_t value)
1173 // This is derived from expensiveAddSub() in mc_translate.c.
1174 return value | vbits;
1177 /* Deal with precise integer ADD and SUB. */
1178 vbits_t
1179 int_add_or_sub_vbits(int isAdd,
1180 vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2)
1182 uint64_t vaa = 0, aa = 0, vbb = 0, bb = 0;
1183 get_binary_vbits_and_vals64(&vaa, &aa, &vbb, &bb,
1184 vbits1, vbits2, val1, val2);
1186 uint64_t a_min = min_vbits(vaa, aa);
1187 uint64_t b_min = min_vbits(vbb, bb);
1188 uint64_t a_max = max_vbits(vaa, aa);
1189 uint64_t b_max = max_vbits(vbb, bb);
1191 uint64_t result;
1192 if (isAdd) {
1193 result = (vaa | vbb) | ((a_min + b_min) ^ (a_max + b_max));
1194 } else {
1195 result = (vaa | vbb) | ((a_min - b_max) ^ (a_max - b_min));
1198 vbits_t res = { .num_bits = vbits1.num_bits };
1199 switch (res.num_bits) {
1200 case 8: res.bits.u8 = (uint8_t)result; break;
1201 case 16: res.bits.u16 = (uint16_t)result; break;
1202 case 32: res.bits.u32 = (uint32_t)result; break;
1203 case 64: res.bits.u64 = (uint64_t)result; break;
1204 default: panic(__func__);
1207 return res;
1210 /* Deal with precise CmpGTsbxe.
1212 * b is the number of bits per element and e is the number of elements. x is
1213 * either S for signed or U for unsigned.
1216 vbits_t
1217 cmp_gt_vbits(int is_signed, int bits_per_element, int element_count,
1218 vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2) {
1219 assert(vbits1.num_bits == vbits2.num_bits);
1220 assert(bits_per_element*element_count == vbits1.num_bits);
1221 assert(vbits1.num_bits == 128); // All the known variants are 128-bit.
1223 vbits_t res = { .num_bits = vbits1.num_bits, .bits.u128 = {0,0} };
1224 for (int word = 0; word < 2; word++) {
1225 for (int element_in_word = 0; element_in_word < element_count/2; element_in_word++) {
1226 // We don't have to worry about little-endian vs big-endian because the
1227 // max bits_per_element is 64 and fits in a word. Extract a word.
1228 uint64_t element1 = (val1.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element));
1229 uint64_t element2 = (val2.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element));
1230 uint64_t velement1 = (vbits1.bits.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element));
1231 uint64_t velement2 = (vbits2.bits.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element));
1233 // If we are doing a signed comparison then we add one to the MSB of
1234 // the element. This converts the signed value into an unsigned value
1235 // in such a way that the greater than operation continues to return
1236 // the same value when done in unsigned math. We don't want the
1237 // addition to overflow so we jsut use XOR instead.
1238 if (is_signed) {
1239 element1 ^= (((uint64_t) 1) << (bits_per_element-1));
1240 element2 ^= (((uint64_t) 1) << (bits_per_element-1));
1243 uint64_t min1 = min_vbits(velement1, element1);
1244 uint64_t min2 = min_vbits(velement2, element2);
1245 uint64_t max1 = max_vbits(velement1, element1);
1246 uint64_t max2 = max_vbits(velement2, element2);
1248 // If the minimum possible value of element1 is greater than the
1249 // maximum possible value of element2 then element1 is surely greater
1250 // than element2.
1251 int is_definitely_greater = min1 > max2;
1252 // If the maximum value of element1 less than or equal to the minimum
1253 // value of element2 then there is no way that element1 is greater than
1254 // element2.
1255 int is_definitely_not_greater = max1 <= min2;
1256 int is_definite = is_definitely_greater || is_definitely_not_greater;
1257 // If the answer is definite then the vbits should indicate that all
1258 // bits are known, so 0. Otherwise, all 1s.
1259 if (!is_definite) {
1260 res.bits.u128[word] |= (((uint64_t) -1) >> (64 - bits_per_element)) << (bits_per_element*element_in_word);
1264 return res;