4 * Copyright (c) 2003-2005 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/helper-proto.h"
23 #include "crypto/clmul.h"
25 target_ulong
helper_array8(target_ulong rs1
, target_ulong rs2
)
28 * From Oracle SPARC Architecture 2015:
29 * Architecturally, an illegal R[rs2] value (>5) causes the array
30 * instructions to produce undefined results. For historic reference,
31 * past implementations of these instructions have ignored R[rs2]{63:3}
32 * and have treated R[rs2] values of 6 and 7 as if they were 5.
34 target_ulong n
= MIN(rs2
& 7, 5);
36 target_ulong x_int
= (rs1
>> 11) & 0x7ff;
37 target_ulong y_int
= (rs1
>> 33) & 0x7ff;
38 target_ulong z_int
= rs1
>> 55;
40 target_ulong lower_x
= x_int
& 3;
41 target_ulong lower_y
= y_int
& 3;
42 target_ulong lower_z
= z_int
& 1;
44 target_ulong middle_x
= (x_int
>> 2) & 15;
45 target_ulong middle_y
= (y_int
>> 2) & 15;
46 target_ulong middle_z
= (z_int
>> 1) & 15;
48 target_ulong upper_x
= (x_int
>> 6) & ((1 << n
) - 1);
49 target_ulong upper_y
= (y_int
>> 6) & ((1 << n
) - 1);
50 target_ulong upper_z
= z_int
>> 5;
52 return (upper_z
<< (17 + 2 * n
))
53 | (upper_y
<< (17 + n
))
64 #define VIS_B64(n) b[7 - (n)]
65 #define VIS_SB64(n) sb[7 - (n)]
66 #define VIS_W64(n) w[3 - (n)]
67 #define VIS_SW64(n) sw[3 - (n)]
68 #define VIS_L64(n) l[1 - (n)]
69 #define VIS_SL64(n) sl[1 - (n)]
70 #define VIS_B32(n) b[3 - (n)]
71 #define VIS_W32(n) w[1 - (n)]
73 #define VIS_B64(n) b[n]
74 #define VIS_SB64(n) sb[n]
75 #define VIS_W64(n) w[n]
76 #define VIS_SW64(n) sw[n]
77 #define VIS_L64(n) l[n]
78 #define VIS_SL64(n) sl[n]
79 #define VIS_B32(n) b[n]
80 #define VIS_W32(n) w[n]
101 uint64_t helper_fpmerge(uint32_t src1
, uint32_t src2
)
110 d
.VIS_B64(7) = s1
.VIS_B32(3);
111 d
.VIS_B64(6) = s2
.VIS_B32(3);
112 d
.VIS_B64(5) = s1
.VIS_B32(2);
113 d
.VIS_B64(4) = s2
.VIS_B32(2);
114 d
.VIS_B64(3) = s1
.VIS_B32(1);
115 d
.VIS_B64(2) = s2
.VIS_B32(1);
116 d
.VIS_B64(1) = s1
.VIS_B32(0);
117 d
.VIS_B64(0) = s2
.VIS_B32(0);
122 static inline int do_ms16b(int x
, int y
)
124 return ((x
* y
) + 0x80) >> 8;
127 uint64_t helper_fmul8x16(uint32_t src1
, uint64_t src2
)
135 d
.VIS_W64(0) = do_ms16b(s
.VIS_B32(0), d
.VIS_SW64(0));
136 d
.VIS_W64(1) = do_ms16b(s
.VIS_B32(1), d
.VIS_SW64(1));
137 d
.VIS_W64(2) = do_ms16b(s
.VIS_B32(2), d
.VIS_SW64(2));
138 d
.VIS_W64(3) = do_ms16b(s
.VIS_B32(3), d
.VIS_SW64(3));
143 uint64_t helper_fmul8x16a(uint32_t src1
, int32_t src2
)
151 d
.VIS_W64(0) = do_ms16b(s
.VIS_B32(0), src2
);
152 d
.VIS_W64(1) = do_ms16b(s
.VIS_B32(1), src2
);
153 d
.VIS_W64(2) = do_ms16b(s
.VIS_B32(2), src2
);
154 d
.VIS_W64(3) = do_ms16b(s
.VIS_B32(3), src2
);
159 uint64_t helper_fmul8sux16(uint64_t src1
, uint64_t src2
)
166 d
.VIS_W64(0) = do_ms16b(s
.VIS_SB64(1), d
.VIS_SW64(0));
167 d
.VIS_W64(1) = do_ms16b(s
.VIS_SB64(3), d
.VIS_SW64(1));
168 d
.VIS_W64(2) = do_ms16b(s
.VIS_SB64(5), d
.VIS_SW64(2));
169 d
.VIS_W64(3) = do_ms16b(s
.VIS_SB64(7), d
.VIS_SW64(3));
174 uint64_t helper_fmul8ulx16(uint64_t src1
, uint64_t src2
)
181 d
.VIS_W64(0) = (s
.VIS_B64(0) * d
.VIS_SW64(0) + 0x8000) >> 16;
182 d
.VIS_W64(1) = (s
.VIS_B64(2) * d
.VIS_SW64(1) + 0x8000) >> 16;
183 d
.VIS_W64(2) = (s
.VIS_B64(4) * d
.VIS_SW64(2) + 0x8000) >> 16;
184 d
.VIS_W64(3) = (s
.VIS_B64(6) * d
.VIS_SW64(3) + 0x8000) >> 16;
189 uint64_t helper_fexpand(uint32_t src2
)
196 d
.VIS_W64(0) = s
.VIS_B32(0) << 4;
197 d
.VIS_W64(1) = s
.VIS_B32(1) << 4;
198 d
.VIS_W64(2) = s
.VIS_B32(2) << 4;
199 d
.VIS_W64(3) = s
.VIS_B32(3) << 4;
204 uint64_t helper_fcmpeq8(uint64_t src1
, uint64_t src2
)
206 uint64_t a
= src1
^ src2
;
207 uint64_t m
= 0x7f7f7f7f7f7f7f7fULL
;
208 uint64_t c
= ~(((a
& m
) + m
) | a
| m
);
210 /* a.......b.......c.......d.......e.......f.......g.......h....... */
212 /* ab......bc......cd......de......ef......fg......gh......h....... */
214 /* abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... */
216 /* abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... */
220 uint64_t helper_fcmpne8(uint64_t src1
, uint64_t src2
)
222 return helper_fcmpeq8(src1
, src2
) ^ 0xff;
225 uint64_t helper_fcmple8(uint64_t src1
, uint64_t src2
)
233 for (int i
= 0; i
< 8; ++i
) {
234 r
|= (s1
.VIS_SB64(i
) <= s2
.VIS_SB64(i
)) << i
;
239 uint64_t helper_fcmpgt8(uint64_t src1
, uint64_t src2
)
241 return helper_fcmple8(src1
, src2
) ^ 0xff;
244 uint64_t helper_fcmpule8(uint64_t src1
, uint64_t src2
)
252 for (int i
= 0; i
< 8; ++i
) {
253 r
|= (s1
.VIS_B64(i
) <= s2
.VIS_B64(i
)) << i
;
258 uint64_t helper_fcmpugt8(uint64_t src1
, uint64_t src2
)
260 return helper_fcmpule8(src1
, src2
) ^ 0xff;
263 uint64_t helper_fcmpeq16(uint64_t src1
, uint64_t src2
)
265 uint64_t a
= src1
^ src2
;
266 uint64_t m
= 0x7fff7fff7fff7fffULL
;
267 uint64_t c
= ~(((a
& m
) + m
) | a
| m
);
269 /* a...............b...............c...............d............... */
271 /* ab..............bc..............cd..............d............... */
273 /* abcd............bcd.............cd..............d............... */
277 uint64_t helper_fcmpne16(uint64_t src1
, uint64_t src2
)
279 return helper_fcmpeq16(src1
, src2
) ^ 0xf;
282 uint64_t helper_fcmple16(uint64_t src1
, uint64_t src2
)
290 for (int i
= 0; i
< 4; ++i
) {
291 r
|= (s1
.VIS_SW64(i
) <= s2
.VIS_SW64(i
)) << i
;
296 uint64_t helper_fcmpgt16(uint64_t src1
, uint64_t src2
)
298 return helper_fcmple16(src1
, src2
) ^ 0xf;
301 uint64_t helper_fcmpule16(uint64_t src1
, uint64_t src2
)
309 for (int i
= 0; i
< 4; ++i
) {
310 r
|= (s1
.VIS_W64(i
) <= s2
.VIS_W64(i
)) << i
;
315 uint64_t helper_fcmpugt16(uint64_t src1
, uint64_t src2
)
317 return helper_fcmpule16(src1
, src2
) ^ 0xf;
320 uint64_t helper_fcmpeq32(uint64_t src1
, uint64_t src2
)
322 uint64_t a
= src1
^ src2
;
323 return ((uint32_t)a
== 0) | (a
>> 32 ? 0 : 2);
326 uint64_t helper_fcmpne32(uint64_t src1
, uint64_t src2
)
328 uint64_t a
= src1
^ src2
;
329 return ((uint32_t)a
!= 0) | (a
>> 32 ? 2 : 0);
332 uint64_t helper_fcmple32(uint64_t src1
, uint64_t src2
)
340 for (int i
= 0; i
< 2; ++i
) {
341 r
|= (s1
.VIS_SL64(i
) <= s2
.VIS_SL64(i
)) << i
;
346 uint64_t helper_fcmpgt32(uint64_t src1
, uint64_t src2
)
348 return helper_fcmple32(src1
, src2
) ^ 3;
351 uint64_t helper_fcmpule32(uint64_t src1
, uint64_t src2
)
359 for (int i
= 0; i
< 2; ++i
) {
360 r
|= (s1
.VIS_L64(i
) <= s2
.VIS_L64(i
)) << i
;
365 uint64_t helper_fcmpugt32(uint64_t src1
, uint64_t src2
)
367 return helper_fcmpule32(src1
, src2
) ^ 3;
370 uint64_t helper_pdist(uint64_t sum
, uint64_t src1
, uint64_t src2
)
373 for (i
= 0; i
< 8; i
++) {
376 s1
= (src1
>> (56 - (i
* 8))) & 0xff;
377 s2
= (src2
>> (56 - (i
* 8))) & 0xff;
379 /* Absolute value of difference. */
391 uint32_t helper_fpack16(uint64_t gsr
, uint64_t rs2
)
393 int scale
= (gsr
>> 3) & 0xf;
397 for (byte
= 0; byte
< 4; byte
++) {
399 int16_t src
= rs2
>> (byte
* 16);
400 int32_t scaled
= src
<< scale
;
401 int32_t from_fixed
= scaled
>> 7;
403 val
= (from_fixed
< 0 ? 0 :
404 from_fixed
> 255 ? 255 : from_fixed
);
406 ret
|= val
<< (8 * byte
);
412 uint64_t helper_fpack32(uint64_t gsr
, uint64_t rs1
, uint64_t rs2
)
414 int scale
= (gsr
>> 3) & 0x1f;
418 ret
= (rs1
<< 8) & ~(0x000000ff000000ffULL
);
419 for (word
= 0; word
< 2; word
++) {
421 int32_t src
= rs2
>> (word
* 32);
422 int64_t scaled
= (int64_t)src
<< scale
;
423 int64_t from_fixed
= scaled
>> 23;
425 val
= (from_fixed
< 0 ? 0 :
426 (from_fixed
> 255) ? 255 : from_fixed
);
428 ret
|= val
<< (32 * word
);
434 uint32_t helper_fpackfix(uint64_t gsr
, uint64_t rs2
)
436 int scale
= (gsr
>> 3) & 0x1f;
440 for (word
= 0; word
< 2; word
++) {
442 int32_t src
= rs2
>> (word
* 32);
443 int64_t scaled
= (int64_t)src
<< scale
;
444 int64_t from_fixed
= scaled
>> 16;
446 val
= (from_fixed
< -32768 ? -32768 :
447 from_fixed
> 32767 ? 32767 : from_fixed
);
449 ret
|= (val
& 0xffff) << (word
* 16);
455 uint64_t helper_bshuffle(uint64_t gsr
, uint64_t src1
, uint64_t src2
)
462 uint32_t i
, mask
, host
;
464 /* Set up S such that we can index across all of the bytes. */
476 for (i
= 0; i
< 8; ++i
) {
477 unsigned e
= (mask
>> (28 - i
*4)) & 0xf;
478 r
.VIS_B64(i
) = s
.b
[e
^ host
];
484 uint64_t helper_cmask8(uint64_t gsr
, uint64_t src
)
488 mask
|= (src
& 0x01 ? 0x00000007 : 0x0000000f);
489 mask
|= (src
& 0x02 ? 0x00000060 : 0x000000e0);
490 mask
|= (src
& 0x04 ? 0x00000500 : 0x00000d00);
491 mask
|= (src
& 0x08 ? 0x00004000 : 0x0000c000);
492 mask
|= (src
& 0x10 ? 0x00030000 : 0x000b0000);
493 mask
|= (src
& 0x20 ? 0x00200000 : 0x00a00000);
494 mask
|= (src
& 0x40 ? 0x01000000 : 0x09000000);
495 mask
|= (src
& 0x80 ? 0x00000000 : 0x80000000);
497 return deposit64(gsr
, 32, 32, mask
);
500 uint64_t helper_cmask16(uint64_t gsr
, uint64_t src
)
504 mask
|= (src
& 0x1 ? 0x00000067 : 0x000000ef);
505 mask
|= (src
& 0x2 ? 0x00004500 : 0x0000cd00);
506 mask
|= (src
& 0x4 ? 0x00230000 : 0x00ab0000);
507 mask
|= (src
& 0x8 ? 0x01000000 : 0x89000000);
509 return deposit64(gsr
, 32, 32, mask
);
512 uint64_t helper_cmask32(uint64_t gsr
, uint64_t src
)
516 mask
|= (src
& 0x1 ? 0x00004567 : 0x0000cdef);
517 mask
|= (src
& 0x2 ? 0x01230000 : 0x89ab0000);
519 return deposit64(gsr
, 32, 32, mask
);
522 static inline uint16_t do_fchksm16(uint16_t src1
, uint16_t src2
)
524 uint16_t a
= src1
+ src2
;
525 uint16_t c
= a
< src1
;
529 uint64_t helper_fchksm16(uint64_t src1
, uint64_t src2
)
537 r
.VIS_W64(0) = do_fchksm16(s1
.VIS_W64(0), s2
.VIS_W64(0));
538 r
.VIS_W64(1) = do_fchksm16(s1
.VIS_W64(1), s2
.VIS_W64(1));
539 r
.VIS_W64(2) = do_fchksm16(s1
.VIS_W64(2), s2
.VIS_W64(2));
540 r
.VIS_W64(3) = do_fchksm16(s1
.VIS_W64(3), s2
.VIS_W64(3));
545 static inline int16_t do_fmean16(int16_t src1
, int16_t src2
)
547 return (src1
+ src2
+ 1) / 2;
550 uint64_t helper_fmean16(uint64_t src1
, uint64_t src2
)
558 r
.VIS_SW64(0) = do_fmean16(s1
.VIS_SW64(0), s2
.VIS_SW64(0));
559 r
.VIS_SW64(1) = do_fmean16(s1
.VIS_SW64(1), s2
.VIS_SW64(1));
560 r
.VIS_SW64(2) = do_fmean16(s1
.VIS_SW64(2), s2
.VIS_SW64(2));
561 r
.VIS_SW64(3) = do_fmean16(s1
.VIS_SW64(3), s2
.VIS_SW64(3));
566 uint64_t helper_fslas16(uint64_t src1
, uint64_t src2
)
574 for (int i
= 0; i
< 4; ++i
) {
575 int t
= s1
.VIS_SW64(i
) << (s2
.VIS_W64(i
) % 16);
576 t
= MIN(t
, INT16_MAX
);
577 t
= MAX(t
, INT16_MIN
);
584 uint64_t helper_fslas32(uint64_t src1
, uint64_t src2
)
592 for (int i
= 0; i
< 2; ++i
) {
593 int64_t t
= (int64_t)(int32_t)s1
.VIS_L64(i
) << (s2
.VIS_L64(i
) % 32);
594 t
= MIN(t
, INT32_MAX
);
595 t
= MAX(t
, INT32_MIN
);
602 uint64_t helper_xmulx(uint64_t src1
, uint64_t src2
)
604 return int128_getlo(clmul_64(src1
, src2
));
607 uint64_t helper_xmulxhi(uint64_t src1
, uint64_t src2
)
609 return int128_gethi(clmul_64(src1
, src2
));