4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
27 * This is converted assembly: SSE2 -> POWER8 PPC64 Little Endian
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
31 #if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
34 .section .rodata.cst16,"aM",@progbits,16
256 .globl zfs_blake3_compress_in_place_sse2
258 .type zfs_blake3_compress_in_place_sse2,@function
259 zfs_blake3_compress_in_place_sse2:
263 addis 2, 12, .TOC.-.Lfunc_gep0@ha
264 addi 2, 2, .TOC.-.Lfunc_gep0@l
266 .localentry zfs_blake3_compress_in_place_sse2, .Lfunc_lep0-.Lfunc_gep0
281 addis 7, 2, .LCPI0_2@toc@ha
284 addi 7, 7, .LCPI0_2@toc@l
294 addis 8, 2, .LCPI0_5@toc@ha
296 addi 8, 8, .LCPI0_5@toc@l
304 addis 6, 2, .LCPI0_0@toc@ha
305 addi 6, 6, .LCPI0_0@toc@l
310 addis 7, 2, .LCPI0_8@toc@ha
311 addi 7, 7, .LCPI0_8@toc@l
320 addis 6, 2, .LCPI0_1@toc@ha
321 addi 6, 6, .LCPI0_1@toc@l
326 addis 4, 2, .LCPI0_3@toc@ha
327 addis 6, 2, .LCPI0_7@toc@ha
330 addi 4, 4, .LCPI0_3@toc@l
331 addi 6, 6, .LCPI0_7@toc@l
347 addis 4, 2, .LCPI0_4@toc@ha
348 addi 4, 4, .LCPI0_4@toc@l
352 xxsldwi 50, 45, 45, 1
354 xxsldwi 43, 43, 43, 3
355 xxsldwi 33, 33, 33, 1
364 addis 4, 2, .LCPI0_6@toc@ha
365 addi 4, 4, .LCPI0_6@toc@l
368 addis 4, 2, .LCPI0_9@toc@ha
370 addi 4, 4, .LCPI0_9@toc@l
379 xxsldwi 40, 40, 40, 1
384 xxsldwi 46, 46, 46, 3
385 xxsldwi 32, 32, 32, 3
391 addis 6, 2, .LCPI0_10@toc@ha
398 addis 4, 2, .LCPI0_11@toc@ha
399 addi 4, 4, .LCPI0_11@toc@l
404 addi 4, 6, .LCPI0_10@toc@l
406 addis 4, 2, .LCPI0_12@toc@ha
408 xxsldwi 47, 47, 47, 1
409 addi 4, 4, .LCPI0_12@toc@l
418 addis 4, 2, .LCPI0_13@toc@ha
420 addi 4, 4, .LCPI0_13@toc@l
422 xxsldwi 51, 51, 51, 3
447 xxsldwi 47, 47, 47, 3
450 xxsldwi 51, 51, 51, 1
461 addis 4, 2, .LCPI0_14@toc@ha
462 addi 4, 4, .LCPI0_14@toc@l
464 xxsldwi 47, 47, 47, 1
472 xxsldwi 50, 51, 51, 3
473 xxsldwi 51, 63, 63, 3
491 xxsldwi 63, 63, 63, 3
493 xxsldwi 47, 47, 47, 3
497 xxsldwi 50, 50, 50, 1
513 xxsldwi 47, 47, 47, 1
521 xxsldwi 48, 50, 50, 3
538 xxsldwi 47, 47, 47, 3
542 xxsldwi 48, 48, 48, 1
558 xxsldwi 47, 47, 47, 1
563 xxsldwi 51, 62, 62, 3
569 xxsldwi 48, 48, 48, 3
582 xxsldwi 47, 47, 47, 3
586 xxsldwi 43, 43, 43, 1
604 xxsldwi 47, 47, 47, 1
610 xxsldwi 63, 62, 62, 3
617 xxsldwi 46, 61, 61, 3
633 xxsldwi 47, 47, 47, 3
639 xxsldwi 46, 46, 46, 1
652 xxsldwi 32, 32, 32, 1
658 xxsldwi 38, 40, 40, 3
696 .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-.Lfunc_begin0
699 .section .rodata.cst16,"aM",@progbits,16
921 .globl zfs_blake3_compress_xof_sse2
923 .type zfs_blake3_compress_xof_sse2,@function
924 zfs_blake3_compress_xof_sse2:
928 addis 2, 12, .TOC.-.Lfunc_gep1@ha
929 addi 2, 2, .TOC.-.Lfunc_gep1@l
931 .localentry zfs_blake3_compress_xof_sse2, .Lfunc_lep1-.Lfunc_gep1
936 addis 10, 2, .LCPI1_2@toc@ha
939 addis 12, 2, .LCPI1_8@toc@ha
940 addis 30, 2, .LCPI1_5@toc@ha
941 addis 11, 2, .LCPI1_7@toc@ha
946 addi 10, 10, .LCPI1_2@toc@l
947 addi 12, 12, .LCPI1_8@toc@l
948 addi 11, 11, .LCPI1_7@toc@l
973 addis 6, 2, .LCPI1_0@toc@ha
974 addi 6, 6, .LCPI1_0@toc@l
986 addis 6, 2, .LCPI1_1@toc@ha
987 addi 6, 6, .LCPI1_1@toc@l
992 addis 4, 2, .LCPI1_3@toc@ha
995 addi 4, 4, .LCPI1_3@toc@l
1012 addis 4, 2, .LCPI1_4@toc@ha
1013 addi 4, 4, .LCPI1_4@toc@l
1017 xxsldwi 50, 45, 45, 1
1019 xxsldwi 43, 43, 43, 3
1020 xxsldwi 33, 33, 33, 1
1029 addis 4, 2, .LCPI1_6@toc@ha
1030 addi 4, 4, .LCPI1_6@toc@l
1033 addi 4, 30, .LCPI1_5@toc@l
1042 addis 4, 2, .LCPI1_9@toc@ha
1043 addi 4, 4, .LCPI1_9@toc@l
1045 xxsldwi 40, 40, 40, 1
1050 xxsldwi 46, 46, 46, 3
1051 xxsldwi 32, 32, 32, 3
1057 addis 11, 2, .LCPI1_10@toc@ha
1061 xxsel 45, 39, 45, 32
1064 addis 4, 2, .LCPI1_11@toc@ha
1065 addi 4, 4, .LCPI1_11@toc@l
1070 addi 4, 11, .LCPI1_10@toc@l
1072 addis 4, 2, .LCPI1_12@toc@ha
1074 xxsldwi 47, 47, 47, 1
1075 addi 4, 4, .LCPI1_12@toc@l
1080 vperm 17, 30, 12, 17
1084 addis 4, 2, .LCPI1_13@toc@ha
1086 addi 4, 4, .LCPI1_13@toc@l
1088 xxsldwi 51, 51, 51, 3
1093 vperm 18, 12, 18, 10
1107 vperm 31, 13, 14, 11
1113 xxsldwi 47, 47, 47, 3
1114 xxsel 46, 46, 62, 32
1116 xxsldwi 51, 51, 51, 1
1127 addis 4, 2, .LCPI1_14@toc@ha
1128 addi 4, 4, .LCPI1_14@toc@l
1130 xxsldwi 47, 47, 47, 1
1131 vperm 30, 13, 18, 12
1136 vperm 16, 13, 16, 10
1138 xxsldwi 50, 51, 51, 3
1139 xxsldwi 51, 63, 63, 3
1143 vperm 31, 14, 19, 11
1157 xxsldwi 63, 63, 63, 3
1159 xxsldwi 47, 47, 47, 3
1163 xxsldwi 50, 50, 50, 1
1171 xxsel 45, 51, 62, 32
1173 vperm 30, 14, 16, 12
1179 xxsldwi 47, 47, 47, 1
1187 xxsldwi 48, 50, 50, 3
1188 vperm 18, 14, 29, 10
1197 vperm 30, 13, 31, 11
1204 xxsldwi 47, 47, 47, 3
1208 xxsldwi 48, 48, 48, 1
1216 xxsel 46, 63, 61, 32
1218 vperm 29, 13, 18, 12
1224 xxsldwi 47, 47, 47, 1
1228 vperm 13, 13, 19, 10
1229 xxsldwi 51, 62, 62, 3
1231 vperm 30, 14, 19, 11
1235 xxsldwi 48, 48, 48, 3
1248 xxsldwi 47, 47, 47, 3
1252 xxsldwi 43, 43, 43, 1
1262 xxsel 43, 50, 51, 32
1264 vperm 19, 14, 13, 12
1270 xxsldwi 47, 47, 47, 1
1275 vperm 13, 13, 31, 10
1276 xxsldwi 63, 62, 62, 3
1278 vperm 19, 11, 31, 19
1283 xxsldwi 46, 61, 61, 3
1286 xxsel 32, 33, 38, 32
1299 xxsldwi 47, 47, 47, 3
1305 xxsldwi 46, 46, 46, 1
1318 xxsldwi 32, 32, 32, 1
1324 xxsldwi 38, 40, 40, 3
1337 xxsldwi 0, 32, 32, 3
1342 xxsldwi 1, 36, 36, 1
1375 .size zfs_blake3_compress_xof_sse2, .Lfunc_end1-.Lfunc_begin1
1378 .globl zfs_blake3_hash_many_sse2
1380 .type zfs_blake3_hash_many_sse2,@function
1381 zfs_blake3_hash_many_sse2:
1385 addis 2, 12, .TOC.-.Lfunc_gep2@ha
1386 addi 2, 2, .TOC.-.Lfunc_gep2@l
1388 .localentry zfs_blake3_hash_many_sse2, .Lfunc_lep2-.Lfunc_gep2
1394 .cfi_def_cfa_offset 256
1396 .cfi_offset r17, -120
1397 .cfi_offset r18, -112
1398 .cfi_offset r19, -104
1399 .cfi_offset r20, -96
1400 .cfi_offset r21, -88
1401 .cfi_offset r22, -80
1402 .cfi_offset r23, -72
1403 .cfi_offset r24, -64
1404 .cfi_offset r25, -56
1405 .cfi_offset r26, -48
1406 .cfi_offset r27, -40
1407 .cfi_offset r28, -32
1408 .cfi_offset r29, -24
1409 .cfi_offset r30, -16
1454 bl blake3_hash4_sse2
1493 bl zfs_blake3_compress_in_place_sse2
1533 .size zfs_blake3_hash_many_sse2, .Lfunc_end2-.Lfunc_begin2
1536 .section .rodata.cst16,"aM",@progbits,16
1580 .type blake3_hash4_sse2,@function
1585 addis 2, 12, .TOC.-.Lfunc_gep3@ha
1586 addi 2, 2, .TOC.-.Lfunc_gep3@l
1588 .localentry blake3_hash4_sse2, .Lfunc_lep3-.Lfunc_gep3
1590 .cfi_def_cfa_offset 400
1591 .cfi_offset r22, -152
1592 .cfi_offset r23, -144
1593 .cfi_offset r24, -136
1594 .cfi_offset r25, -128
1595 .cfi_offset r26, -120
1596 .cfi_offset r27, -112
1597 .cfi_offset r28, -104
1598 .cfi_offset r29, -96
1599 .cfi_offset r30, -88
1600 .cfi_offset f23, -72
1601 .cfi_offset f24, -64
1602 .cfi_offset f25, -56
1603 .cfi_offset f26, -48
1604 .cfi_offset f27, -40
1605 .cfi_offset f28, -32
1606 .cfi_offset f29, -24
1607 .cfi_offset f30, -16
1609 .cfi_offset v20, -352
1610 .cfi_offset v21, -336
1611 .cfi_offset v22, -320
1612 .cfi_offset v23, -304
1613 .cfi_offset v24, -288
1614 .cfi_offset v25, -272
1615 .cfi_offset v26, -256
1616 .cfi_offset v27, -240
1617 .cfi_offset v28, -224
1618 .cfi_offset v29, -208
1619 .cfi_offset v30, -192
1620 .cfi_offset v31, -176
1689 addis 30, 2, .LCPI3_0@toc@ha
1692 addis 28, 2, .LCPI3_2@toc@ha
1693 addis 27, 2, .LCPI3_3@toc@ha
1694 addis 26, 2, .LCPI3_4@toc@ha
1695 addis 25, 2, .LCPI3_5@toc@ha
1697 addi 0, 30, .LCPI3_0@toc@l
1699 addis 7, 2, .LCPI3_1@toc@ha
1704 addi 0, 7, .LCPI3_1@toc@l
1708 addi 0, 28, .LCPI3_2@toc@l
1709 addi 28, 27, .LCPI3_3@toc@l
1710 addi 27, 26, .LCPI3_4@toc@l
1711 addi 26, 25, .LCPI3_5@toc@l
2820 .size blake3_hash4_sse2, .Lfunc_end3-.Lfunc_begin3
2822 .section ".note.GNU-stack","",@progbits