4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
24 * Copyright (c) 2019-2022 Samuel Neves
25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
27 * This is converted assembly: SSE4.1 -> POWER8 PPC64 Little Endian
28 * Used tools: SIMDe https://github.com/simd-everywhere/simde
31 #if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
34 .section .rodata.cst16,"aM",@progbits,16
280 .globl zfs_blake3_compress_in_place_sse41
282 .type zfs_blake3_compress_in_place_sse41,@function
283 zfs_blake3_compress_in_place_sse41:
287 addis 2, 12, .TOC.-.Lfunc_gep0@ha
288 addi 2, 2, .TOC.-.Lfunc_gep0@l
290 .localentry zfs_blake3_compress_in_place_sse41, .Lfunc_lep0-.Lfunc_gep0
301 addis 7, 2, .LCPI0_0@toc@ha
311 addi 6, 7, .LCPI0_0@toc@l
312 addis 7, 2, .LCPI0_2@toc@ha
317 addis 6, 2, .LCPI0_1@toc@ha
318 addi 7, 7, .LCPI0_2@toc@l
320 addi 6, 6, .LCPI0_1@toc@l
331 addis 7, 2, .LCPI0_4@toc@ha
332 addi 7, 7, .LCPI0_4@toc@l
335 addis 7, 2, .LCPI0_6@toc@ha
336 addi 7, 7, .LCPI0_6@toc@l
339 addis 6, 2, .LCPI0_3@toc@ha
340 addi 6, 6, .LCPI0_3@toc@l
346 addis 6, 2, .LCPI0_5@toc@ha
349 addi 6, 6, .LCPI0_5@toc@l
362 addis 4, 2, .LCPI0_7@toc@ha
363 addis 6, 2, .LCPI0_9@toc@ha
366 addi 4, 4, .LCPI0_7@toc@l
367 addi 6, 6, .LCPI0_9@toc@l
375 addis 4, 2, .LCPI0_8@toc@ha
378 addi 4, 4, .LCPI0_8@toc@l
380 xxsldwi 48, 48, 48, 1
382 xxsldwi 40, 40, 40, 3
383 xxsldwi 39, 39, 39, 1
398 addis 4, 2, .LCPI0_10@toc@ha
399 addi 4, 4, .LCPI0_10@toc@l
404 xxsldwi 45, 45, 45, 3
406 xxsldwi 47, 47, 47, 1
407 xxsldwi 39, 39, 39, 3
412 addis 4, 2, .LCPI0_11@toc@ha
413 addi 4, 4, .LCPI0_11@toc@l
418 addis 4, 2, .LCPI0_12@toc@ha
420 addi 4, 4, .LCPI0_12@toc@l
431 xxsldwi 49, 49, 49, 1
435 addis 4, 2, .LCPI0_13@toc@ha
436 addi 4, 4, .LCPI0_13@toc@l
438 addis 4, 2, .LCPI0_14@toc@ha
440 addi 4, 4, .LCPI0_14@toc@l
447 xxsldwi 47, 47, 47, 3
465 xxsldwi 49, 49, 49, 3
472 xxsldwi 47, 47, 47, 1
487 xxsldwi 49, 49, 49, 1
494 xxsldwi 46, 47, 47, 3
501 xxsldwi 50, 51, 51, 3
511 xxsldwi 49, 49, 49, 3
516 xxsldwi 46, 46, 46, 1
532 xxsldwi 49, 49, 49, 1
539 xxsldwi 45, 63, 63, 3
549 xxsldwi 49, 51, 51, 3
559 xxsldwi 45, 45, 45, 1
562 xxsldwi 44, 44, 44, 3
578 xxsldwi 50, 51, 51, 3
581 xxsldwi 48, 48, 48, 1
593 xxsldwi 45, 45, 45, 3
613 xxsldwi 51, 51, 51, 3
614 xxsldwi 45, 45, 45, 3
620 xxsldwi 50, 63, 63, 1
635 xxsldwi 49, 49, 49, 1
642 xxsldwi 46, 50, 50, 3
656 xxsldwi 38, 38, 38, 3
661 xxsldwi 46, 46, 46, 1
673 xxsldwi 38, 38, 38, 1
680 xxsldwi 41, 46, 46, 3
720 .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-.Lfunc_begin0
723 .section .rodata.cst16,"aM",@progbits,16
969 .globl zfs_blake3_compress_xof_sse41
971 .type zfs_blake3_compress_xof_sse41,@function
972 zfs_blake3_compress_xof_sse41:
976 addis 2, 12, .TOC.-.Lfunc_gep1@ha
977 addi 2, 2, .TOC.-.Lfunc_gep1@l
979 .localentry zfs_blake3_compress_xof_sse41, .Lfunc_lep1-.Lfunc_gep1
985 addis 11, 2, .LCPI1_9@toc@ha
991 addis 7, 2, .LCPI1_0@toc@ha
1001 addi 6, 7, .LCPI1_0@toc@l
1002 addis 7, 2, .LCPI1_2@toc@ha
1008 addis 6, 2, .LCPI1_1@toc@ha
1009 addi 7, 7, .LCPI1_2@toc@l
1012 addi 6, 6, .LCPI1_1@toc@l
1021 addis 7, 2, .LCPI1_4@toc@ha
1022 addi 7, 7, .LCPI1_4@toc@l
1025 addis 7, 2, .LCPI1_6@toc@ha
1026 addi 7, 7, .LCPI1_6@toc@l
1029 addis 6, 2, .LCPI1_3@toc@ha
1030 addi 6, 6, .LCPI1_3@toc@l
1036 addis 6, 2, .LCPI1_5@toc@ha
1039 addi 6, 6, .LCPI1_5@toc@l
1052 addis 4, 2, .LCPI1_7@toc@ha
1054 addi 4, 4, .LCPI1_7@toc@l
1063 addis 4, 2, .LCPI1_8@toc@ha
1066 addi 4, 4, .LCPI1_8@toc@l
1068 xxsldwi 40, 40, 40, 3
1070 xxsldwi 48, 48, 48, 1
1072 xxsldwi 39, 39, 39, 1
1073 vperm 14, 10, 12, 14
1086 addis 4, 2, .LCPI1_10@toc@ha
1087 addi 4, 4, .LCPI1_10@toc@l
1092 xxsldwi 45, 45, 45, 3
1094 xxsldwi 47, 47, 47, 1
1095 xxsldwi 39, 39, 39, 3
1100 addi 4, 11, .LCPI1_9@toc@l
1103 addis 4, 2, .LCPI1_11@toc@ha
1105 addi 4, 4, .LCPI1_11@toc@l
1108 addis 4, 2, .LCPI1_12@toc@ha
1110 addi 4, 4, .LCPI1_12@toc@l
1120 xxsldwi 49, 49, 49, 1
1124 addis 4, 2, .LCPI1_13@toc@ha
1125 addi 4, 4, .LCPI1_13@toc@l
1127 addis 4, 2, .LCPI1_14@toc@ha
1128 vperm 31, 16, 31, 10
1129 addi 4, 4, .LCPI1_14@toc@l
1130 vperm 14, 14, 16, 11
1136 xxsldwi 47, 47, 47, 3
1154 xxsldwi 49, 49, 49, 3
1161 xxsldwi 47, 47, 47, 1
1163 vperm 30, 31, 30, 10
1164 vperm 12, 12, 31, 11
1176 xxsldwi 49, 49, 49, 1
1183 xxsldwi 46, 47, 47, 3
1190 xxsldwi 50, 51, 51, 3
1200 xxsldwi 49, 49, 49, 3
1205 xxsldwi 46, 46, 46, 1
1217 vperm 18, 30, 18, 10
1221 xxsldwi 49, 49, 49, 1
1228 xxsldwi 45, 63, 63, 3
1229 vperm 31, 12, 30, 11
1238 xxsldwi 49, 51, 51, 3
1248 xxsldwi 45, 45, 45, 1
1251 xxsldwi 44, 44, 44, 3
1265 vperm 30, 18, 30, 10
1266 vperm 14, 14, 18, 11
1267 xxsldwi 50, 51, 51, 3
1270 xxsldwi 48, 48, 48, 1
1276 vperm 28, 30, 28, 10
1280 vperm 12, 12, 30, 11
1282 xxsldwi 45, 45, 45, 3
1302 xxsldwi 51, 51, 51, 3
1303 xxsldwi 45, 45, 45, 3
1309 xxsldwi 50, 63, 63, 1
1310 vperm 16, 14, 30, 16
1324 xxsldwi 49, 49, 49, 1
1331 xxsldwi 46, 50, 50, 3
1345 xxsldwi 38, 38, 38, 3
1350 xxsldwi 46, 46, 46, 1
1362 xxsldwi 38, 38, 38, 1
1369 xxsldwi 41, 46, 46, 3
1383 xxsldwi 0, 38, 38, 3
1389 xxsldwi 1, 35, 35, 1
1421 .size zfs_blake3_compress_xof_sse41, .Lfunc_end1-.Lfunc_begin1
1424 .globl zfs_blake3_hash_many_sse41
1426 .type zfs_blake3_hash_many_sse41,@function
1427 zfs_blake3_hash_many_sse41:
1431 addis 2, 12, .TOC.-.Lfunc_gep2@ha
1432 addi 2, 2, .TOC.-.Lfunc_gep2@l
1434 .localentry zfs_blake3_hash_many_sse41, .Lfunc_lep2-.Lfunc_gep2
1440 .cfi_def_cfa_offset 256
1442 .cfi_offset r17, -120
1443 .cfi_offset r18, -112
1444 .cfi_offset r19, -104
1445 .cfi_offset r20, -96
1446 .cfi_offset r21, -88
1447 .cfi_offset r22, -80
1448 .cfi_offset r23, -72
1449 .cfi_offset r24, -64
1450 .cfi_offset r25, -56
1451 .cfi_offset r26, -48
1452 .cfi_offset r27, -40
1453 .cfi_offset r28, -32
1454 .cfi_offset r29, -24
1455 .cfi_offset r30, -16
1500 bl blake3_hash4_sse41
1539 bl zfs_blake3_compress_in_place_sse41
1579 .size zfs_blake3_hash_many_sse41, .Lfunc_end2-.Lfunc_begin2
1582 .section .rodata.cst16,"aM",@progbits,16
1677 .type blake3_hash4_sse41,@function
1682 addis 2, 12, .TOC.-.Lfunc_gep3@ha
1683 addi 2, 2, .TOC.-.Lfunc_gep3@l
1685 .localentry blake3_hash4_sse41, .Lfunc_lep3-.Lfunc_gep3
1687 .cfi_def_cfa_offset 416
1688 .cfi_offset r22, -176
1689 .cfi_offset r23, -168
1690 .cfi_offset r24, -160
1691 .cfi_offset r25, -152
1692 .cfi_offset r26, -144
1693 .cfi_offset r27, -136
1694 .cfi_offset r28, -128
1695 .cfi_offset r29, -120
1696 .cfi_offset r30, -112
1697 .cfi_offset f20, -96
1698 .cfi_offset f21, -88
1699 .cfi_offset f22, -80
1700 .cfi_offset f23, -72
1701 .cfi_offset f24, -64
1702 .cfi_offset f25, -56
1703 .cfi_offset f26, -48
1704 .cfi_offset f27, -40
1705 .cfi_offset f28, -32
1706 .cfi_offset f29, -24
1707 .cfi_offset f30, -16
1709 .cfi_offset v20, -368
1710 .cfi_offset v21, -352
1711 .cfi_offset v22, -336
1712 .cfi_offset v23, -320
1713 .cfi_offset v24, -304
1714 .cfi_offset v25, -288
1715 .cfi_offset v26, -272
1716 .cfi_offset v27, -256
1717 .cfi_offset v28, -240
1718 .cfi_offset v29, -224
1719 .cfi_offset v30, -208
1720 .cfi_offset v31, -192
1792 addis 30, 2, .LCPI3_0@toc@ha
1795 addis 28, 2, .LCPI3_5@toc@ha
1796 addis 27, 2, .LCPI3_6@toc@ha
1797 addis 26, 2, .LCPI3_7@toc@ha
1798 addis 29, 2, .LCPI3_4@toc@ha
1799 addis 25, 2, .LCPI3_8@toc@ha
1800 addi 0, 30, .LCPI3_0@toc@l
1802 addis 7, 2, .LCPI3_1@toc@ha
1803 addis 30, 2, .LCPI3_3@toc@ha
1804 addi 24, 29, .LCPI3_4@toc@l
1809 addi 0, 30, .LCPI3_3@toc@l
1815 addi 6, 7, .LCPI3_1@toc@l
1816 addis 7, 2, .LCPI3_2@toc@ha
1823 addi 6, 7, .LCPI3_2@toc@l
1833 addi 0, 28, .LCPI3_5@toc@l
1834 addi 28, 27, .LCPI3_6@toc@l
1835 addi 27, 26, .LCPI3_7@toc@l
1836 addi 26, 25, .LCPI3_8@toc@l
3061 .size blake3_hash4_sse41, .Lfunc_end3-.Lfunc_begin3
3063 .section ".note.GNU-stack","",@progbits