1 /*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 #error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
14 #ifndef __SHA512INTRIN_H
15 #define __SHA512INTRIN_H
17 #define __DEFAULT_FN_ATTRS256 \
18 __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \
19 __min_vector_width__(256)))
21 /// This intrinisc is one of the two SHA512 message scheduling instructions.
22 /// The intrinsic performs an intermediate calculation for the next four
23 /// SHA512 message qwords. The calculated results are stored in \a dst.
25 /// \headerfile <immintrin.h>
28 /// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B)
31 /// This intrinsic corresponds to the \c VSHA512MSG1 instruction.
34 /// A 256-bit vector of [4 x long long].
36 /// A 128-bit vector of [2 x long long].
38 /// A 256-bit vector of [4 x long long].
41 /// DEFINE ROR64(qword, n) {
43 /// dest := (qword >> count) | (qword << (64 - count))
46 /// DEFINE SHR64(qword, n) {
50 /// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
52 /// W[4] := __B.qword[0]
53 /// W[3] := __A.qword[3]
54 /// W[2] := __A.qword[2]
55 /// W[1] := __A.qword[1]
56 /// W[0] := __A.qword[0]
57 /// dst.qword[3] := W[3] + s0(W[4])
58 /// dst.qword[2] := W[2] + s0(W[3])
59 /// dst.qword[1] := W[1] + s0(W[2])
60 /// dst.qword[0] := W[0] + s0(W[1])
63 static __inline__ __m256i __DEFAULT_FN_ATTRS256
64 _mm256_sha512msg1_epi64(__m256i __A
, __m128i __B
) {
65 return (__m256i
)__builtin_ia32_vsha512msg1((__v4du
)__A
, (__v2du
)__B
);
68 /// This intrinisc is one of the two SHA512 message scheduling instructions.
69 /// The intrinsic performs the final calculation for the next four SHA512
70 /// message qwords. The calculated results are stored in \a dst.
72 /// \headerfile <immintrin.h>
75 /// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B)
78 /// This intrinsic corresponds to the \c VSHA512MSG2 instruction.
81 /// A 256-bit vector of [4 x long long].
83 /// A 256-bit vector of [4 x long long].
85 /// A 256-bit vector of [4 x long long].
88 /// DEFINE ROR64(qword, n) {
90 /// dest := (qword >> count) | (qword << (64 - count))
93 /// DEFINE SHR64(qword, n) {
96 /// DEFINE s1(qword) {
97 /// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
99 /// W[14] := __B.qword[2]
100 /// W[15] := __B.qword[3]
101 /// W[16] := __A.qword[0] + s1(W[14])
102 /// W[17] := __A.qword[1] + s1(W[15])
103 /// W[18] := __A.qword[2] + s1(W[16])
104 /// W[19] := __A.qword[3] + s1(W[17])
105 /// dst.qword[3] := W[19]
106 /// dst.qword[2] := W[18]
107 /// dst.qword[1] := W[17]
108 /// dst.qword[0] := W[16]
109 /// dst[MAX:256] := 0
111 static __inline__ __m256i __DEFAULT_FN_ATTRS256
112 _mm256_sha512msg2_epi64(__m256i __A
, __m256i __B
) {
113 return (__m256i
)__builtin_ia32_vsha512msg2((__v4du
)__A
, (__v4du
)__B
);
116 /// This intrinisc performs two rounds of SHA512 operation using initial SHA512
117 /// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from
118 /// \a __A, and a pre-computed sum of the next two round message qwords and
119 /// the corresponding round constants from \a __C (only the two lower qwords
120 /// of the third operand). The updated SHA512 state (A,B,E,F) is written to
121 /// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later
124 /// \headerfile <immintrin.h>
127 /// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C)
130 /// This intrinsic corresponds to the \c VSHA512RNDS2 instruction.
133 /// A 256-bit vector of [4 x long long].
135 /// A 256-bit vector of [4 x long long].
137 /// A 128-bit vector of [2 x long long].
139 /// A 256-bit vector of [4 x long long].
141 /// \code{.operation}
142 /// DEFINE ROR64(qword, n) {
144 /// dest := (qword >> count) | (qword << (64 - count))
147 /// DEFINE SHR64(qword, n) {
148 /// RETURN qword >> n
150 /// DEFINE cap_sigma0(qword) {
151 /// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
153 /// DEFINE cap_sigma1(qword) {
154 /// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
156 /// DEFINE MAJ(a,b,c) {
157 /// RETURN (a & b) ^ (a & c) ^ (b & c)
159 /// DEFINE CH(e,f,g) {
160 /// RETURN (e & f) ^ (g & ~e)
162 /// A[0] := __B.qword[3]
163 /// B[0] := __B.qword[2]
164 /// C[0] := __C.qword[3]
165 /// D[0] := __C.qword[2]
166 /// E[0] := __B.qword[1]
167 /// F[0] := __B.qword[0]
168 /// G[0] := __C.qword[1]
169 /// H[0] := __C.qword[0]
170 /// WK[0]:= __A.qword[0]
171 /// WK[1]:= __A.qword[1]
173 /// A[i+1] := CH(E[i], F[i], G[i]) +
174 /// cap_sigma1(E[i]) + WK[i] + H[i] +
175 /// MAJ(A[i], B[i], C[i]) +
180 /// E[i+1] := CH(E[i], F[i], G[i]) +
181 /// cap_sigma1(E[i]) + WK[i] + H[i] + D[i]
186 /// dst.qword[3] := A[2]
187 /// dst.qword[2] := B[2]
188 /// dst.qword[1] := E[2]
189 /// dst.qword[0] := F[2]
190 /// dst[MAX:256] := 0
192 static __inline__ __m256i __DEFAULT_FN_ATTRS256
193 _mm256_sha512rnds2_epi64(__m256i __A
, __m256i __B
, __m128i __C
) {
194 return (__m256i
)__builtin_ia32_vsha512rnds2((__v4du
)__A
, (__v4du
)__B
,
198 #undef __DEFAULT_FN_ATTRS256
200 #endif // __SHA512INTRIN_H