2 * Intel SHA Extensions optimized implementation of a SHA-256 update function
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * Copyright(c) 2015 Intel Corporation.
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * Contact Information:
21 * Sean Gulley <sean.m.gulley@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
26 * Copyright(c) 2015 Intel Corporation.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 #include <linux/linkage.h>
58 #define DIGEST_PTR %rdi /* 1st arg */
59 #define DATA_PTR %rsi /* 2nd arg */
60 #define NUM_BLKS %rdx /* 3rd arg */
62 #define SHA256CONSTANTS %rax
73 #define SHUF_MASK %xmm8
75 #define ABEF_SAVE %xmm9
76 #define CDGH_SAVE %xmm10
79 * Intel SHA Extensions optimized implementation of a SHA-256 update function
81 * The function takes a pointer to the current hash values, a pointer to the
82 * input data, and a number of 64 byte blocks to process. Once all blocks have
83 * been processed, the digest pointer is updated with the resulting hash value.
84 * The function only processes complete blocks, there is no functionality to
85 * store partial blocks. All message padding and hash value initialization must
86 * be done outside the update function.
88 * The indented lines in the loop are instructions related to rounds processing.
89 * The non-indented lines are instructions related to the message schedule.
91 * void sha256_ni_transform(uint32_t *digest, const void *data,
93 * digest : pointer to digest
94 * data: pointer to input data
95 * numBlocks: Number of blocks to process
100 SYM_FUNC_START(sha256_ni_transform)
102 shl $6, NUM_BLKS /* convert to bytes */
104 add DATA_PTR, NUM_BLKS /* pointer to end of data */
107 * load initial hash values
108 * Need to reorder these appropriately
109 * DCBA, HGFE -> ABEF, CDGH
111 movdqu 0*16(DIGEST_PTR), STATE0
112 movdqu 1*16(DIGEST_PTR), STATE1
114 pshufd $0xB1, STATE0, STATE0 /* CDAB */
115 pshufd $0x1B, STATE1, STATE1 /* EFGH */
116 movdqa STATE0, MSGTMP4
117 palignr $8, STATE1, STATE0 /* ABEF */
118 pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */
120 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
121 lea K256(%rip), SHA256CONSTANTS
124 /* Save hash values for addition after rounds */
125 movdqa STATE0, ABEF_SAVE
126 movdqa STATE1, CDGH_SAVE
129 movdqu 0*16(DATA_PTR), MSG
130 pshufb SHUF_MASK, MSG
132 paddd 0*16(SHA256CONSTANTS), MSG
133 sha256rnds2 STATE0, STATE1
134 pshufd $0x0E, MSG, MSG
135 sha256rnds2 STATE1, STATE0
138 movdqu 1*16(DATA_PTR), MSG
139 pshufb SHUF_MASK, MSG
141 paddd 1*16(SHA256CONSTANTS), MSG
142 sha256rnds2 STATE0, STATE1
143 pshufd $0x0E, MSG, MSG
144 sha256rnds2 STATE1, STATE0
145 sha256msg1 MSGTMP1, MSGTMP0
148 movdqu 2*16(DATA_PTR), MSG
149 pshufb SHUF_MASK, MSG
151 paddd 2*16(SHA256CONSTANTS), MSG
152 sha256rnds2 STATE0, STATE1
153 pshufd $0x0E, MSG, MSG
154 sha256rnds2 STATE1, STATE0
155 sha256msg1 MSGTMP2, MSGTMP1
158 movdqu 3*16(DATA_PTR), MSG
159 pshufb SHUF_MASK, MSG
161 paddd 3*16(SHA256CONSTANTS), MSG
162 sha256rnds2 STATE0, STATE1
163 movdqa MSGTMP3, MSGTMP4
164 palignr $4, MSGTMP2, MSGTMP4
165 paddd MSGTMP4, MSGTMP0
166 sha256msg2 MSGTMP3, MSGTMP0
167 pshufd $0x0E, MSG, MSG
168 sha256rnds2 STATE1, STATE0
169 sha256msg1 MSGTMP3, MSGTMP2
173 paddd 4*16(SHA256CONSTANTS), MSG
174 sha256rnds2 STATE0, STATE1
175 movdqa MSGTMP0, MSGTMP4
176 palignr $4, MSGTMP3, MSGTMP4
177 paddd MSGTMP4, MSGTMP1
178 sha256msg2 MSGTMP0, MSGTMP1
179 pshufd $0x0E, MSG, MSG
180 sha256rnds2 STATE1, STATE0
181 sha256msg1 MSGTMP0, MSGTMP3
185 paddd 5*16(SHA256CONSTANTS), MSG
186 sha256rnds2 STATE0, STATE1
187 movdqa MSGTMP1, MSGTMP4
188 palignr $4, MSGTMP0, MSGTMP4
189 paddd MSGTMP4, MSGTMP2
190 sha256msg2 MSGTMP1, MSGTMP2
191 pshufd $0x0E, MSG, MSG
192 sha256rnds2 STATE1, STATE0
193 sha256msg1 MSGTMP1, MSGTMP0
197 paddd 6*16(SHA256CONSTANTS), MSG
198 sha256rnds2 STATE0, STATE1
199 movdqa MSGTMP2, MSGTMP4
200 palignr $4, MSGTMP1, MSGTMP4
201 paddd MSGTMP4, MSGTMP3
202 sha256msg2 MSGTMP2, MSGTMP3
203 pshufd $0x0E, MSG, MSG
204 sha256rnds2 STATE1, STATE0
205 sha256msg1 MSGTMP2, MSGTMP1
209 paddd 7*16(SHA256CONSTANTS), MSG
210 sha256rnds2 STATE0, STATE1
211 movdqa MSGTMP3, MSGTMP4
212 palignr $4, MSGTMP2, MSGTMP4
213 paddd MSGTMP4, MSGTMP0
214 sha256msg2 MSGTMP3, MSGTMP0
215 pshufd $0x0E, MSG, MSG
216 sha256rnds2 STATE1, STATE0
217 sha256msg1 MSGTMP3, MSGTMP2
221 paddd 8*16(SHA256CONSTANTS), MSG
222 sha256rnds2 STATE0, STATE1
223 movdqa MSGTMP0, MSGTMP4
224 palignr $4, MSGTMP3, MSGTMP4
225 paddd MSGTMP4, MSGTMP1
226 sha256msg2 MSGTMP0, MSGTMP1
227 pshufd $0x0E, MSG, MSG
228 sha256rnds2 STATE1, STATE0
229 sha256msg1 MSGTMP0, MSGTMP3
233 paddd 9*16(SHA256CONSTANTS), MSG
234 sha256rnds2 STATE0, STATE1
235 movdqa MSGTMP1, MSGTMP4
236 palignr $4, MSGTMP0, MSGTMP4
237 paddd MSGTMP4, MSGTMP2
238 sha256msg2 MSGTMP1, MSGTMP2
239 pshufd $0x0E, MSG, MSG
240 sha256rnds2 STATE1, STATE0
241 sha256msg1 MSGTMP1, MSGTMP0
245 paddd 10*16(SHA256CONSTANTS), MSG
246 sha256rnds2 STATE0, STATE1
247 movdqa MSGTMP2, MSGTMP4
248 palignr $4, MSGTMP1, MSGTMP4
249 paddd MSGTMP4, MSGTMP3
250 sha256msg2 MSGTMP2, MSGTMP3
251 pshufd $0x0E, MSG, MSG
252 sha256rnds2 STATE1, STATE0
253 sha256msg1 MSGTMP2, MSGTMP1
257 paddd 11*16(SHA256CONSTANTS), MSG
258 sha256rnds2 STATE0, STATE1
259 movdqa MSGTMP3, MSGTMP4
260 palignr $4, MSGTMP2, MSGTMP4
261 paddd MSGTMP4, MSGTMP0
262 sha256msg2 MSGTMP3, MSGTMP0
263 pshufd $0x0E, MSG, MSG
264 sha256rnds2 STATE1, STATE0
265 sha256msg1 MSGTMP3, MSGTMP2
269 paddd 12*16(SHA256CONSTANTS), MSG
270 sha256rnds2 STATE0, STATE1
271 movdqa MSGTMP0, MSGTMP4
272 palignr $4, MSGTMP3, MSGTMP4
273 paddd MSGTMP4, MSGTMP1
274 sha256msg2 MSGTMP0, MSGTMP1
275 pshufd $0x0E, MSG, MSG
276 sha256rnds2 STATE1, STATE0
277 sha256msg1 MSGTMP0, MSGTMP3
281 paddd 13*16(SHA256CONSTANTS), MSG
282 sha256rnds2 STATE0, STATE1
283 movdqa MSGTMP1, MSGTMP4
284 palignr $4, MSGTMP0, MSGTMP4
285 paddd MSGTMP4, MSGTMP2
286 sha256msg2 MSGTMP1, MSGTMP2
287 pshufd $0x0E, MSG, MSG
288 sha256rnds2 STATE1, STATE0
292 paddd 14*16(SHA256CONSTANTS), MSG
293 sha256rnds2 STATE0, STATE1
294 movdqa MSGTMP2, MSGTMP4
295 palignr $4, MSGTMP1, MSGTMP4
296 paddd MSGTMP4, MSGTMP3
297 sha256msg2 MSGTMP2, MSGTMP3
298 pshufd $0x0E, MSG, MSG
299 sha256rnds2 STATE1, STATE0
303 paddd 15*16(SHA256CONSTANTS), MSG
304 sha256rnds2 STATE0, STATE1
305 pshufd $0x0E, MSG, MSG
306 sha256rnds2 STATE1, STATE0
308 /* Add current hash values with previously saved */
309 paddd ABEF_SAVE, STATE0
310 paddd CDGH_SAVE, STATE1
312 /* Increment data pointer and loop if more to process */
314 cmp NUM_BLKS, DATA_PTR
317 /* Write hash values back in the correct order */
318 pshufd $0x1B, STATE0, STATE0 /* FEBA */
319 pshufd $0xB1, STATE1, STATE1 /* DCHG */
320 movdqa STATE0, MSGTMP4
321 pblendw $0xF0, STATE1, STATE0 /* DCBA */
322 palignr $8, MSGTMP4, STATE1 /* HGFE */
324 movdqu STATE0, 0*16(DIGEST_PTR)
325 movdqu STATE1, 1*16(DIGEST_PTR)
330 SYM_FUNC_END(sha256_ni_transform)
332 .section .rodata.cst256.K256, "aM", @progbits, 256
335 .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
336 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
337 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
338 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
339 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
340 .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
341 .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
342 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
343 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
344 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
345 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
346 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
347 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
348 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
349 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
350 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
352 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
354 PSHUFFLE_BYTE_FLIP_MASK:
355 .octa 0x0c0d0e0f08090a0b0405060700010203