2 * Intel SHA Extensions optimized implementation of a SHA-1 update function
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * Copyright(c) 2015 Intel Corporation.
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of version 2 of the GNU General Public License as
13 * published by the Free Software Foundation.
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * Contact Information:
21 * Sean Gulley <sean.m.gulley@intel.com>
22 * Tim Chen <tim.c.chen@linux.intel.com>
26 * Copyright(c) 2015 Intel Corporation.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
32 * * Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 * * Redistributions in binary form must reproduce the above copyright
35 * notice, this list of conditions and the following disclaimer in
36 * the documentation and/or other materials provided with the
38 * * Neither the name of Intel Corporation nor the names of its
39 * contributors may be used to endorse or promote products derived
40 * from this software without specific prior written permission.
42 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56 #include <linux/linkage.h>
58 #define DIGEST_PTR %rdi /* 1st arg */
59 #define DATA_PTR %rsi /* 2nd arg */
60 #define NUM_BLKS %rdx /* 3rd arg */
65 #define FRAME_SIZE 32 /* space for 2x16 bytes */
68 #define E0 %xmm1 /* Need two E's b/c they ping pong */
74 #define SHUF_MASK %xmm7
78 * Intel SHA Extensions optimized implementation of a SHA-1 update function
80 * The function takes a pointer to the current hash values, a pointer to the
81 * input data, and a number of 64 byte blocks to process. Once all blocks have
82 * been processed, the digest pointer is updated with the resulting hash value.
83 * The function only processes complete blocks, there is no functionality to
84 * store partial blocks. All message padding and hash value initialization must
85 * be done outside the update function.
87 * The indented lines in the loop are instructions related to rounds processing.
88 * The non-indented lines are instructions related to the message schedule.
90 * void sha1_ni_transform(uint32_t *digest, const void *data,
92 * digest : pointer to digest
93 * data: pointer to input data
94 * numBlocks: Number of blocks to process
98 SYM_FUNC_START(sha1_ni_transform)
100 sub $FRAME_SIZE, %rsp
103 shl $6, NUM_BLKS /* convert to bytes */
105 add DATA_PTR, NUM_BLKS /* pointer to end of data */
107 /* load initial hash values */
108 pinsrd $3, 1*16(DIGEST_PTR), E0
109 movdqu 0*16(DIGEST_PTR), ABCD
110 pand UPPER_WORD_MASK(%rip), E0
111 pshufd $0x1B, ABCD, ABCD
113 movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
116 /* Save hash values for addition after rounds */
117 movdqa E0, (0*16)(%rsp)
118 movdqa ABCD, (1*16)(%rsp)
121 movdqu 0*16(DATA_PTR), MSG0
122 pshufb SHUF_MASK, MSG0
125 sha1rnds4 $0, E0, ABCD
128 movdqu 1*16(DATA_PTR), MSG1
129 pshufb SHUF_MASK, MSG1
132 sha1rnds4 $0, E1, ABCD
136 movdqu 2*16(DATA_PTR), MSG2
137 pshufb SHUF_MASK, MSG2
140 sha1rnds4 $0, E0, ABCD
145 movdqu 3*16(DATA_PTR), MSG3
146 pshufb SHUF_MASK, MSG3
150 sha1rnds4 $0, E1, ABCD
158 sha1rnds4 $0, E0, ABCD
166 sha1rnds4 $1, E1, ABCD
174 sha1rnds4 $1, E0, ABCD
182 sha1rnds4 $1, E1, ABCD
190 sha1rnds4 $1, E0, ABCD
198 sha1rnds4 $1, E1, ABCD
206 sha1rnds4 $2, E0, ABCD
214 sha1rnds4 $2, E1, ABCD
222 sha1rnds4 $2, E0, ABCD
230 sha1rnds4 $2, E1, ABCD
238 sha1rnds4 $2, E0, ABCD
246 sha1rnds4 $3, E1, ABCD
254 sha1rnds4 $3, E0, ABCD
262 sha1rnds4 $3, E1, ABCD
269 sha1rnds4 $3, E0, ABCD
274 sha1rnds4 $3, E1, ABCD
276 /* Add current hash values with previously saved */
277 sha1nexte (0*16)(%rsp), E0
278 paddd (1*16)(%rsp), ABCD
280 /* Increment data pointer and loop if more to process */
282 cmp NUM_BLKS, DATA_PTR
285 /* Write hash values back in the correct order */
286 pshufd $0x1B, ABCD, ABCD
287 movdqu ABCD, 0*16(DIGEST_PTR)
288 pextrd $3, E0, 1*16(DIGEST_PTR)
294 SYM_FUNC_END(sha1_ni_transform)
296 .section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
298 PSHUFFLE_BYTE_FLIP_MASK:
299 .octa 0x000102030405060708090a0b0c0d0e0f
301 .section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16
304 .octa 0xFFFFFFFF000000000000000000000000