1 /* $OpenBSD: gcm128.c,v 1.20 2017/09/03 13:07:34 inoguchi Exp $ */
2 /* ====================================================================
3 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * 3. All advertising materials mentioning features or use of this
18 * software must display the following acknowledgment:
19 * "This product includes software developed by the OpenSSL Project
20 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
22 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23 * endorse or promote products derived from this software without
24 * prior written permission. For written permission, please contact
25 * openssl-core@openssl.org.
27 * 5. Products derived from this software may not be called "OpenSSL"
28 * nor may "OpenSSL" appear in their names without prior written
29 * permission of the OpenSSL Project.
31 * 6. Redistributions of any form whatsoever must retain the following
33 * "This product includes software developed by the OpenSSL Project
34 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
36 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
40 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 * ====================================================================
51 #define OPENSSL_FIPSAPI
53 #include <openssl/crypto.h>
54 #include "modes_lcl.h"
63 #if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) \
74 if (sizeof(size_t)==8) { \
75 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
76 V.lo = (V.hi<<63)|(V.lo>>1); \
77 V.hi = (V.hi>>1 )^T; \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
117 * Value of 1 is not appropriate for performance reasons.
121 static void gcm_init_8bit(u128 Htable
[256], u64 H
[2])
131 for (Htable
[128]=V
, i
=64; i
>0; i
>>=1) {
136 for (i
=2; i
<256; i
<<=1) {
137 u128
*Hi
= Htable
+i
, H0
= *Hi
;
138 for (j
=1; j
<i
; ++j
) {
139 Hi
[j
].hi
= H0
.hi
^Htable
[j
].hi
;
140 Hi
[j
].lo
= H0
.lo
^Htable
[j
].lo
;
145 static void gcm_gmult_8bit(u64 Xi
[2], const u128 Htable
[256])
148 const u8
*xi
= (const u8
*)Xi
+15;
150 static const size_t rem_8bit
[256] = {
151 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
152 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
153 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
154 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
155 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
156 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
157 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
158 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
159 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
160 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
161 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
162 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
163 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
164 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
165 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
166 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
167 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
168 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
169 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
170 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
171 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
172 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
173 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
174 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
175 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
176 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
177 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
178 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
179 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
180 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
181 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
182 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
183 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
184 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
185 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
186 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
187 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
188 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
189 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
190 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
191 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
192 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
193 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
194 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
195 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
196 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
197 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
198 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
199 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
200 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
201 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
202 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
203 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
204 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
205 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
206 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
207 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
208 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
209 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
210 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
211 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
212 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
213 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
214 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
217 Z
.hi
^= Htable
[n
].hi
;
218 Z
.lo
^= Htable
[n
].lo
;
220 if ((u8
*)Xi
==xi
) break;
224 rem
= (size_t)Z
.lo
&0xff;
225 Z
.lo
= (Z
.hi
<<56)|(Z
.lo
>>8);
227 #if SIZE_MAX == 0xffffffffffffffff
228 Z
.hi
^= rem_8bit
[rem
];
230 Z
.hi
^= (u64
)rem_8bit
[rem
]<<32;
234 #if BYTE_ORDER == LITTLE_ENDIAN
236 Xi
[0] = BSWAP8(Z
.hi
);
237 Xi
[1] = BSWAP8(Z
.lo
);
241 v
= (u32
)(Z
.hi
>>32); PUTU32(p
,v
);
242 v
= (u32
)(Z
.hi
); PUTU32(p
+4,v
);
243 v
= (u32
)(Z
.lo
>>32); PUTU32(p
+8,v
);
244 v
= (u32
)(Z
.lo
); PUTU32(p
+12,v
);
246 #else /* BIG_ENDIAN */
251 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
255 static void gcm_init_4bit(u128 Htable
[16], u64 H
[2])
258 #if defined(OPENSSL_SMALL_FOOTPRINT)
267 #if defined(OPENSSL_SMALL_FOOTPRINT)
268 for (Htable
[8]=V
, i
=4; i
>0; i
>>=1) {
273 for (i
=2; i
<16; i
<<=1) {
276 for (V
=*Hi
, j
=1; j
<i
; ++j
) {
277 Hi
[j
].hi
= V
.hi
^Htable
[j
].hi
;
278 Hi
[j
].lo
= V
.lo
^Htable
[j
].lo
;
289 Htable
[3].hi
= V
.hi
^Htable
[2].hi
, Htable
[3].lo
= V
.lo
^Htable
[2].lo
;
291 Htable
[5].hi
= V
.hi
^Htable
[1].hi
, Htable
[5].lo
= V
.lo
^Htable
[1].lo
;
292 Htable
[6].hi
= V
.hi
^Htable
[2].hi
, Htable
[6].lo
= V
.lo
^Htable
[2].lo
;
293 Htable
[7].hi
= V
.hi
^Htable
[3].hi
, Htable
[7].lo
= V
.lo
^Htable
[3].lo
;
295 Htable
[9].hi
= V
.hi
^Htable
[1].hi
, Htable
[9].lo
= V
.lo
^Htable
[1].lo
;
296 Htable
[10].hi
= V
.hi
^Htable
[2].hi
, Htable
[10].lo
= V
.lo
^Htable
[2].lo
;
297 Htable
[11].hi
= V
.hi
^Htable
[3].hi
, Htable
[11].lo
= V
.lo
^Htable
[3].lo
;
298 Htable
[12].hi
= V
.hi
^Htable
[4].hi
, Htable
[12].lo
= V
.lo
^Htable
[4].lo
;
299 Htable
[13].hi
= V
.hi
^Htable
[5].hi
, Htable
[13].lo
= V
.lo
^Htable
[5].lo
;
300 Htable
[14].hi
= V
.hi
^Htable
[6].hi
, Htable
[14].lo
= V
.lo
^Htable
[6].lo
;
301 Htable
[15].hi
= V
.hi
^Htable
[7].hi
, Htable
[15].lo
= V
.lo
^Htable
[7].lo
;
303 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 * ARM assembler expects specific dword order in Htable.
309 #if BYTE_ORDER == LITTLE_ENDIAN
315 #else /* BIG_ENDIAN */
318 Htable
[j
].hi
= V
.lo
<<32|V
.lo
>>32;
319 Htable
[j
].lo
= V
.hi
<<32|V
.hi
>>32;
327 static const size_t rem_4bit
[16] = {
328 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
329 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
330 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
331 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
333 static void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16])
337 size_t rem
, nlo
, nhi
;
339 nlo
= ((const u8
*)Xi
)[15];
343 Z
.hi
= Htable
[nlo
].hi
;
344 Z
.lo
= Htable
[nlo
].lo
;
347 rem
= (size_t)Z
.lo
&0xf;
348 Z
.lo
= (Z
.hi
<<60)|(Z
.lo
>>4);
350 #if SIZE_MAX == 0xffffffffffffffff
351 Z
.hi
^= rem_4bit
[rem
];
353 Z
.hi
^= (u64
)rem_4bit
[rem
]<<32;
355 Z
.hi
^= Htable
[nhi
].hi
;
356 Z
.lo
^= Htable
[nhi
].lo
;
360 nlo
= ((const u8
*)Xi
)[cnt
];
364 rem
= (size_t)Z
.lo
&0xf;
365 Z
.lo
= (Z
.hi
<<60)|(Z
.lo
>>4);
367 #if SIZE_MAX == 0xffffffffffffffff
368 Z
.hi
^= rem_4bit
[rem
];
370 Z
.hi
^= (u64
)rem_4bit
[rem
]<<32;
372 Z
.hi
^= Htable
[nlo
].hi
;
373 Z
.lo
^= Htable
[nlo
].lo
;
376 #if BYTE_ORDER == LITTLE_ENDIAN
378 Xi
[0] = BSWAP8(Z
.hi
);
379 Xi
[1] = BSWAP8(Z
.lo
);
383 v
= (u32
)(Z
.hi
>>32); PUTU32(p
,v
);
384 v
= (u32
)(Z
.hi
); PUTU32(p
+4,v
);
385 v
= (u32
)(Z
.lo
>>32); PUTU32(p
+8,v
);
386 v
= (u32
)(Z
.lo
); PUTU32(p
+12,v
);
388 #else /* BIG_ENDIAN */
394 #if !defined(OPENSSL_SMALL_FOOTPRINT)
396 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
397 * details... Compiler-generated code doesn't seem to give any
398 * performance improvement, at least not on x86[_64]. It's here
399 * mostly as reference and a placeholder for possible future
400 * non-trivial optimization[s]...
402 static void gcm_ghash_4bit(u64 Xi
[2],const u128 Htable
[16],
403 const u8
*inp
,size_t len
)
407 size_t rem
, nlo
, nhi
;
412 nlo
= ((const u8
*)Xi
)[15];
417 Z
.hi
= Htable
[nlo
].hi
;
418 Z
.lo
= Htable
[nlo
].lo
;
421 rem
= (size_t)Z
.lo
&0xf;
422 Z
.lo
= (Z
.hi
<<60)|(Z
.lo
>>4);
424 #if SIZE_MAX == 0xffffffffffffffff
425 Z
.hi
^= rem_4bit
[rem
];
427 Z
.hi
^= (u64
)rem_4bit
[rem
]<<32;
429 Z
.hi
^= Htable
[nhi
].hi
;
430 Z
.lo
^= Htable
[nhi
].lo
;
434 nlo
= ((const u8
*)Xi
)[cnt
];
439 rem
= (size_t)Z
.lo
&0xf;
440 Z
.lo
= (Z
.hi
<<60)|(Z
.lo
>>4);
442 #if SIZE_MAX == 0xffffffffffffffff
443 Z
.hi
^= rem_4bit
[rem
];
445 Z
.hi
^= (u64
)rem_4bit
[rem
]<<32;
447 Z
.hi
^= Htable
[nlo
].hi
;
448 Z
.lo
^= Htable
[nlo
].lo
;
452 * Extra 256+16 bytes per-key plus 512 bytes shared tables
453 * [should] give ~50% improvement... One could have PACK()-ed
454 * the rem_8bit even here, but the priority is to minimize
457 u128 Hshr4
[16]; /* Htable shifted right by 4 bits */
458 u8 Hshl4
[16]; /* Htable shifted left by 4 bits */
459 static const unsigned short rem_8bit
[256] = {
460 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
461 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
462 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
463 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
464 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
465 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
466 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
467 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
468 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
469 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
470 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
471 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
472 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
473 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
474 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
475 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
476 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
477 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
478 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
479 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
480 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
481 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
482 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
483 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
484 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
485 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
486 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
487 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
488 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
489 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
490 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
491 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
493 * This pre-processing phase slows down procedure by approximately
494 * same time as it makes each loop spin faster. In other words
495 * single block performance is approximately same as straightforward
496 * "4-bit" implementation, and then it goes only faster...
498 for (cnt
=0; cnt
<16; ++cnt
) {
499 Z
.hi
= Htable
[cnt
].hi
;
500 Z
.lo
= Htable
[cnt
].lo
;
501 Hshr4
[cnt
].lo
= (Z
.hi
<<60)|(Z
.lo
>>4);
502 Hshr4
[cnt
].hi
= (Z
.hi
>>4);
503 Hshl4
[cnt
] = (u8
)(Z
.lo
<<4);
507 for (Z
.lo
=0, Z
.hi
=0, cnt
=15; cnt
; --cnt
) {
508 nlo
= ((const u8
*)Xi
)[cnt
];
513 Z
.hi
^= Htable
[nlo
].hi
;
514 Z
.lo
^= Htable
[nlo
].lo
;
516 rem
= (size_t)Z
.lo
&0xff;
518 Z
.lo
= (Z
.hi
<<56)|(Z
.lo
>>8);
521 Z
.hi
^= Hshr4
[nhi
].hi
;
522 Z
.lo
^= Hshr4
[nhi
].lo
;
523 Z
.hi
^= (u64
)rem_8bit
[rem
^Hshl4
[nhi
]]<<48;
526 nlo
= ((const u8
*)Xi
)[0];
531 Z
.hi
^= Htable
[nlo
].hi
;
532 Z
.lo
^= Htable
[nlo
].lo
;
534 rem
= (size_t)Z
.lo
&0xf;
536 Z
.lo
= (Z
.hi
<<60)|(Z
.lo
>>4);
539 Z
.hi
^= Htable
[nhi
].hi
;
540 Z
.lo
^= Htable
[nhi
].lo
;
541 Z
.hi
^= ((u64
)rem_8bit
[rem
<<4])<<48;
544 #if BYTE_ORDER == LITTLE_ENDIAN
546 Xi
[0] = BSWAP8(Z
.hi
);
547 Xi
[1] = BSWAP8(Z
.lo
);
551 v
= (u32
)(Z
.hi
>>32); PUTU32(p
,v
);
552 v
= (u32
)(Z
.hi
); PUTU32(p
+4,v
);
553 v
= (u32
)(Z
.lo
>>32); PUTU32(p
+8,v
);
554 v
= (u32
)(Z
.lo
); PUTU32(p
+12,v
);
556 #else /* BIG_ENDIAN */
560 } while (inp
+=16, len
-=16);
564 void gcm_gmult_4bit(u64 Xi
[2],const u128 Htable
[16]);
565 void gcm_ghash_4bit(u64 Xi
[2],const u128 Htable
[16],const u8
*inp
,size_t len
);
568 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
569 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
570 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
571 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
572 * trashing effect. In other words idea is to hash data while it's
573 * still in L1 cache after encryption pass... */
574 #define GHASH_CHUNK (3*1024)
577 #else /* TABLE_BITS */
579 static void gcm_gmult_1bit(u64 Xi
[2],const u64 H
[2])
584 const long *xi
= (const long *)Xi
;
586 V
.hi
= H
[0]; /* H is in host byte order, no byte swapping */
589 for (j
=0; j
<16/sizeof(long); ++j
) {
590 #if BYTE_ORDER == LITTLE_ENDIAN
591 #if SIZE_MAX == 0xffffffffffffffff
593 X
= (long)(BSWAP8(xi
[j
]));
595 const u8
*p
= (const u8
*)(xi
+j
);
596 X
= (long)((u64
)GETU32(p
)<<32|GETU32(p
+4));
599 const u8
*p
= (const u8
*)(xi
+j
);
602 #else /* BIG_ENDIAN */
606 for (i
=0; i
<8*sizeof(long); ++i
, X
<<=1) {
607 u64 M
= (u64
)(X
>>(8*sizeof(long)-1));
615 #if BYTE_ORDER == LITTLE_ENDIAN
617 Xi
[0] = BSWAP8(Z
.hi
);
618 Xi
[1] = BSWAP8(Z
.lo
);
622 v
= (u32
)(Z
.hi
>>32); PUTU32(p
,v
);
623 v
= (u32
)(Z
.hi
); PUTU32(p
+4,v
);
624 v
= (u32
)(Z
.lo
>>32); PUTU32(p
+8,v
);
625 v
= (u32
)(Z
.lo
); PUTU32(p
+12,v
);
627 #else /* BIG_ENDIAN */
632 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
636 #if defined(GHASH_ASM) && \
637 (defined(__i386) || defined(__i386__) || \
638 defined(__x86_64) || defined(__x86_64__) || \
639 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
640 #include "x86_arch.h"
643 #if TABLE_BITS==4 && defined(GHASH_ASM)
644 # if (defined(__i386) || defined(__i386__) || \
645 defined(__x86_64) || defined(__x86_64__) || \
646 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
647 # define GHASH_ASM_X86_OR_64
648 # define GCM_FUNCREF_4BIT
650 void gcm_init_clmul(u128 Htable
[16],const u64 Xi
[2]);
651 void gcm_gmult_clmul(u64 Xi
[2],const u128 Htable
[16]);
652 void gcm_ghash_clmul(u64 Xi
[2],const u128 Htable
[16],const u8
*inp
,size_t len
);
654 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
655 # define GHASH_ASM_X86
656 void gcm_gmult_4bit_mmx(u64 Xi
[2],const u128 Htable
[16]);
657 void gcm_ghash_4bit_mmx(u64 Xi
[2],const u128 Htable
[16],const u8
*inp
,size_t len
);
659 void gcm_gmult_4bit_x86(u64 Xi
[2],const u128 Htable
[16]);
660 void gcm_ghash_4bit_x86(u64 Xi
[2],const u128 Htable
[16],const u8
*inp
,size_t len
);
662 # elif defined(__arm__) || defined(__arm)
663 # include "arm_arch.h"
665 # define GHASH_ASM_ARM
666 # define GCM_FUNCREF_4BIT
667 void gcm_gmult_neon(u64 Xi
[2],const u128 Htable
[16]);
668 void gcm_ghash_neon(u64 Xi
[2],const u128 Htable
[16],const u8
*inp
,size_t len
);
673 #ifdef GCM_FUNCREF_4BIT
675 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
678 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
682 void CRYPTO_gcm128_init(GCM128_CONTEXT
*ctx
,void *key
,block128_f block
)
684 memset(ctx
,0,sizeof(*ctx
));
688 (*block
)(ctx
->H
.c
,ctx
->H
.c
,key
);
690 #if BYTE_ORDER == LITTLE_ENDIAN
691 /* H is stored in host byte order */
693 ctx
->H
.u
[0] = BSWAP8(ctx
->H
.u
[0]);
694 ctx
->H
.u
[1] = BSWAP8(ctx
->H
.u
[1]);
698 hi
= (u64
)GETU32(p
) <<32|GETU32(p
+4);
699 lo
= (u64
)GETU32(p
+8)<<32|GETU32(p
+12);
706 gcm_init_8bit(ctx
->Htable
,ctx
->H
.u
);
708 # if defined(GHASH_ASM_X86_OR_64)
709 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
710 /* check FXSR and PCLMULQDQ bits */
711 if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR
| CPUCAP_MASK_PCLMUL
)) ==
712 (CPUCAP_MASK_FXSR
| CPUCAP_MASK_PCLMUL
)) {
713 gcm_init_clmul(ctx
->Htable
,ctx
->H
.u
);
714 ctx
->gmult
= gcm_gmult_clmul
;
715 ctx
->ghash
= gcm_ghash_clmul
;
719 gcm_init_4bit(ctx
->Htable
,ctx
->H
.u
);
720 # if defined(GHASH_ASM_X86) /* x86 only */
721 # if defined(OPENSSL_IA32_SSE2)
722 if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE
) { /* check SSE bit */
724 if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX
) { /* check MMX bit */
726 ctx
->gmult
= gcm_gmult_4bit_mmx
;
727 ctx
->ghash
= gcm_ghash_4bit_mmx
;
729 ctx
->gmult
= gcm_gmult_4bit_x86
;
730 ctx
->ghash
= gcm_ghash_4bit_x86
;
733 ctx
->gmult
= gcm_gmult_4bit
;
734 ctx
->ghash
= gcm_ghash_4bit
;
736 # elif defined(GHASH_ASM_ARM)
737 if (OPENSSL_armcap_P
& ARMV7_NEON
) {
738 ctx
->gmult
= gcm_gmult_neon
;
739 ctx
->ghash
= gcm_ghash_neon
;
741 gcm_init_4bit(ctx
->Htable
,ctx
->H
.u
);
742 ctx
->gmult
= gcm_gmult_4bit
;
743 ctx
->ghash
= gcm_ghash_4bit
;
746 gcm_init_4bit(ctx
->Htable
,ctx
->H
.u
);
751 void CRYPTO_gcm128_setiv(GCM128_CONTEXT
*ctx
,const unsigned char *iv
,size_t len
)
754 #ifdef GCM_FUNCREF_4BIT
755 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
762 ctx
->len
.u
[0] = 0; /* AAD length */
763 ctx
->len
.u
[1] = 0; /* message length */
768 memcpy(ctx
->Yi
.c
,iv
,12);
777 for (i
=0; i
<16; ++i
) ctx
->Yi
.c
[i
] ^= iv
[i
];
783 for (i
=0; i
<len
; ++i
) ctx
->Yi
.c
[i
] ^= iv
[i
];
787 #if BYTE_ORDER == LITTLE_ENDIAN
789 ctx
->Yi
.u
[1] ^= BSWAP8(len0
);
791 ctx
->Yi
.c
[8] ^= (u8
)(len0
>>56);
792 ctx
->Yi
.c
[9] ^= (u8
)(len0
>>48);
793 ctx
->Yi
.c
[10] ^= (u8
)(len0
>>40);
794 ctx
->Yi
.c
[11] ^= (u8
)(len0
>>32);
795 ctx
->Yi
.c
[12] ^= (u8
)(len0
>>24);
796 ctx
->Yi
.c
[13] ^= (u8
)(len0
>>16);
797 ctx
->Yi
.c
[14] ^= (u8
)(len0
>>8);
798 ctx
->Yi
.c
[15] ^= (u8
)(len0
);
800 #else /* BIG_ENDIAN */
801 ctx
->Yi
.u
[1] ^= len0
;
806 #if BYTE_ORDER == LITTLE_ENDIAN
808 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
810 ctr
= GETU32(ctx
->Yi
.c
+12);
812 #else /* BIG_ENDIAN */
817 (*ctx
->block
)(ctx
->Yi
.c
,ctx
->EK0
.c
,ctx
->key
);
819 #if BYTE_ORDER == LITTLE_ENDIAN
821 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
823 PUTU32(ctx
->Yi
.c
+12,ctr
);
825 #else /* BIG_ENDIAN */
830 int CRYPTO_gcm128_aad(GCM128_CONTEXT
*ctx
,const unsigned char *aad
,size_t len
)
834 u64 alen
= ctx
->len
.u
[0];
835 #ifdef GCM_FUNCREF_4BIT
836 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
838 void (*gcm_ghash_p
)(u64 Xi
[2],const u128 Htable
[16],
839 const u8
*inp
,size_t len
) = ctx
->ghash
;
843 if (ctx
->len
.u
[1]) return -2;
846 if (alen
>(U64(1)<<61) || (sizeof(len
)==8 && alen
<len
))
848 ctx
->len
.u
[0] = alen
;
853 ctx
->Xi
.c
[n
] ^= *(aad
++);
857 if (n
==0) GCM_MUL(ctx
,Xi
);
865 if ((i
= (len
&(size_t)-16))) {
872 for (i
=0; i
<16; ++i
) ctx
->Xi
.c
[i
] ^= aad
[i
];
879 n
= (unsigned int)len
;
880 for (i
=0; i
<len
; ++i
) ctx
->Xi
.c
[i
] ^= aad
[i
];
887 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT
*ctx
,
888 const unsigned char *in
, unsigned char *out
,
893 u64 mlen
= ctx
->len
.u
[1];
894 block128_f block
= ctx
->block
;
895 void *key
= ctx
->key
;
896 #ifdef GCM_FUNCREF_4BIT
897 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
899 void (*gcm_ghash_p
)(u64 Xi
[2],const u128 Htable
[16],
900 const u8
*inp
,size_t len
) = ctx
->ghash
;
905 if (mlen
>((U64(1)<<36)-32) || (sizeof(len
)==8 && mlen
<len
))
907 ctx
->len
.u
[1] = mlen
;
910 /* First call to encrypt finalizes GHASH(AAD) */
915 #if BYTE_ORDER == LITTLE_ENDIAN
917 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
919 ctr
= GETU32(ctx
->Yi
.c
+12);
921 #else /* BIG_ENDIAN */
926 #if !defined(OPENSSL_SMALL_FOOTPRINT)
927 if (16%sizeof(size_t) == 0) do { /* always true actually */
930 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++)^ctx
->EKi
.c
[n
];
934 if (n
==0) GCM_MUL(ctx
,Xi
);
940 #ifdef __STRICT_ALIGNMENT
941 if (((size_t)in
|(size_t)out
)%sizeof(size_t) != 0)
944 #if defined(GHASH) && defined(GHASH_CHUNK)
945 while (len
>=GHASH_CHUNK
) {
946 size_t j
=GHASH_CHUNK
;
949 size_t *out_t
=(size_t *)out
;
950 const size_t *in_t
=(const size_t *)in
;
952 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
954 #if BYTE_ORDER == LITTLE_ENDIAN
956 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
958 PUTU32(ctx
->Yi
.c
+12,ctr
);
960 #else /* BIG_ENDIAN */
963 for (i
=0; i
<16/sizeof(size_t); ++i
)
964 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
969 GHASH(ctx
,out
-GHASH_CHUNK
,GHASH_CHUNK
);
972 if ((i
= (len
&(size_t)-16))) {
976 size_t *out_t
=(size_t *)out
;
977 const size_t *in_t
=(const size_t *)in
;
979 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
981 #if BYTE_ORDER == LITTLE_ENDIAN
983 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
985 PUTU32(ctx
->Yi
.c
+12,ctr
);
987 #else /* BIG_ENDIAN */
990 for (i
=0; i
<16/sizeof(size_t); ++i
)
991 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1000 size_t *out_t
=(size_t *)out
;
1001 const size_t *in_t
=(const size_t *)in
;
1003 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1005 #if BYTE_ORDER == LITTLE_ENDIAN
1007 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1009 PUTU32(ctx
->Yi
.c
+12,ctr
);
1011 #else /* BIG_ENDIAN */
1014 for (i
=0; i
<16/sizeof(size_t); ++i
)
1016 out_t
[i
] = in_t
[i
]^ctx
->EKi
.t
[i
];
1024 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1026 #if BYTE_ORDER == LITTLE_ENDIAN
1028 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1030 PUTU32(ctx
->Yi
.c
+12,ctr
);
1032 #else /* BIG_ENDIAN */
1036 ctx
->Xi
.c
[n
] ^= out
[n
] = in
[n
]^ctx
->EKi
.c
[n
];
1045 for (i
=0;i
<len
;++i
) {
1047 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1049 #if BYTE_ORDER == LITTLE_ENDIAN
1051 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1053 PUTU32(ctx
->Yi
.c
+12,ctr
);
1055 #else /* BIG_ENDIAN */
1059 ctx
->Xi
.c
[n
] ^= out
[i
] = in
[i
]^ctx
->EKi
.c
[n
];
1069 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT
*ctx
,
1070 const unsigned char *in
, unsigned char *out
,
1073 unsigned int n
, ctr
;
1075 u64 mlen
= ctx
->len
.u
[1];
1076 block128_f block
= ctx
->block
;
1077 void *key
= ctx
->key
;
1078 #ifdef GCM_FUNCREF_4BIT
1079 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
1081 void (*gcm_ghash_p
)(u64 Xi
[2],const u128 Htable
[16],
1082 const u8
*inp
,size_t len
) = ctx
->ghash
;
1087 if (mlen
>((U64(1)<<36)-32) || (sizeof(len
)==8 && mlen
<len
))
1089 ctx
->len
.u
[1] = mlen
;
1092 /* First call to decrypt finalizes GHASH(AAD) */
1097 #if BYTE_ORDER == LITTLE_ENDIAN
1099 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1101 ctr
= GETU32(ctx
->Yi
.c
+12);
1103 #else /* BIG_ENDIAN */
1108 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1109 if (16%sizeof(size_t) == 0) do { /* always true actually */
1113 *(out
++) = c
^ctx
->EKi
.c
[n
];
1118 if (n
==0) GCM_MUL (ctx
,Xi
);
1124 #ifdef __STRICT_ALIGNMENT
1125 if (((size_t)in
|(size_t)out
)%sizeof(size_t) != 0)
1128 #if defined(GHASH) && defined(GHASH_CHUNK)
1129 while (len
>=GHASH_CHUNK
) {
1130 size_t j
=GHASH_CHUNK
;
1132 GHASH(ctx
,in
,GHASH_CHUNK
);
1134 size_t *out_t
=(size_t *)out
;
1135 const size_t *in_t
=(const size_t *)in
;
1137 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1139 #if BYTE_ORDER == LITTLE_ENDIAN
1141 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1143 PUTU32(ctx
->Yi
.c
+12,ctr
);
1145 #else /* BIG_ENDIAN */
1148 for (i
=0; i
<16/sizeof(size_t); ++i
)
1149 out_t
[i
] = in_t
[i
]^ctx
->EKi
.t
[i
];
1156 if ((i
= (len
&(size_t)-16))) {
1159 size_t *out_t
=(size_t *)out
;
1160 const size_t *in_t
=(const size_t *)in
;
1162 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1164 #if BYTE_ORDER == LITTLE_ENDIAN
1166 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1168 PUTU32(ctx
->Yi
.c
+12,ctr
);
1170 #else /* BIG_ENDIAN */
1173 for (i
=0; i
<16/sizeof(size_t); ++i
)
1174 out_t
[i
] = in_t
[i
]^ctx
->EKi
.t
[i
];
1182 size_t *out_t
=(size_t *)out
;
1183 const size_t *in_t
=(const size_t *)in
;
1185 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1187 #if BYTE_ORDER == LITTLE_ENDIAN
1189 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1191 PUTU32(ctx
->Yi
.c
+12,ctr
);
1193 #else /* BIG_ENDIAN */
1196 for (i
=0; i
<16/sizeof(size_t); ++i
) {
1198 out
[i
] = c
^ctx
->EKi
.t
[i
];
1208 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1210 #if BYTE_ORDER == LITTLE_ENDIAN
1212 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1214 PUTU32(ctx
->Yi
.c
+12,ctr
);
1216 #else /* BIG_ENDIAN */
1222 out
[n
] = c
^ctx
->EKi
.c
[n
];
1231 for (i
=0;i
<len
;++i
) {
1234 (*block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1236 #if BYTE_ORDER == LITTLE_ENDIAN
1238 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1240 PUTU32(ctx
->Yi
.c
+12,ctr
);
1242 #else /* BIG_ENDIAN */
1247 out
[i
] = c
^ctx
->EKi
.c
[n
];
1258 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT
*ctx
,
1259 const unsigned char *in
, unsigned char *out
,
1260 size_t len
, ctr128_f stream
)
1262 unsigned int n
, ctr
;
1264 u64 mlen
= ctx
->len
.u
[1];
1265 void *key
= ctx
->key
;
1266 #ifdef GCM_FUNCREF_4BIT
1267 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
1269 void (*gcm_ghash_p
)(u64 Xi
[2],const u128 Htable
[16],
1270 const u8
*inp
,size_t len
) = ctx
->ghash
;
1275 if (mlen
>((U64(1)<<36)-32) || (sizeof(len
)==8 && mlen
<len
))
1277 ctx
->len
.u
[1] = mlen
;
1280 /* First call to encrypt finalizes GHASH(AAD) */
1285 #if BYTE_ORDER == LITTLE_ENDIAN
1287 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1289 ctr
= GETU32(ctx
->Yi
.c
+12);
1291 #else /* BIG_ENDIAN */
1298 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++)^ctx
->EKi
.c
[n
];
1302 if (n
==0) GCM_MUL(ctx
,Xi
);
1308 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1309 while (len
>=GHASH_CHUNK
) {
1310 (*stream
)(in
,out
,GHASH_CHUNK
/16,key
,ctx
->Yi
.c
);
1311 ctr
+= GHASH_CHUNK
/16;
1312 #if BYTE_ORDER == LITTLE_ENDIAN
1314 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1316 PUTU32(ctx
->Yi
.c
+12,ctr
);
1318 #else /* BIG_ENDIAN */
1321 GHASH(ctx
,out
,GHASH_CHUNK
);
1327 if ((i
= (len
&(size_t)-16))) {
1330 (*stream
)(in
,out
,j
,key
,ctx
->Yi
.c
);
1331 ctr
+= (unsigned int)j
;
1332 #if BYTE_ORDER == LITTLE_ENDIAN
1334 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1336 PUTU32(ctx
->Yi
.c
+12,ctr
);
1338 #else /* BIG_ENDIAN */
1348 for (i
=0;i
<16;++i
) ctx
->Xi
.c
[i
] ^= out
[i
];
1355 (*ctx
->block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1357 #if BYTE_ORDER == LITTLE_ENDIAN
1359 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1361 PUTU32(ctx
->Yi
.c
+12,ctr
);
1363 #else /* BIG_ENDIAN */
1367 ctx
->Xi
.c
[n
] ^= out
[n
] = in
[n
]^ctx
->EKi
.c
[n
];
1376 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT
*ctx
,
1377 const unsigned char *in
, unsigned char *out
,
1378 size_t len
,ctr128_f stream
)
1380 unsigned int n
, ctr
;
1382 u64 mlen
= ctx
->len
.u
[1];
1383 void *key
= ctx
->key
;
1384 #ifdef GCM_FUNCREF_4BIT
1385 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
1387 void (*gcm_ghash_p
)(u64 Xi
[2],const u128 Htable
[16],
1388 const u8
*inp
,size_t len
) = ctx
->ghash
;
1393 if (mlen
>((U64(1)<<36)-32) || (sizeof(len
)==8 && mlen
<len
))
1395 ctx
->len
.u
[1] = mlen
;
1398 /* First call to decrypt finalizes GHASH(AAD) */
1403 #if BYTE_ORDER == LITTLE_ENDIAN
1405 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1407 ctr
= GETU32(ctx
->Yi
.c
+12);
1409 #else /* BIG_ENDIAN */
1417 *(out
++) = c
^ctx
->EKi
.c
[n
];
1422 if (n
==0) GCM_MUL (ctx
,Xi
);
1428 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1429 while (len
>=GHASH_CHUNK
) {
1430 GHASH(ctx
,in
,GHASH_CHUNK
);
1431 (*stream
)(in
,out
,GHASH_CHUNK
/16,key
,ctx
->Yi
.c
);
1432 ctr
+= GHASH_CHUNK
/16;
1433 #if BYTE_ORDER == LITTLE_ENDIAN
1435 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1437 PUTU32(ctx
->Yi
.c
+12,ctr
);
1439 #else /* BIG_ENDIAN */
1447 if ((i
= (len
&(size_t)-16))) {
1455 for (k
=0;k
<16;++k
) ctx
->Xi
.c
[k
] ^= in
[k
];
1462 (*stream
)(in
,out
,j
,key
,ctx
->Yi
.c
);
1463 ctr
+= (unsigned int)j
;
1464 #if BYTE_ORDER == LITTLE_ENDIAN
1466 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1468 PUTU32(ctx
->Yi
.c
+12,ctr
);
1470 #else /* BIG_ENDIAN */
1478 (*ctx
->block
)(ctx
->Yi
.c
,ctx
->EKi
.c
,key
);
1480 #if BYTE_ORDER == LITTLE_ENDIAN
1482 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1484 PUTU32(ctx
->Yi
.c
+12,ctr
);
1486 #else /* BIG_ENDIAN */
1492 out
[n
] = c
^ctx
->EKi
.c
[n
];
1501 int CRYPTO_gcm128_finish(GCM128_CONTEXT
*ctx
,const unsigned char *tag
,
1504 u64 alen
= ctx
->len
.u
[0]<<3;
1505 u64 clen
= ctx
->len
.u
[1]<<3;
1506 #ifdef GCM_FUNCREF_4BIT
1507 void (*gcm_gmult_p
)(u64 Xi
[2],const u128 Htable
[16]) = ctx
->gmult
;
1510 if (ctx
->mres
|| ctx
->ares
)
1513 #if BYTE_ORDER == LITTLE_ENDIAN
1515 alen
= BSWAP8(alen
);
1516 clen
= BSWAP8(clen
);
1520 ctx
->len
.u
[0] = alen
;
1521 ctx
->len
.u
[1] = clen
;
1523 alen
= (u64
)GETU32(p
) <<32|GETU32(p
+4);
1524 clen
= (u64
)GETU32(p
+8)<<32|GETU32(p
+12);
1528 ctx
->Xi
.u
[0] ^= alen
;
1529 ctx
->Xi
.u
[1] ^= clen
;
1532 ctx
->Xi
.u
[0] ^= ctx
->EK0
.u
[0];
1533 ctx
->Xi
.u
[1] ^= ctx
->EK0
.u
[1];
1535 if (tag
&& len
<=sizeof(ctx
->Xi
))
1536 return memcmp(ctx
->Xi
.c
,tag
,len
);
1541 void CRYPTO_gcm128_tag(GCM128_CONTEXT
*ctx
, unsigned char *tag
, size_t len
)
1543 CRYPTO_gcm128_finish(ctx
, NULL
, 0);
1544 memcpy(tag
, ctx
->Xi
.c
, len
<=sizeof(ctx
->Xi
.c
)?len
:sizeof(ctx
->Xi
.c
));
1547 GCM128_CONTEXT
*CRYPTO_gcm128_new(void *key
, block128_f block
)
1549 GCM128_CONTEXT
*ret
;
1551 if ((ret
= malloc(sizeof(GCM128_CONTEXT
))))
1552 CRYPTO_gcm128_init(ret
,key
,block
);
1557 void CRYPTO_gcm128_release(GCM128_CONTEXT
*ctx
)
1559 freezero(ctx
, sizeof(*ctx
));