Merge commit 'dfc115332c94a2f62058ac7f2bce7631fbd20b3d'
[unleashed/tickless.git] / lib / libcrypto / chacha / chacha-merged.c
blob08511ed27366da2ba2c7782e06e78161747dcae2
1 /* $OpenBSD: chacha-merged.c,v 1.8 2017/08/13 16:55:31 jsing Exp $ */
2 /*
3 chacha-merged.c version 20080118
4 D. J. Bernstein
5 Public domain.
6 */
8 #include <sys/types.h>
10 #include <stdint.h>
12 #define CHACHA_MINKEYLEN 16
13 #define CHACHA_NONCELEN 8
14 #define CHACHA_CTRLEN 8
15 #define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN)
16 #define CHACHA_BLOCKLEN 64
18 struct chacha_ctx {
19 u_int input[16];
20 uint8_t ks[CHACHA_BLOCKLEN];
21 uint8_t unused;
24 static inline void chacha_keysetup(struct chacha_ctx *x, const u_char *k,
25 u_int kbits)
26 __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN)));
27 static inline void chacha_ivsetup(struct chacha_ctx *x, const u_char *iv,
28 const u_char *ctr)
29 __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN)))
30 __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN)));
31 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u_char *m,
32 u_char *c, u_int bytes)
33 __attribute__((__bounded__(__buffer__, 2, 4)))
34 __attribute__((__bounded__(__buffer__, 3, 4)));
36 typedef unsigned char u8;
37 typedef unsigned int u32;
39 typedef struct chacha_ctx chacha_ctx;
41 #define U8C(v) (v##U)
42 #define U32C(v) (v##U)
44 #define U8V(v) ((u8)(v) & U8C(0xFF))
45 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
47 #define ROTL32(v, n) \
48 (U32V((v) << (n)) | ((v) >> (32 - (n))))
50 #define U8TO32_LITTLE(p) \
51 (((u32)((p)[0])) | \
52 ((u32)((p)[1]) << 8) | \
53 ((u32)((p)[2]) << 16) | \
54 ((u32)((p)[3]) << 24))
56 #define U32TO8_LITTLE(p, v) \
57 do { \
58 (p)[0] = U8V((v)); \
59 (p)[1] = U8V((v) >> 8); \
60 (p)[2] = U8V((v) >> 16); \
61 (p)[3] = U8V((v) >> 24); \
62 } while (0)
64 #define ROTATE(v,c) (ROTL32(v,c))
65 #define XOR(v,w) ((v) ^ (w))
66 #define PLUS(v,w) (U32V((v) + (w)))
67 #define PLUSONE(v) (PLUS((v),1))
69 #define QUARTERROUND(a,b,c,d) \
70 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
71 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
72 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
73 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
75 /* Initialise with "expand 32-byte k". */
76 static const char sigma[16] = {
77 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33,
78 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
81 /* Initialise with "expand 16-byte k". */
82 static const char tau[16] = {
83 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31,
84 0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b,
87 static inline void
88 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits)
90 const char *constants;
92 x->input[4] = U8TO32_LITTLE(k + 0);
93 x->input[5] = U8TO32_LITTLE(k + 4);
94 x->input[6] = U8TO32_LITTLE(k + 8);
95 x->input[7] = U8TO32_LITTLE(k + 12);
96 if (kbits == 256) { /* recommended */
97 k += 16;
98 constants = sigma;
99 } else { /* kbits == 128 */
100 constants = tau;
102 x->input[8] = U8TO32_LITTLE(k + 0);
103 x->input[9] = U8TO32_LITTLE(k + 4);
104 x->input[10] = U8TO32_LITTLE(k + 8);
105 x->input[11] = U8TO32_LITTLE(k + 12);
106 x->input[0] = U8TO32_LITTLE(constants + 0);
107 x->input[1] = U8TO32_LITTLE(constants + 4);
108 x->input[2] = U8TO32_LITTLE(constants + 8);
109 x->input[3] = U8TO32_LITTLE(constants + 12);
112 static inline void
113 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
115 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
116 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
117 x->input[14] = U8TO32_LITTLE(iv + 0);
118 x->input[15] = U8TO32_LITTLE(iv + 4);
121 static inline void
122 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes)
124 u32 x0, x1, x2, x3, x4, x5, x6, x7;
125 u32 x8, x9, x10, x11, x12, x13, x14, x15;
126 u32 j0, j1, j2, j3, j4, j5, j6, j7;
127 u32 j8, j9, j10, j11, j12, j13, j14, j15;
128 u8 *ctarget = NULL;
129 u8 tmp[64];
130 u_int i;
132 if (!bytes)
133 return;
135 j0 = x->input[0];
136 j1 = x->input[1];
137 j2 = x->input[2];
138 j3 = x->input[3];
139 j4 = x->input[4];
140 j5 = x->input[5];
141 j6 = x->input[6];
142 j7 = x->input[7];
143 j8 = x->input[8];
144 j9 = x->input[9];
145 j10 = x->input[10];
146 j11 = x->input[11];
147 j12 = x->input[12];
148 j13 = x->input[13];
149 j14 = x->input[14];
150 j15 = x->input[15];
152 for (;;) {
153 if (bytes < 64) {
154 for (i = 0; i < bytes; ++i)
155 tmp[i] = m[i];
156 m = tmp;
157 ctarget = c;
158 c = tmp;
160 x0 = j0;
161 x1 = j1;
162 x2 = j2;
163 x3 = j3;
164 x4 = j4;
165 x5 = j5;
166 x6 = j6;
167 x7 = j7;
168 x8 = j8;
169 x9 = j9;
170 x10 = j10;
171 x11 = j11;
172 x12 = j12;
173 x13 = j13;
174 x14 = j14;
175 x15 = j15;
176 for (i = 20; i > 0; i -= 2) {
177 QUARTERROUND(x0, x4, x8, x12)
178 QUARTERROUND(x1, x5, x9, x13)
179 QUARTERROUND(x2, x6, x10, x14)
180 QUARTERROUND(x3, x7, x11, x15)
181 QUARTERROUND(x0, x5, x10, x15)
182 QUARTERROUND(x1, x6, x11, x12)
183 QUARTERROUND(x2, x7, x8, x13)
184 QUARTERROUND(x3, x4, x9, x14)
186 x0 = PLUS(x0, j0);
187 x1 = PLUS(x1, j1);
188 x2 = PLUS(x2, j2);
189 x3 = PLUS(x3, j3);
190 x4 = PLUS(x4, j4);
191 x5 = PLUS(x5, j5);
192 x6 = PLUS(x6, j6);
193 x7 = PLUS(x7, j7);
194 x8 = PLUS(x8, j8);
195 x9 = PLUS(x9, j9);
196 x10 = PLUS(x10, j10);
197 x11 = PLUS(x11, j11);
198 x12 = PLUS(x12, j12);
199 x13 = PLUS(x13, j13);
200 x14 = PLUS(x14, j14);
201 x15 = PLUS(x15, j15);
203 if (bytes < 64) {
204 U32TO8_LITTLE(x->ks + 0, x0);
205 U32TO8_LITTLE(x->ks + 4, x1);
206 U32TO8_LITTLE(x->ks + 8, x2);
207 U32TO8_LITTLE(x->ks + 12, x3);
208 U32TO8_LITTLE(x->ks + 16, x4);
209 U32TO8_LITTLE(x->ks + 20, x5);
210 U32TO8_LITTLE(x->ks + 24, x6);
211 U32TO8_LITTLE(x->ks + 28, x7);
212 U32TO8_LITTLE(x->ks + 32, x8);
213 U32TO8_LITTLE(x->ks + 36, x9);
214 U32TO8_LITTLE(x->ks + 40, x10);
215 U32TO8_LITTLE(x->ks + 44, x11);
216 U32TO8_LITTLE(x->ks + 48, x12);
217 U32TO8_LITTLE(x->ks + 52, x13);
218 U32TO8_LITTLE(x->ks + 56, x14);
219 U32TO8_LITTLE(x->ks + 60, x15);
222 x0 = XOR(x0, U8TO32_LITTLE(m + 0));
223 x1 = XOR(x1, U8TO32_LITTLE(m + 4));
224 x2 = XOR(x2, U8TO32_LITTLE(m + 8));
225 x3 = XOR(x3, U8TO32_LITTLE(m + 12));
226 x4 = XOR(x4, U8TO32_LITTLE(m + 16));
227 x5 = XOR(x5, U8TO32_LITTLE(m + 20));
228 x6 = XOR(x6, U8TO32_LITTLE(m + 24));
229 x7 = XOR(x7, U8TO32_LITTLE(m + 28));
230 x8 = XOR(x8, U8TO32_LITTLE(m + 32));
231 x9 = XOR(x9, U8TO32_LITTLE(m + 36));
232 x10 = XOR(x10, U8TO32_LITTLE(m + 40));
233 x11 = XOR(x11, U8TO32_LITTLE(m + 44));
234 x12 = XOR(x12, U8TO32_LITTLE(m + 48));
235 x13 = XOR(x13, U8TO32_LITTLE(m + 52));
236 x14 = XOR(x14, U8TO32_LITTLE(m + 56));
237 x15 = XOR(x15, U8TO32_LITTLE(m + 60));
239 j12 = PLUSONE(j12);
240 if (!j12) {
241 j13 = PLUSONE(j13);
243 * Stopping at 2^70 bytes per nonce is the user's
244 * responsibility.
248 U32TO8_LITTLE(c + 0, x0);
249 U32TO8_LITTLE(c + 4, x1);
250 U32TO8_LITTLE(c + 8, x2);
251 U32TO8_LITTLE(c + 12, x3);
252 U32TO8_LITTLE(c + 16, x4);
253 U32TO8_LITTLE(c + 20, x5);
254 U32TO8_LITTLE(c + 24, x6);
255 U32TO8_LITTLE(c + 28, x7);
256 U32TO8_LITTLE(c + 32, x8);
257 U32TO8_LITTLE(c + 36, x9);
258 U32TO8_LITTLE(c + 40, x10);
259 U32TO8_LITTLE(c + 44, x11);
260 U32TO8_LITTLE(c + 48, x12);
261 U32TO8_LITTLE(c + 52, x13);
262 U32TO8_LITTLE(c + 56, x14);
263 U32TO8_LITTLE(c + 60, x15);
265 if (bytes <= 64) {
266 if (bytes < 64) {
267 for (i = 0; i < bytes; ++i)
268 ctarget[i] = c[i];
270 x->input[12] = j12;
271 x->input[13] = j13;
272 x->unused = 64 - bytes;
273 return;
275 bytes -= 64;
276 c += 64;
277 m += 64;