1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright (C) 2015-2016 The fiat-crypto Authors.
4 * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
6 * This is a machine-generated formally verified implementation of Curve25519
7 * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
8 * machine generated, it has been tweaked to be suitable for use in the kernel.
9 * It is optimized for 32-bit machines and machines that cannot work efficiently
10 * with 128-bit integer types.
13 #include <asm/unaligned.h>
14 #include <crypto/curve25519.h>
15 #include <linux/string.h>
17 /* fe means field element. Here the field is \Z/(2^255-19). An element t,
18 * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
19 * t[3]+2^102 t[4]+...+2^230 t[9].
20 * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
21 * Multiplication and carrying produce fe from fe_loose.
23 typedef struct fe
{ u32 v
[10]; } fe
;
25 /* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
26 * Addition and subtraction produce fe_loose from (fe, fe).
28 typedef struct fe_loose
{ u32 v
[10]; } fe_loose
;
30 static __always_inline
void fe_frombytes_impl(u32 h
[10], const u8
*s
)
32 /* Ignores top bit of s. */
33 u32 a0
= get_unaligned_le32(s
);
34 u32 a1
= get_unaligned_le32(s
+4);
35 u32 a2
= get_unaligned_le32(s
+8);
36 u32 a3
= get_unaligned_le32(s
+12);
37 u32 a4
= get_unaligned_le32(s
+16);
38 u32 a5
= get_unaligned_le32(s
+20);
39 u32 a6
= get_unaligned_le32(s
+24);
40 u32 a7
= get_unaligned_le32(s
+28);
41 h
[0] = a0
&((1<<26)-1); /* 26 used, 32-26 left. 26 */
42 h
[1] = (a0
>>26) | ((a1
&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
43 h
[2] = (a1
>>19) | ((a2
&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
44 h
[3] = (a2
>>13) | ((a3
&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
45 h
[4] = (a3
>> 6); /* (32- 6) = 26 */
46 h
[5] = a4
&((1<<25)-1); /* 25 */
47 h
[6] = (a4
>>25) | ((a5
&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
48 h
[7] = (a5
>>19) | ((a6
&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
49 h
[8] = (a6
>>12) | ((a7
&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
50 h
[9] = (a7
>> 6)&((1<<25)-1); /* 25 */
53 static __always_inline
void fe_frombytes(fe
*h
, const u8
*s
)
55 fe_frombytes_impl(h
->v
, s
);
58 static __always_inline u8
/*bool*/
59 addcarryx_u25(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
61 /* This function extracts 25 bits of result and 1 bit of carry
62 * (26 total), so a 32-bit intermediate is sufficient.
65 *low
= x
& ((1 << 25) - 1);
69 static __always_inline u8
/*bool*/
70 addcarryx_u26(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
72 /* This function extracts 26 bits of result and 1 bit of carry
73 * (27 total), so a 32-bit intermediate is sufficient.
76 *low
= x
& ((1 << 26) - 1);
80 static __always_inline u8
/*bool*/
81 subborrow_u25(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
83 /* This function extracts 25 bits of result and 1 bit of borrow
84 * (26 total), so a 32-bit intermediate is sufficient.
87 *low
= x
& ((1 << 25) - 1);
91 static __always_inline u8
/*bool*/
92 subborrow_u26(u8
/*bool*/ c
, u32 a
, u32 b
, u32
*low
)
94 /* This function extracts 26 bits of result and 1 bit of borrow
95 *(27 total), so a 32-bit intermediate is sufficient.
98 *low
= x
& ((1 << 26) - 1);
102 static __always_inline u32
cmovznz32(u32 t
, u32 z
, u32 nz
)
104 t
= -!!t
; /* all set if nonzero, 0 if 0 */
105 return (t
&nz
) | ((~t
)&z
);
108 static __always_inline
void fe_freeze(u32 out
[10], const u32 in1
[10])
110 { const u32 x17
= in1
[9];
111 { const u32 x18
= in1
[8];
112 { const u32 x16
= in1
[7];
113 { const u32 x14
= in1
[6];
114 { const u32 x12
= in1
[5];
115 { const u32 x10
= in1
[4];
116 { const u32 x8
= in1
[3];
117 { const u32 x6
= in1
[2];
118 { const u32 x4
= in1
[1];
119 { const u32 x2
= in1
[0];
120 { u32 x20
; u8
/*bool*/ x21
= subborrow_u26(0x0, x2
, 0x3ffffed, &x20
);
121 { u32 x23
; u8
/*bool*/ x24
= subborrow_u25(x21
, x4
, 0x1ffffff, &x23
);
122 { u32 x26
; u8
/*bool*/ x27
= subborrow_u26(x24
, x6
, 0x3ffffff, &x26
);
123 { u32 x29
; u8
/*bool*/ x30
= subborrow_u25(x27
, x8
, 0x1ffffff, &x29
);
124 { u32 x32
; u8
/*bool*/ x33
= subborrow_u26(x30
, x10
, 0x3ffffff, &x32
);
125 { u32 x35
; u8
/*bool*/ x36
= subborrow_u25(x33
, x12
, 0x1ffffff, &x35
);
126 { u32 x38
; u8
/*bool*/ x39
= subborrow_u26(x36
, x14
, 0x3ffffff, &x38
);
127 { u32 x41
; u8
/*bool*/ x42
= subborrow_u25(x39
, x16
, 0x1ffffff, &x41
);
128 { u32 x44
; u8
/*bool*/ x45
= subborrow_u26(x42
, x18
, 0x3ffffff, &x44
);
129 { u32 x47
; u8
/*bool*/ x48
= subborrow_u25(x45
, x17
, 0x1ffffff, &x47
);
130 { u32 x49
= cmovznz32(x48
, 0x0, 0xffffffff);
131 { u32 x50
= (x49
& 0x3ffffed);
132 { u32 x52
; u8
/*bool*/ x53
= addcarryx_u26(0x0, x20
, x50
, &x52
);
133 { u32 x54
= (x49
& 0x1ffffff);
134 { u32 x56
; u8
/*bool*/ x57
= addcarryx_u25(x53
, x23
, x54
, &x56
);
135 { u32 x58
= (x49
& 0x3ffffff);
136 { u32 x60
; u8
/*bool*/ x61
= addcarryx_u26(x57
, x26
, x58
, &x60
);
137 { u32 x62
= (x49
& 0x1ffffff);
138 { u32 x64
; u8
/*bool*/ x65
= addcarryx_u25(x61
, x29
, x62
, &x64
);
139 { u32 x66
= (x49
& 0x3ffffff);
140 { u32 x68
; u8
/*bool*/ x69
= addcarryx_u26(x65
, x32
, x66
, &x68
);
141 { u32 x70
= (x49
& 0x1ffffff);
142 { u32 x72
; u8
/*bool*/ x73
= addcarryx_u25(x69
, x35
, x70
, &x72
);
143 { u32 x74
= (x49
& 0x3ffffff);
144 { u32 x76
; u8
/*bool*/ x77
= addcarryx_u26(x73
, x38
, x74
, &x76
);
145 { u32 x78
= (x49
& 0x1ffffff);
146 { u32 x80
; u8
/*bool*/ x81
= addcarryx_u25(x77
, x41
, x78
, &x80
);
147 { u32 x82
= (x49
& 0x3ffffff);
148 { u32 x84
; u8
/*bool*/ x85
= addcarryx_u26(x81
, x44
, x82
, &x84
);
149 { u32 x86
= (x49
& 0x1ffffff);
150 { u32 x88
; addcarryx_u25(x85
, x47
, x86
, &x88
);
161 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
164 static __always_inline
void fe_tobytes(u8 s
[32], const fe
*f
)
171 s
[3] = (h
[0] >> 24) | (h
[1] << 2);
174 s
[6] = (h
[1] >> 22) | (h
[2] << 3);
177 s
[9] = (h
[2] >> 21) | (h
[3] << 5);
180 s
[12] = (h
[3] >> 19) | (h
[4] << 6);
187 s
[19] = (h
[5] >> 24) | (h
[6] << 1);
190 s
[22] = (h
[6] >> 23) | (h
[7] << 3);
193 s
[25] = (h
[7] >> 21) | (h
[8] << 4);
196 s
[28] = (h
[8] >> 20) | (h
[9] << 6);
203 static __always_inline
void fe_copy(fe
*h
, const fe
*f
)
205 memmove(h
, f
, sizeof(u32
) * 10);
208 static __always_inline
void fe_copy_lt(fe_loose
*h
, const fe
*f
)
210 memmove(h
, f
, sizeof(u32
) * 10);
214 static __always_inline
void fe_0(fe
*h
)
216 memset(h
, 0, sizeof(u32
) * 10);
220 static __always_inline
void fe_1(fe
*h
)
222 memset(h
, 0, sizeof(u32
) * 10);
226 static noinline
void fe_add_impl(u32 out
[10], const u32 in1
[10], const u32 in2
[10])
228 { const u32 x20
= in1
[9];
229 { const u32 x21
= in1
[8];
230 { const u32 x19
= in1
[7];
231 { const u32 x17
= in1
[6];
232 { const u32 x15
= in1
[5];
233 { const u32 x13
= in1
[4];
234 { const u32 x11
= in1
[3];
235 { const u32 x9
= in1
[2];
236 { const u32 x7
= in1
[1];
237 { const u32 x5
= in1
[0];
238 { const u32 x38
= in2
[9];
239 { const u32 x39
= in2
[8];
240 { const u32 x37
= in2
[7];
241 { const u32 x35
= in2
[6];
242 { const u32 x33
= in2
[5];
243 { const u32 x31
= in2
[4];
244 { const u32 x29
= in2
[3];
245 { const u32 x27
= in2
[2];
246 { const u32 x25
= in2
[1];
247 { const u32 x23
= in2
[0];
251 out
[3] = (x11
+ x29
);
252 out
[4] = (x13
+ x31
);
253 out
[5] = (x15
+ x33
);
254 out
[6] = (x17
+ x35
);
255 out
[7] = (x19
+ x37
);
256 out
[8] = (x21
+ x39
);
257 out
[9] = (x20
+ x38
);
262 * Can overlap h with f or g.
264 static __always_inline
void fe_add(fe_loose
*h
, const fe
*f
, const fe
*g
)
266 fe_add_impl(h
->v
, f
->v
, g
->v
);
269 static noinline
void fe_sub_impl(u32 out
[10], const u32 in1
[10], const u32 in2
[10])
271 { const u32 x20
= in1
[9];
272 { const u32 x21
= in1
[8];
273 { const u32 x19
= in1
[7];
274 { const u32 x17
= in1
[6];
275 { const u32 x15
= in1
[5];
276 { const u32 x13
= in1
[4];
277 { const u32 x11
= in1
[3];
278 { const u32 x9
= in1
[2];
279 { const u32 x7
= in1
[1];
280 { const u32 x5
= in1
[0];
281 { const u32 x38
= in2
[9];
282 { const u32 x39
= in2
[8];
283 { const u32 x37
= in2
[7];
284 { const u32 x35
= in2
[6];
285 { const u32 x33
= in2
[5];
286 { const u32 x31
= in2
[4];
287 { const u32 x29
= in2
[3];
288 { const u32 x27
= in2
[2];
289 { const u32 x25
= in2
[1];
290 { const u32 x23
= in2
[0];
291 out
[0] = ((0x7ffffda + x5
) - x23
);
292 out
[1] = ((0x3fffffe + x7
) - x25
);
293 out
[2] = ((0x7fffffe + x9
) - x27
);
294 out
[3] = ((0x3fffffe + x11
) - x29
);
295 out
[4] = ((0x7fffffe + x13
) - x31
);
296 out
[5] = ((0x3fffffe + x15
) - x33
);
297 out
[6] = ((0x7fffffe + x17
) - x35
);
298 out
[7] = ((0x3fffffe + x19
) - x37
);
299 out
[8] = ((0x7fffffe + x21
) - x39
);
300 out
[9] = ((0x3fffffe + x20
) - x38
);
305 * Can overlap h with f or g.
307 static __always_inline
void fe_sub(fe_loose
*h
, const fe
*f
, const fe
*g
)
309 fe_sub_impl(h
->v
, f
->v
, g
->v
);
312 static noinline
void fe_mul_impl(u32 out
[10], const u32 in1
[10], const u32 in2
[10])
314 { const u32 x20
= in1
[9];
315 { const u32 x21
= in1
[8];
316 { const u32 x19
= in1
[7];
317 { const u32 x17
= in1
[6];
318 { const u32 x15
= in1
[5];
319 { const u32 x13
= in1
[4];
320 { const u32 x11
= in1
[3];
321 { const u32 x9
= in1
[2];
322 { const u32 x7
= in1
[1];
323 { const u32 x5
= in1
[0];
324 { const u32 x38
= in2
[9];
325 { const u32 x39
= in2
[8];
326 { const u32 x37
= in2
[7];
327 { const u32 x35
= in2
[6];
328 { const u32 x33
= in2
[5];
329 { const u32 x31
= in2
[4];
330 { const u32 x29
= in2
[3];
331 { const u32 x27
= in2
[2];
332 { const u32 x25
= in2
[1];
333 { const u32 x23
= in2
[0];
334 { u64 x40
= ((u64
)x23
* x5
);
335 { u64 x41
= (((u64
)x23
* x7
) + ((u64
)x25
* x5
));
336 { u64 x42
= ((((u64
)(0x2 * x25
) * x7
) + ((u64
)x23
* x9
)) + ((u64
)x27
* x5
));
337 { u64 x43
= (((((u64
)x25
* x9
) + ((u64
)x27
* x7
)) + ((u64
)x23
* x11
)) + ((u64
)x29
* x5
));
338 { u64 x44
= (((((u64
)x27
* x9
) + (0x2 * (((u64
)x25
* x11
) + ((u64
)x29
* x7
)))) + ((u64
)x23
* x13
)) + ((u64
)x31
* x5
));
339 { u64 x45
= (((((((u64
)x27
* x11
) + ((u64
)x29
* x9
)) + ((u64
)x25
* x13
)) + ((u64
)x31
* x7
)) + ((u64
)x23
* x15
)) + ((u64
)x33
* x5
));
340 { u64 x46
= (((((0x2 * ((((u64
)x29
* x11
) + ((u64
)x25
* x15
)) + ((u64
)x33
* x7
))) + ((u64
)x27
* x13
)) + ((u64
)x31
* x9
)) + ((u64
)x23
* x17
)) + ((u64
)x35
* x5
));
341 { u64 x47
= (((((((((u64
)x29
* x13
) + ((u64
)x31
* x11
)) + ((u64
)x27
* x15
)) + ((u64
)x33
* x9
)) + ((u64
)x25
* x17
)) + ((u64
)x35
* x7
)) + ((u64
)x23
* x19
)) + ((u64
)x37
* x5
));
342 { u64 x48
= (((((((u64
)x31
* x13
) + (0x2 * (((((u64
)x29
* x15
) + ((u64
)x33
* x11
)) + ((u64
)x25
* x19
)) + ((u64
)x37
* x7
)))) + ((u64
)x27
* x17
)) + ((u64
)x35
* x9
)) + ((u64
)x23
* x21
)) + ((u64
)x39
* x5
));
343 { u64 x49
= (((((((((((u64
)x31
* x15
) + ((u64
)x33
* x13
)) + ((u64
)x29
* x17
)) + ((u64
)x35
* x11
)) + ((u64
)x27
* x19
)) + ((u64
)x37
* x9
)) + ((u64
)x25
* x21
)) + ((u64
)x39
* x7
)) + ((u64
)x23
* x20
)) + ((u64
)x38
* x5
));
344 { u64 x50
= (((((0x2 * ((((((u64
)x33
* x15
) + ((u64
)x29
* x19
)) + ((u64
)x37
* x11
)) + ((u64
)x25
* x20
)) + ((u64
)x38
* x7
))) + ((u64
)x31
* x17
)) + ((u64
)x35
* x13
)) + ((u64
)x27
* x21
)) + ((u64
)x39
* x9
));
345 { u64 x51
= (((((((((u64
)x33
* x17
) + ((u64
)x35
* x15
)) + ((u64
)x31
* x19
)) + ((u64
)x37
* x13
)) + ((u64
)x29
* x21
)) + ((u64
)x39
* x11
)) + ((u64
)x27
* x20
)) + ((u64
)x38
* x9
));
346 { u64 x52
= (((((u64
)x35
* x17
) + (0x2 * (((((u64
)x33
* x19
) + ((u64
)x37
* x15
)) + ((u64
)x29
* x20
)) + ((u64
)x38
* x11
)))) + ((u64
)x31
* x21
)) + ((u64
)x39
* x13
));
347 { u64 x53
= (((((((u64
)x35
* x19
) + ((u64
)x37
* x17
)) + ((u64
)x33
* x21
)) + ((u64
)x39
* x15
)) + ((u64
)x31
* x20
)) + ((u64
)x38
* x13
));
348 { u64 x54
= (((0x2 * ((((u64
)x37
* x19
) + ((u64
)x33
* x20
)) + ((u64
)x38
* x15
))) + ((u64
)x35
* x21
)) + ((u64
)x39
* x17
));
349 { u64 x55
= (((((u64
)x37
* x21
) + ((u64
)x39
* x19
)) + ((u64
)x35
* x20
)) + ((u64
)x38
* x17
));
350 { u64 x56
= (((u64
)x39
* x21
) + (0x2 * (((u64
)x37
* x20
) + ((u64
)x38
* x19
))));
351 { u64 x57
= (((u64
)x39
* x20
) + ((u64
)x38
* x21
));
352 { u64 x58
= ((u64
)(0x2 * x38
) * x20
);
353 { u64 x59
= (x48
+ (x58
<< 0x4));
354 { u64 x60
= (x59
+ (x58
<< 0x1));
355 { u64 x61
= (x60
+ x58
);
356 { u64 x62
= (x47
+ (x57
<< 0x4));
357 { u64 x63
= (x62
+ (x57
<< 0x1));
358 { u64 x64
= (x63
+ x57
);
359 { u64 x65
= (x46
+ (x56
<< 0x4));
360 { u64 x66
= (x65
+ (x56
<< 0x1));
361 { u64 x67
= (x66
+ x56
);
362 { u64 x68
= (x45
+ (x55
<< 0x4));
363 { u64 x69
= (x68
+ (x55
<< 0x1));
364 { u64 x70
= (x69
+ x55
);
365 { u64 x71
= (x44
+ (x54
<< 0x4));
366 { u64 x72
= (x71
+ (x54
<< 0x1));
367 { u64 x73
= (x72
+ x54
);
368 { u64 x74
= (x43
+ (x53
<< 0x4));
369 { u64 x75
= (x74
+ (x53
<< 0x1));
370 { u64 x76
= (x75
+ x53
);
371 { u64 x77
= (x42
+ (x52
<< 0x4));
372 { u64 x78
= (x77
+ (x52
<< 0x1));
373 { u64 x79
= (x78
+ x52
);
374 { u64 x80
= (x41
+ (x51
<< 0x4));
375 { u64 x81
= (x80
+ (x51
<< 0x1));
376 { u64 x82
= (x81
+ x51
);
377 { u64 x83
= (x40
+ (x50
<< 0x4));
378 { u64 x84
= (x83
+ (x50
<< 0x1));
379 { u64 x85
= (x84
+ x50
);
380 { u64 x86
= (x85
>> 0x1a);
381 { u32 x87
= ((u32
)x85
& 0x3ffffff);
382 { u64 x88
= (x86
+ x82
);
383 { u64 x89
= (x88
>> 0x19);
384 { u32 x90
= ((u32
)x88
& 0x1ffffff);
385 { u64 x91
= (x89
+ x79
);
386 { u64 x92
= (x91
>> 0x1a);
387 { u32 x93
= ((u32
)x91
& 0x3ffffff);
388 { u64 x94
= (x92
+ x76
);
389 { u64 x95
= (x94
>> 0x19);
390 { u32 x96
= ((u32
)x94
& 0x1ffffff);
391 { u64 x97
= (x95
+ x73
);
392 { u64 x98
= (x97
>> 0x1a);
393 { u32 x99
= ((u32
)x97
& 0x3ffffff);
394 { u64 x100
= (x98
+ x70
);
395 { u64 x101
= (x100
>> 0x19);
396 { u32 x102
= ((u32
)x100
& 0x1ffffff);
397 { u64 x103
= (x101
+ x67
);
398 { u64 x104
= (x103
>> 0x1a);
399 { u32 x105
= ((u32
)x103
& 0x3ffffff);
400 { u64 x106
= (x104
+ x64
);
401 { u64 x107
= (x106
>> 0x19);
402 { u32 x108
= ((u32
)x106
& 0x1ffffff);
403 { u64 x109
= (x107
+ x61
);
404 { u64 x110
= (x109
>> 0x1a);
405 { u32 x111
= ((u32
)x109
& 0x3ffffff);
406 { u64 x112
= (x110
+ x49
);
407 { u64 x113
= (x112
>> 0x19);
408 { u32 x114
= ((u32
)x112
& 0x1ffffff);
409 { u64 x115
= (x87
+ (0x13 * x113
));
410 { u32 x116
= (u32
) (x115
>> 0x1a);
411 { u32 x117
= ((u32
)x115
& 0x3ffffff);
412 { u32 x118
= (x116
+ x90
);
413 { u32 x119
= (x118
>> 0x19);
414 { u32 x120
= (x118
& 0x1ffffff);
417 out
[2] = (x119
+ x93
);
425 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
428 static __always_inline
void fe_mul_ttt(fe
*h
, const fe
*f
, const fe
*g
)
430 fe_mul_impl(h
->v
, f
->v
, g
->v
);
433 static __always_inline
void fe_mul_tlt(fe
*h
, const fe_loose
*f
, const fe
*g
)
435 fe_mul_impl(h
->v
, f
->v
, g
->v
);
438 static __always_inline
void
439 fe_mul_tll(fe
*h
, const fe_loose
*f
, const fe_loose
*g
)
441 fe_mul_impl(h
->v
, f
->v
, g
->v
);
444 static noinline
void fe_sqr_impl(u32 out
[10], const u32 in1
[10])
446 { const u32 x17
= in1
[9];
447 { const u32 x18
= in1
[8];
448 { const u32 x16
= in1
[7];
449 { const u32 x14
= in1
[6];
450 { const u32 x12
= in1
[5];
451 { const u32 x10
= in1
[4];
452 { const u32 x8
= in1
[3];
453 { const u32 x6
= in1
[2];
454 { const u32 x4
= in1
[1];
455 { const u32 x2
= in1
[0];
456 { u64 x19
= ((u64
)x2
* x2
);
457 { u64 x20
= ((u64
)(0x2 * x2
) * x4
);
458 { u64 x21
= (0x2 * (((u64
)x4
* x4
) + ((u64
)x2
* x6
)));
459 { u64 x22
= (0x2 * (((u64
)x4
* x6
) + ((u64
)x2
* x8
)));
460 { u64 x23
= ((((u64
)x6
* x6
) + ((u64
)(0x4 * x4
) * x8
)) + ((u64
)(0x2 * x2
) * x10
));
461 { u64 x24
= (0x2 * ((((u64
)x6
* x8
) + ((u64
)x4
* x10
)) + ((u64
)x2
* x12
)));
462 { u64 x25
= (0x2 * (((((u64
)x8
* x8
) + ((u64
)x6
* x10
)) + ((u64
)x2
* x14
)) + ((u64
)(0x2 * x4
) * x12
)));
463 { u64 x26
= (0x2 * (((((u64
)x8
* x10
) + ((u64
)x6
* x12
)) + ((u64
)x4
* x14
)) + ((u64
)x2
* x16
)));
464 { u64 x27
= (((u64
)x10
* x10
) + (0x2 * ((((u64
)x6
* x14
) + ((u64
)x2
* x18
)) + (0x2 * (((u64
)x4
* x16
) + ((u64
)x8
* x12
))))));
465 { u64 x28
= (0x2 * ((((((u64
)x10
* x12
) + ((u64
)x8
* x14
)) + ((u64
)x6
* x16
)) + ((u64
)x4
* x18
)) + ((u64
)x2
* x17
)));
466 { u64 x29
= (0x2 * (((((u64
)x12
* x12
) + ((u64
)x10
* x14
)) + ((u64
)x6
* x18
)) + (0x2 * (((u64
)x8
* x16
) + ((u64
)x4
* x17
)))));
467 { u64 x30
= (0x2 * (((((u64
)x12
* x14
) + ((u64
)x10
* x16
)) + ((u64
)x8
* x18
)) + ((u64
)x6
* x17
)));
468 { u64 x31
= (((u64
)x14
* x14
) + (0x2 * (((u64
)x10
* x18
) + (0x2 * (((u64
)x12
* x16
) + ((u64
)x8
* x17
))))));
469 { u64 x32
= (0x2 * ((((u64
)x14
* x16
) + ((u64
)x12
* x18
)) + ((u64
)x10
* x17
)));
470 { u64 x33
= (0x2 * ((((u64
)x16
* x16
) + ((u64
)x14
* x18
)) + ((u64
)(0x2 * x12
) * x17
)));
471 { u64 x34
= (0x2 * (((u64
)x16
* x18
) + ((u64
)x14
* x17
)));
472 { u64 x35
= (((u64
)x18
* x18
) + ((u64
)(0x4 * x16
) * x17
));
473 { u64 x36
= ((u64
)(0x2 * x18
) * x17
);
474 { u64 x37
= ((u64
)(0x2 * x17
) * x17
);
475 { u64 x38
= (x27
+ (x37
<< 0x4));
476 { u64 x39
= (x38
+ (x37
<< 0x1));
477 { u64 x40
= (x39
+ x37
);
478 { u64 x41
= (x26
+ (x36
<< 0x4));
479 { u64 x42
= (x41
+ (x36
<< 0x1));
480 { u64 x43
= (x42
+ x36
);
481 { u64 x44
= (x25
+ (x35
<< 0x4));
482 { u64 x45
= (x44
+ (x35
<< 0x1));
483 { u64 x46
= (x45
+ x35
);
484 { u64 x47
= (x24
+ (x34
<< 0x4));
485 { u64 x48
= (x47
+ (x34
<< 0x1));
486 { u64 x49
= (x48
+ x34
);
487 { u64 x50
= (x23
+ (x33
<< 0x4));
488 { u64 x51
= (x50
+ (x33
<< 0x1));
489 { u64 x52
= (x51
+ x33
);
490 { u64 x53
= (x22
+ (x32
<< 0x4));
491 { u64 x54
= (x53
+ (x32
<< 0x1));
492 { u64 x55
= (x54
+ x32
);
493 { u64 x56
= (x21
+ (x31
<< 0x4));
494 { u64 x57
= (x56
+ (x31
<< 0x1));
495 { u64 x58
= (x57
+ x31
);
496 { u64 x59
= (x20
+ (x30
<< 0x4));
497 { u64 x60
= (x59
+ (x30
<< 0x1));
498 { u64 x61
= (x60
+ x30
);
499 { u64 x62
= (x19
+ (x29
<< 0x4));
500 { u64 x63
= (x62
+ (x29
<< 0x1));
501 { u64 x64
= (x63
+ x29
);
502 { u64 x65
= (x64
>> 0x1a);
503 { u32 x66
= ((u32
)x64
& 0x3ffffff);
504 { u64 x67
= (x65
+ x61
);
505 { u64 x68
= (x67
>> 0x19);
506 { u32 x69
= ((u32
)x67
& 0x1ffffff);
507 { u64 x70
= (x68
+ x58
);
508 { u64 x71
= (x70
>> 0x1a);
509 { u32 x72
= ((u32
)x70
& 0x3ffffff);
510 { u64 x73
= (x71
+ x55
);
511 { u64 x74
= (x73
>> 0x19);
512 { u32 x75
= ((u32
)x73
& 0x1ffffff);
513 { u64 x76
= (x74
+ x52
);
514 { u64 x77
= (x76
>> 0x1a);
515 { u32 x78
= ((u32
)x76
& 0x3ffffff);
516 { u64 x79
= (x77
+ x49
);
517 { u64 x80
= (x79
>> 0x19);
518 { u32 x81
= ((u32
)x79
& 0x1ffffff);
519 { u64 x82
= (x80
+ x46
);
520 { u64 x83
= (x82
>> 0x1a);
521 { u32 x84
= ((u32
)x82
& 0x3ffffff);
522 { u64 x85
= (x83
+ x43
);
523 { u64 x86
= (x85
>> 0x19);
524 { u32 x87
= ((u32
)x85
& 0x1ffffff);
525 { u64 x88
= (x86
+ x40
);
526 { u64 x89
= (x88
>> 0x1a);
527 { u32 x90
= ((u32
)x88
& 0x3ffffff);
528 { u64 x91
= (x89
+ x28
);
529 { u64 x92
= (x91
>> 0x19);
530 { u32 x93
= ((u32
)x91
& 0x1ffffff);
531 { u64 x94
= (x66
+ (0x13 * x92
));
532 { u32 x95
= (u32
) (x94
>> 0x1a);
533 { u32 x96
= ((u32
)x94
& 0x3ffffff);
534 { u32 x97
= (x95
+ x69
);
535 { u32 x98
= (x97
>> 0x19);
536 { u32 x99
= (x97
& 0x1ffffff);
539 out
[2] = (x98
+ x72
);
547 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
550 static __always_inline
void fe_sq_tl(fe
*h
, const fe_loose
*f
)
552 fe_sqr_impl(h
->v
, f
->v
);
555 static __always_inline
void fe_sq_tt(fe
*h
, const fe
*f
)
557 fe_sqr_impl(h
->v
, f
->v
);
560 static __always_inline
void fe_loose_invert(fe
*out
, const fe_loose
*z
)
570 for (i
= 1; i
< 2; ++i
)
572 fe_mul_tlt(&t1
, z
, &t1
);
573 fe_mul_ttt(&t0
, &t0
, &t1
);
575 fe_mul_ttt(&t1
, &t1
, &t2
);
577 for (i
= 1; i
< 5; ++i
)
579 fe_mul_ttt(&t1
, &t2
, &t1
);
581 for (i
= 1; i
< 10; ++i
)
583 fe_mul_ttt(&t2
, &t2
, &t1
);
585 for (i
= 1; i
< 20; ++i
)
587 fe_mul_ttt(&t2
, &t3
, &t2
);
589 for (i
= 1; i
< 10; ++i
)
591 fe_mul_ttt(&t1
, &t2
, &t1
);
593 for (i
= 1; i
< 50; ++i
)
595 fe_mul_ttt(&t2
, &t2
, &t1
);
597 for (i
= 1; i
< 100; ++i
)
599 fe_mul_ttt(&t2
, &t3
, &t2
);
601 for (i
= 1; i
< 50; ++i
)
603 fe_mul_ttt(&t1
, &t2
, &t1
);
605 for (i
= 1; i
< 5; ++i
)
607 fe_mul_ttt(out
, &t1
, &t0
);
610 static __always_inline
void fe_invert(fe
*out
, const fe
*z
)
614 fe_loose_invert(out
, &l
);
617 /* Replace (f,g) with (g,f) if b == 1;
618 * replace (f,g) with (f,g) if b == 0.
620 * Preconditions: b in {0,1}
622 static noinline
void fe_cswap(fe
*f
, fe
*g
, unsigned int b
)
626 for (i
= 0; i
< 10; i
++) {
627 u32 x
= f
->v
[i
] ^ g
->v
[i
];
634 /* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
635 static __always_inline
void fe_mul_121666_impl(u32 out
[10], const u32 in1
[10])
637 { const u32 x20
= in1
[9];
638 { const u32 x21
= in1
[8];
639 { const u32 x19
= in1
[7];
640 { const u32 x17
= in1
[6];
641 { const u32 x15
= in1
[5];
642 { const u32 x13
= in1
[4];
643 { const u32 x11
= in1
[3];
644 { const u32 x9
= in1
[2];
645 { const u32 x7
= in1
[1];
646 { const u32 x5
= in1
[0];
656 { const u32 x23
= 121666;
657 { u64 x40
= ((u64
)x23
* x5
);
658 { u64 x41
= (((u64
)x23
* x7
) + ((u64
)x25
* x5
));
659 { u64 x42
= ((((u64
)(0x2 * x25
) * x7
) + ((u64
)x23
* x9
)) + ((u64
)x27
* x5
));
660 { u64 x43
= (((((u64
)x25
* x9
) + ((u64
)x27
* x7
)) + ((u64
)x23
* x11
)) + ((u64
)x29
* x5
));
661 { u64 x44
= (((((u64
)x27
* x9
) + (0x2 * (((u64
)x25
* x11
) + ((u64
)x29
* x7
)))) + ((u64
)x23
* x13
)) + ((u64
)x31
* x5
));
662 { u64 x45
= (((((((u64
)x27
* x11
) + ((u64
)x29
* x9
)) + ((u64
)x25
* x13
)) + ((u64
)x31
* x7
)) + ((u64
)x23
* x15
)) + ((u64
)x33
* x5
));
663 { u64 x46
= (((((0x2 * ((((u64
)x29
* x11
) + ((u64
)x25
* x15
)) + ((u64
)x33
* x7
))) + ((u64
)x27
* x13
)) + ((u64
)x31
* x9
)) + ((u64
)x23
* x17
)) + ((u64
)x35
* x5
));
664 { u64 x47
= (((((((((u64
)x29
* x13
) + ((u64
)x31
* x11
)) + ((u64
)x27
* x15
)) + ((u64
)x33
* x9
)) + ((u64
)x25
* x17
)) + ((u64
)x35
* x7
)) + ((u64
)x23
* x19
)) + ((u64
)x37
* x5
));
665 { u64 x48
= (((((((u64
)x31
* x13
) + (0x2 * (((((u64
)x29
* x15
) + ((u64
)x33
* x11
)) + ((u64
)x25
* x19
)) + ((u64
)x37
* x7
)))) + ((u64
)x27
* x17
)) + ((u64
)x35
* x9
)) + ((u64
)x23
* x21
)) + ((u64
)x39
* x5
));
666 { u64 x49
= (((((((((((u64
)x31
* x15
) + ((u64
)x33
* x13
)) + ((u64
)x29
* x17
)) + ((u64
)x35
* x11
)) + ((u64
)x27
* x19
)) + ((u64
)x37
* x9
)) + ((u64
)x25
* x21
)) + ((u64
)x39
* x7
)) + ((u64
)x23
* x20
)) + ((u64
)x38
* x5
));
667 { u64 x50
= (((((0x2 * ((((((u64
)x33
* x15
) + ((u64
)x29
* x19
)) + ((u64
)x37
* x11
)) + ((u64
)x25
* x20
)) + ((u64
)x38
* x7
))) + ((u64
)x31
* x17
)) + ((u64
)x35
* x13
)) + ((u64
)x27
* x21
)) + ((u64
)x39
* x9
));
668 { u64 x51
= (((((((((u64
)x33
* x17
) + ((u64
)x35
* x15
)) + ((u64
)x31
* x19
)) + ((u64
)x37
* x13
)) + ((u64
)x29
* x21
)) + ((u64
)x39
* x11
)) + ((u64
)x27
* x20
)) + ((u64
)x38
* x9
));
669 { u64 x52
= (((((u64
)x35
* x17
) + (0x2 * (((((u64
)x33
* x19
) + ((u64
)x37
* x15
)) + ((u64
)x29
* x20
)) + ((u64
)x38
* x11
)))) + ((u64
)x31
* x21
)) + ((u64
)x39
* x13
));
670 { u64 x53
= (((((((u64
)x35
* x19
) + ((u64
)x37
* x17
)) + ((u64
)x33
* x21
)) + ((u64
)x39
* x15
)) + ((u64
)x31
* x20
)) + ((u64
)x38
* x13
));
671 { u64 x54
= (((0x2 * ((((u64
)x37
* x19
) + ((u64
)x33
* x20
)) + ((u64
)x38
* x15
))) + ((u64
)x35
* x21
)) + ((u64
)x39
* x17
));
672 { u64 x55
= (((((u64
)x37
* x21
) + ((u64
)x39
* x19
)) + ((u64
)x35
* x20
)) + ((u64
)x38
* x17
));
673 { u64 x56
= (((u64
)x39
* x21
) + (0x2 * (((u64
)x37
* x20
) + ((u64
)x38
* x19
))));
674 { u64 x57
= (((u64
)x39
* x20
) + ((u64
)x38
* x21
));
675 { u64 x58
= ((u64
)(0x2 * x38
) * x20
);
676 { u64 x59
= (x48
+ (x58
<< 0x4));
677 { u64 x60
= (x59
+ (x58
<< 0x1));
678 { u64 x61
= (x60
+ x58
);
679 { u64 x62
= (x47
+ (x57
<< 0x4));
680 { u64 x63
= (x62
+ (x57
<< 0x1));
681 { u64 x64
= (x63
+ x57
);
682 { u64 x65
= (x46
+ (x56
<< 0x4));
683 { u64 x66
= (x65
+ (x56
<< 0x1));
684 { u64 x67
= (x66
+ x56
);
685 { u64 x68
= (x45
+ (x55
<< 0x4));
686 { u64 x69
= (x68
+ (x55
<< 0x1));
687 { u64 x70
= (x69
+ x55
);
688 { u64 x71
= (x44
+ (x54
<< 0x4));
689 { u64 x72
= (x71
+ (x54
<< 0x1));
690 { u64 x73
= (x72
+ x54
);
691 { u64 x74
= (x43
+ (x53
<< 0x4));
692 { u64 x75
= (x74
+ (x53
<< 0x1));
693 { u64 x76
= (x75
+ x53
);
694 { u64 x77
= (x42
+ (x52
<< 0x4));
695 { u64 x78
= (x77
+ (x52
<< 0x1));
696 { u64 x79
= (x78
+ x52
);
697 { u64 x80
= (x41
+ (x51
<< 0x4));
698 { u64 x81
= (x80
+ (x51
<< 0x1));
699 { u64 x82
= (x81
+ x51
);
700 { u64 x83
= (x40
+ (x50
<< 0x4));
701 { u64 x84
= (x83
+ (x50
<< 0x1));
702 { u64 x85
= (x84
+ x50
);
703 { u64 x86
= (x85
>> 0x1a);
704 { u32 x87
= ((u32
)x85
& 0x3ffffff);
705 { u64 x88
= (x86
+ x82
);
706 { u64 x89
= (x88
>> 0x19);
707 { u32 x90
= ((u32
)x88
& 0x1ffffff);
708 { u64 x91
= (x89
+ x79
);
709 { u64 x92
= (x91
>> 0x1a);
710 { u32 x93
= ((u32
)x91
& 0x3ffffff);
711 { u64 x94
= (x92
+ x76
);
712 { u64 x95
= (x94
>> 0x19);
713 { u32 x96
= ((u32
)x94
& 0x1ffffff);
714 { u64 x97
= (x95
+ x73
);
715 { u64 x98
= (x97
>> 0x1a);
716 { u32 x99
= ((u32
)x97
& 0x3ffffff);
717 { u64 x100
= (x98
+ x70
);
718 { u64 x101
= (x100
>> 0x19);
719 { u32 x102
= ((u32
)x100
& 0x1ffffff);
720 { u64 x103
= (x101
+ x67
);
721 { u64 x104
= (x103
>> 0x1a);
722 { u32 x105
= ((u32
)x103
& 0x3ffffff);
723 { u64 x106
= (x104
+ x64
);
724 { u64 x107
= (x106
>> 0x19);
725 { u32 x108
= ((u32
)x106
& 0x1ffffff);
726 { u64 x109
= (x107
+ x61
);
727 { u64 x110
= (x109
>> 0x1a);
728 { u32 x111
= ((u32
)x109
& 0x3ffffff);
729 { u64 x112
= (x110
+ x49
);
730 { u64 x113
= (x112
>> 0x19);
731 { u32 x114
= ((u32
)x112
& 0x1ffffff);
732 { u64 x115
= (x87
+ (0x13 * x113
));
733 { u32 x116
= (u32
) (x115
>> 0x1a);
734 { u32 x117
= ((u32
)x115
& 0x3ffffff);
735 { u32 x118
= (x116
+ x90
);
736 { u32 x119
= (x118
>> 0x19);
737 { u32 x120
= (x118
& 0x1ffffff);
740 out
[2] = (x119
+ x93
);
748 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
751 static __always_inline
void fe_mul121666(fe
*h
, const fe_loose
*f
)
753 fe_mul_121666_impl(h
->v
, f
->v
);
756 void curve25519_generic(u8 out
[CURVE25519_KEY_SIZE
],
757 const u8 scalar
[CURVE25519_KEY_SIZE
],
758 const u8 point
[CURVE25519_KEY_SIZE
])
760 fe x1
, x2
, z2
, x3
, z3
;
761 fe_loose x2l
, z2l
, x3l
;
766 memcpy(e
, scalar
, 32);
767 curve25519_clamp_secret(e
);
769 /* The following implementation was transcribed to Coq and proven to
770 * correspond to unary scalar multiplication in affine coordinates given
771 * that x1 != 0 is the x coordinate of some point on the curve. It was
772 * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
773 * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
774 * quantified over the underlying field, so it applies to Curve25519
775 * itself and the quadratic twist of Curve25519. It was not proven in
776 * Coq that prime-field arithmetic correctly simulates extension-field
777 * arithmetic on prime-field values. The decoding of the byte array
778 * representation of e was not considered.
780 * Specification of Montgomery curves in affine coordinates:
781 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
783 * Proof that these form a group that is isomorphic to a Weierstrass
785 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
787 * Coq transcription and correctness proof of the loop
788 * (where scalarbits=255):
789 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
790 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
791 * preconditions: 0 <= e < 2^255 (not necessarily e < order),
794 fe_frombytes(&x1
, point
);
800 for (pos
= 254; pos
>= 0; --pos
) {
802 fe_loose tmp0l
, tmp1l
;
803 /* loop invariant as of right before the test, for the case
805 * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
807 * let r := e >> (pos+1) in the following equalities of
809 * to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
810 * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
811 * x1 is the nonzero x coordinate of the nonzero
812 * point (r*P-(r+1)*P)
814 unsigned b
= 1 & (e
[pos
/ 8] >> (pos
& 7));
816 fe_cswap(&x2
, &x3
, swap
);
817 fe_cswap(&z2
, &z3
, swap
);
819 /* Coq transcription of ladderstep formula (called from
821 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
822 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
823 * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
824 * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
826 fe_sub(&tmp0l
, &x3
, &z3
);
827 fe_sub(&tmp1l
, &x2
, &z2
);
828 fe_add(&x2l
, &x2
, &z2
);
829 fe_add(&z2l
, &x3
, &z3
);
830 fe_mul_tll(&z3
, &tmp0l
, &x2l
);
831 fe_mul_tll(&z2
, &z2l
, &tmp1l
);
832 fe_sq_tl(&tmp0
, &tmp1l
);
833 fe_sq_tl(&tmp1
, &x2l
);
834 fe_add(&x3l
, &z3
, &z2
);
835 fe_sub(&z2l
, &z3
, &z2
);
836 fe_mul_ttt(&x2
, &tmp1
, &tmp0
);
837 fe_sub(&tmp1l
, &tmp1
, &tmp0
);
839 fe_mul121666(&z3
, &tmp1l
);
841 fe_add(&tmp0l
, &tmp0
, &z3
);
842 fe_mul_ttt(&z3
, &x1
, &z2
);
843 fe_mul_tll(&z2
, &tmp1l
, &tmp0l
);
845 /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
848 fe_cswap(&x2
, &x3
, swap
);
849 fe_cswap(&z2
, &z3
, swap
);
852 fe_mul_ttt(&x2
, &x2
, &z2
);
853 fe_tobytes(out
, &x2
);
855 memzero_explicit(&x1
, sizeof(x1
));
856 memzero_explicit(&x2
, sizeof(x2
));
857 memzero_explicit(&z2
, sizeof(z2
));
858 memzero_explicit(&x3
, sizeof(x3
));
859 memzero_explicit(&z3
, sizeof(z3
));
860 memzero_explicit(&x2l
, sizeof(x2l
));
861 memzero_explicit(&z2l
, sizeof(z2l
));
862 memzero_explicit(&x3l
, sizeof(x3l
));
863 memzero_explicit(&e
, sizeof(e
));