1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
4 * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
5 * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
8 #include <crypto/curve25519.h>
9 #include <crypto/internal/kpp.h>
11 #include <linux/types.h>
12 #include <linux/jump_label.h>
13 #include <linux/kernel.h>
14 #include <linux/module.h>
16 #include <asm/cpufeature.h>
17 #include <asm/processor.h>
19 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2
);
20 static __ro_after_init
DEFINE_STATIC_KEY_FALSE(curve25519_use_adx
);
22 enum { NUM_WORDS_ELTFP25519
= 4 };
23 typedef __aligned(32) u64 eltfp25519_1w
[NUM_WORDS_ELTFP25519
];
24 typedef __aligned(32) u64 eltfp25519_1w_buffer
[2 * NUM_WORDS_ELTFP25519
];
26 #define mul_eltfp25519_1w_adx(c, a, b) do { \
27 mul_256x256_integer_adx(m.buffer, a, b); \
28 red_eltfp25519_1w_adx(c, m.buffer); \
31 #define mul_eltfp25519_1w_bmi2(c, a, b) do { \
32 mul_256x256_integer_bmi2(m.buffer, a, b); \
33 red_eltfp25519_1w_bmi2(c, m.buffer); \
36 #define sqr_eltfp25519_1w_adx(a) do { \
37 sqr_256x256_integer_adx(m.buffer, a); \
38 red_eltfp25519_1w_adx(a, m.buffer); \
41 #define sqr_eltfp25519_1w_bmi2(a) do { \
42 sqr_256x256_integer_bmi2(m.buffer, a); \
43 red_eltfp25519_1w_bmi2(a, m.buffer); \
46 #define mul_eltfp25519_2w_adx(c, a, b) do { \
47 mul2_256x256_integer_adx(m.buffer, a, b); \
48 red_eltfp25519_2w_adx(c, m.buffer); \
51 #define mul_eltfp25519_2w_bmi2(c, a, b) do { \
52 mul2_256x256_integer_bmi2(m.buffer, a, b); \
53 red_eltfp25519_2w_bmi2(c, m.buffer); \
56 #define sqr_eltfp25519_2w_adx(a) do { \
57 sqr2_256x256_integer_adx(m.buffer, a); \
58 red_eltfp25519_2w_adx(a, m.buffer); \
61 #define sqr_eltfp25519_2w_bmi2(a) do { \
62 sqr2_256x256_integer_bmi2(m.buffer, a); \
63 red_eltfp25519_2w_bmi2(a, m.buffer); \
66 #define sqrn_eltfp25519_1w_adx(a, times) do { \
67 int ____counter = (times); \
68 while (____counter-- > 0) \
69 sqr_eltfp25519_1w_adx(a); \
72 #define sqrn_eltfp25519_1w_bmi2(a, times) do { \
73 int ____counter = (times); \
74 while (____counter-- > 0) \
75 sqr_eltfp25519_1w_bmi2(a); \
78 #define copy_eltfp25519_1w(C, A) do { \
85 #define setzero_eltfp25519_1w(C) do { \
92 __aligned(32) static const u64 table_ladder_8k
[252 * NUM_WORDS_ELTFP25519
] = {
93 /* 1 */ 0xfffffffffffffff3UL
, 0xffffffffffffffffUL
,
94 0xffffffffffffffffUL
, 0x5fffffffffffffffUL
,
95 /* 2 */ 0x6b8220f416aafe96UL
, 0x82ebeb2b4f566a34UL
,
96 0xd5a9a5b075a5950fUL
, 0x5142b2cf4b2488f4UL
,
97 /* 3 */ 0x6aaebc750069680cUL
, 0x89cf7820a0f99c41UL
,
98 0x2a58d9183b56d0f4UL
, 0x4b5aca80e36011a4UL
,
99 /* 4 */ 0x329132348c29745dUL
, 0xf4a2e616e1642fd7UL
,
100 0x1e45bb03ff67bc34UL
, 0x306912d0f42a9b4aUL
,
101 /* 5 */ 0xff886507e6af7154UL
, 0x04f50e13dfeec82fUL
,
102 0xaa512fe82abab5ceUL
, 0x174e251a68d5f222UL
,
103 /* 6 */ 0xcf96700d82028898UL
, 0x1743e3370a2c02c5UL
,
104 0x379eec98b4e86eaaUL
, 0x0c59888a51e0482eUL
,
105 /* 7 */ 0xfbcbf1d699b5d189UL
, 0xacaef0d58e9fdc84UL
,
106 0xc1c20d06231f7614UL
, 0x2938218da274f972UL
,
107 /* 8 */ 0xf6af49beff1d7f18UL
, 0xcc541c22387ac9c2UL
,
108 0x96fcc9ef4015c56bUL
, 0x69c1627c690913a9UL
,
109 /* 9 */ 0x7a86fd2f4733db0eUL
, 0xfdb8c4f29e087de9UL
,
110 0x095e4b1a8ea2a229UL
, 0x1ad7a7c829b37a79UL
,
111 /* 10 */ 0x342d89cad17ea0c0UL
, 0x67bedda6cced2051UL
,
112 0x19ca31bf2bb42f74UL
, 0x3df7b4c84980acbbUL
,
113 /* 11 */ 0xa8c6444dc80ad883UL
, 0xb91e440366e3ab85UL
,
114 0xc215cda00164f6d8UL
, 0x3d867c6ef247e668UL
,
115 /* 12 */ 0xc7dd582bcc3e658cUL
, 0xfd2c4748ee0e5528UL
,
116 0xa0fd9b95cc9f4f71UL
, 0x7529d871b0675ddfUL
,
117 /* 13 */ 0xb8f568b42d3cbd78UL
, 0x1233011b91f3da82UL
,
118 0x2dce6ccd4a7c3b62UL
, 0x75e7fc8e9e498603UL
,
119 /* 14 */ 0x2f4f13f1fcd0b6ecUL
, 0xf1a8ca1f29ff7a45UL
,
120 0xc249c1a72981e29bUL
, 0x6ebe0dbb8c83b56aUL
,
121 /* 15 */ 0x7114fa8d170bb222UL
, 0x65a2dcd5bf93935fUL
,
122 0xbdc41f68b59c979aUL
, 0x2f0eef79a2ce9289UL
,
123 /* 16 */ 0x42ecbf0c083c37ceUL
, 0x2930bc09ec496322UL
,
124 0xf294b0c19cfeac0dUL
, 0x3780aa4bedfabb80UL
,
125 /* 17 */ 0x56c17d3e7cead929UL
, 0xe7cb4beb2e5722c5UL
,
126 0x0ce931732dbfe15aUL
, 0x41b883c7621052f8UL
,
127 /* 18 */ 0xdbf75ca0c3d25350UL
, 0x2936be086eb1e351UL
,
128 0xc936e03cb4a9b212UL
, 0x1d45bf82322225aaUL
,
129 /* 19 */ 0xe81ab1036a024cc5UL
, 0xe212201c304c9a72UL
,
130 0xc5d73fba6832b1fcUL
, 0x20ffdb5a4d839581UL
,
131 /* 20 */ 0xa283d367be5d0fadUL
, 0x6c2b25ca8b164475UL
,
132 0x9d4935467caaf22eUL
, 0x5166408eee85ff49UL
,
133 /* 21 */ 0x3c67baa2fab4e361UL
, 0xb3e433c67ef35cefUL
,
134 0x5259729241159b1cUL
, 0x6a621892d5b0ab33UL
,
135 /* 22 */ 0x20b74a387555cdcbUL
, 0x532aa10e1208923fUL
,
136 0xeaa17b7762281dd1UL
, 0x61ab3443f05c44bfUL
,
137 /* 23 */ 0x257a6c422324def8UL
, 0x131c6c1017e3cf7fUL
,
138 0x23758739f630a257UL
, 0x295a407a01a78580UL
,
139 /* 24 */ 0xf8c443246d5da8d9UL
, 0x19d775450c52fa5dUL
,
140 0x2afcfc92731bf83dUL
, 0x7d10c8e81b2b4700UL
,
141 /* 25 */ 0xc8e0271f70baa20bUL
, 0x993748867ca63957UL
,
142 0x5412efb3cb7ed4bbUL
, 0x3196d36173e62975UL
,
143 /* 26 */ 0xde5bcad141c7dffcUL
, 0x47cc8cd2b395c848UL
,
144 0xa34cd942e11af3cbUL
, 0x0256dbf2d04ecec2UL
,
145 /* 27 */ 0x875ab7e94b0e667fUL
, 0xcad4dd83c0850d10UL
,
146 0x47f12e8f4e72c79fUL
, 0x5f1a87bb8c85b19bUL
,
147 /* 28 */ 0x7ae9d0b6437f51b8UL
, 0x12c7ce5518879065UL
,
148 0x2ade09fe5cf77aeeUL
, 0x23a05a2f7d2c5627UL
,
149 /* 29 */ 0x5908e128f17c169aUL
, 0xf77498dd8ad0852dUL
,
150 0x74b4c4ceab102f64UL
, 0x183abadd10139845UL
,
151 /* 30 */ 0xb165ba8daa92aaacUL
, 0xd5c5ef9599386705UL
,
152 0xbe2f8f0cf8fc40d1UL
, 0x2701e635ee204514UL
,
153 /* 31 */ 0x629fa80020156514UL
, 0xf223868764a8c1ceUL
,
154 0x5b894fff0b3f060eUL
, 0x60d9944cf708a3faUL
,
155 /* 32 */ 0xaeea001a1c7a201fUL
, 0xebf16a633ee2ce63UL
,
156 0x6f7709594c7a07e1UL
, 0x79b958150d0208cbUL
,
157 /* 33 */ 0x24b55e5301d410e7UL
, 0xe3a34edff3fdc84dUL
,
158 0xd88768e4904032d8UL
, 0x131384427b3aaeecUL
,
159 /* 34 */ 0x8405e51286234f14UL
, 0x14dc4739adb4c529UL
,
160 0xb8a2b5b250634ffdUL
, 0x2fe2a94ad8a7ff93UL
,
161 /* 35 */ 0xec5c57efe843faddUL
, 0x2843ce40f0bb9918UL
,
162 0xa4b561d6cf3d6305UL
, 0x743629bde8fb777eUL
,
163 /* 36 */ 0x343edd46bbaf738fUL
, 0xed981828b101a651UL
,
164 0xa401760b882c797aUL
, 0x1fc223e28dc88730UL
,
165 /* 37 */ 0x48604e91fc0fba0eUL
, 0xb637f78f052c6fa4UL
,
166 0x91ccac3d09e9239cUL
, 0x23f7eed4437a687cUL
,
167 /* 38 */ 0x5173b1118d9bd800UL
, 0x29d641b63189d4a7UL
,
168 0xfdbf177988bbc586UL
, 0x2959894fcad81df5UL
,
169 /* 39 */ 0xaebc8ef3b4bbc899UL
, 0x4148995ab26992b9UL
,
170 0x24e20b0134f92cfbUL
, 0x40d158894a05dee8UL
,
171 /* 40 */ 0x46b00b1185af76f6UL
, 0x26bac77873187a79UL
,
172 0x3dc0bf95ab8fff5fUL
, 0x2a608bd8945524d7UL
,
173 /* 41 */ 0x26449588bd446302UL
, 0x7c4bc21c0388439cUL
,
174 0x8e98a4f383bd11b2UL
, 0x26218d7bc9d876b9UL
,
175 /* 42 */ 0xe3081542997c178aUL
, 0x3c2d29a86fb6606fUL
,
176 0x5c217736fa279374UL
, 0x7dde05734afeb1faUL
,
177 /* 43 */ 0x3bf10e3906d42babUL
, 0xe4f7803e1980649cUL
,
178 0xe6053bf89595bf7aUL
, 0x394faf38da245530UL
,
179 /* 44 */ 0x7a8efb58896928f4UL
, 0xfbc778e9cc6a113cUL
,
180 0x72670ce330af596fUL
, 0x48f222a81d3d6cf7UL
,
181 /* 45 */ 0xf01fce410d72caa7UL
, 0x5a20ecc7213b5595UL
,
182 0x7bc21165c1fa1483UL
, 0x07f89ae31da8a741UL
,
183 /* 46 */ 0x05d2c2b4c6830ff9UL
, 0xd43e330fc6316293UL
,
184 0xa5a5590a96d3a904UL
, 0x705edb91a65333b6UL
,
185 /* 47 */ 0x048ee15e0bb9a5f7UL
, 0x3240cfca9e0aaf5dUL
,
186 0x8f4b71ceedc4a40bUL
, 0x621c0da3de544a6dUL
,
187 /* 48 */ 0x92872836a08c4091UL
, 0xce8375b010c91445UL
,
188 0x8a72eb524f276394UL
, 0x2667fcfa7ec83635UL
,
189 /* 49 */ 0x7f4c173345e8752aUL
, 0x061b47feee7079a5UL
,
190 0x25dd9afa9f86ff34UL
, 0x3780cef5425dc89cUL
,
191 /* 50 */ 0x1a46035a513bb4e9UL
, 0x3e1ef379ac575adaUL
,
192 0xc78c5f1c5fa24b50UL
, 0x321a967634fd9f22UL
,
193 /* 51 */ 0x946707b8826e27faUL
, 0x3dca84d64c506fd0UL
,
194 0xc189218075e91436UL
, 0x6d9284169b3b8484UL
,
195 /* 52 */ 0x3a67e840383f2ddfUL
, 0x33eec9a30c4f9b75UL
,
196 0x3ec7c86fa783ef47UL
, 0x26ec449fbac9fbc4UL
,
197 /* 53 */ 0x5c0f38cba09b9e7dUL
, 0x81168cc762a3478cUL
,
198 0x3e23b0d306fc121cUL
, 0x5a238aa0a5efdcddUL
,
199 /* 54 */ 0x1ba26121c4ea43ffUL
, 0x36f8c77f7c8832b5UL
,
200 0x88fbea0b0adcf99aUL
, 0x5ca9938ec25bebf9UL
,
201 /* 55 */ 0xd5436a5e51fccda0UL
, 0x1dbc4797c2cd893bUL
,
202 0x19346a65d3224a08UL
, 0x0f5034e49b9af466UL
,
203 /* 56 */ 0xf23c3967a1e0b96eUL
, 0xe58b08fa867a4d88UL
,
204 0xfb2fabc6a7341679UL
, 0x2a75381eb6026946UL
,
205 /* 57 */ 0xc80a3be4c19420acUL
, 0x66b1f6c681f2b6dcUL
,
206 0x7cf7036761e93388UL
, 0x25abbbd8a660a4c4UL
,
207 /* 58 */ 0x91ea12ba14fd5198UL
, 0x684950fc4a3cffa9UL
,
208 0xf826842130f5ad28UL
, 0x3ea988f75301a441UL
,
209 /* 59 */ 0xc978109a695f8c6fUL
, 0x1746eb4a0530c3f3UL
,
210 0x444d6d77b4459995UL
, 0x75952b8c054e5cc7UL
,
211 /* 60 */ 0xa3703f7915f4d6aaUL
, 0x66c346202f2647d8UL
,
212 0xd01469df811d644bUL
, 0x77fea47d81a5d71fUL
,
213 /* 61 */ 0xc5e9529ef57ca381UL
, 0x6eeeb4b9ce2f881aUL
,
214 0xb6e91a28e8009bd6UL
, 0x4b80be3e9afc3fecUL
,
215 /* 62 */ 0x7e3773c526aed2c5UL
, 0x1b4afcb453c9a49dUL
,
216 0xa920bdd7baffb24dUL
, 0x7c54699f122d400eUL
,
217 /* 63 */ 0xef46c8e14fa94bc8UL
, 0xe0b074ce2952ed5eUL
,
218 0xbea450e1dbd885d5UL
, 0x61b68649320f712cUL
,
219 /* 64 */ 0x8a485f7309ccbdd1UL
, 0xbd06320d7d4d1a2dUL
,
220 0x25232973322dbef4UL
, 0x445dc4758c17f770UL
,
221 /* 65 */ 0xdb0434177cc8933cUL
, 0xed6fe82175ea059fUL
,
222 0x1efebefdc053db34UL
, 0x4adbe867c65daf99UL
,
223 /* 66 */ 0x3acd71a2a90609dfUL
, 0xe5e991856dd04050UL
,
224 0x1ec69b688157c23cUL
, 0x697427f6885cfe4dUL
,
225 /* 67 */ 0xd7be7b9b65e1a851UL
, 0xa03d28d522c536ddUL
,
226 0x28399d658fd2b645UL
, 0x49e5b7e17c2641e1UL
,
227 /* 68 */ 0x6f8c3a98700457a4UL
, 0x5078f0a25ebb6778UL
,
228 0xd13c3ccbc382960fUL
, 0x2e003258a7df84b1UL
,
229 /* 69 */ 0x8ad1f39be6296a1cUL
, 0xc1eeaa652a5fbfb2UL
,
230 0x33ee0673fd26f3cbUL
, 0x59256173a69d2cccUL
,
231 /* 70 */ 0x41ea07aa4e18fc41UL
, 0xd9fc19527c87a51eUL
,
232 0xbdaacb805831ca6fUL
, 0x445b652dc916694fUL
,
233 /* 71 */ 0xce92a3a7f2172315UL
, 0x1edc282de11b9964UL
,
234 0xa1823aafe04c314aUL
, 0x790a2d94437cf586UL
,
235 /* 72 */ 0x71c447fb93f6e009UL
, 0x8922a56722845276UL
,
236 0xbf70903b204f5169UL
, 0x2f7a89891ba319feUL
,
237 /* 73 */ 0x02a08eb577e2140cUL
, 0xed9a4ed4427bdcf4UL
,
238 0x5253ec44e4323cd1UL
, 0x3e88363c14e9355bUL
,
239 /* 74 */ 0xaa66c14277110b8cUL
, 0x1ae0391610a23390UL
,
240 0x2030bd12c93fc2a2UL
, 0x3ee141579555c7abUL
,
241 /* 75 */ 0x9214de3a6d6e7d41UL
, 0x3ccdd88607f17efeUL
,
242 0x674f1288f8e11217UL
, 0x5682250f329f93d0UL
,
243 /* 76 */ 0x6cf00b136d2e396eUL
, 0x6e4cf86f1014debfUL
,
244 0x5930b1b5bfcc4e83UL
, 0x047069b48aba16b6UL
,
245 /* 77 */ 0x0d4ce4ab69b20793UL
, 0xb24db91a97d0fb9eUL
,
246 0xcdfa50f54e00d01dUL
, 0x221b1085368bddb5UL
,
247 /* 78 */ 0xe7e59468b1e3d8d2UL
, 0x53c56563bd122f93UL
,
248 0xeee8a903e0663f09UL
, 0x61efa662cbbe3d42UL
,
249 /* 79 */ 0x2cf8ddddde6eab2aUL
, 0x9bf80ad51435f231UL
,
250 0x5deadacec9f04973UL
, 0x29275b5d41d29b27UL
,
251 /* 80 */ 0xcfde0f0895ebf14fUL
, 0xb9aab96b054905a7UL
,
252 0xcae80dd9a1c420fdUL
, 0x0a63bf2f1673bbc7UL
,
253 /* 81 */ 0x092f6e11958fbc8cUL
, 0x672a81e804822fadUL
,
254 0xcac8351560d52517UL
, 0x6f3f7722c8f192f8UL
,
255 /* 82 */ 0xf8ba90ccc2e894b7UL
, 0x2c7557a438ff9f0dUL
,
256 0x894d1d855ae52359UL
, 0x68e122157b743d69UL
,
257 /* 83 */ 0xd87e5570cfb919f3UL
, 0x3f2cdecd95798db9UL
,
258 0x2121154710c0a2ceUL
, 0x3c66a115246dc5b2UL
,
259 /* 84 */ 0xcbedc562294ecb72UL
, 0xba7143c36a280b16UL
,
260 0x9610c2efd4078b67UL
, 0x6144735d946a4b1eUL
,
261 /* 85 */ 0x536f111ed75b3350UL
, 0x0211db8c2041d81bUL
,
262 0xf93cb1000e10413cUL
, 0x149dfd3c039e8876UL
,
263 /* 86 */ 0xd479dde46b63155bUL
, 0xb66e15e93c837976UL
,
264 0xdafde43b1f13e038UL
, 0x5fafda1a2e4b0b35UL
,
265 /* 87 */ 0x3600bbdf17197581UL
, 0x3972050bbe3cd2c2UL
,
266 0x5938906dbdd5be86UL
, 0x34fce5e43f9b860fUL
,
267 /* 88 */ 0x75a8a4cd42d14d02UL
, 0x828dabc53441df65UL
,
268 0x33dcabedd2e131d3UL
, 0x3ebad76fb814d25fUL
,
269 /* 89 */ 0xd4906f566f70e10fUL
, 0x5d12f7aa51690f5aUL
,
270 0x45adb16e76cefcf2UL
, 0x01f768aead232999UL
,
271 /* 90 */ 0x2b6cc77b6248febdUL
, 0x3cd30628ec3aaffdUL
,
272 0xce1c0b80d4ef486aUL
, 0x4c3bff2ea6f66c23UL
,
273 /* 91 */ 0x3f2ec4094aeaeb5fUL
, 0x61b19b286e372ca7UL
,
274 0x5eefa966de2a701dUL
, 0x23b20565de55e3efUL
,
275 /* 92 */ 0xe301ca5279d58557UL
, 0x07b2d4ce27c2874fUL
,
276 0xa532cd8a9dcf1d67UL
, 0x2a52fee23f2bff56UL
,
277 /* 93 */ 0x8624efb37cd8663dUL
, 0xbbc7ac20ffbd7594UL
,
278 0x57b85e9c82d37445UL
, 0x7b3052cb86a6ec66UL
,
279 /* 94 */ 0x3482f0ad2525e91eUL
, 0x2cb68043d28edca0UL
,
280 0xaf4f6d052e1b003aUL
, 0x185f8c2529781b0aUL
,
281 /* 95 */ 0xaa41de5bd80ce0d6UL
, 0x9407b2416853e9d6UL
,
282 0x563ec36e357f4c3aUL
, 0x4cc4b8dd0e297bceUL
,
283 /* 96 */ 0xa2fc1a52ffb8730eUL
, 0x1811f16e67058e37UL
,
284 0x10f9a366cddf4ee1UL
, 0x72f4a0c4a0b9f099UL
,
285 /* 97 */ 0x8c16c06f663f4ea7UL
, 0x693b3af74e970fbaUL
,
286 0x2102e7f1d69ec345UL
, 0x0ba53cbc968a8089UL
,
287 /* 98 */ 0xca3d9dc7fea15537UL
, 0x4c6824bb51536493UL
,
288 0xb9886314844006b1UL
, 0x40d2a72ab454cc60UL
,
289 /* 99 */ 0x5936a1b712570975UL
, 0x91b9d648debda657UL
,
290 0x3344094bb64330eaUL
, 0x006ba10d12ee51d0UL
,
291 /* 100 */ 0x19228468f5de5d58UL
, 0x0eb12f4c38cc05b0UL
,
292 0xa1039f9dd5601990UL
, 0x4502d4ce4fff0e0bUL
,
293 /* 101 */ 0xeb2054106837c189UL
, 0xd0f6544c6dd3b93cUL
,
294 0x40727064c416d74fUL
, 0x6e15c6114b502ef0UL
,
295 /* 102 */ 0x4df2a398cfb1a76bUL
, 0x11256c7419f2f6b1UL
,
296 0x4a497962066e6043UL
, 0x705b3aab41355b44UL
,
297 /* 103 */ 0x365ef536d797b1d8UL
, 0x00076bd622ddf0dbUL
,
298 0x3bbf33b0e0575a88UL
, 0x3777aa05c8e4ca4dUL
,
299 /* 104 */ 0x392745c85578db5fUL
, 0x6fda4149dbae5ae2UL
,
300 0xb1f0b00b8adc9867UL
, 0x09963437d36f1da3UL
,
301 /* 105 */ 0x7e824e90a5dc3853UL
, 0xccb5f6641f135cbdUL
,
302 0x6736d86c87ce8fccUL
, 0x625f3ce26604249fUL
,
303 /* 106 */ 0xaf8ac8059502f63fUL
, 0x0c05e70a2e351469UL
,
304 0x35292e9c764b6305UL
, 0x1a394360c7e23ac3UL
,
305 /* 107 */ 0xd5c6d53251183264UL
, 0x62065abd43c2b74fUL
,
306 0xb5fbf5d03b973f9bUL
, 0x13a3da3661206e5eUL
,
307 /* 108 */ 0xc6bd5837725d94e5UL
, 0x18e30912205016c5UL
,
308 0x2088ce1570033c68UL
, 0x7fba1f495c837987UL
,
309 /* 109 */ 0x5a8c7423f2f9079dUL
, 0x1735157b34023fc5UL
,
310 0xe4f9b49ad2fab351UL
, 0x6691ff72c878e33cUL
,
311 /* 110 */ 0x122c2adedc5eff3eUL
, 0xf8dd4bf1d8956cf4UL
,
312 0xeb86205d9e9e5bdaUL
, 0x049b92b9d975c743UL
,
313 /* 111 */ 0xa5379730b0f6c05aUL
, 0x72a0ffacc6f3a553UL
,
314 0xb0032c34b20dcd6dUL
, 0x470e9dbc88d5164aUL
,
315 /* 112 */ 0xb19cf10ca237c047UL
, 0xb65466711f6c81a2UL
,
316 0xb3321bd16dd80b43UL
, 0x48c14f600c5fbe8eUL
,
317 /* 113 */ 0x66451c264aa6c803UL
, 0xb66e3904a4fa7da6UL
,
318 0xd45f19b0b3128395UL
, 0x31602627c3c9bc10UL
,
319 /* 114 */ 0x3120dc4832e4e10dUL
, 0xeb20c46756c717f7UL
,
320 0x00f52e3f67280294UL
, 0x566d4fc14730c509UL
,
321 /* 115 */ 0x7e3a5d40fd837206UL
, 0xc1e926dc7159547aUL
,
322 0x216730fba68d6095UL
, 0x22e8c3843f69cea7UL
,
323 /* 116 */ 0x33d074e8930e4b2bUL
, 0xb6e4350e84d15816UL
,
324 0x5534c26ad6ba2365UL
, 0x7773c12f89f1f3f3UL
,
325 /* 117 */ 0x8cba404da57962aaUL
, 0x5b9897a81999ce56UL
,
326 0x508e862f121692fcUL
, 0x3a81907fa093c291UL
,
327 /* 118 */ 0x0dded0ff4725a510UL
, 0x10d8cc10673fc503UL
,
328 0x5b9d151c9f1f4e89UL
, 0x32a5c1d5cb09a44cUL
,
329 /* 119 */ 0x1e0aa442b90541fbUL
, 0x5f85eb7cc1b485dbUL
,
330 0xbee595ce8a9df2e5UL
, 0x25e496c722422236UL
,
331 /* 120 */ 0x5edf3c46cd0fe5b9UL
, 0x34e75a7ed2a43388UL
,
332 0xe488de11d761e352UL
, 0x0e878a01a085545cUL
,
333 /* 121 */ 0xba493c77e021bb04UL
, 0x2b4d1843c7df899aUL
,
334 0x9ea37a487ae80d67UL
, 0x67a9958011e41794UL
,
335 /* 122 */ 0x4b58051a6697b065UL
, 0x47e33f7d8d6ba6d4UL
,
336 0xbb4da8d483ca46c1UL
, 0x68becaa181c2db0dUL
,
337 /* 123 */ 0x8d8980e90b989aa5UL
, 0xf95eb14a2c93c99bUL
,
338 0x51c6c7c4796e73a2UL
, 0x6e228363b5efb569UL
,
339 /* 124 */ 0xc6bbc0b02dd624c8UL
, 0x777eb47dec8170eeUL
,
340 0x3cde15a004cfafa9UL
, 0x1dc6bc087160bf9bUL
,
341 /* 125 */ 0x2e07e043eec34002UL
, 0x18e9fc677a68dc7fUL
,
342 0xd8da03188bd15b9aUL
, 0x48fbc3bb00568253UL
,
343 /* 126 */ 0x57547d4cfb654ce1UL
, 0xd3565b82a058e2adUL
,
344 0xf63eaf0bbf154478UL
, 0x47531ef114dfbb18UL
,
345 /* 127 */ 0xe1ec630a4278c587UL
, 0x5507d546ca8e83f3UL
,
346 0x85e135c63adc0c2bUL
, 0x0aa7efa85682844eUL
,
347 /* 128 */ 0x72691ba8b3e1f615UL
, 0x32b4e9701fbe3ffaUL
,
348 0x97b6d92e39bb7868UL
, 0x2cfe53dea02e39e8UL
,
349 /* 129 */ 0x687392cd85cd52b0UL
, 0x27ff66c910e29831UL
,
350 0x97134556a9832d06UL
, 0x269bb0360a84f8a0UL
,
351 /* 130 */ 0x706e55457643f85cUL
, 0x3734a48c9b597d1bUL
,
352 0x7aee91e8c6efa472UL
, 0x5cd6abc198a9d9e0UL
,
353 /* 131 */ 0x0e04de06cb3ce41aUL
, 0xd8c6eb893402e138UL
,
354 0x904659bb686e3772UL
, 0x7215c371746ba8c8UL
,
355 /* 132 */ 0xfd12a97eeae4a2d9UL
, 0x9514b7516394f2c5UL
,
356 0x266fd5809208f294UL
, 0x5c847085619a26b9UL
,
357 /* 133 */ 0x52985410fed694eaUL
, 0x3c905b934a2ed254UL
,
358 0x10bb47692d3be467UL
, 0x063b3d2d69e5e9e1UL
,
359 /* 134 */ 0x472726eedda57debUL
, 0xefb6c4ae10f41891UL
,
360 0x2b1641917b307614UL
, 0x117c554fc4f45b7cUL
,
361 /* 135 */ 0xc07cf3118f9d8812UL
, 0x01dbd82050017939UL
,
362 0xd7e803f4171b2827UL
, 0x1015e87487d225eaUL
,
363 /* 136 */ 0xc58de3fed23acc4dUL
, 0x50db91c294a7be2dUL
,
364 0x0b94d43d1c9cf457UL
, 0x6b1640fa6e37524aUL
,
365 /* 137 */ 0x692f346c5fda0d09UL
, 0x200b1c59fa4d3151UL
,
366 0xb8c46f760777a296UL
, 0x4b38395f3ffdfbcfUL
,
367 /* 138 */ 0x18d25e00be54d671UL
, 0x60d50582bec8aba6UL
,
368 0x87ad8f263b78b982UL
, 0x50fdf64e9cda0432UL
,
369 /* 139 */ 0x90f567aac578dcf0UL
, 0xef1e9b0ef2a3133bUL
,
370 0x0eebba9242d9de71UL
, 0x15473c9bf03101c7UL
,
371 /* 140 */ 0x7c77e8ae56b78095UL
, 0xb678e7666e6f078eUL
,
372 0x2da0b9615348ba1fUL
, 0x7cf931c1ff733f0bUL
,
373 /* 141 */ 0x26b357f50a0a366cUL
, 0xe9708cf42b87d732UL
,
374 0xc13aeea5f91cb2c0UL
, 0x35d90c991143bb4cUL
,
375 /* 142 */ 0x47c1c404a9a0d9dcUL
, 0x659e58451972d251UL
,
376 0x3875a8c473b38c31UL
, 0x1fbd9ed379561f24UL
,
377 /* 143 */ 0x11fabc6fd41ec28dUL
, 0x7ef8dfe3cd2a2dcaUL
,
378 0x72e73b5d8c404595UL
, 0x6135fa4954b72f27UL
,
379 /* 144 */ 0xccfc32a2de24b69cUL
, 0x3f55698c1f095d88UL
,
380 0xbe3350ed5ac3f929UL
, 0x5e9bf806ca477eebUL
,
381 /* 145 */ 0xe9ce8fb63c309f68UL
, 0x5376f63565e1f9f4UL
,
382 0xd1afcfb35a6393f1UL
, 0x6632a1ede5623506UL
,
383 /* 146 */ 0x0b7d6c390c2ded4cUL
, 0x56cb3281df04cb1fUL
,
384 0x66305a1249ecc3c7UL
, 0x5d588b60a38ca72aUL
,
385 /* 147 */ 0xa6ecbf78e8e5f42dUL
, 0x86eeb44b3c8a3eecUL
,
386 0xec219c48fbd21604UL
, 0x1aaf1af517c36731UL
,
387 /* 148 */ 0xc306a2836769bde7UL
, 0x208280622b1e2adbUL
,
388 0x8027f51ffbff94a6UL
, 0x76cfa1ce1124f26bUL
,
389 /* 149 */ 0x18eb00562422abb6UL
, 0xf377c4d58f8c29c3UL
,
390 0x4dbbc207f531561aUL
, 0x0253b7f082128a27UL
,
391 /* 150 */ 0x3d1f091cb62c17e0UL
, 0x4860e1abd64628a9UL
,
392 0x52d17436309d4253UL
, 0x356f97e13efae576UL
,
393 /* 151 */ 0xd351e11aa150535bUL
, 0x3e6b45bb1dd878ccUL
,
394 0x0c776128bed92c98UL
, 0x1d34ae93032885b8UL
,
395 /* 152 */ 0x4ba0488ca85ba4c3UL
, 0x985348c33c9ce6ceUL
,
396 0x66124c6f97bda770UL
, 0x0f81a0290654124aUL
,
397 /* 153 */ 0x9ed09ca6569b86fdUL
, 0x811009fd18af9a2dUL
,
398 0xff08d03f93d8c20aUL
, 0x52a148199faef26bUL
,
399 /* 154 */ 0x3e03f9dc2d8d1b73UL
, 0x4205801873961a70UL
,
400 0xc0d987f041a35970UL
, 0x07aa1f15a1c0d549UL
,
401 /* 155 */ 0xdfd46ce08cd27224UL
, 0x6d0a024f934e4239UL
,
402 0x808a7a6399897b59UL
, 0x0a4556e9e13d95a2UL
,
403 /* 156 */ 0xd21a991fe9c13045UL
, 0x9b0e8548fe7751b8UL
,
404 0x5da643cb4bf30035UL
, 0x77db28d63940f721UL
,
405 /* 157 */ 0xfc5eeb614adc9011UL
, 0x5229419ae8c411ebUL
,
406 0x9ec3e7787d1dcf74UL
, 0x340d053e216e4cb5UL
,
407 /* 158 */ 0xcac7af39b48df2b4UL
, 0xc0faec2871a10a94UL
,
408 0x140a69245ca575edUL
, 0x0cf1c37134273a4cUL
,
409 /* 159 */ 0xc8ee306ac224b8a5UL
, 0x57eaee7ccb4930b0UL
,
410 0xa1e806bdaacbe74fUL
, 0x7d9a62742eeb657dUL
,
411 /* 160 */ 0x9eb6b6ef546c4830UL
, 0x885cca1fddb36e2eUL
,
412 0xe6b9f383ef0d7105UL
, 0x58654fef9d2e0412UL
,
413 /* 161 */ 0xa905c4ffbe0e8e26UL
, 0x942de5df9b31816eUL
,
414 0x497d723f802e88e1UL
, 0x30684dea602f408dUL
,
415 /* 162 */ 0x21e5a278a3e6cb34UL
, 0xaefb6e6f5b151dc4UL
,
416 0xb30b8e049d77ca15UL
, 0x28c3c9cf53b98981UL
,
417 /* 163 */ 0x287fb721556cdd2aUL
, 0x0d317ca897022274UL
,
418 0x7468c7423a543258UL
, 0x4a7f11464eb5642fUL
,
419 /* 164 */ 0xa237a4774d193aa6UL
, 0xd865986ea92129a1UL
,
420 0x24c515ecf87c1a88UL
, 0x604003575f39f5ebUL
,
421 /* 165 */ 0x47b9f189570a9b27UL
, 0x2b98cede465e4b78UL
,
422 0x026df551dbb85c20UL
, 0x74fcd91047e21901UL
,
423 /* 166 */ 0x13e2a90a23c1bfa3UL
, 0x0cb0074e478519f6UL
,
424 0x5ff1cbbe3af6cf44UL
, 0x67fe5438be812dbeUL
,
425 /* 167 */ 0xd13cf64fa40f05b0UL
, 0x054dfb2f32283787UL
,
426 0x4173915b7f0d2aeaUL
, 0x482f144f1f610d4eUL
,
427 /* 168 */ 0xf6210201b47f8234UL
, 0x5d0ae1929e70b990UL
,
428 0xdcd7f455b049567cUL
, 0x7e93d0f1f0916f01UL
,
429 /* 169 */ 0xdd79cbf18a7db4faUL
, 0xbe8391bf6f74c62fUL
,
430 0x027145d14b8291bdUL
, 0x585a73ea2cbf1705UL
,
431 /* 170 */ 0x485ca03e928a0db2UL
, 0x10fc01a5742857e7UL
,
432 0x2f482edbd6d551a7UL
, 0x0f0433b5048fdb8aUL
,
433 /* 171 */ 0x60da2e8dd7dc6247UL
, 0x88b4c9d38cd4819aUL
,
434 0x13033ac001f66697UL
, 0x273b24fe3b367d75UL
,
435 /* 172 */ 0xc6e8f66a31b3b9d4UL
, 0x281514a494df49d5UL
,
436 0xd1726fdfc8b23da7UL
, 0x4b3ae7d103dee548UL
,
437 /* 173 */ 0xc6256e19ce4b9d7eUL
, 0xff5c5cf186e3c61cUL
,
438 0xacc63ca34b8ec145UL
, 0x74621888fee66574UL
,
439 /* 174 */ 0x956f409645290a1eUL
, 0xef0bf8e3263a962eUL
,
440 0xed6a50eb5ec2647bUL
, 0x0694283a9dca7502UL
,
441 /* 175 */ 0x769b963643a2dcd1UL
, 0x42b7c8ea09fc5353UL
,
442 0x4f002aee13397eabUL
, 0x63005e2c19b7d63aUL
,
443 /* 176 */ 0xca6736da63023beaUL
, 0x966c7f6db12a99b7UL
,
444 0xace09390c537c5e1UL
, 0x0b696063a1aa89eeUL
,
445 /* 177 */ 0xebb03e97288c56e5UL
, 0x432a9f9f938c8be8UL
,
446 0xa6a5a93d5b717f71UL
, 0x1a5fb4c3e18f9d97UL
,
447 /* 178 */ 0x1c94e7ad1c60cdceUL
, 0xee202a43fc02c4a0UL
,
448 0x8dafe4d867c46a20UL
, 0x0a10263c8ac27b58UL
,
449 /* 179 */ 0xd0dea9dfe4432a4aUL
, 0x856af87bbe9277c5UL
,
450 0xce8472acc212c71aUL
, 0x6f151b6d9bbb1e91UL
,
451 /* 180 */ 0x26776c527ceed56aUL
, 0x7d211cb7fbf8faecUL
,
452 0x37ae66a6fd4609ccUL
, 0x1f81b702d2770c42UL
,
453 /* 181 */ 0x2fb0b057eac58392UL
, 0xe1dd89fe29744e9dUL
,
454 0xc964f8eb17beb4f8UL
, 0x29571073c9a2d41eUL
,
455 /* 182 */ 0xa948a18981c0e254UL
, 0x2df6369b65b22830UL
,
456 0xa33eb2d75fcfd3c6UL
, 0x078cd6ec4199a01fUL
,
457 /* 183 */ 0x4a584a41ad900d2fUL
, 0x32142b78e2c74c52UL
,
458 0x68c4e8338431c978UL
, 0x7f69ea9008689fc2UL
,
459 /* 184 */ 0x52f2c81e46a38265UL
, 0xfd78072d04a832fdUL
,
460 0x8cd7d5fa25359e94UL
, 0x4de71b7454cc29d2UL
,
461 /* 185 */ 0x42eb60ad1eda6ac9UL
, 0x0aad37dfdbc09c3aUL
,
462 0x81004b71e33cc191UL
, 0x44e6be345122803cUL
,
463 /* 186 */ 0x03fe8388ba1920dbUL
, 0xf5d57c32150db008UL
,
464 0x49c8c4281af60c29UL
, 0x21edb518de701aeeUL
,
465 /* 187 */ 0x7fb63e418f06dc99UL
, 0xa4460d99c166d7b8UL
,
466 0x24dd5248ce520a83UL
, 0x5ec3ad712b928358UL
,
467 /* 188 */ 0x15022a5fbd17930fUL
, 0xa4f64a77d82570e3UL
,
468 0x12bc8d6915783712UL
, 0x498194c0fc620abbUL
,
469 /* 189 */ 0x38a2d9d255686c82UL
, 0x785c6bd9193e21f0UL
,
470 0xe4d5c81ab24a5484UL
, 0x56307860b2e20989UL
,
471 /* 190 */ 0x429d55f78b4d74c4UL
, 0x22f1834643350131UL
,
472 0x1e60c24598c71fffUL
, 0x59f2f014979983efUL
,
473 /* 191 */ 0x46a47d56eb494a44UL
, 0x3e22a854d636a18eUL
,
474 0xb346e15274491c3bUL
, 0x2ceafd4e5390cde7UL
,
475 /* 192 */ 0xba8a8538be0d6675UL
, 0x4b9074bb50818e23UL
,
476 0xcbdab89085d304c3UL
, 0x61a24fe0e56192c4UL
,
477 /* 193 */ 0xcb7615e6db525bcbUL
, 0xdd7d8c35a567e4caUL
,
478 0xe6b4153acafcdd69UL
, 0x2d668e097f3c9766UL
,
479 /* 194 */ 0xa57e7e265ce55ef0UL
, 0x5d9f4e527cd4b967UL
,
480 0xfbc83606492fd1e5UL
, 0x090d52beb7c3f7aeUL
,
481 /* 195 */ 0x09b9515a1e7b4d7cUL
, 0x1f266a2599da44c0UL
,
482 0xa1c49548e2c55504UL
, 0x7ef04287126f15ccUL
,
483 /* 196 */ 0xfed1659dbd30ef15UL
, 0x8b4ab9eec4e0277bUL
,
484 0x884d6236a5df3291UL
, 0x1fd96ea6bf5cf788UL
,
485 /* 197 */ 0x42a161981f190d9aUL
, 0x61d849507e6052c1UL
,
486 0x9fe113bf285a2cd5UL
, 0x7c22d676dbad85d8UL
,
487 /* 198 */ 0x82e770ed2bfbd27dUL
, 0x4c05b2ece996f5a5UL
,
488 0xcd40a9c2b0900150UL
, 0x5895319213d9bf64UL
,
489 /* 199 */ 0xe7cc5d703fea2e08UL
, 0xb50c491258e2188cUL
,
490 0xcce30baa48205bf0UL
, 0x537c659ccfa32d62UL
,
491 /* 200 */ 0x37b6623a98cfc088UL
, 0xfe9bed1fa4d6aca4UL
,
492 0x04d29b8e56a8d1b0UL
, 0x725f71c40b519575UL
,
493 /* 201 */ 0x28c7f89cd0339ce6UL
, 0x8367b14469ddc18bUL
,
494 0x883ada83a6a1652cUL
, 0x585f1974034d6c17UL
,
495 /* 202 */ 0x89cfb266f1b19188UL
, 0xe63b4863e7c35217UL
,
496 0xd88c9da6b4c0526aUL
, 0x3e035c9df0954635UL
,
497 /* 203 */ 0xdd9d5412fb45de9dUL
, 0xdd684532e4cff40dUL
,
498 0x4b5c999b151d671cUL
, 0x2d8c2cc811e7f690UL
,
499 /* 204 */ 0x7f54be1d90055d40UL
, 0xa464c5df464aaf40UL
,
500 0x33979624f0e917beUL
, 0x2c018dc527356b30UL
,
501 /* 205 */ 0xa5415024e330b3d4UL
, 0x73ff3d96691652d3UL
,
502 0x94ec42c4ef9b59f1UL
, 0x0747201618d08e5aUL
,
503 /* 206 */ 0x4d6ca48aca411c53UL
, 0x66415f2fcfa66119UL
,
504 0x9c4dd40051e227ffUL
, 0x59810bc09a02f7ebUL
,
505 /* 207 */ 0x2a7eb171b3dc101dUL
, 0x441c5ab99ffef68eUL
,
506 0x32025c9b93b359eaUL
, 0x5e8ce0a71e9d112fUL
,
507 /* 208 */ 0xbfcccb92429503fdUL
, 0xd271ba752f095d55UL
,
508 0x345ead5e972d091eUL
, 0x18c8df11a83103baUL
,
509 /* 209 */ 0x90cd949a9aed0f4cUL
, 0xc5d1f4cb6660e37eUL
,
510 0xb8cac52d56c52e0bUL
, 0x6e42e400c5808e0dUL
,
511 /* 210 */ 0xa3b46966eeaefd23UL
, 0x0c4f1f0be39ecdcaUL
,
512 0x189dc8c9d683a51dUL
, 0x51f27f054c09351bUL
,
513 /* 211 */ 0x4c487ccd2a320682UL
, 0x587ea95bb3df1c96UL
,
514 0xc8ccf79e555cb8e8UL
, 0x547dc829a206d73dUL
,
515 /* 212 */ 0xb822a6cd80c39b06UL
, 0xe96d54732000d4c6UL
,
516 0x28535b6f91463b4dUL
, 0x228f4660e2486e1dUL
,
517 /* 213 */ 0x98799538de8d3abfUL
, 0x8cd8330045ebca6eUL
,
518 0x79952a008221e738UL
, 0x4322e1a7535cd2bbUL
,
519 /* 214 */ 0xb114c11819d1801cUL
, 0x2016e4d84f3f5ec7UL
,
520 0xdd0e2df409260f4cUL
, 0x5ec362c0ae5f7266UL
,
521 /* 215 */ 0xc0462b18b8b2b4eeUL
, 0x7cc8d950274d1afbUL
,
522 0xf25f7105436b02d2UL
, 0x43bbf8dcbff9ccd3UL
,
523 /* 216 */ 0xb6ad1767a039e9dfUL
, 0xb0714da8f69d3583UL
,
524 0x5e55fa18b42931f5UL
, 0x4ed5558f33c60961UL
,
525 /* 217 */ 0x1fe37901c647a5ddUL
, 0x593ddf1f8081d357UL
,
526 0x0249a4fd813fd7a6UL
, 0x69acca274e9caf61UL
,
527 /* 218 */ 0x047ba3ea330721c9UL
, 0x83423fc20e7e1ea0UL
,
528 0x1df4c0af01314a60UL
, 0x09a62dab89289527UL
,
529 /* 219 */ 0xa5b325a49cc6cb00UL
, 0xe94b5dc654b56cb6UL
,
530 0x3be28779adc994a0UL
, 0x4296e8f8ba3a4aadUL
,
531 /* 220 */ 0x328689761e451eabUL
, 0x2e4d598bff59594aUL
,
532 0x49b96853d7a7084aUL
, 0x4980a319601420a8UL
,
533 /* 221 */ 0x9565b9e12f552c42UL
, 0x8a5318db7100fe96UL
,
534 0x05c90b4d43add0d7UL
, 0x538b4cd66a5d4edaUL
,
535 /* 222 */ 0xf4e94fc3e89f039fUL
, 0x592c9af26f618045UL
,
536 0x08a36eb5fd4b9550UL
, 0x25fffaf6c2ed1419UL
,
537 /* 223 */ 0x34434459cc79d354UL
, 0xeeecbfb4b1d5476bUL
,
538 0xddeb34a061615d99UL
, 0x5129cecceb64b773UL
,
539 /* 224 */ 0xee43215894993520UL
, 0x772f9c7cf14c0b3bUL
,
540 0xd2e2fce306bedad5UL
, 0x715f42b546f06a97UL
,
541 /* 225 */ 0x434ecdceda5b5f1aUL
, 0x0da17115a49741a9UL
,
542 0x680bd77c73edad2eUL
, 0x487c02354edd9041UL
,
543 /* 226 */ 0xb8efeff3a70ed9c4UL
, 0x56a32aa3e857e302UL
,
544 0xdf3a68bd48a2a5a0UL
, 0x07f650b73176c444UL
,
545 /* 227 */ 0xe38b9b1626e0ccb1UL
, 0x79e053c18b09fb36UL
,
546 0x56d90319c9f94964UL
, 0x1ca941e7ac9ff5c4UL
,
547 /* 228 */ 0x49c4df29162fa0bbUL
, 0x8488cf3282b33305UL
,
548 0x95dfda14cabb437dUL
, 0x3391f78264d5ad86UL
,
549 /* 229 */ 0x729ae06ae2b5095dUL
, 0xd58a58d73259a946UL
,
550 0xe9834262d13921edUL
, 0x27fedafaa54bb592UL
,
551 /* 230 */ 0xa99dc5b829ad48bbUL
, 0x5f025742499ee260UL
,
552 0x802c8ecd5d7513fdUL
, 0x78ceb3ef3f6dd938UL
,
553 /* 231 */ 0xc342f44f8a135d94UL
, 0x7b9edb44828cdda3UL
,
554 0x9436d11a0537cfe7UL
, 0x5064b164ec1ab4c8UL
,
555 /* 232 */ 0x7020eccfd37eb2fcUL
, 0x1f31ea3ed90d25fcUL
,
556 0x1b930d7bdfa1bb34UL
, 0x5344467a48113044UL
,
557 /* 233 */ 0x70073170f25e6dfbUL
, 0xe385dc1a50114cc8UL
,
558 0x2348698ac8fc4f00UL
, 0x2a77a55284dd40d8UL
,
559 /* 234 */ 0xfe06afe0c98c6ce4UL
, 0xc235df96dddfd6e4UL
,
560 0x1428d01e33bf1ed3UL
, 0x785768ec9300bdafUL
,
561 /* 235 */ 0x9702e57a91deb63bUL
, 0x61bdb8bfe5ce8b80UL
,
562 0x645b426f3d1d58acUL
, 0x4804a82227a557bcUL
,
563 /* 236 */ 0x8e57048ab44d2601UL
, 0x68d6501a4b3a6935UL
,
564 0xc39c9ec3f9e1c293UL
, 0x4172f257d4de63e2UL
,
565 /* 237 */ 0xd368b450330c6401UL
, 0x040d3017418f2391UL
,
566 0x2c34bb6090b7d90dUL
, 0x16f649228fdfd51fUL
,
567 /* 238 */ 0xbea6818e2b928ef5UL
, 0xe28ccf91cdc11e72UL
,
568 0x594aaa68e77a36cdUL
, 0x313034806c7ffd0fUL
,
569 /* 239 */ 0x8a9d27ac2249bd65UL
, 0x19a3b464018e9512UL
,
570 0xc26ccff352b37ec7UL
, 0x056f68341d797b21UL
,
571 /* 240 */ 0x5e79d6757efd2327UL
, 0xfabdbcb6553afe15UL
,
572 0xd3e7222c6eaf5a60UL
, 0x7046c76d4dae743bUL
,
573 /* 241 */ 0x660be872b18d4a55UL
, 0x19992518574e1496UL
,
574 0xc103053a302bdcbbUL
, 0x3ed8e9800b218e8eUL
,
575 /* 242 */ 0x7b0b9239fa75e03eUL
, 0xefe9fb684633c083UL
,
576 0x98a35fbe391a7793UL
, 0x6065510fe2d0fe34UL
,
577 /* 243 */ 0x55cb668548abad0cUL
, 0xb4584548da87e527UL
,
578 0x2c43ecea0107c1ddUL
, 0x526028809372de35UL
,
579 /* 244 */ 0x3415c56af9213b1fUL
, 0x5bee1a4d017e98dbUL
,
580 0x13f6b105b5cf709bUL
, 0x5ff20e3482b29ab6UL
,
581 /* 245 */ 0x0aa29c75cc2e6c90UL
, 0xfc7d73ca3a70e206UL
,
582 0x899fc38fc4b5c515UL
, 0x250386b124ffc207UL
,
583 /* 246 */ 0x54ea28d5ae3d2b56UL
, 0x9913149dd6de60ceUL
,
584 0x16694fc58f06d6c1UL
, 0x46b23975eb018fc7UL
,
585 /* 247 */ 0x470a6a0fb4b7b4e2UL
, 0x5d92475a8f7253deUL
,
586 0xabeee5b52fbd3adbUL
, 0x7fa20801a0806968UL
,
587 /* 248 */ 0x76f3faf19f7714d2UL
, 0xb3e840c12f4660c3UL
,
588 0x0fb4cd8df212744eUL
, 0x4b065a251d3a2dd2UL
,
589 /* 249 */ 0x5cebde383d77cd4aUL
, 0x6adf39df882c9cb1UL
,
590 0xa2dd242eb09af759UL
, 0x3147c0e50e5f6422UL
,
591 /* 250 */ 0x164ca5101d1350dbUL
, 0xf8d13479c33fc962UL
,
592 0xe640ce4d13e5da08UL
, 0x4bdee0c45061f8baUL
,
593 /* 251 */ 0xd7c46dc1a4edb1c9UL
, 0x5514d7b6437fd98aUL
,
594 0x58942f6bb2a1c00bUL
, 0x2dffb2ab1d70710eUL
,
595 /* 252 */ 0xccdfcf2fc18b6d68UL
, 0xa8ebcba8b7806167UL
,
596 0x980697f95e2937e3UL
, 0x02fbba1cd0126e8cUL
599 /* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
600 * a is two 256-bit integers: a0[0:3] and a1[4:7]
601 * b is two 256-bit integers: b0[0:3] and b1[4:7]
603 static void mul2_256x256_integer_adx(u64
*const c
, const u64
*const a
,
607 "xorl %%r14d, %%r14d ;"
608 "movq (%1), %%rdx; " /* A[0] */
609 "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
610 "xorl %%r10d, %%r10d ;"
612 "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
613 "adox %%r10, %%r15 ;"
614 "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
616 "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
617 "adox %%r10, %%rbx ;"
618 /******************************************/
619 "adox %%r14, %%rcx ;"
621 "movq 8(%1), %%rdx; " /* A[1] */
622 "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
625 "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
628 "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
630 "adcx %%r11, %%rbx ;"
631 "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
632 "adox %%r10, %%r13 ;"
633 "adcx %%r13, %%rcx ;"
634 /******************************************/
635 "adox %%r14, %%r15 ;"
636 "adcx %%r14, %%r15 ;"
638 "movq 16(%1), %%rdx; " /* A[2] */
639 "xorl %%r10d, %%r10d ;"
640 "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
642 "movq %%r8, 16(%0) ;"
643 "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
646 "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
648 "adcx %%r11, %%rcx ;"
649 "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
650 "adox %%r10, %%r13 ;"
651 "adcx %%r13, %%r15 ;"
652 /******************************************/
653 "adox %%r14, %%rax ;"
654 "adcx %%r14, %%rax ;"
656 "movq 24(%1), %%rdx; " /* A[3] */
657 "xorl %%r10d, %%r10d ;"
658 "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
660 "movq %%r8, 24(%0) ;"
661 "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
664 "movq %%rcx, 32(%0) ;"
665 "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
667 "adcx %%r11, %%r15 ;"
668 "movq %%r15, 40(%0) ;"
669 "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
670 "adox %%r10, %%r13 ;"
671 "adcx %%r13, %%rax ;"
672 "movq %%rax, 48(%0) ;"
673 /******************************************/
674 "adox %%r14, %%rbx ;"
675 "adcx %%r14, %%rbx ;"
676 "movq %%rbx, 56(%0) ;"
678 "movq 32(%1), %%rdx; " /* C[0] */
679 "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
680 "xorl %%r10d, %%r10d ;"
682 "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
683 "adox %%r10, %%r15 ;"
684 "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
686 "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
687 "adox %%r10, %%rbx ;"
688 /******************************************/
689 "adox %%r14, %%rcx ;"
691 "movq 40(%1), %%rdx; " /* C[1] */
692 "xorl %%r10d, %%r10d ;"
693 "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
696 "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
699 "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
701 "adcx %%r11, %%rbx ;"
702 "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
703 "adox %%r10, %%r13 ;"
704 "adcx %%r13, %%rcx ;"
705 /******************************************/
706 "adox %%r14, %%r15 ;"
707 "adcx %%r14, %%r15 ;"
709 "movq 48(%1), %%rdx; " /* C[2] */
710 "xorl %%r10d, %%r10d ;"
711 "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
714 "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
717 "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
719 "adcx %%r11, %%rcx ;"
720 "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
721 "adox %%r10, %%r13 ;"
722 "adcx %%r13, %%r15 ;"
723 /******************************************/
724 "adox %%r14, %%rax ;"
725 "adcx %%r14, %%rax ;"
727 "movq 56(%1), %%rdx; " /* C[3] */
728 "xorl %%r10d, %%r10d ;"
729 "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
732 "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
735 "movq %%rcx, 96(%0) ;"
736 "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
738 "adcx %%r11, %%r15 ;"
739 "movq %%r15, 104(%0) ;"
740 "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
741 "adox %%r10, %%r13 ;"
742 "adcx %%r13, %%rax ;"
743 "movq %%rax, 112(%0) ;"
744 /******************************************/
745 "adox %%r14, %%rbx ;"
746 "adcx %%r14, %%rbx ;"
747 "movq %%rbx, 120(%0) ;"
749 : "r"(c
), "r"(a
), "r"(b
)
750 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
751 "%r10", "%r11", "%r13", "%r14", "%r15");
754 static void mul2_256x256_integer_bmi2(u64
*const c
, const u64
*const a
,
758 "movq (%1), %%rdx; " /* A[0] */
759 "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
761 "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
762 "addq %%r10, %%r15 ;"
763 "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
765 "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
766 "adcq %%r10, %%rbx ;"
767 /******************************************/
770 "movq 8(%1), %%rdx; " /* A[1] */
771 "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
774 "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
776 "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
778 "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
779 "adcq %%r10, %%r13 ;"
780 /******************************************/
784 "adcq %%r11, %%rbx ;"
785 "adcq %%r13, %%rcx ;"
788 "movq 16(%1), %%rdx; " /* A[2] */
789 "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
791 "movq %%r8, 16(%0) ;"
792 "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
794 "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
796 "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
797 "adcq %%r10, %%r13 ;"
798 /******************************************/
802 "adcq %%r11, %%rcx ;"
803 "adcq %%r13, %%r15 ;"
806 "movq 24(%1), %%rdx; " /* A[3] */
807 "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
809 "movq %%r8, 24(%0) ;"
810 "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
812 "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
814 "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
815 "adcq %%r10, %%r13 ;"
816 /******************************************/
820 "movq %%rcx, 32(%0) ;"
821 "adcq %%r11, %%r15 ;"
822 "movq %%r15, 40(%0) ;"
823 "adcq %%r13, %%rax ;"
824 "movq %%rax, 48(%0) ;"
826 "movq %%rbx, 56(%0) ;"
828 "movq 32(%1), %%rdx; " /* C[0] */
829 "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
830 "movq %%r8, 64(%0) ;"
831 "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
832 "addq %%r10, %%r15 ;"
833 "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
835 "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
836 "adcq %%r10, %%rbx ;"
837 /******************************************/
840 "movq 40(%1), %%rdx; " /* C[1] */
841 "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
843 "movq %%r8, 72(%0) ;"
844 "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
846 "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
848 "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
849 "adcq %%r10, %%r13 ;"
850 /******************************************/
854 "adcq %%r11, %%rbx ;"
855 "adcq %%r13, %%rcx ;"
858 "movq 48(%1), %%rdx; " /* C[2] */
859 "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
861 "movq %%r8, 80(%0) ;"
862 "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
864 "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
866 "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
867 "adcq %%r10, %%r13 ;"
868 /******************************************/
872 "adcq %%r11, %%rcx ;"
873 "adcq %%r13, %%r15 ;"
876 "movq 56(%1), %%rdx; " /* C[3] */
877 "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
879 "movq %%r8, 88(%0) ;"
880 "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
882 "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
884 "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
885 "adcq %%r10, %%r13 ;"
886 /******************************************/
890 "movq %%rcx, 96(%0) ;"
891 "adcq %%r11, %%r15 ;"
892 "movq %%r15, 104(%0) ;"
893 "adcq %%r13, %%rax ;"
894 "movq %%rax, 112(%0) ;"
896 "movq %%rbx, 120(%0) ;"
898 : "r"(c
), "r"(a
), "r"(b
)
899 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
900 "%r10", "%r11", "%r13", "%r15");
903 static void sqr2_256x256_integer_adx(u64
*const c
, const u64
*const a
)
906 "movq (%1), %%rdx ;" /* A[0] */
907 "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
908 "xorl %%r15d, %%r15d;"
909 "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
911 "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
912 "adcx %%rax, %%r10 ;"
913 "movq 24(%1), %%rdx ;" /* A[3] */
914 "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
915 "adcx %%rcx, %%r11 ;"
916 "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
917 "adcx %%rax, %%rbx ;"
918 "movq 8(%1), %%rdx ;" /* A[1] */
919 "adcx %%r15, %%r13 ;"
920 "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
922 /******************************************/
923 "adcx %%r15, %%r14 ;"
925 "xorl %%r15d, %%r15d;"
926 "adox %%rax, %%r10 ;"
928 "adox %%rcx, %%r11 ;"
930 "adox %%r15, %%rbx ;"
931 "adcx %%r10, %%r10 ;"
932 "adox %%r15, %%r13 ;"
933 "adcx %%r11, %%r11 ;"
934 "adox %%r15, %%r14 ;"
935 "adcx %%rbx, %%rbx ;"
936 "adcx %%r13, %%r13 ;"
937 "adcx %%r14, %%r14 ;"
940 "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
941 /*******************/
942 "movq %%rax, 0(%0) ;"
945 "movq 8(%1), %%rdx ;"
946 "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
948 "movq %%r9, 16(%0) ;"
949 "adcq %%rcx, %%r10 ;"
950 "movq %%r10, 24(%0) ;"
951 "movq 16(%1), %%rdx ;"
952 "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
953 "adcq %%rax, %%r11 ;"
954 "movq %%r11, 32(%0) ;"
955 "adcq %%rcx, %%rbx ;"
956 "movq %%rbx, 40(%0) ;"
957 "movq 24(%1), %%rdx ;"
958 "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
959 "adcq %%rax, %%r13 ;"
960 "movq %%r13, 48(%0) ;"
961 "adcq %%rcx, %%r14 ;"
962 "movq %%r14, 56(%0) ;"
965 "movq 32(%1), %%rdx ;" /* B[0] */
966 "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
967 "xorl %%r15d, %%r15d;"
968 "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
970 "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
971 "adcx %%rax, %%r10 ;"
972 "movq 56(%1), %%rdx ;" /* B[3] */
973 "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
974 "adcx %%rcx, %%r11 ;"
975 "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
976 "adcx %%rax, %%rbx ;"
977 "movq 40(%1), %%rdx ;" /* B[1] */
978 "adcx %%r15, %%r13 ;"
979 "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
981 /******************************************/
982 "adcx %%r15, %%r14 ;"
984 "xorl %%r15d, %%r15d;"
985 "adox %%rax, %%r10 ;"
987 "adox %%rcx, %%r11 ;"
989 "adox %%r15, %%rbx ;"
990 "adcx %%r10, %%r10 ;"
991 "adox %%r15, %%r13 ;"
992 "adcx %%r11, %%r11 ;"
993 "adox %%r15, %%r14 ;"
994 "adcx %%rbx, %%rbx ;"
995 "adcx %%r13, %%r13 ;"
996 "adcx %%r14, %%r14 ;"
998 "movq 32(%1), %%rdx ;"
999 "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
1000 /*******************/
1001 "movq %%rax, 64(%0) ;"
1002 "addq %%rcx, %%r8 ;"
1003 "movq %%r8, 72(%0) ;"
1004 "movq 40(%1), %%rdx ;"
1005 "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
1006 "adcq %%rax, %%r9 ;"
1007 "movq %%r9, 80(%0) ;"
1008 "adcq %%rcx, %%r10 ;"
1009 "movq %%r10, 88(%0) ;"
1010 "movq 48(%1), %%rdx ;"
1011 "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
1012 "adcq %%rax, %%r11 ;"
1013 "movq %%r11, 96(%0) ;"
1014 "adcq %%rcx, %%rbx ;"
1015 "movq %%rbx, 104(%0) ;"
1016 "movq 56(%1), %%rdx ;"
1017 "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
1018 "adcq %%rax, %%r13 ;"
1019 "movq %%r13, 112(%0) ;"
1020 "adcq %%rcx, %%r14 ;"
1021 "movq %%r14, 120(%0) ;"
1024 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1025 "%r10", "%r11", "%r13", "%r14", "%r15");
1028 static void sqr2_256x256_integer_bmi2(u64
*const c
, const u64
*const a
)
1031 "movq 8(%1), %%rdx ;" /* A[1] */
1032 "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
1033 "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
1034 "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
1036 "movq 16(%1), %%rdx ;" /* A[2] */
1037 "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
1038 "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
1040 "addq %%rax, %%r9 ;"
1041 "adcq %%rdx, %%r10 ;"
1042 "adcq %%rcx, %%r11 ;"
1043 "adcq %%r14, %%r15 ;"
1048 "movq (%1), %%rdx ;" /* A[0] */
1049 "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
1051 "addq %%rax, %%r10 ;"
1052 "adcq %%rcx, %%r11 ;"
1057 "shldq $1, %%r13, %%r14 ;"
1058 "shldq $1, %%r15, %%r13 ;"
1059 "shldq $1, %%r11, %%r15 ;"
1060 "shldq $1, %%r10, %%r11 ;"
1061 "shldq $1, %%r9, %%r10 ;"
1062 "shldq $1, %%r8, %%r9 ;"
1065 /*******************/
1066 "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
1067 /*******************/
1068 "movq %%rax, 0(%0) ;"
1069 "addq %%rcx, %%r8 ;"
1070 "movq %%r8, 8(%0) ;"
1071 "movq 8(%1), %%rdx ;"
1072 "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
1073 "adcq %%rax, %%r9 ;"
1074 "movq %%r9, 16(%0) ;"
1075 "adcq %%rcx, %%r10 ;"
1076 "movq %%r10, 24(%0) ;"
1077 "movq 16(%1), %%rdx ;"
1078 "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
1079 "adcq %%rax, %%r11 ;"
1080 "movq %%r11, 32(%0) ;"
1081 "adcq %%rcx, %%r15 ;"
1082 "movq %%r15, 40(%0) ;"
1083 "movq 24(%1), %%rdx ;"
1084 "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
1085 "adcq %%rax, %%r13 ;"
1086 "movq %%r13, 48(%0) ;"
1087 "adcq %%rcx, %%r14 ;"
1088 "movq %%r14, 56(%0) ;"
1090 "movq 40(%1), %%rdx ;" /* B[1] */
1091 "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
1092 "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
1093 "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
1095 "movq 48(%1), %%rdx ;" /* B[2] */
1096 "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
1097 "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
1099 "addq %%rax, %%r9 ;"
1100 "adcq %%rdx, %%r10 ;"
1101 "adcq %%rcx, %%r11 ;"
1102 "adcq %%r14, %%r15 ;"
1107 "movq 32(%1), %%rdx ;" /* B[0] */
1108 "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
1110 "addq %%rax, %%r10 ;"
1111 "adcq %%rcx, %%r11 ;"
1116 "shldq $1, %%r13, %%r14 ;"
1117 "shldq $1, %%r15, %%r13 ;"
1118 "shldq $1, %%r11, %%r15 ;"
1119 "shldq $1, %%r10, %%r11 ;"
1120 "shldq $1, %%r9, %%r10 ;"
1121 "shldq $1, %%r8, %%r9 ;"
1124 /*******************/
1125 "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
1126 /*******************/
1127 "movq %%rax, 64(%0) ;"
1128 "addq %%rcx, %%r8 ;"
1129 "movq %%r8, 72(%0) ;"
1130 "movq 40(%1), %%rdx ;"
1131 "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
1132 "adcq %%rax, %%r9 ;"
1133 "movq %%r9, 80(%0) ;"
1134 "adcq %%rcx, %%r10 ;"
1135 "movq %%r10, 88(%0) ;"
1136 "movq 48(%1), %%rdx ;"
1137 "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
1138 "adcq %%rax, %%r11 ;"
1139 "movq %%r11, 96(%0) ;"
1140 "adcq %%rcx, %%r15 ;"
1141 "movq %%r15, 104(%0) ;"
1142 "movq 56(%1), %%rdx ;"
1143 "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
1144 "adcq %%rax, %%r13 ;"
1145 "movq %%r13, 112(%0) ;"
1146 "adcq %%rcx, %%r14 ;"
1147 "movq %%r14, 120(%0) ;"
1150 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1151 "%r11", "%r13", "%r14", "%r15");
1154 static void red_eltfp25519_2w_adx(u64
*const c
, const u64
*const a
)
1157 "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
1158 "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
1159 "xorl %%ebx, %%ebx ;"
1161 "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
1162 "adcx %%r10, %%r9 ;"
1163 "adox 8(%1), %%r9 ;"
1164 "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
1165 "adcx %%r11, %%r10 ;"
1166 "adox 16(%1), %%r10 ;"
1167 "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
1168 "adcx %%rax, %%r11 ;"
1169 "adox 24(%1), %%r11 ;"
1170 /***************************************/
1171 "adcx %%rbx, %%rcx ;"
1172 "adox %%rbx, %%rcx ;"
1173 "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
1174 "adcx %%rcx, %%r8 ;"
1175 "adcx %%rbx, %%r9 ;"
1176 "movq %%r9, 8(%0) ;"
1177 "adcx %%rbx, %%r10 ;"
1178 "movq %%r10, 16(%0) ;"
1179 "adcx %%rbx, %%r11 ;"
1180 "movq %%r11, 24(%0) ;"
1182 "cmovc %%edx, %%ecx ;"
1183 "addq %%rcx, %%r8 ;"
1186 "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
1187 "xorl %%ebx, %%ebx ;"
1188 "adox 64(%1), %%r8 ;"
1189 "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
1190 "adcx %%r10, %%r9 ;"
1191 "adox 72(%1), %%r9 ;"
1192 "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
1193 "adcx %%r11, %%r10 ;"
1194 "adox 80(%1), %%r10 ;"
1195 "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
1196 "adcx %%rax, %%r11 ;"
1197 "adox 88(%1), %%r11 ;"
1198 /****************************************/
1199 "adcx %%rbx, %%rcx ;"
1200 "adox %%rbx, %%rcx ;"
1201 "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
1202 "adcx %%rcx, %%r8 ;"
1203 "adcx %%rbx, %%r9 ;"
1204 "movq %%r9, 40(%0) ;"
1205 "adcx %%rbx, %%r10 ;"
1206 "movq %%r10, 48(%0) ;"
1207 "adcx %%rbx, %%r11 ;"
1208 "movq %%r11, 56(%0) ;"
1210 "cmovc %%edx, %%ecx ;"
1211 "addq %%rcx, %%r8 ;"
1212 "movq %%r8, 32(%0) ;"
1215 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1219 static void red_eltfp25519_2w_bmi2(u64
*const c
, const u64
*const a
)
1222 "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
1223 "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
1224 "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
1225 "addq %%r10, %%r9 ;"
1226 "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
1227 "adcq %%r11, %%r10 ;"
1228 "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
1229 "adcq %%rax, %%r11 ;"
1230 /***************************************/
1233 "adcq 8(%1), %%r9 ;"
1234 "adcq 16(%1), %%r10 ;"
1235 "adcq 24(%1), %%r11 ;"
1237 "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
1238 "addq %%rcx, %%r8 ;"
1240 "movq %%r9, 8(%0) ;"
1242 "movq %%r10, 16(%0) ;"
1244 "movq %%r11, 24(%0) ;"
1246 "cmovc %%edx, %%ecx ;"
1247 "addq %%rcx, %%r8 ;"
1250 "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
1251 "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
1252 "addq %%r10, %%r9 ;"
1253 "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
1254 "adcq %%r11, %%r10 ;"
1255 "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
1256 "adcq %%rax, %%r11 ;"
1257 /****************************************/
1259 "addq 64(%1), %%r8 ;"
1260 "adcq 72(%1), %%r9 ;"
1261 "adcq 80(%1), %%r10 ;"
1262 "adcq 88(%1), %%r11 ;"
1264 "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
1265 "addq %%rcx, %%r8 ;"
1267 "movq %%r9, 40(%0) ;"
1269 "movq %%r10, 48(%0) ;"
1271 "movq %%r11, 56(%0) ;"
1273 "cmovc %%edx, %%ecx ;"
1274 "addq %%rcx, %%r8 ;"
1275 "movq %%r8, 32(%0) ;"
1278 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1282 static void mul_256x256_integer_adx(u64
*const c
, const u64
*const a
,
1286 "movq (%1), %%rdx; " /* A[0] */
1287 "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
1288 "xorl %%r10d, %%r10d ;"
1290 "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
1291 "adox %%r9, %%r10 ;"
1292 "movq %%r10, 8(%0) ;"
1293 "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
1294 "adox %%r11, %%r15 ;"
1295 "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
1296 "adox %%r13, %%r14 ;"
1298 /******************************************/
1299 "adox %%rdx, %%rax ;"
1301 "movq 8(%1), %%rdx; " /* A[1] */
1302 "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
1303 "xorl %%r10d, %%r10d ;"
1304 "adcx 8(%0), %%r8 ;"
1305 "movq %%r8, 8(%0) ;"
1306 "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
1307 "adox %%r9, %%r10 ;"
1308 "adcx %%r15, %%r10 ;"
1309 "movq %%r10, 16(%0) ;"
1310 "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
1311 "adox %%r11, %%r15 ;"
1312 "adcx %%r14, %%r15 ;"
1314 "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
1315 "adox %%r13, %%r14 ;"
1316 "adcx %%rax, %%r14 ;"
1318 /******************************************/
1319 "adox %%rdx, %%rax ;"
1320 "adcx %%r8, %%rax ;"
1322 "movq 16(%1), %%rdx; " /* A[2] */
1323 "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
1324 "xorl %%r10d, %%r10d ;"
1325 "adcx 16(%0), %%r8 ;"
1326 "movq %%r8, 16(%0) ;"
1327 "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
1328 "adox %%r9, %%r10 ;"
1329 "adcx %%r15, %%r10 ;"
1330 "movq %%r10, 24(%0) ;"
1331 "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
1332 "adox %%r11, %%r15 ;"
1333 "adcx %%r14, %%r15 ;"
1335 "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
1336 "adox %%r13, %%r14 ;"
1337 "adcx %%rax, %%r14 ;"
1339 /******************************************/
1340 "adox %%rdx, %%rax ;"
1341 "adcx %%r8, %%rax ;"
1343 "movq 24(%1), %%rdx; " /* A[3] */
1344 "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
1345 "xorl %%r10d, %%r10d ;"
1346 "adcx 24(%0), %%r8 ;"
1347 "movq %%r8, 24(%0) ;"
1348 "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
1349 "adox %%r9, %%r10 ;"
1350 "adcx %%r15, %%r10 ;"
1351 "movq %%r10, 32(%0) ;"
1352 "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
1353 "adox %%r11, %%r15 ;"
1354 "adcx %%r14, %%r15 ;"
1355 "movq %%r15, 40(%0) ;"
1357 "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
1358 "adox %%r13, %%r14 ;"
1359 "adcx %%rax, %%r14 ;"
1360 "movq %%r14, 48(%0) ;"
1362 /******************************************/
1363 "adox %%rdx, %%rax ;"
1364 "adcx %%r8, %%rax ;"
1365 "movq %%rax, 56(%0) ;"
1367 : "r"(c
), "r"(a
), "r"(b
)
1368 : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
1369 "%r13", "%r14", "%r15");
1372 static void mul_256x256_integer_bmi2(u64
*const c
, const u64
*const a
,
1376 "movq (%1), %%rdx; " /* A[0] */
1377 "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
1379 "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
1380 "addq %%r10, %%r15 ;"
1381 "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
1382 "adcq %%r8, %%rax ;"
1383 "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
1384 "adcq %%r10, %%rbx ;"
1385 /******************************************/
1388 "movq 8(%1), %%rdx; " /* A[1] */
1389 "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
1390 "addq %%r15, %%r8 ;"
1391 "movq %%r8, 8(%0) ;"
1392 "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
1393 "adcq %%r10, %%r9 ;"
1394 "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
1395 "adcq %%r8, %%r11 ;"
1396 "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
1397 "adcq %%r10, %%r13 ;"
1398 /******************************************/
1401 "addq %%r9, %%rax ;"
1402 "adcq %%r11, %%rbx ;"
1403 "adcq %%r13, %%rcx ;"
1406 "movq 16(%1), %%rdx; " /* A[2] */
1407 "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
1408 "addq %%rax, %%r8 ;"
1409 "movq %%r8, 16(%0) ;"
1410 "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
1411 "adcq %%r10, %%r9 ;"
1412 "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
1413 "adcq %%r8, %%r11 ;"
1414 "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
1415 "adcq %%r10, %%r13 ;"
1416 /******************************************/
1419 "addq %%r9, %%rbx ;"
1420 "adcq %%r11, %%rcx ;"
1421 "adcq %%r13, %%r15 ;"
1424 "movq 24(%1), %%rdx; " /* A[3] */
1425 "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
1426 "addq %%rbx, %%r8 ;"
1427 "movq %%r8, 24(%0) ;"
1428 "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
1429 "adcq %%r10, %%r9 ;"
1430 "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
1431 "adcq %%r8, %%r11 ;"
1432 "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
1433 "adcq %%r10, %%r13 ;"
1434 /******************************************/
1437 "addq %%r9, %%rcx ;"
1438 "movq %%rcx, 32(%0) ;"
1439 "adcq %%r11, %%r15 ;"
1440 "movq %%r15, 40(%0) ;"
1441 "adcq %%r13, %%rax ;"
1442 "movq %%rax, 48(%0) ;"
1444 "movq %%rbx, 56(%0) ;"
1446 : "r"(c
), "r"(a
), "r"(b
)
1447 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1448 "%r10", "%r11", "%r13", "%r15");
1451 static void sqr_256x256_integer_adx(u64
*const c
, const u64
*const a
)
1454 "movq (%1), %%rdx ;" /* A[0] */
1455 "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
1456 "xorl %%r15d, %%r15d;"
1457 "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
1458 "adcx %%r14, %%r9 ;"
1459 "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
1460 "adcx %%rax, %%r10 ;"
1461 "movq 24(%1), %%rdx ;" /* A[3] */
1462 "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
1463 "adcx %%rcx, %%r11 ;"
1464 "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
1465 "adcx %%rax, %%rbx ;"
1466 "movq 8(%1), %%rdx ;" /* A[1] */
1467 "adcx %%r15, %%r13 ;"
1468 "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
1470 /******************************************/
1471 "adcx %%r15, %%r14 ;"
1473 "xorl %%r15d, %%r15d;"
1474 "adox %%rax, %%r10 ;"
1476 "adox %%rcx, %%r11 ;"
1478 "adox %%r15, %%rbx ;"
1479 "adcx %%r10, %%r10 ;"
1480 "adox %%r15, %%r13 ;"
1481 "adcx %%r11, %%r11 ;"
1482 "adox %%r15, %%r14 ;"
1483 "adcx %%rbx, %%rbx ;"
1484 "adcx %%r13, %%r13 ;"
1485 "adcx %%r14, %%r14 ;"
1487 "movq (%1), %%rdx ;"
1488 "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
1489 /*******************/
1490 "movq %%rax, 0(%0) ;"
1491 "addq %%rcx, %%r8 ;"
1492 "movq %%r8, 8(%0) ;"
1493 "movq 8(%1), %%rdx ;"
1494 "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
1495 "adcq %%rax, %%r9 ;"
1496 "movq %%r9, 16(%0) ;"
1497 "adcq %%rcx, %%r10 ;"
1498 "movq %%r10, 24(%0) ;"
1499 "movq 16(%1), %%rdx ;"
1500 "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
1501 "adcq %%rax, %%r11 ;"
1502 "movq %%r11, 32(%0) ;"
1503 "adcq %%rcx, %%rbx ;"
1504 "movq %%rbx, 40(%0) ;"
1505 "movq 24(%1), %%rdx ;"
1506 "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
1507 "adcq %%rax, %%r13 ;"
1508 "movq %%r13, 48(%0) ;"
1509 "adcq %%rcx, %%r14 ;"
1510 "movq %%r14, 56(%0) ;"
1513 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1514 "%r10", "%r11", "%r13", "%r14", "%r15");
1517 static void sqr_256x256_integer_bmi2(u64
*const c
, const u64
*const a
)
1520 "movq 8(%1), %%rdx ;" /* A[1] */
1521 "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
1522 "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
1523 "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
1525 "movq 16(%1), %%rdx ;" /* A[2] */
1526 "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
1527 "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
1529 "addq %%rax, %%r9 ;"
1530 "adcq %%rdx, %%r10 ;"
1531 "adcq %%rcx, %%r11 ;"
1532 "adcq %%r14, %%r15 ;"
1537 "movq (%1), %%rdx ;" /* A[0] */
1538 "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
1540 "addq %%rax, %%r10 ;"
1541 "adcq %%rcx, %%r11 ;"
1546 "shldq $1, %%r13, %%r14 ;"
1547 "shldq $1, %%r15, %%r13 ;"
1548 "shldq $1, %%r11, %%r15 ;"
1549 "shldq $1, %%r10, %%r11 ;"
1550 "shldq $1, %%r9, %%r10 ;"
1551 "shldq $1, %%r8, %%r9 ;"
1554 /*******************/
1555 "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
1556 /*******************/
1557 "movq %%rax, 0(%0) ;"
1558 "addq %%rcx, %%r8 ;"
1559 "movq %%r8, 8(%0) ;"
1560 "movq 8(%1), %%rdx ;"
1561 "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
1562 "adcq %%rax, %%r9 ;"
1563 "movq %%r9, 16(%0) ;"
1564 "adcq %%rcx, %%r10 ;"
1565 "movq %%r10, 24(%0) ;"
1566 "movq 16(%1), %%rdx ;"
1567 "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
1568 "adcq %%rax, %%r11 ;"
1569 "movq %%r11, 32(%0) ;"
1570 "adcq %%rcx, %%r15 ;"
1571 "movq %%r15, 40(%0) ;"
1572 "movq 24(%1), %%rdx ;"
1573 "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
1574 "adcq %%rax, %%r13 ;"
1575 "movq %%r13, 48(%0) ;"
1576 "adcq %%rcx, %%r14 ;"
1577 "movq %%r14, 56(%0) ;"
1580 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1581 "%r11", "%r13", "%r14", "%r15");
1584 static void red_eltfp25519_1w_adx(u64
*const c
, const u64
*const a
)
1587 "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
1588 "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
1589 "xorl %%ebx, %%ebx ;"
1591 "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
1592 "adcx %%r10, %%r9 ;"
1593 "adox 8(%1), %%r9 ;"
1594 "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
1595 "adcx %%r11, %%r10 ;"
1596 "adox 16(%1), %%r10 ;"
1597 "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
1598 "adcx %%rax, %%r11 ;"
1599 "adox 24(%1), %%r11 ;"
1600 /***************************************/
1601 "adcx %%rbx, %%rcx ;"
1602 "adox %%rbx, %%rcx ;"
1603 "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
1604 "adcx %%rcx, %%r8 ;"
1605 "adcx %%rbx, %%r9 ;"
1606 "movq %%r9, 8(%0) ;"
1607 "adcx %%rbx, %%r10 ;"
1608 "movq %%r10, 16(%0) ;"
1609 "adcx %%rbx, %%r11 ;"
1610 "movq %%r11, 24(%0) ;"
1612 "cmovc %%edx, %%ecx ;"
1613 "addq %%rcx, %%r8 ;"
1617 : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
1621 static void red_eltfp25519_1w_bmi2(u64
*const c
, const u64
*const a
)
1624 "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
1625 "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
1626 "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
1627 "addq %%r10, %%r9 ;"
1628 "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
1629 "adcq %%r11, %%r10 ;"
1630 "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
1631 "adcq %%rax, %%r11 ;"
1632 /***************************************/
1635 "adcq 8(%1), %%r9 ;"
1636 "adcq 16(%1), %%r10 ;"
1637 "adcq 24(%1), %%r11 ;"
1639 "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
1640 "addq %%rcx, %%r8 ;"
1642 "movq %%r9, 8(%0) ;"
1644 "movq %%r10, 16(%0) ;"
1646 "movq %%r11, 24(%0) ;"
1648 "cmovc %%edx, %%ecx ;"
1649 "addq %%rcx, %%r8 ;"
1653 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1657 static __always_inline
void
1658 add_eltfp25519_1w_adx(u64
*const c
, const u64
*const a
, const u64
*const b
)
1662 "xorl %%ecx, %%ecx ;"
1665 "movq 8(%2), %%r9 ;"
1666 "adcx 8(%1), %%r9 ;"
1667 "movq 16(%2), %%r10 ;"
1668 "adcx 16(%1), %%r10 ;"
1669 "movq 24(%2), %%r11 ;"
1670 "adcx 24(%1), %%r11 ;"
1671 "cmovc %%eax, %%ecx ;"
1672 "xorl %%eax, %%eax ;"
1673 "adcx %%rcx, %%r8 ;"
1674 "adcx %%rax, %%r9 ;"
1675 "movq %%r9, 8(%0) ;"
1676 "adcx %%rax, %%r10 ;"
1677 "movq %%r10, 16(%0) ;"
1678 "adcx %%rax, %%r11 ;"
1679 "movq %%r11, 24(%0) ;"
1681 "cmovc %%ecx, %%eax ;"
1682 "addq %%rax, %%r8 ;"
1685 : "r"(c
), "r"(a
), "r"(b
)
1686 : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
1689 static __always_inline
void
1690 add_eltfp25519_1w_bmi2(u64
*const c
, const u64
*const a
, const u64
*const b
)
1696 "movq 8(%2), %%r9 ;"
1697 "adcq 8(%1), %%r9 ;"
1698 "movq 16(%2), %%r10 ;"
1699 "adcq 16(%1), %%r10 ;"
1700 "movq 24(%2), %%r11 ;"
1701 "adcq 24(%1), %%r11 ;"
1703 "cmovc %%eax, %%ecx ;"
1704 "addq %%rcx, %%r8 ;"
1706 "movq %%r9, 8(%0) ;"
1708 "movq %%r10, 16(%0) ;"
1710 "movq %%r11, 24(%0) ;"
1712 "cmovc %%eax, %%ecx ;"
1713 "addq %%rcx, %%r8 ;"
1716 : "r"(c
), "r"(a
), "r"(b
)
1717 : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
1720 static __always_inline
void
1721 sub_eltfp25519_1w(u64
*const c
, const u64
*const a
, const u64
*const b
)
1727 "movq 8(%1), %%r9 ;"
1728 "sbbq 8(%2), %%r9 ;"
1729 "movq 16(%1), %%r10 ;"
1730 "sbbq 16(%2), %%r10 ;"
1731 "movq 24(%1), %%r11 ;"
1732 "sbbq 24(%2), %%r11 ;"
1734 "cmovc %%eax, %%ecx ;"
1735 "subq %%rcx, %%r8 ;"
1737 "movq %%r9, 8(%0) ;"
1739 "movq %%r10, 16(%0) ;"
1741 "movq %%r11, 24(%0) ;"
1743 "cmovc %%eax, %%ecx ;"
1744 "subq %%rcx, %%r8 ;"
1747 : "r"(c
), "r"(a
), "r"(b
)
1748 : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
1751 /* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
1752 static __always_inline
void
1753 mul_a24_eltfp25519_1w(u64
*const c
, const u64
*const a
)
1755 const u64 a24
= 121666;
1758 "mulx (%1), %%r8, %%r10 ;"
1759 "mulx 8(%1), %%r9, %%r11 ;"
1760 "addq %%r10, %%r9 ;"
1761 "mulx 16(%1), %%r10, %%rax ;"
1762 "adcq %%r11, %%r10 ;"
1763 "mulx 24(%1), %%r11, %%rcx ;"
1764 "adcq %%rax, %%r11 ;"
1765 /**************************/
1767 "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
1768 "imul %%rdx, %%rcx ;"
1769 "addq %%rcx, %%r8 ;"
1771 "movq %%r9, 8(%0) ;"
1773 "movq %%r10, 16(%0) ;"
1775 "movq %%r11, 24(%0) ;"
1777 "cmovc %%edx, %%ecx ;"
1778 "addq %%rcx, %%r8 ;"
1781 : "r"(c
), "r"(a
), "r"(a24
)
1782 : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
1786 static void inv_eltfp25519_1w_adx(u64
*const c
, const u64
*const a
)
1789 eltfp25519_1w_buffer buffer
;
1790 eltfp25519_1w x0
, x1
, x2
;
1795 T
[1] = c
; /* x^(-1) */
1799 copy_eltfp25519_1w(T
[1], a
);
1800 sqrn_eltfp25519_1w_adx(T
[1], 1);
1801 copy_eltfp25519_1w(T
[2], T
[1]);
1802 sqrn_eltfp25519_1w_adx(T
[2], 2);
1803 mul_eltfp25519_1w_adx(T
[0], a
, T
[2]);
1804 mul_eltfp25519_1w_adx(T
[1], T
[1], T
[0]);
1805 copy_eltfp25519_1w(T
[2], T
[1]);
1806 sqrn_eltfp25519_1w_adx(T
[2], 1);
1807 mul_eltfp25519_1w_adx(T
[0], T
[0], T
[2]);
1808 copy_eltfp25519_1w(T
[2], T
[0]);
1809 sqrn_eltfp25519_1w_adx(T
[2], 5);
1810 mul_eltfp25519_1w_adx(T
[0], T
[0], T
[2]);
1811 copy_eltfp25519_1w(T
[2], T
[0]);
1812 sqrn_eltfp25519_1w_adx(T
[2], 10);
1813 mul_eltfp25519_1w_adx(T
[2], T
[2], T
[0]);
1814 copy_eltfp25519_1w(T
[3], T
[2]);
1815 sqrn_eltfp25519_1w_adx(T
[3], 20);
1816 mul_eltfp25519_1w_adx(T
[3], T
[3], T
[2]);
1817 sqrn_eltfp25519_1w_adx(T
[3], 10);
1818 mul_eltfp25519_1w_adx(T
[3], T
[3], T
[0]);
1819 copy_eltfp25519_1w(T
[0], T
[3]);
1820 sqrn_eltfp25519_1w_adx(T
[0], 50);
1821 mul_eltfp25519_1w_adx(T
[0], T
[0], T
[3]);
1822 copy_eltfp25519_1w(T
[2], T
[0]);
1823 sqrn_eltfp25519_1w_adx(T
[2], 100);
1824 mul_eltfp25519_1w_adx(T
[2], T
[2], T
[0]);
1825 sqrn_eltfp25519_1w_adx(T
[2], 50);
1826 mul_eltfp25519_1w_adx(T
[2], T
[2], T
[3]);
1827 sqrn_eltfp25519_1w_adx(T
[2], 5);
1828 mul_eltfp25519_1w_adx(T
[1], T
[1], T
[2]);
1830 memzero_explicit(&m
, sizeof(m
));
1833 static void inv_eltfp25519_1w_bmi2(u64
*const c
, const u64
*const a
)
1836 eltfp25519_1w_buffer buffer
;
1837 eltfp25519_1w x0
, x1
, x2
;
1842 T
[1] = c
; /* x^(-1) */
1846 copy_eltfp25519_1w(T
[1], a
);
1847 sqrn_eltfp25519_1w_bmi2(T
[1], 1);
1848 copy_eltfp25519_1w(T
[2], T
[1]);
1849 sqrn_eltfp25519_1w_bmi2(T
[2], 2);
1850 mul_eltfp25519_1w_bmi2(T
[0], a
, T
[2]);
1851 mul_eltfp25519_1w_bmi2(T
[1], T
[1], T
[0]);
1852 copy_eltfp25519_1w(T
[2], T
[1]);
1853 sqrn_eltfp25519_1w_bmi2(T
[2], 1);
1854 mul_eltfp25519_1w_bmi2(T
[0], T
[0], T
[2]);
1855 copy_eltfp25519_1w(T
[2], T
[0]);
1856 sqrn_eltfp25519_1w_bmi2(T
[2], 5);
1857 mul_eltfp25519_1w_bmi2(T
[0], T
[0], T
[2]);
1858 copy_eltfp25519_1w(T
[2], T
[0]);
1859 sqrn_eltfp25519_1w_bmi2(T
[2], 10);
1860 mul_eltfp25519_1w_bmi2(T
[2], T
[2], T
[0]);
1861 copy_eltfp25519_1w(T
[3], T
[2]);
1862 sqrn_eltfp25519_1w_bmi2(T
[3], 20);
1863 mul_eltfp25519_1w_bmi2(T
[3], T
[3], T
[2]);
1864 sqrn_eltfp25519_1w_bmi2(T
[3], 10);
1865 mul_eltfp25519_1w_bmi2(T
[3], T
[3], T
[0]);
1866 copy_eltfp25519_1w(T
[0], T
[3]);
1867 sqrn_eltfp25519_1w_bmi2(T
[0], 50);
1868 mul_eltfp25519_1w_bmi2(T
[0], T
[0], T
[3]);
1869 copy_eltfp25519_1w(T
[2], T
[0]);
1870 sqrn_eltfp25519_1w_bmi2(T
[2], 100);
1871 mul_eltfp25519_1w_bmi2(T
[2], T
[2], T
[0]);
1872 sqrn_eltfp25519_1w_bmi2(T
[2], 50);
1873 mul_eltfp25519_1w_bmi2(T
[2], T
[2], T
[3]);
1874 sqrn_eltfp25519_1w_bmi2(T
[2], 5);
1875 mul_eltfp25519_1w_bmi2(T
[1], T
[1], T
[2]);
1877 memzero_explicit(&m
, sizeof(m
));
1880 /* Given c, a 256-bit number, fred_eltfp25519_1w updates c
1881 * with a number such that 0 <= C < 2**255-19.
1883 static __always_inline
void fred_eltfp25519_1w(u64
*const c
)
1885 u64 tmp0
= 38, tmp1
= 19;
1887 "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
1888 "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
1890 /* Add either 19 or 38 to c */
1896 /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
1898 "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
1899 "btrq $63, %3 ;" /* Clear bit 255 */
1901 /* Subtract 19 if necessary */
1907 : "+r"(c
[0]), "+r"(c
[1]), "+r"(c
[2]), "+r"(c
[3]), "+r"(tmp0
),
1913 static __always_inline
void cswap(u8 bit
, u64
*const px
, u64
*const py
)
1930 : "+r"(px
[0]), "+r"(px
[1]), "+r"(px
[2]), "+r"(px
[3]),
1931 "+r"(py
[0]), "+r"(py
[1]), "+r"(py
[2]), "+r"(py
[3]),
1938 static __always_inline
void cselect(u8 bit
, u64
*const px
, const u64
*const py
)
1946 : "+r"(px
[0]), "+r"(px
[1]), "+r"(px
[2]), "+r"(px
[3])
1947 : "r"(bit
), "rm"(py
[0]), "rm"(py
[1]), "rm"(py
[2]), "rm"(py
[3])
1952 static void curve25519_adx(u8 shared
[CURVE25519_KEY_SIZE
],
1953 const u8 private_key
[CURVE25519_KEY_SIZE
],
1954 const u8 session_key
[CURVE25519_KEY_SIZE
])
1957 u64 buffer
[4 * NUM_WORDS_ELTFP25519
];
1958 u64 coordinates
[4 * NUM_WORDS_ELTFP25519
];
1959 u64 workspace
[6 * NUM_WORDS_ELTFP25519
];
1960 u8 session
[CURVE25519_KEY_SIZE
];
1961 u8
private[CURVE25519_KEY_SIZE
];
1966 u64
*const X1
= (u64
*)m
.session
;
1967 u64
*const key
= (u64
*)m
.private;
1968 u64
*const Px
= m
.coordinates
+ 0;
1969 u64
*const Pz
= m
.coordinates
+ 4;
1970 u64
*const Qx
= m
.coordinates
+ 8;
1971 u64
*const Qz
= m
.coordinates
+ 12;
1976 u64
*const X2Z2
= Qx
;
1977 u64
*const X3Z3
= Px
;
1979 u64
*const A
= m
.workspace
+ 0;
1980 u64
*const B
= m
.workspace
+ 4;
1981 u64
*const D
= m
.workspace
+ 8;
1982 u64
*const C
= m
.workspace
+ 12;
1983 u64
*const DA
= m
.workspace
+ 16;
1984 u64
*const CB
= m
.workspace
+ 20;
1987 u64
*const DACB
= DA
;
1989 memcpy(m
.private, private_key
, sizeof(m
.private));
1990 memcpy(m
.session
, session_key
, sizeof(m
.session
));
1992 curve25519_clamp_secret(m
.private);
1995 * When receiving such an array, implementations of curve25519
1996 * MUST mask the most-significant bit in the final byte. This
1997 * is done to preserve compatibility with point formats which
1998 * reserve the sign bit for use in other protocols and to
1999 * increase resistance to implementation fingerprinting
2001 m
.session
[CURVE25519_KEY_SIZE
- 1] &= (1 << (255 % 8)) - 1;
2003 copy_eltfp25519_1w(Px
, X1
);
2004 setzero_eltfp25519_1w(Pz
);
2005 setzero_eltfp25519_1w(Qx
);
2006 setzero_eltfp25519_1w(Qz
);
2014 for (i
= 3; i
>= 0; --i
) {
2016 u64 bit
= (key
[i
] >> j
) & 0x1;
2017 u64 swap
= bit
^ prev
;
2020 add_eltfp25519_1w_adx(A
, X2
, Z2
); /* A = (X2+Z2) */
2021 sub_eltfp25519_1w(B
, X2
, Z2
); /* B = (X2-Z2) */
2022 add_eltfp25519_1w_adx(C
, X3
, Z3
); /* C = (X3+Z3) */
2023 sub_eltfp25519_1w(D
, X3
, Z3
); /* D = (X3-Z3) */
2024 mul_eltfp25519_2w_adx(DACB
, AB
, DC
); /* [DA|CB] = [A|B]*[D|C] */
2026 cselect(swap
, A
, C
);
2027 cselect(swap
, B
, D
);
2029 sqr_eltfp25519_2w_adx(AB
); /* [AA|BB] = [A^2|B^2] */
2030 add_eltfp25519_1w_adx(X3
, DA
, CB
); /* X3 = (DA+CB) */
2031 sub_eltfp25519_1w(Z3
, DA
, CB
); /* Z3 = (DA-CB) */
2032 sqr_eltfp25519_2w_adx(X3Z3
); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
2034 copy_eltfp25519_1w(X2
, B
); /* X2 = B^2 */
2035 sub_eltfp25519_1w(Z2
, A
, B
); /* Z2 = E = AA-BB */
2037 mul_a24_eltfp25519_1w(B
, Z2
); /* B = a24*E */
2038 add_eltfp25519_1w_adx(B
, B
, X2
); /* B = a24*E+B */
2039 mul_eltfp25519_2w_adx(X2Z2
, X2Z2
, AB
); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
2040 mul_eltfp25519_1w_adx(Z3
, Z3
, X1
); /* Z3 = Z3*X1 */
2046 inv_eltfp25519_1w_adx(A
, Qz
);
2047 mul_eltfp25519_1w_adx((u64
*)shared
, Qx
, A
);
2048 fred_eltfp25519_1w((u64
*)shared
);
2050 memzero_explicit(&m
, sizeof(m
));
2053 static void curve25519_adx_base(u8 session_key
[CURVE25519_KEY_SIZE
],
2054 const u8 private_key
[CURVE25519_KEY_SIZE
])
2057 u64 buffer
[4 * NUM_WORDS_ELTFP25519
];
2058 u64 coordinates
[4 * NUM_WORDS_ELTFP25519
];
2059 u64 workspace
[4 * NUM_WORDS_ELTFP25519
];
2060 u8
private[CURVE25519_KEY_SIZE
];
2063 const int ite
[4] = { 64, 64, 64, 63 };
2067 int i
= 0, j
= 0, k
= 0;
2068 u64
*const key
= (u64
*)m
.private;
2069 u64
*const Ur1
= m
.coordinates
+ 0;
2070 u64
*const Zr1
= m
.coordinates
+ 4;
2071 u64
*const Ur2
= m
.coordinates
+ 8;
2072 u64
*const Zr2
= m
.coordinates
+ 12;
2074 u64
*const UZr1
= m
.coordinates
+ 0;
2075 u64
*const ZUr2
= m
.coordinates
+ 8;
2077 u64
*const A
= m
.workspace
+ 0;
2078 u64
*const B
= m
.workspace
+ 4;
2079 u64
*const C
= m
.workspace
+ 8;
2080 u64
*const D
= m
.workspace
+ 12;
2082 u64
*const AB
= m
.workspace
+ 0;
2083 u64
*const CD
= m
.workspace
+ 8;
2085 const u64
*const P
= table_ladder_8k
;
2087 memcpy(m
.private, private_key
, sizeof(m
.private));
2089 curve25519_clamp_secret(m
.private);
2091 setzero_eltfp25519_1w(Ur1
);
2092 setzero_eltfp25519_1w(Zr1
);
2093 setzero_eltfp25519_1w(Zr2
);
2099 Ur2
[3] = 0x1eaecdeee27cab34UL
;
2100 Ur2
[2] = 0xadc7a0b9235d48e2UL
;
2101 Ur2
[1] = 0xbbf095ae14b2edf8UL
;
2102 Ur2
[0] = 0x7e94e1fec82faabdUL
;
2106 for (i
= 0; i
< NUM_WORDS_ELTFP25519
; ++i
) {
2107 while (j
< ite
[i
]) {
2108 u64 bit
= (key
[i
] >> j
) & 0x1;
2109 k
= (64 * i
+ j
- q
);
2111 cswap(swap
, Ur1
, Ur2
);
2112 cswap(swap
, Zr1
, Zr2
);
2115 sub_eltfp25519_1w(B
, Ur1
, Zr1
); /* B = Ur1-Zr1 */
2116 add_eltfp25519_1w_adx(A
, Ur1
, Zr1
); /* A = Ur1+Zr1 */
2117 mul_eltfp25519_1w_adx(C
, &P
[4 * k
], B
); /* C = M0-B */
2118 sub_eltfp25519_1w(B
, A
, C
); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
2119 add_eltfp25519_1w_adx(A
, A
, C
); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
2120 sqr_eltfp25519_2w_adx(AB
); /* A = A^2 | B = B^2 */
2121 mul_eltfp25519_2w_adx(UZr1
, ZUr2
, AB
); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
2128 for (i
= 0; i
< q
; ++i
) {
2129 add_eltfp25519_1w_adx(A
, Ur1
, Zr1
); /* A = Ur1+Zr1 */
2130 sub_eltfp25519_1w(B
, Ur1
, Zr1
); /* B = Ur1-Zr1 */
2131 sqr_eltfp25519_2w_adx(AB
); /* A = A**2 B = B**2 */
2132 copy_eltfp25519_1w(C
, B
); /* C = B */
2133 sub_eltfp25519_1w(B
, A
, B
); /* B = A-B */
2134 mul_a24_eltfp25519_1w(D
, B
); /* D = my_a24*B */
2135 add_eltfp25519_1w_adx(D
, D
, C
); /* D = D+C */
2136 mul_eltfp25519_2w_adx(UZr1
, AB
, CD
); /* Ur1 = A*B Zr1 = Zr1*A */
2139 /* Convert to affine coordinates */
2140 inv_eltfp25519_1w_adx(A
, Zr1
);
2141 mul_eltfp25519_1w_adx((u64
*)session_key
, Ur1
, A
);
2142 fred_eltfp25519_1w((u64
*)session_key
);
2144 memzero_explicit(&m
, sizeof(m
));
2147 static void curve25519_bmi2(u8 shared
[CURVE25519_KEY_SIZE
],
2148 const u8 private_key
[CURVE25519_KEY_SIZE
],
2149 const u8 session_key
[CURVE25519_KEY_SIZE
])
2152 u64 buffer
[4 * NUM_WORDS_ELTFP25519
];
2153 u64 coordinates
[4 * NUM_WORDS_ELTFP25519
];
2154 u64 workspace
[6 * NUM_WORDS_ELTFP25519
];
2155 u8 session
[CURVE25519_KEY_SIZE
];
2156 u8
private[CURVE25519_KEY_SIZE
];
2161 u64
*const X1
= (u64
*)m
.session
;
2162 u64
*const key
= (u64
*)m
.private;
2163 u64
*const Px
= m
.coordinates
+ 0;
2164 u64
*const Pz
= m
.coordinates
+ 4;
2165 u64
*const Qx
= m
.coordinates
+ 8;
2166 u64
*const Qz
= m
.coordinates
+ 12;
2171 u64
*const X2Z2
= Qx
;
2172 u64
*const X3Z3
= Px
;
2174 u64
*const A
= m
.workspace
+ 0;
2175 u64
*const B
= m
.workspace
+ 4;
2176 u64
*const D
= m
.workspace
+ 8;
2177 u64
*const C
= m
.workspace
+ 12;
2178 u64
*const DA
= m
.workspace
+ 16;
2179 u64
*const CB
= m
.workspace
+ 20;
2182 u64
*const DACB
= DA
;
2184 memcpy(m
.private, private_key
, sizeof(m
.private));
2185 memcpy(m
.session
, session_key
, sizeof(m
.session
));
2187 curve25519_clamp_secret(m
.private);
2190 * When receiving such an array, implementations of curve25519
2191 * MUST mask the most-significant bit in the final byte. This
2192 * is done to preserve compatibility with point formats which
2193 * reserve the sign bit for use in other protocols and to
2194 * increase resistance to implementation fingerprinting
2196 m
.session
[CURVE25519_KEY_SIZE
- 1] &= (1 << (255 % 8)) - 1;
2198 copy_eltfp25519_1w(Px
, X1
);
2199 setzero_eltfp25519_1w(Pz
);
2200 setzero_eltfp25519_1w(Qx
);
2201 setzero_eltfp25519_1w(Qz
);
2209 for (i
= 3; i
>= 0; --i
) {
2211 u64 bit
= (key
[i
] >> j
) & 0x1;
2212 u64 swap
= bit
^ prev
;
2215 add_eltfp25519_1w_bmi2(A
, X2
, Z2
); /* A = (X2+Z2) */
2216 sub_eltfp25519_1w(B
, X2
, Z2
); /* B = (X2-Z2) */
2217 add_eltfp25519_1w_bmi2(C
, X3
, Z3
); /* C = (X3+Z3) */
2218 sub_eltfp25519_1w(D
, X3
, Z3
); /* D = (X3-Z3) */
2219 mul_eltfp25519_2w_bmi2(DACB
, AB
, DC
); /* [DA|CB] = [A|B]*[D|C] */
2221 cselect(swap
, A
, C
);
2222 cselect(swap
, B
, D
);
2224 sqr_eltfp25519_2w_bmi2(AB
); /* [AA|BB] = [A^2|B^2] */
2225 add_eltfp25519_1w_bmi2(X3
, DA
, CB
); /* X3 = (DA+CB) */
2226 sub_eltfp25519_1w(Z3
, DA
, CB
); /* Z3 = (DA-CB) */
2227 sqr_eltfp25519_2w_bmi2(X3Z3
); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
2229 copy_eltfp25519_1w(X2
, B
); /* X2 = B^2 */
2230 sub_eltfp25519_1w(Z2
, A
, B
); /* Z2 = E = AA-BB */
2232 mul_a24_eltfp25519_1w(B
, Z2
); /* B = a24*E */
2233 add_eltfp25519_1w_bmi2(B
, B
, X2
); /* B = a24*E+B */
2234 mul_eltfp25519_2w_bmi2(X2Z2
, X2Z2
, AB
); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
2235 mul_eltfp25519_1w_bmi2(Z3
, Z3
, X1
); /* Z3 = Z3*X1 */
2241 inv_eltfp25519_1w_bmi2(A
, Qz
);
2242 mul_eltfp25519_1w_bmi2((u64
*)shared
, Qx
, A
);
2243 fred_eltfp25519_1w((u64
*)shared
);
2245 memzero_explicit(&m
, sizeof(m
));
2248 static void curve25519_bmi2_base(u8 session_key
[CURVE25519_KEY_SIZE
],
2249 const u8 private_key
[CURVE25519_KEY_SIZE
])
2252 u64 buffer
[4 * NUM_WORDS_ELTFP25519
];
2253 u64 coordinates
[4 * NUM_WORDS_ELTFP25519
];
2254 u64 workspace
[4 * NUM_WORDS_ELTFP25519
];
2255 u8
private[CURVE25519_KEY_SIZE
];
2258 const int ite
[4] = { 64, 64, 64, 63 };
2262 int i
= 0, j
= 0, k
= 0;
2263 u64
*const key
= (u64
*)m
.private;
2264 u64
*const Ur1
= m
.coordinates
+ 0;
2265 u64
*const Zr1
= m
.coordinates
+ 4;
2266 u64
*const Ur2
= m
.coordinates
+ 8;
2267 u64
*const Zr2
= m
.coordinates
+ 12;
2269 u64
*const UZr1
= m
.coordinates
+ 0;
2270 u64
*const ZUr2
= m
.coordinates
+ 8;
2272 u64
*const A
= m
.workspace
+ 0;
2273 u64
*const B
= m
.workspace
+ 4;
2274 u64
*const C
= m
.workspace
+ 8;
2275 u64
*const D
= m
.workspace
+ 12;
2277 u64
*const AB
= m
.workspace
+ 0;
2278 u64
*const CD
= m
.workspace
+ 8;
2280 const u64
*const P
= table_ladder_8k
;
2282 memcpy(m
.private, private_key
, sizeof(m
.private));
2284 curve25519_clamp_secret(m
.private);
2286 setzero_eltfp25519_1w(Ur1
);
2287 setzero_eltfp25519_1w(Zr1
);
2288 setzero_eltfp25519_1w(Zr2
);
2294 Ur2
[3] = 0x1eaecdeee27cab34UL
;
2295 Ur2
[2] = 0xadc7a0b9235d48e2UL
;
2296 Ur2
[1] = 0xbbf095ae14b2edf8UL
;
2297 Ur2
[0] = 0x7e94e1fec82faabdUL
;
2301 for (i
= 0; i
< NUM_WORDS_ELTFP25519
; ++i
) {
2302 while (j
< ite
[i
]) {
2303 u64 bit
= (key
[i
] >> j
) & 0x1;
2304 k
= (64 * i
+ j
- q
);
2306 cswap(swap
, Ur1
, Ur2
);
2307 cswap(swap
, Zr1
, Zr2
);
2310 sub_eltfp25519_1w(B
, Ur1
, Zr1
); /* B = Ur1-Zr1 */
2311 add_eltfp25519_1w_bmi2(A
, Ur1
, Zr1
); /* A = Ur1+Zr1 */
2312 mul_eltfp25519_1w_bmi2(C
, &P
[4 * k
], B
);/* C = M0-B */
2313 sub_eltfp25519_1w(B
, A
, C
); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
2314 add_eltfp25519_1w_bmi2(A
, A
, C
); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
2315 sqr_eltfp25519_2w_bmi2(AB
); /* A = A^2 | B = B^2 */
2316 mul_eltfp25519_2w_bmi2(UZr1
, ZUr2
, AB
); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
2323 for (i
= 0; i
< q
; ++i
) {
2324 add_eltfp25519_1w_bmi2(A
, Ur1
, Zr1
); /* A = Ur1+Zr1 */
2325 sub_eltfp25519_1w(B
, Ur1
, Zr1
); /* B = Ur1-Zr1 */
2326 sqr_eltfp25519_2w_bmi2(AB
); /* A = A**2 B = B**2 */
2327 copy_eltfp25519_1w(C
, B
); /* C = B */
2328 sub_eltfp25519_1w(B
, A
, B
); /* B = A-B */
2329 mul_a24_eltfp25519_1w(D
, B
); /* D = my_a24*B */
2330 add_eltfp25519_1w_bmi2(D
, D
, C
); /* D = D+C */
2331 mul_eltfp25519_2w_bmi2(UZr1
, AB
, CD
); /* Ur1 = A*B Zr1 = Zr1*A */
2334 /* Convert to affine coordinates */
2335 inv_eltfp25519_1w_bmi2(A
, Zr1
);
2336 mul_eltfp25519_1w_bmi2((u64
*)session_key
, Ur1
, A
);
2337 fred_eltfp25519_1w((u64
*)session_key
);
2339 memzero_explicit(&m
, sizeof(m
));
2342 void curve25519_arch(u8 mypublic
[CURVE25519_KEY_SIZE
],
2343 const u8 secret
[CURVE25519_KEY_SIZE
],
2344 const u8 basepoint
[CURVE25519_KEY_SIZE
])
2346 if (static_branch_likely(&curve25519_use_adx
))
2347 curve25519_adx(mypublic
, secret
, basepoint
);
2348 else if (static_branch_likely(&curve25519_use_bmi2
))
2349 curve25519_bmi2(mypublic
, secret
, basepoint
);
2351 curve25519_generic(mypublic
, secret
, basepoint
);
2353 EXPORT_SYMBOL(curve25519_arch
);
2355 void curve25519_base_arch(u8 pub
[CURVE25519_KEY_SIZE
],
2356 const u8 secret
[CURVE25519_KEY_SIZE
])
2358 if (static_branch_likely(&curve25519_use_adx
))
2359 curve25519_adx_base(pub
, secret
);
2360 else if (static_branch_likely(&curve25519_use_bmi2
))
2361 curve25519_bmi2_base(pub
, secret
);
2363 curve25519_generic(pub
, secret
, curve25519_base_point
);
2365 EXPORT_SYMBOL(curve25519_base_arch
);
2367 static int curve25519_set_secret(struct crypto_kpp
*tfm
, const void *buf
,
2370 u8
*secret
= kpp_tfm_ctx(tfm
);
2373 curve25519_generate_secret(secret
);
2374 else if (len
== CURVE25519_KEY_SIZE
&&
2375 crypto_memneq(buf
, curve25519_null_point
, CURVE25519_KEY_SIZE
))
2376 memcpy(secret
, buf
, CURVE25519_KEY_SIZE
);
2382 static int curve25519_generate_public_key(struct kpp_request
*req
)
2384 struct crypto_kpp
*tfm
= crypto_kpp_reqtfm(req
);
2385 const u8
*secret
= kpp_tfm_ctx(tfm
);
2386 u8 buf
[CURVE25519_KEY_SIZE
];
2392 curve25519_base_arch(buf
, secret
);
2394 /* might want less than we've got */
2395 nbytes
= min_t(size_t, CURVE25519_KEY_SIZE
, req
->dst_len
);
2396 copied
= sg_copy_from_buffer(req
->dst
, sg_nents_for_len(req
->dst
,
2399 if (copied
!= nbytes
)
2404 static int curve25519_compute_shared_secret(struct kpp_request
*req
)
2406 struct crypto_kpp
*tfm
= crypto_kpp_reqtfm(req
);
2407 const u8
*secret
= kpp_tfm_ctx(tfm
);
2408 u8 public_key
[CURVE25519_KEY_SIZE
];
2409 u8 buf
[CURVE25519_KEY_SIZE
];
2415 copied
= sg_copy_to_buffer(req
->src
,
2416 sg_nents_for_len(req
->src
,
2417 CURVE25519_KEY_SIZE
),
2418 public_key
, CURVE25519_KEY_SIZE
);
2419 if (copied
!= CURVE25519_KEY_SIZE
)
2422 curve25519_arch(buf
, secret
, public_key
);
2424 /* might want less than we've got */
2425 nbytes
= min_t(size_t, CURVE25519_KEY_SIZE
, req
->dst_len
);
2426 copied
= sg_copy_from_buffer(req
->dst
, sg_nents_for_len(req
->dst
,
2429 if (copied
!= nbytes
)
2434 static unsigned int curve25519_max_size(struct crypto_kpp
*tfm
)
2436 return CURVE25519_KEY_SIZE
;
2439 static struct kpp_alg curve25519_alg
= {
2440 .base
.cra_name
= "curve25519",
2441 .base
.cra_driver_name
= "curve25519-x86",
2442 .base
.cra_priority
= 200,
2443 .base
.cra_module
= THIS_MODULE
,
2444 .base
.cra_ctxsize
= CURVE25519_KEY_SIZE
,
2446 .set_secret
= curve25519_set_secret
,
2447 .generate_public_key
= curve25519_generate_public_key
,
2448 .compute_shared_secret
= curve25519_compute_shared_secret
,
2449 .max_size
= curve25519_max_size
,
2452 static int __init
curve25519_mod_init(void)
2454 if (boot_cpu_has(X86_FEATURE_BMI2
))
2455 static_branch_enable(&curve25519_use_bmi2
);
2456 else if (boot_cpu_has(X86_FEATURE_ADX
))
2457 static_branch_enable(&curve25519_use_adx
);
2460 return IS_REACHABLE(CONFIG_CRYPTO_KPP
) ?
2461 crypto_register_kpp(&curve25519_alg
) : 0;
2464 static void __exit
curve25519_mod_exit(void)
2466 if (IS_REACHABLE(CONFIG_CRYPTO_KPP
) &&
2467 (boot_cpu_has(X86_FEATURE_BMI2
) || boot_cpu_has(X86_FEATURE_ADX
)))
2468 crypto_unregister_kpp(&curve25519_alg
);
2471 module_init(curve25519_mod_init
);
2472 module_exit(curve25519_mod_exit
);
2474 MODULE_ALIAS_CRYPTO("curve25519");
2475 MODULE_ALIAS_CRYPTO("curve25519-x86");
2476 MODULE_LICENSE("GPL v2");