Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / boringssl / win-x86 / crypto / aes / vpaes-x86.asm
blobb08b05637db27915efb782c748da6578feed82bd
1 %ifidn __OUTPUT_FORMAT__,obj
2 section code use32 class=code align=64
3 %elifidn __OUTPUT_FORMAT__,win32
4 %ifdef __YASM_VERSION_ID__
5 %if __YASM_VERSION_ID__ < 01010000h
6 %error yasm version 1.1.0 or later needed.
7 %endif
8 ; Yasm automatically includes .00 and complains about redefining it.
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10 %else
11 $@feat.00 equ 1
12 %endif
13 section .text code align=64
14 %else
15 section .text code
16 %endif
17 align 64
18 L$_vpaes_consts:
19 dd 218628480,235210255,168496130,67568393
20 dd 252381056,17041926,33884169,51187212
21 dd 252645135,252645135,252645135,252645135
22 dd 1512730624,3266504856,1377990664,3401244816
23 dd 830229760,1275146365,2969422977,3447763452
24 dd 3411033600,2979783055,338359620,2782886510
25 dd 4209124096,907596821,221174255,1006095553
26 dd 191964160,3799684038,3164090317,1589111125
27 dd 182528256,1777043520,2877432650,3265356744
28 dd 1874708224,3503451415,3305285752,363511674
29 dd 1606117888,3487855781,1093350906,2384367825
30 dd 197121,67569157,134941193,202313229
31 dd 67569157,134941193,202313229,197121
32 dd 134941193,202313229,197121,67569157
33 dd 202313229,197121,67569157,134941193
34 dd 33619971,100992007,168364043,235736079
35 dd 235736079,33619971,100992007,168364043
36 dd 168364043,235736079,33619971,100992007
37 dd 100992007,168364043,235736079,33619971
38 dd 50462976,117835012,185207048,252579084
39 dd 252314880,51251460,117574920,184942860
40 dd 184682752,252054788,50987272,118359308
41 dd 118099200,185467140,251790600,50727180
42 dd 2946363062,528716217,1300004225,1881839624
43 dd 1532713819,1532713819,1532713819,1532713819
44 dd 3602276352,4288629033,3737020424,4153884961
45 dd 1354558464,32357713,2958822624,3775749553
46 dd 1201988352,132424512,1572796698,503232858
47 dd 2213177600,1597421020,4103937655,675398315
48 dd 2749646592,4273543773,1511898873,121693092
49 dd 3040248576,1103263732,2871565598,1608280554
50 dd 2236667136,2588920351,482954393,64377734
51 dd 3069987328,291237287,2117370568,3650299247
52 dd 533321216,3573750986,2572112006,1401264716
53 dd 1339849704,2721158661,548607111,3445553514
54 dd 2128193280,3054596040,2183486460,1257083700
55 dd 655635200,1165381986,3923443150,2344132524
56 dd 190078720,256924420,290342170,357187870
57 dd 1610966272,2263057382,4103205268,309794674
58 dd 2592527872,2233205587,1335446729,3402964816
59 dd 3973531904,3225098121,3002836325,1918774430
60 dd 3870401024,2102906079,2284471353,4117666579
61 dd 617007872,1021508343,366931923,691083277
62 dd 2528395776,3491914898,2968704004,1613121270
63 dd 3445188352,3247741094,844474987,4093578302
64 dd 651481088,1190302358,1689581232,574775300
65 dd 4289380608,206939853,2555985458,2489840491
66 dd 2130264064,327674451,3566485037,3349835193
67 dd 2470714624,316102159,3636825756,3393945945
68 db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
69 db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
70 db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
71 db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
72 db 118,101,114,115,105,116,121,41,0
73 align 64
74 align 16
75 __vpaes_preheat:
76 add ebp,DWORD [esp]
77 movdqa xmm7,[ebp-48]
78 movdqa xmm6,[ebp-16]
79 ret
80 align 16
81 __vpaes_encrypt_core:
82 mov ecx,16
83 mov eax,DWORD [240+edx]
84 movdqa xmm1,xmm6
85 movdqa xmm2,[ebp]
86 pandn xmm1,xmm0
87 pand xmm0,xmm6
88 movdqu xmm5,[edx]
89 db 102,15,56,0,208
90 movdqa xmm0,[16+ebp]
91 pxor xmm2,xmm5
92 psrld xmm1,4
93 add edx,16
94 db 102,15,56,0,193
95 lea ebx,[192+ebp]
96 pxor xmm0,xmm2
97 jmp NEAR L$000enc_entry
98 align 16
99 L$001enc_loop:
100 movdqa xmm4,[32+ebp]
101 movdqa xmm0,[48+ebp]
102 db 102,15,56,0,226
103 db 102,15,56,0,195
104 pxor xmm4,xmm5
105 movdqa xmm5,[64+ebp]
106 pxor xmm0,xmm4
107 movdqa xmm1,[ecx*1+ebx-64]
108 db 102,15,56,0,234
109 movdqa xmm2,[80+ebp]
110 movdqa xmm4,[ecx*1+ebx]
111 db 102,15,56,0,211
112 movdqa xmm3,xmm0
113 pxor xmm2,xmm5
114 db 102,15,56,0,193
115 add edx,16
116 pxor xmm0,xmm2
117 db 102,15,56,0,220
118 add ecx,16
119 pxor xmm3,xmm0
120 db 102,15,56,0,193
121 and ecx,48
122 sub eax,1
123 pxor xmm0,xmm3
124 L$000enc_entry:
125 movdqa xmm1,xmm6
126 movdqa xmm5,[ebp-32]
127 pandn xmm1,xmm0
128 psrld xmm1,4
129 pand xmm0,xmm6
130 db 102,15,56,0,232
131 movdqa xmm3,xmm7
132 pxor xmm0,xmm1
133 db 102,15,56,0,217
134 movdqa xmm4,xmm7
135 pxor xmm3,xmm5
136 db 102,15,56,0,224
137 movdqa xmm2,xmm7
138 pxor xmm4,xmm5
139 db 102,15,56,0,211
140 movdqa xmm3,xmm7
141 pxor xmm2,xmm0
142 db 102,15,56,0,220
143 movdqu xmm5,[edx]
144 pxor xmm3,xmm1
145 jnz NEAR L$001enc_loop
146 movdqa xmm4,[96+ebp]
147 movdqa xmm0,[112+ebp]
148 db 102,15,56,0,226
149 pxor xmm4,xmm5
150 db 102,15,56,0,195
151 movdqa xmm1,[64+ecx*1+ebx]
152 pxor xmm0,xmm4
153 db 102,15,56,0,193
155 align 16
156 __vpaes_decrypt_core:
157 lea ebx,[608+ebp]
158 mov eax,DWORD [240+edx]
159 movdqa xmm1,xmm6
160 movdqa xmm2,[ebx-64]
161 pandn xmm1,xmm0
162 mov ecx,eax
163 psrld xmm1,4
164 movdqu xmm5,[edx]
165 shl ecx,4
166 pand xmm0,xmm6
167 db 102,15,56,0,208
168 movdqa xmm0,[ebx-48]
169 xor ecx,48
170 db 102,15,56,0,193
171 and ecx,48
172 pxor xmm2,xmm5
173 movdqa xmm5,[176+ebp]
174 pxor xmm0,xmm2
175 add edx,16
176 lea ecx,[ecx*1+ebx-352]
177 jmp NEAR L$002dec_entry
178 align 16
179 L$003dec_loop:
180 movdqa xmm4,[ebx-32]
181 movdqa xmm1,[ebx-16]
182 db 102,15,56,0,226
183 db 102,15,56,0,203
184 pxor xmm0,xmm4
185 movdqa xmm4,[ebx]
186 pxor xmm0,xmm1
187 movdqa xmm1,[16+ebx]
188 db 102,15,56,0,226
189 db 102,15,56,0,197
190 db 102,15,56,0,203
191 pxor xmm0,xmm4
192 movdqa xmm4,[32+ebx]
193 pxor xmm0,xmm1
194 movdqa xmm1,[48+ebx]
195 db 102,15,56,0,226
196 db 102,15,56,0,197
197 db 102,15,56,0,203
198 pxor xmm0,xmm4
199 movdqa xmm4,[64+ebx]
200 pxor xmm0,xmm1
201 movdqa xmm1,[80+ebx]
202 db 102,15,56,0,226
203 db 102,15,56,0,197
204 db 102,15,56,0,203
205 pxor xmm0,xmm4
206 add edx,16
207 db 102,15,58,15,237,12
208 pxor xmm0,xmm1
209 sub eax,1
210 L$002dec_entry:
211 movdqa xmm1,xmm6
212 movdqa xmm2,[ebp-32]
213 pandn xmm1,xmm0
214 pand xmm0,xmm6
215 psrld xmm1,4
216 db 102,15,56,0,208
217 movdqa xmm3,xmm7
218 pxor xmm0,xmm1
219 db 102,15,56,0,217
220 movdqa xmm4,xmm7
221 pxor xmm3,xmm2
222 db 102,15,56,0,224
223 pxor xmm4,xmm2
224 movdqa xmm2,xmm7
225 db 102,15,56,0,211
226 movdqa xmm3,xmm7
227 pxor xmm2,xmm0
228 db 102,15,56,0,220
229 movdqu xmm0,[edx]
230 pxor xmm3,xmm1
231 jnz NEAR L$003dec_loop
232 movdqa xmm4,[96+ebx]
233 db 102,15,56,0,226
234 pxor xmm4,xmm0
235 movdqa xmm0,[112+ebx]
236 movdqa xmm2,[ecx]
237 db 102,15,56,0,195
238 pxor xmm0,xmm4
239 db 102,15,56,0,194
241 align 16
242 __vpaes_schedule_core:
243 add ebp,DWORD [esp]
244 movdqu xmm0,[esi]
245 movdqa xmm2,[320+ebp]
246 movdqa xmm3,xmm0
247 lea ebx,[ebp]
248 movdqa [4+esp],xmm2
249 call __vpaes_schedule_transform
250 movdqa xmm7,xmm0
251 test edi,edi
252 jnz NEAR L$004schedule_am_decrypting
253 movdqu [edx],xmm0
254 jmp NEAR L$005schedule_go
255 L$004schedule_am_decrypting:
256 movdqa xmm1,[256+ecx*1+ebp]
257 db 102,15,56,0,217
258 movdqu [edx],xmm3
259 xor ecx,48
260 L$005schedule_go:
261 cmp eax,192
262 ja NEAR L$006schedule_256
263 je NEAR L$007schedule_192
264 L$008schedule_128:
265 mov eax,10
266 L$009loop_schedule_128:
267 call __vpaes_schedule_round
268 dec eax
269 jz NEAR L$010schedule_mangle_last
270 call __vpaes_schedule_mangle
271 jmp NEAR L$009loop_schedule_128
272 align 16
273 L$007schedule_192:
274 movdqu xmm0,[8+esi]
275 call __vpaes_schedule_transform
276 movdqa xmm6,xmm0
277 pxor xmm4,xmm4
278 movhlps xmm6,xmm4
279 mov eax,4
280 L$011loop_schedule_192:
281 call __vpaes_schedule_round
282 db 102,15,58,15,198,8
283 call __vpaes_schedule_mangle
284 call __vpaes_schedule_192_smear
285 call __vpaes_schedule_mangle
286 call __vpaes_schedule_round
287 dec eax
288 jz NEAR L$010schedule_mangle_last
289 call __vpaes_schedule_mangle
290 call __vpaes_schedule_192_smear
291 jmp NEAR L$011loop_schedule_192
292 align 16
293 L$006schedule_256:
294 movdqu xmm0,[16+esi]
295 call __vpaes_schedule_transform
296 mov eax,7
297 L$012loop_schedule_256:
298 call __vpaes_schedule_mangle
299 movdqa xmm6,xmm0
300 call __vpaes_schedule_round
301 dec eax
302 jz NEAR L$010schedule_mangle_last
303 call __vpaes_schedule_mangle
304 pshufd xmm0,xmm0,255
305 movdqa [20+esp],xmm7
306 movdqa xmm7,xmm6
307 call L$_vpaes_schedule_low_round
308 movdqa xmm7,[20+esp]
309 jmp NEAR L$012loop_schedule_256
310 align 16
311 L$010schedule_mangle_last:
312 lea ebx,[384+ebp]
313 test edi,edi
314 jnz NEAR L$013schedule_mangle_last_dec
315 movdqa xmm1,[256+ecx*1+ebp]
316 db 102,15,56,0,193
317 lea ebx,[352+ebp]
318 add edx,32
319 L$013schedule_mangle_last_dec:
320 add edx,-16
321 pxor xmm0,[336+ebp]
322 call __vpaes_schedule_transform
323 movdqu [edx],xmm0
324 pxor xmm0,xmm0
325 pxor xmm1,xmm1
326 pxor xmm2,xmm2
327 pxor xmm3,xmm3
328 pxor xmm4,xmm4
329 pxor xmm5,xmm5
330 pxor xmm6,xmm6
331 pxor xmm7,xmm7
333 align 16
334 __vpaes_schedule_192_smear:
335 pshufd xmm1,xmm6,128
336 pshufd xmm0,xmm7,254
337 pxor xmm6,xmm1
338 pxor xmm1,xmm1
339 pxor xmm6,xmm0
340 movdqa xmm0,xmm6
341 movhlps xmm6,xmm1
343 align 16
344 __vpaes_schedule_round:
345 movdqa xmm2,[8+esp]
346 pxor xmm1,xmm1
347 db 102,15,58,15,202,15
348 db 102,15,58,15,210,15
349 pxor xmm7,xmm1
350 pshufd xmm0,xmm0,255
351 db 102,15,58,15,192,1
352 movdqa [8+esp],xmm2
353 L$_vpaes_schedule_low_round:
354 movdqa xmm1,xmm7
355 pslldq xmm7,4
356 pxor xmm7,xmm1
357 movdqa xmm1,xmm7
358 pslldq xmm7,8
359 pxor xmm7,xmm1
360 pxor xmm7,[336+ebp]
361 movdqa xmm4,[ebp-16]
362 movdqa xmm5,[ebp-48]
363 movdqa xmm1,xmm4
364 pandn xmm1,xmm0
365 psrld xmm1,4
366 pand xmm0,xmm4
367 movdqa xmm2,[ebp-32]
368 db 102,15,56,0,208
369 pxor xmm0,xmm1
370 movdqa xmm3,xmm5
371 db 102,15,56,0,217
372 pxor xmm3,xmm2
373 movdqa xmm4,xmm5
374 db 102,15,56,0,224
375 pxor xmm4,xmm2
376 movdqa xmm2,xmm5
377 db 102,15,56,0,211
378 pxor xmm2,xmm0
379 movdqa xmm3,xmm5
380 db 102,15,56,0,220
381 pxor xmm3,xmm1
382 movdqa xmm4,[32+ebp]
383 db 102,15,56,0,226
384 movdqa xmm0,[48+ebp]
385 db 102,15,56,0,195
386 pxor xmm0,xmm4
387 pxor xmm0,xmm7
388 movdqa xmm7,xmm0
390 align 16
391 __vpaes_schedule_transform:
392 movdqa xmm2,[ebp-16]
393 movdqa xmm1,xmm2
394 pandn xmm1,xmm0
395 psrld xmm1,4
396 pand xmm0,xmm2
397 movdqa xmm2,[ebx]
398 db 102,15,56,0,208
399 movdqa xmm0,[16+ebx]
400 db 102,15,56,0,193
401 pxor xmm0,xmm2
403 align 16
404 __vpaes_schedule_mangle:
405 movdqa xmm4,xmm0
406 movdqa xmm5,[128+ebp]
407 test edi,edi
408 jnz NEAR L$014schedule_mangle_dec
409 add edx,16
410 pxor xmm4,[336+ebp]
411 db 102,15,56,0,229
412 movdqa xmm3,xmm4
413 db 102,15,56,0,229
414 pxor xmm3,xmm4
415 db 102,15,56,0,229
416 pxor xmm3,xmm4
417 jmp NEAR L$015schedule_mangle_both
418 align 16
419 L$014schedule_mangle_dec:
420 movdqa xmm2,[ebp-16]
421 lea esi,[416+ebp]
422 movdqa xmm1,xmm2
423 pandn xmm1,xmm4
424 psrld xmm1,4
425 pand xmm4,xmm2
426 movdqa xmm2,[esi]
427 db 102,15,56,0,212
428 movdqa xmm3,[16+esi]
429 db 102,15,56,0,217
430 pxor xmm3,xmm2
431 db 102,15,56,0,221
432 movdqa xmm2,[32+esi]
433 db 102,15,56,0,212
434 pxor xmm2,xmm3
435 movdqa xmm3,[48+esi]
436 db 102,15,56,0,217
437 pxor xmm3,xmm2
438 db 102,15,56,0,221
439 movdqa xmm2,[64+esi]
440 db 102,15,56,0,212
441 pxor xmm2,xmm3
442 movdqa xmm3,[80+esi]
443 db 102,15,56,0,217
444 pxor xmm3,xmm2
445 db 102,15,56,0,221
446 movdqa xmm2,[96+esi]
447 db 102,15,56,0,212
448 pxor xmm2,xmm3
449 movdqa xmm3,[112+esi]
450 db 102,15,56,0,217
451 pxor xmm3,xmm2
452 add edx,-16
453 L$015schedule_mangle_both:
454 movdqa xmm1,[256+ecx*1+ebp]
455 db 102,15,56,0,217
456 add ecx,-16
457 and ecx,48
458 movdqu [edx],xmm3
460 global _vpaes_set_encrypt_key
461 align 16
462 _vpaes_set_encrypt_key:
463 L$_vpaes_set_encrypt_key_begin:
464 push ebp
465 push ebx
466 push esi
467 push edi
468 mov esi,DWORD [20+esp]
469 lea ebx,[esp-56]
470 mov eax,DWORD [24+esp]
471 and ebx,-16
472 mov edx,DWORD [28+esp]
473 xchg ebx,esp
474 mov DWORD [48+esp],ebx
475 mov ebx,eax
476 shr ebx,5
477 add ebx,5
478 mov DWORD [240+edx],ebx
479 mov ecx,48
480 mov edi,0
481 lea ebp,[(L$_vpaes_consts+0x30-L$016pic_point)]
482 call __vpaes_schedule_core
483 L$016pic_point:
484 mov esp,DWORD [48+esp]
485 xor eax,eax
486 pop edi
487 pop esi
488 pop ebx
489 pop ebp
491 global _vpaes_set_decrypt_key
492 align 16
493 _vpaes_set_decrypt_key:
494 L$_vpaes_set_decrypt_key_begin:
495 push ebp
496 push ebx
497 push esi
498 push edi
499 mov esi,DWORD [20+esp]
500 lea ebx,[esp-56]
501 mov eax,DWORD [24+esp]
502 and ebx,-16
503 mov edx,DWORD [28+esp]
504 xchg ebx,esp
505 mov DWORD [48+esp],ebx
506 mov ebx,eax
507 shr ebx,5
508 add ebx,5
509 mov DWORD [240+edx],ebx
510 shl ebx,4
511 lea edx,[16+ebx*1+edx]
512 mov edi,1
513 mov ecx,eax
514 shr ecx,1
515 and ecx,32
516 xor ecx,32
517 lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
518 call __vpaes_schedule_core
519 L$017pic_point:
520 mov esp,DWORD [48+esp]
521 xor eax,eax
522 pop edi
523 pop esi
524 pop ebx
525 pop ebp
527 global _vpaes_encrypt
528 align 16
529 _vpaes_encrypt:
530 L$_vpaes_encrypt_begin:
531 push ebp
532 push ebx
533 push esi
534 push edi
535 lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
536 call __vpaes_preheat
537 L$018pic_point:
538 mov esi,DWORD [20+esp]
539 lea ebx,[esp-56]
540 mov edi,DWORD [24+esp]
541 and ebx,-16
542 mov edx,DWORD [28+esp]
543 xchg ebx,esp
544 mov DWORD [48+esp],ebx
545 movdqu xmm0,[esi]
546 call __vpaes_encrypt_core
547 movdqu [edi],xmm0
548 mov esp,DWORD [48+esp]
549 pop edi
550 pop esi
551 pop ebx
552 pop ebp
554 global _vpaes_decrypt
555 align 16
556 _vpaes_decrypt:
557 L$_vpaes_decrypt_begin:
558 push ebp
559 push ebx
560 push esi
561 push edi
562 lea ebp,[(L$_vpaes_consts+0x30-L$019pic_point)]
563 call __vpaes_preheat
564 L$019pic_point:
565 mov esi,DWORD [20+esp]
566 lea ebx,[esp-56]
567 mov edi,DWORD [24+esp]
568 and ebx,-16
569 mov edx,DWORD [28+esp]
570 xchg ebx,esp
571 mov DWORD [48+esp],ebx
572 movdqu xmm0,[esi]
573 call __vpaes_decrypt_core
574 movdqu [edi],xmm0
575 mov esp,DWORD [48+esp]
576 pop edi
577 pop esi
578 pop ebx
579 pop ebp
581 global _vpaes_cbc_encrypt
582 align 16
583 _vpaes_cbc_encrypt:
584 L$_vpaes_cbc_encrypt_begin:
585 push ebp
586 push ebx
587 push esi
588 push edi
589 mov esi,DWORD [20+esp]
590 mov edi,DWORD [24+esp]
591 mov eax,DWORD [28+esp]
592 mov edx,DWORD [32+esp]
593 sub eax,16
594 jc NEAR L$020cbc_abort
595 lea ebx,[esp-56]
596 mov ebp,DWORD [36+esp]
597 and ebx,-16
598 mov ecx,DWORD [40+esp]
599 xchg ebx,esp
600 movdqu xmm1,[ebp]
601 sub edi,esi
602 mov DWORD [48+esp],ebx
603 mov DWORD [esp],edi
604 mov DWORD [4+esp],edx
605 mov DWORD [8+esp],ebp
606 mov edi,eax
607 lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
608 call __vpaes_preheat
609 L$021pic_point:
610 cmp ecx,0
611 je NEAR L$022cbc_dec_loop
612 jmp NEAR L$023cbc_enc_loop
613 align 16
614 L$023cbc_enc_loop:
615 movdqu xmm0,[esi]
616 pxor xmm0,xmm1
617 call __vpaes_encrypt_core
618 mov ebx,DWORD [esp]
619 mov edx,DWORD [4+esp]
620 movdqa xmm1,xmm0
621 movdqu [esi*1+ebx],xmm0
622 lea esi,[16+esi]
623 sub edi,16
624 jnc NEAR L$023cbc_enc_loop
625 jmp NEAR L$024cbc_done
626 align 16
627 L$022cbc_dec_loop:
628 movdqu xmm0,[esi]
629 movdqa [16+esp],xmm1
630 movdqa [32+esp],xmm0
631 call __vpaes_decrypt_core
632 mov ebx,DWORD [esp]
633 mov edx,DWORD [4+esp]
634 pxor xmm0,[16+esp]
635 movdqa xmm1,[32+esp]
636 movdqu [esi*1+ebx],xmm0
637 lea esi,[16+esi]
638 sub edi,16
639 jnc NEAR L$022cbc_dec_loop
640 L$024cbc_done:
641 mov ebx,DWORD [8+esp]
642 mov esp,DWORD [48+esp]
643 movdqu [ebx],xmm1
644 L$020cbc_abort:
645 pop edi
646 pop esi
647 pop ebx
648 pop ebp