Add ICU message format support
[chromium-blink-merge.git] / third_party / boringssl / win-x86 / crypto / bn / co-586.asm
blob5780dc841bf88876f8d8ce368a9c7de80b2b4500
1 %ifidn __OUTPUT_FORMAT__,obj
2 section code use32 class=code align=64
3 %elifidn __OUTPUT_FORMAT__,win32
4 %ifdef __YASM_VERSION_ID__
5 %if __YASM_VERSION_ID__ < 01010000h
6 %error yasm version 1.1.0 or later needed.
7 %endif
8 ; Yasm automatically includes .00 and complains about redefining it.
9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
10 %else
11 $@feat.00 equ 1
12 %endif
13 section .text code align=64
14 %else
15 section .text code
16 %endif
17 global _bn_mul_comba8
18 align 16
19 _bn_mul_comba8:
20 L$_bn_mul_comba8_begin:
21 push esi
22 mov esi,DWORD [12+esp]
23 push edi
24 mov edi,DWORD [20+esp]
25 push ebp
26 push ebx
27 xor ebx,ebx
28 mov eax,DWORD [esi]
29 xor ecx,ecx
30 mov edx,DWORD [edi]
31 ; ################## Calculate word 0
32 xor ebp,ebp
33 ; mul a[0]*b[0]
34 mul edx
35 add ebx,eax
36 mov eax,DWORD [20+esp]
37 adc ecx,edx
38 mov edx,DWORD [edi]
39 adc ebp,0
40 mov DWORD [eax],ebx
41 mov eax,DWORD [4+esi]
42 ; saved r[0]
43 ; ################## Calculate word 1
44 xor ebx,ebx
45 ; mul a[1]*b[0]
46 mul edx
47 add ecx,eax
48 mov eax,DWORD [esi]
49 adc ebp,edx
50 mov edx,DWORD [4+edi]
51 adc ebx,0
52 ; mul a[0]*b[1]
53 mul edx
54 add ecx,eax
55 mov eax,DWORD [20+esp]
56 adc ebp,edx
57 mov edx,DWORD [edi]
58 adc ebx,0
59 mov DWORD [4+eax],ecx
60 mov eax,DWORD [8+esi]
61 ; saved r[1]
62 ; ################## Calculate word 2
63 xor ecx,ecx
64 ; mul a[2]*b[0]
65 mul edx
66 add ebp,eax
67 mov eax,DWORD [4+esi]
68 adc ebx,edx
69 mov edx,DWORD [4+edi]
70 adc ecx,0
71 ; mul a[1]*b[1]
72 mul edx
73 add ebp,eax
74 mov eax,DWORD [esi]
75 adc ebx,edx
76 mov edx,DWORD [8+edi]
77 adc ecx,0
78 ; mul a[0]*b[2]
79 mul edx
80 add ebp,eax
81 mov eax,DWORD [20+esp]
82 adc ebx,edx
83 mov edx,DWORD [edi]
84 adc ecx,0
85 mov DWORD [8+eax],ebp
86 mov eax,DWORD [12+esi]
87 ; saved r[2]
88 ; ################## Calculate word 3
89 xor ebp,ebp
90 ; mul a[3]*b[0]
91 mul edx
92 add ebx,eax
93 mov eax,DWORD [8+esi]
94 adc ecx,edx
95 mov edx,DWORD [4+edi]
96 adc ebp,0
97 ; mul a[2]*b[1]
98 mul edx
99 add ebx,eax
100 mov eax,DWORD [4+esi]
101 adc ecx,edx
102 mov edx,DWORD [8+edi]
103 adc ebp,0
104 ; mul a[1]*b[2]
105 mul edx
106 add ebx,eax
107 mov eax,DWORD [esi]
108 adc ecx,edx
109 mov edx,DWORD [12+edi]
110 adc ebp,0
111 ; mul a[0]*b[3]
112 mul edx
113 add ebx,eax
114 mov eax,DWORD [20+esp]
115 adc ecx,edx
116 mov edx,DWORD [edi]
117 adc ebp,0
118 mov DWORD [12+eax],ebx
119 mov eax,DWORD [16+esi]
120 ; saved r[3]
121 ; ################## Calculate word 4
122 xor ebx,ebx
123 ; mul a[4]*b[0]
124 mul edx
125 add ecx,eax
126 mov eax,DWORD [12+esi]
127 adc ebp,edx
128 mov edx,DWORD [4+edi]
129 adc ebx,0
130 ; mul a[3]*b[1]
131 mul edx
132 add ecx,eax
133 mov eax,DWORD [8+esi]
134 adc ebp,edx
135 mov edx,DWORD [8+edi]
136 adc ebx,0
137 ; mul a[2]*b[2]
138 mul edx
139 add ecx,eax
140 mov eax,DWORD [4+esi]
141 adc ebp,edx
142 mov edx,DWORD [12+edi]
143 adc ebx,0
144 ; mul a[1]*b[3]
145 mul edx
146 add ecx,eax
147 mov eax,DWORD [esi]
148 adc ebp,edx
149 mov edx,DWORD [16+edi]
150 adc ebx,0
151 ; mul a[0]*b[4]
152 mul edx
153 add ecx,eax
154 mov eax,DWORD [20+esp]
155 adc ebp,edx
156 mov edx,DWORD [edi]
157 adc ebx,0
158 mov DWORD [16+eax],ecx
159 mov eax,DWORD [20+esi]
160 ; saved r[4]
161 ; ################## Calculate word 5
162 xor ecx,ecx
163 ; mul a[5]*b[0]
164 mul edx
165 add ebp,eax
166 mov eax,DWORD [16+esi]
167 adc ebx,edx
168 mov edx,DWORD [4+edi]
169 adc ecx,0
170 ; mul a[4]*b[1]
171 mul edx
172 add ebp,eax
173 mov eax,DWORD [12+esi]
174 adc ebx,edx
175 mov edx,DWORD [8+edi]
176 adc ecx,0
177 ; mul a[3]*b[2]
178 mul edx
179 add ebp,eax
180 mov eax,DWORD [8+esi]
181 adc ebx,edx
182 mov edx,DWORD [12+edi]
183 adc ecx,0
184 ; mul a[2]*b[3]
185 mul edx
186 add ebp,eax
187 mov eax,DWORD [4+esi]
188 adc ebx,edx
189 mov edx,DWORD [16+edi]
190 adc ecx,0
191 ; mul a[1]*b[4]
192 mul edx
193 add ebp,eax
194 mov eax,DWORD [esi]
195 adc ebx,edx
196 mov edx,DWORD [20+edi]
197 adc ecx,0
198 ; mul a[0]*b[5]
199 mul edx
200 add ebp,eax
201 mov eax,DWORD [20+esp]
202 adc ebx,edx
203 mov edx,DWORD [edi]
204 adc ecx,0
205 mov DWORD [20+eax],ebp
206 mov eax,DWORD [24+esi]
207 ; saved r[5]
208 ; ################## Calculate word 6
209 xor ebp,ebp
210 ; mul a[6]*b[0]
211 mul edx
212 add ebx,eax
213 mov eax,DWORD [20+esi]
214 adc ecx,edx
215 mov edx,DWORD [4+edi]
216 adc ebp,0
217 ; mul a[5]*b[1]
218 mul edx
219 add ebx,eax
220 mov eax,DWORD [16+esi]
221 adc ecx,edx
222 mov edx,DWORD [8+edi]
223 adc ebp,0
224 ; mul a[4]*b[2]
225 mul edx
226 add ebx,eax
227 mov eax,DWORD [12+esi]
228 adc ecx,edx
229 mov edx,DWORD [12+edi]
230 adc ebp,0
231 ; mul a[3]*b[3]
232 mul edx
233 add ebx,eax
234 mov eax,DWORD [8+esi]
235 adc ecx,edx
236 mov edx,DWORD [16+edi]
237 adc ebp,0
238 ; mul a[2]*b[4]
239 mul edx
240 add ebx,eax
241 mov eax,DWORD [4+esi]
242 adc ecx,edx
243 mov edx,DWORD [20+edi]
244 adc ebp,0
245 ; mul a[1]*b[5]
246 mul edx
247 add ebx,eax
248 mov eax,DWORD [esi]
249 adc ecx,edx
250 mov edx,DWORD [24+edi]
251 adc ebp,0
252 ; mul a[0]*b[6]
253 mul edx
254 add ebx,eax
255 mov eax,DWORD [20+esp]
256 adc ecx,edx
257 mov edx,DWORD [edi]
258 adc ebp,0
259 mov DWORD [24+eax],ebx
260 mov eax,DWORD [28+esi]
261 ; saved r[6]
262 ; ################## Calculate word 7
263 xor ebx,ebx
264 ; mul a[7]*b[0]
265 mul edx
266 add ecx,eax
267 mov eax,DWORD [24+esi]
268 adc ebp,edx
269 mov edx,DWORD [4+edi]
270 adc ebx,0
271 ; mul a[6]*b[1]
272 mul edx
273 add ecx,eax
274 mov eax,DWORD [20+esi]
275 adc ebp,edx
276 mov edx,DWORD [8+edi]
277 adc ebx,0
278 ; mul a[5]*b[2]
279 mul edx
280 add ecx,eax
281 mov eax,DWORD [16+esi]
282 adc ebp,edx
283 mov edx,DWORD [12+edi]
284 adc ebx,0
285 ; mul a[4]*b[3]
286 mul edx
287 add ecx,eax
288 mov eax,DWORD [12+esi]
289 adc ebp,edx
290 mov edx,DWORD [16+edi]
291 adc ebx,0
292 ; mul a[3]*b[4]
293 mul edx
294 add ecx,eax
295 mov eax,DWORD [8+esi]
296 adc ebp,edx
297 mov edx,DWORD [20+edi]
298 adc ebx,0
299 ; mul a[2]*b[5]
300 mul edx
301 add ecx,eax
302 mov eax,DWORD [4+esi]
303 adc ebp,edx
304 mov edx,DWORD [24+edi]
305 adc ebx,0
306 ; mul a[1]*b[6]
307 mul edx
308 add ecx,eax
309 mov eax,DWORD [esi]
310 adc ebp,edx
311 mov edx,DWORD [28+edi]
312 adc ebx,0
313 ; mul a[0]*b[7]
314 mul edx
315 add ecx,eax
316 mov eax,DWORD [20+esp]
317 adc ebp,edx
318 mov edx,DWORD [4+edi]
319 adc ebx,0
320 mov DWORD [28+eax],ecx
321 mov eax,DWORD [28+esi]
322 ; saved r[7]
323 ; ################## Calculate word 8
324 xor ecx,ecx
325 ; mul a[7]*b[1]
326 mul edx
327 add ebp,eax
328 mov eax,DWORD [24+esi]
329 adc ebx,edx
330 mov edx,DWORD [8+edi]
331 adc ecx,0
332 ; mul a[6]*b[2]
333 mul edx
334 add ebp,eax
335 mov eax,DWORD [20+esi]
336 adc ebx,edx
337 mov edx,DWORD [12+edi]
338 adc ecx,0
339 ; mul a[5]*b[3]
340 mul edx
341 add ebp,eax
342 mov eax,DWORD [16+esi]
343 adc ebx,edx
344 mov edx,DWORD [16+edi]
345 adc ecx,0
346 ; mul a[4]*b[4]
347 mul edx
348 add ebp,eax
349 mov eax,DWORD [12+esi]
350 adc ebx,edx
351 mov edx,DWORD [20+edi]
352 adc ecx,0
353 ; mul a[3]*b[5]
354 mul edx
355 add ebp,eax
356 mov eax,DWORD [8+esi]
357 adc ebx,edx
358 mov edx,DWORD [24+edi]
359 adc ecx,0
360 ; mul a[2]*b[6]
361 mul edx
362 add ebp,eax
363 mov eax,DWORD [4+esi]
364 adc ebx,edx
365 mov edx,DWORD [28+edi]
366 adc ecx,0
367 ; mul a[1]*b[7]
368 mul edx
369 add ebp,eax
370 mov eax,DWORD [20+esp]
371 adc ebx,edx
372 mov edx,DWORD [8+edi]
373 adc ecx,0
374 mov DWORD [32+eax],ebp
375 mov eax,DWORD [28+esi]
376 ; saved r[8]
377 ; ################## Calculate word 9
378 xor ebp,ebp
379 ; mul a[7]*b[2]
380 mul edx
381 add ebx,eax
382 mov eax,DWORD [24+esi]
383 adc ecx,edx
384 mov edx,DWORD [12+edi]
385 adc ebp,0
386 ; mul a[6]*b[3]
387 mul edx
388 add ebx,eax
389 mov eax,DWORD [20+esi]
390 adc ecx,edx
391 mov edx,DWORD [16+edi]
392 adc ebp,0
393 ; mul a[5]*b[4]
394 mul edx
395 add ebx,eax
396 mov eax,DWORD [16+esi]
397 adc ecx,edx
398 mov edx,DWORD [20+edi]
399 adc ebp,0
400 ; mul a[4]*b[5]
401 mul edx
402 add ebx,eax
403 mov eax,DWORD [12+esi]
404 adc ecx,edx
405 mov edx,DWORD [24+edi]
406 adc ebp,0
407 ; mul a[3]*b[6]
408 mul edx
409 add ebx,eax
410 mov eax,DWORD [8+esi]
411 adc ecx,edx
412 mov edx,DWORD [28+edi]
413 adc ebp,0
414 ; mul a[2]*b[7]
415 mul edx
416 add ebx,eax
417 mov eax,DWORD [20+esp]
418 adc ecx,edx
419 mov edx,DWORD [12+edi]
420 adc ebp,0
421 mov DWORD [36+eax],ebx
422 mov eax,DWORD [28+esi]
423 ; saved r[9]
424 ; ################## Calculate word 10
425 xor ebx,ebx
426 ; mul a[7]*b[3]
427 mul edx
428 add ecx,eax
429 mov eax,DWORD [24+esi]
430 adc ebp,edx
431 mov edx,DWORD [16+edi]
432 adc ebx,0
433 ; mul a[6]*b[4]
434 mul edx
435 add ecx,eax
436 mov eax,DWORD [20+esi]
437 adc ebp,edx
438 mov edx,DWORD [20+edi]
439 adc ebx,0
440 ; mul a[5]*b[5]
441 mul edx
442 add ecx,eax
443 mov eax,DWORD [16+esi]
444 adc ebp,edx
445 mov edx,DWORD [24+edi]
446 adc ebx,0
447 ; mul a[4]*b[6]
448 mul edx
449 add ecx,eax
450 mov eax,DWORD [12+esi]
451 adc ebp,edx
452 mov edx,DWORD [28+edi]
453 adc ebx,0
454 ; mul a[3]*b[7]
455 mul edx
456 add ecx,eax
457 mov eax,DWORD [20+esp]
458 adc ebp,edx
459 mov edx,DWORD [16+edi]
460 adc ebx,0
461 mov DWORD [40+eax],ecx
462 mov eax,DWORD [28+esi]
463 ; saved r[10]
464 ; ################## Calculate word 11
465 xor ecx,ecx
466 ; mul a[7]*b[4]
467 mul edx
468 add ebp,eax
469 mov eax,DWORD [24+esi]
470 adc ebx,edx
471 mov edx,DWORD [20+edi]
472 adc ecx,0
473 ; mul a[6]*b[5]
474 mul edx
475 add ebp,eax
476 mov eax,DWORD [20+esi]
477 adc ebx,edx
478 mov edx,DWORD [24+edi]
479 adc ecx,0
480 ; mul a[5]*b[6]
481 mul edx
482 add ebp,eax
483 mov eax,DWORD [16+esi]
484 adc ebx,edx
485 mov edx,DWORD [28+edi]
486 adc ecx,0
487 ; mul a[4]*b[7]
488 mul edx
489 add ebp,eax
490 mov eax,DWORD [20+esp]
491 adc ebx,edx
492 mov edx,DWORD [20+edi]
493 adc ecx,0
494 mov DWORD [44+eax],ebp
495 mov eax,DWORD [28+esi]
496 ; saved r[11]
497 ; ################## Calculate word 12
498 xor ebp,ebp
499 ; mul a[7]*b[5]
500 mul edx
501 add ebx,eax
502 mov eax,DWORD [24+esi]
503 adc ecx,edx
504 mov edx,DWORD [24+edi]
505 adc ebp,0
506 ; mul a[6]*b[6]
507 mul edx
508 add ebx,eax
509 mov eax,DWORD [20+esi]
510 adc ecx,edx
511 mov edx,DWORD [28+edi]
512 adc ebp,0
513 ; mul a[5]*b[7]
514 mul edx
515 add ebx,eax
516 mov eax,DWORD [20+esp]
517 adc ecx,edx
518 mov edx,DWORD [24+edi]
519 adc ebp,0
520 mov DWORD [48+eax],ebx
521 mov eax,DWORD [28+esi]
522 ; saved r[12]
523 ; ################## Calculate word 13
524 xor ebx,ebx
525 ; mul a[7]*b[6]
526 mul edx
527 add ecx,eax
528 mov eax,DWORD [24+esi]
529 adc ebp,edx
530 mov edx,DWORD [28+edi]
531 adc ebx,0
532 ; mul a[6]*b[7]
533 mul edx
534 add ecx,eax
535 mov eax,DWORD [20+esp]
536 adc ebp,edx
537 mov edx,DWORD [28+edi]
538 adc ebx,0
539 mov DWORD [52+eax],ecx
540 mov eax,DWORD [28+esi]
541 ; saved r[13]
542 ; ################## Calculate word 14
543 xor ecx,ecx
544 ; mul a[7]*b[7]
545 mul edx
546 add ebp,eax
547 mov eax,DWORD [20+esp]
548 adc ebx,edx
549 adc ecx,0
550 mov DWORD [56+eax],ebp
551 ; saved r[14]
552 ; save r[15]
553 mov DWORD [60+eax],ebx
554 pop ebx
555 pop ebp
556 pop edi
557 pop esi
559 global _bn_mul_comba4
560 align 16
561 _bn_mul_comba4:
562 L$_bn_mul_comba4_begin:
563 push esi
564 mov esi,DWORD [12+esp]
565 push edi
566 mov edi,DWORD [20+esp]
567 push ebp
568 push ebx
569 xor ebx,ebx
570 mov eax,DWORD [esi]
571 xor ecx,ecx
572 mov edx,DWORD [edi]
573 ; ################## Calculate word 0
574 xor ebp,ebp
575 ; mul a[0]*b[0]
576 mul edx
577 add ebx,eax
578 mov eax,DWORD [20+esp]
579 adc ecx,edx
580 mov edx,DWORD [edi]
581 adc ebp,0
582 mov DWORD [eax],ebx
583 mov eax,DWORD [4+esi]
584 ; saved r[0]
585 ; ################## Calculate word 1
586 xor ebx,ebx
587 ; mul a[1]*b[0]
588 mul edx
589 add ecx,eax
590 mov eax,DWORD [esi]
591 adc ebp,edx
592 mov edx,DWORD [4+edi]
593 adc ebx,0
594 ; mul a[0]*b[1]
595 mul edx
596 add ecx,eax
597 mov eax,DWORD [20+esp]
598 adc ebp,edx
599 mov edx,DWORD [edi]
600 adc ebx,0
601 mov DWORD [4+eax],ecx
602 mov eax,DWORD [8+esi]
603 ; saved r[1]
604 ; ################## Calculate word 2
605 xor ecx,ecx
606 ; mul a[2]*b[0]
607 mul edx
608 add ebp,eax
609 mov eax,DWORD [4+esi]
610 adc ebx,edx
611 mov edx,DWORD [4+edi]
612 adc ecx,0
613 ; mul a[1]*b[1]
614 mul edx
615 add ebp,eax
616 mov eax,DWORD [esi]
617 adc ebx,edx
618 mov edx,DWORD [8+edi]
619 adc ecx,0
620 ; mul a[0]*b[2]
621 mul edx
622 add ebp,eax
623 mov eax,DWORD [20+esp]
624 adc ebx,edx
625 mov edx,DWORD [edi]
626 adc ecx,0
627 mov DWORD [8+eax],ebp
628 mov eax,DWORD [12+esi]
629 ; saved r[2]
630 ; ################## Calculate word 3
631 xor ebp,ebp
632 ; mul a[3]*b[0]
633 mul edx
634 add ebx,eax
635 mov eax,DWORD [8+esi]
636 adc ecx,edx
637 mov edx,DWORD [4+edi]
638 adc ebp,0
639 ; mul a[2]*b[1]
640 mul edx
641 add ebx,eax
642 mov eax,DWORD [4+esi]
643 adc ecx,edx
644 mov edx,DWORD [8+edi]
645 adc ebp,0
646 ; mul a[1]*b[2]
647 mul edx
648 add ebx,eax
649 mov eax,DWORD [esi]
650 adc ecx,edx
651 mov edx,DWORD [12+edi]
652 adc ebp,0
653 ; mul a[0]*b[3]
654 mul edx
655 add ebx,eax
656 mov eax,DWORD [20+esp]
657 adc ecx,edx
658 mov edx,DWORD [4+edi]
659 adc ebp,0
660 mov DWORD [12+eax],ebx
661 mov eax,DWORD [12+esi]
662 ; saved r[3]
663 ; ################## Calculate word 4
664 xor ebx,ebx
665 ; mul a[3]*b[1]
666 mul edx
667 add ecx,eax
668 mov eax,DWORD [8+esi]
669 adc ebp,edx
670 mov edx,DWORD [8+edi]
671 adc ebx,0
672 ; mul a[2]*b[2]
673 mul edx
674 add ecx,eax
675 mov eax,DWORD [4+esi]
676 adc ebp,edx
677 mov edx,DWORD [12+edi]
678 adc ebx,0
679 ; mul a[1]*b[3]
680 mul edx
681 add ecx,eax
682 mov eax,DWORD [20+esp]
683 adc ebp,edx
684 mov edx,DWORD [8+edi]
685 adc ebx,0
686 mov DWORD [16+eax],ecx
687 mov eax,DWORD [12+esi]
688 ; saved r[4]
689 ; ################## Calculate word 5
690 xor ecx,ecx
691 ; mul a[3]*b[2]
692 mul edx
693 add ebp,eax
694 mov eax,DWORD [8+esi]
695 adc ebx,edx
696 mov edx,DWORD [12+edi]
697 adc ecx,0
698 ; mul a[2]*b[3]
699 mul edx
700 add ebp,eax
701 mov eax,DWORD [20+esp]
702 adc ebx,edx
703 mov edx,DWORD [12+edi]
704 adc ecx,0
705 mov DWORD [20+eax],ebp
706 mov eax,DWORD [12+esi]
707 ; saved r[5]
708 ; ################## Calculate word 6
709 xor ebp,ebp
710 ; mul a[3]*b[3]
711 mul edx
712 add ebx,eax
713 mov eax,DWORD [20+esp]
714 adc ecx,edx
715 adc ebp,0
716 mov DWORD [24+eax],ebx
717 ; saved r[6]
718 ; save r[7]
719 mov DWORD [28+eax],ecx
720 pop ebx
721 pop ebp
722 pop edi
723 pop esi
725 global _bn_sqr_comba8
726 align 16
727 _bn_sqr_comba8:
728 L$_bn_sqr_comba8_begin:
729 push esi
730 push edi
731 push ebp
732 push ebx
733 mov edi,DWORD [20+esp]
734 mov esi,DWORD [24+esp]
735 xor ebx,ebx
736 xor ecx,ecx
737 mov eax,DWORD [esi]
738 ; ############### Calculate word 0
739 xor ebp,ebp
740 ; sqr a[0]*a[0]
741 mul eax
742 add ebx,eax
743 adc ecx,edx
744 mov edx,DWORD [esi]
745 adc ebp,0
746 mov DWORD [edi],ebx
747 mov eax,DWORD [4+esi]
748 ; saved r[0]
749 ; ############### Calculate word 1
750 xor ebx,ebx
751 ; sqr a[1]*a[0]
752 mul edx
753 add eax,eax
754 adc edx,edx
755 adc ebx,0
756 add ecx,eax
757 adc ebp,edx
758 mov eax,DWORD [8+esi]
759 adc ebx,0
760 mov DWORD [4+edi],ecx
761 mov edx,DWORD [esi]
762 ; saved r[1]
763 ; ############### Calculate word 2
764 xor ecx,ecx
765 ; sqr a[2]*a[0]
766 mul edx
767 add eax,eax
768 adc edx,edx
769 adc ecx,0
770 add ebp,eax
771 adc ebx,edx
772 mov eax,DWORD [4+esi]
773 adc ecx,0
774 ; sqr a[1]*a[1]
775 mul eax
776 add ebp,eax
777 adc ebx,edx
778 mov edx,DWORD [esi]
779 adc ecx,0
780 mov DWORD [8+edi],ebp
781 mov eax,DWORD [12+esi]
782 ; saved r[2]
783 ; ############### Calculate word 3
784 xor ebp,ebp
785 ; sqr a[3]*a[0]
786 mul edx
787 add eax,eax
788 adc edx,edx
789 adc ebp,0
790 add ebx,eax
791 adc ecx,edx
792 mov eax,DWORD [8+esi]
793 adc ebp,0
794 mov edx,DWORD [4+esi]
795 ; sqr a[2]*a[1]
796 mul edx
797 add eax,eax
798 adc edx,edx
799 adc ebp,0
800 add ebx,eax
801 adc ecx,edx
802 mov eax,DWORD [16+esi]
803 adc ebp,0
804 mov DWORD [12+edi],ebx
805 mov edx,DWORD [esi]
806 ; saved r[3]
807 ; ############### Calculate word 4
808 xor ebx,ebx
809 ; sqr a[4]*a[0]
810 mul edx
811 add eax,eax
812 adc edx,edx
813 adc ebx,0
814 add ecx,eax
815 adc ebp,edx
816 mov eax,DWORD [12+esi]
817 adc ebx,0
818 mov edx,DWORD [4+esi]
819 ; sqr a[3]*a[1]
820 mul edx
821 add eax,eax
822 adc edx,edx
823 adc ebx,0
824 add ecx,eax
825 adc ebp,edx
826 mov eax,DWORD [8+esi]
827 adc ebx,0
828 ; sqr a[2]*a[2]
829 mul eax
830 add ecx,eax
831 adc ebp,edx
832 mov edx,DWORD [esi]
833 adc ebx,0
834 mov DWORD [16+edi],ecx
835 mov eax,DWORD [20+esi]
836 ; saved r[4]
837 ; ############### Calculate word 5
838 xor ecx,ecx
839 ; sqr a[5]*a[0]
840 mul edx
841 add eax,eax
842 adc edx,edx
843 adc ecx,0
844 add ebp,eax
845 adc ebx,edx
846 mov eax,DWORD [16+esi]
847 adc ecx,0
848 mov edx,DWORD [4+esi]
849 ; sqr a[4]*a[1]
850 mul edx
851 add eax,eax
852 adc edx,edx
853 adc ecx,0
854 add ebp,eax
855 adc ebx,edx
856 mov eax,DWORD [12+esi]
857 adc ecx,0
858 mov edx,DWORD [8+esi]
859 ; sqr a[3]*a[2]
860 mul edx
861 add eax,eax
862 adc edx,edx
863 adc ecx,0
864 add ebp,eax
865 adc ebx,edx
866 mov eax,DWORD [24+esi]
867 adc ecx,0
868 mov DWORD [20+edi],ebp
869 mov edx,DWORD [esi]
870 ; saved r[5]
871 ; ############### Calculate word 6
872 xor ebp,ebp
873 ; sqr a[6]*a[0]
874 mul edx
875 add eax,eax
876 adc edx,edx
877 adc ebp,0
878 add ebx,eax
879 adc ecx,edx
880 mov eax,DWORD [20+esi]
881 adc ebp,0
882 mov edx,DWORD [4+esi]
883 ; sqr a[5]*a[1]
884 mul edx
885 add eax,eax
886 adc edx,edx
887 adc ebp,0
888 add ebx,eax
889 adc ecx,edx
890 mov eax,DWORD [16+esi]
891 adc ebp,0
892 mov edx,DWORD [8+esi]
893 ; sqr a[4]*a[2]
894 mul edx
895 add eax,eax
896 adc edx,edx
897 adc ebp,0
898 add ebx,eax
899 adc ecx,edx
900 mov eax,DWORD [12+esi]
901 adc ebp,0
902 ; sqr a[3]*a[3]
903 mul eax
904 add ebx,eax
905 adc ecx,edx
906 mov edx,DWORD [esi]
907 adc ebp,0
908 mov DWORD [24+edi],ebx
909 mov eax,DWORD [28+esi]
910 ; saved r[6]
911 ; ############### Calculate word 7
912 xor ebx,ebx
913 ; sqr a[7]*a[0]
914 mul edx
915 add eax,eax
916 adc edx,edx
917 adc ebx,0
918 add ecx,eax
919 adc ebp,edx
920 mov eax,DWORD [24+esi]
921 adc ebx,0
922 mov edx,DWORD [4+esi]
923 ; sqr a[6]*a[1]
924 mul edx
925 add eax,eax
926 adc edx,edx
927 adc ebx,0
928 add ecx,eax
929 adc ebp,edx
930 mov eax,DWORD [20+esi]
931 adc ebx,0
932 mov edx,DWORD [8+esi]
933 ; sqr a[5]*a[2]
934 mul edx
935 add eax,eax
936 adc edx,edx
937 adc ebx,0
938 add ecx,eax
939 adc ebp,edx
940 mov eax,DWORD [16+esi]
941 adc ebx,0
942 mov edx,DWORD [12+esi]
943 ; sqr a[4]*a[3]
944 mul edx
945 add eax,eax
946 adc edx,edx
947 adc ebx,0
948 add ecx,eax
949 adc ebp,edx
950 mov eax,DWORD [28+esi]
951 adc ebx,0
952 mov DWORD [28+edi],ecx
953 mov edx,DWORD [4+esi]
954 ; saved r[7]
955 ; ############### Calculate word 8
956 xor ecx,ecx
957 ; sqr a[7]*a[1]
958 mul edx
959 add eax,eax
960 adc edx,edx
961 adc ecx,0
962 add ebp,eax
963 adc ebx,edx
964 mov eax,DWORD [24+esi]
965 adc ecx,0
966 mov edx,DWORD [8+esi]
967 ; sqr a[6]*a[2]
968 mul edx
969 add eax,eax
970 adc edx,edx
971 adc ecx,0
972 add ebp,eax
973 adc ebx,edx
974 mov eax,DWORD [20+esi]
975 adc ecx,0
976 mov edx,DWORD [12+esi]
977 ; sqr a[5]*a[3]
978 mul edx
979 add eax,eax
980 adc edx,edx
981 adc ecx,0
982 add ebp,eax
983 adc ebx,edx
984 mov eax,DWORD [16+esi]
985 adc ecx,0
986 ; sqr a[4]*a[4]
987 mul eax
988 add ebp,eax
989 adc ebx,edx
990 mov edx,DWORD [8+esi]
991 adc ecx,0
992 mov DWORD [32+edi],ebp
993 mov eax,DWORD [28+esi]
994 ; saved r[8]
995 ; ############### Calculate word 9
996 xor ebp,ebp
997 ; sqr a[7]*a[2]
998 mul edx
999 add eax,eax
1000 adc edx,edx
1001 adc ebp,0
1002 add ebx,eax
1003 adc ecx,edx
1004 mov eax,DWORD [24+esi]
1005 adc ebp,0
1006 mov edx,DWORD [12+esi]
1007 ; sqr a[6]*a[3]
1008 mul edx
1009 add eax,eax
1010 adc edx,edx
1011 adc ebp,0
1012 add ebx,eax
1013 adc ecx,edx
1014 mov eax,DWORD [20+esi]
1015 adc ebp,0
1016 mov edx,DWORD [16+esi]
1017 ; sqr a[5]*a[4]
1018 mul edx
1019 add eax,eax
1020 adc edx,edx
1021 adc ebp,0
1022 add ebx,eax
1023 adc ecx,edx
1024 mov eax,DWORD [28+esi]
1025 adc ebp,0
1026 mov DWORD [36+edi],ebx
1027 mov edx,DWORD [12+esi]
1028 ; saved r[9]
1029 ; ############### Calculate word 10
1030 xor ebx,ebx
1031 ; sqr a[7]*a[3]
1032 mul edx
1033 add eax,eax
1034 adc edx,edx
1035 adc ebx,0
1036 add ecx,eax
1037 adc ebp,edx
1038 mov eax,DWORD [24+esi]
1039 adc ebx,0
1040 mov edx,DWORD [16+esi]
1041 ; sqr a[6]*a[4]
1042 mul edx
1043 add eax,eax
1044 adc edx,edx
1045 adc ebx,0
1046 add ecx,eax
1047 adc ebp,edx
1048 mov eax,DWORD [20+esi]
1049 adc ebx,0
1050 ; sqr a[5]*a[5]
1051 mul eax
1052 add ecx,eax
1053 adc ebp,edx
1054 mov edx,DWORD [16+esi]
1055 adc ebx,0
1056 mov DWORD [40+edi],ecx
1057 mov eax,DWORD [28+esi]
1058 ; saved r[10]
1059 ; ############### Calculate word 11
1060 xor ecx,ecx
1061 ; sqr a[7]*a[4]
1062 mul edx
1063 add eax,eax
1064 adc edx,edx
1065 adc ecx,0
1066 add ebp,eax
1067 adc ebx,edx
1068 mov eax,DWORD [24+esi]
1069 adc ecx,0
1070 mov edx,DWORD [20+esi]
1071 ; sqr a[6]*a[5]
1072 mul edx
1073 add eax,eax
1074 adc edx,edx
1075 adc ecx,0
1076 add ebp,eax
1077 adc ebx,edx
1078 mov eax,DWORD [28+esi]
1079 adc ecx,0
1080 mov DWORD [44+edi],ebp
1081 mov edx,DWORD [20+esi]
1082 ; saved r[11]
1083 ; ############### Calculate word 12
1084 xor ebp,ebp
1085 ; sqr a[7]*a[5]
1086 mul edx
1087 add eax,eax
1088 adc edx,edx
1089 adc ebp,0
1090 add ebx,eax
1091 adc ecx,edx
1092 mov eax,DWORD [24+esi]
1093 adc ebp,0
1094 ; sqr a[6]*a[6]
1095 mul eax
1096 add ebx,eax
1097 adc ecx,edx
1098 mov edx,DWORD [24+esi]
1099 adc ebp,0
1100 mov DWORD [48+edi],ebx
1101 mov eax,DWORD [28+esi]
1102 ; saved r[12]
1103 ; ############### Calculate word 13
1104 xor ebx,ebx
1105 ; sqr a[7]*a[6]
1106 mul edx
1107 add eax,eax
1108 adc edx,edx
1109 adc ebx,0
1110 add ecx,eax
1111 adc ebp,edx
1112 mov eax,DWORD [28+esi]
1113 adc ebx,0
1114 mov DWORD [52+edi],ecx
1115 ; saved r[13]
1116 ; ############### Calculate word 14
1117 xor ecx,ecx
1118 ; sqr a[7]*a[7]
1119 mul eax
1120 add ebp,eax
1121 adc ebx,edx
1122 adc ecx,0
1123 mov DWORD [56+edi],ebp
1124 ; saved r[14]
1125 mov DWORD [60+edi],ebx
1126 pop ebx
1127 pop ebp
1128 pop edi
1129 pop esi
1131 global _bn_sqr_comba4
1132 align 16
1133 _bn_sqr_comba4:
1134 L$_bn_sqr_comba4_begin:
1135 push esi
1136 push edi
1137 push ebp
1138 push ebx
1139 mov edi,DWORD [20+esp]
1140 mov esi,DWORD [24+esp]
1141 xor ebx,ebx
1142 xor ecx,ecx
1143 mov eax,DWORD [esi]
1144 ; ############### Calculate word 0
1145 xor ebp,ebp
1146 ; sqr a[0]*a[0]
1147 mul eax
1148 add ebx,eax
1149 adc ecx,edx
1150 mov edx,DWORD [esi]
1151 adc ebp,0
1152 mov DWORD [edi],ebx
1153 mov eax,DWORD [4+esi]
1154 ; saved r[0]
1155 ; ############### Calculate word 1
1156 xor ebx,ebx
1157 ; sqr a[1]*a[0]
1158 mul edx
1159 add eax,eax
1160 adc edx,edx
1161 adc ebx,0
1162 add ecx,eax
1163 adc ebp,edx
1164 mov eax,DWORD [8+esi]
1165 adc ebx,0
1166 mov DWORD [4+edi],ecx
1167 mov edx,DWORD [esi]
1168 ; saved r[1]
1169 ; ############### Calculate word 2
1170 xor ecx,ecx
1171 ; sqr a[2]*a[0]
1172 mul edx
1173 add eax,eax
1174 adc edx,edx
1175 adc ecx,0
1176 add ebp,eax
1177 adc ebx,edx
1178 mov eax,DWORD [4+esi]
1179 adc ecx,0
1180 ; sqr a[1]*a[1]
1181 mul eax
1182 add ebp,eax
1183 adc ebx,edx
1184 mov edx,DWORD [esi]
1185 adc ecx,0
1186 mov DWORD [8+edi],ebp
1187 mov eax,DWORD [12+esi]
1188 ; saved r[2]
1189 ; ############### Calculate word 3
1190 xor ebp,ebp
1191 ; sqr a[3]*a[0]
1192 mul edx
1193 add eax,eax
1194 adc edx,edx
1195 adc ebp,0
1196 add ebx,eax
1197 adc ecx,edx
1198 mov eax,DWORD [8+esi]
1199 adc ebp,0
1200 mov edx,DWORD [4+esi]
1201 ; sqr a[2]*a[1]
1202 mul edx
1203 add eax,eax
1204 adc edx,edx
1205 adc ebp,0
1206 add ebx,eax
1207 adc ecx,edx
1208 mov eax,DWORD [12+esi]
1209 adc ebp,0
1210 mov DWORD [12+edi],ebx
1211 mov edx,DWORD [4+esi]
1212 ; saved r[3]
1213 ; ############### Calculate word 4
1214 xor ebx,ebx
1215 ; sqr a[3]*a[1]
1216 mul edx
1217 add eax,eax
1218 adc edx,edx
1219 adc ebx,0
1220 add ecx,eax
1221 adc ebp,edx
1222 mov eax,DWORD [8+esi]
1223 adc ebx,0
1224 ; sqr a[2]*a[2]
1225 mul eax
1226 add ecx,eax
1227 adc ebp,edx
1228 mov edx,DWORD [8+esi]
1229 adc ebx,0
1230 mov DWORD [16+edi],ecx
1231 mov eax,DWORD [12+esi]
1232 ; saved r[4]
1233 ; ############### Calculate word 5
1234 xor ecx,ecx
1235 ; sqr a[3]*a[2]
1236 mul edx
1237 add eax,eax
1238 adc edx,edx
1239 adc ecx,0
1240 add ebp,eax
1241 adc ebx,edx
1242 mov eax,DWORD [12+esi]
1243 adc ecx,0
1244 mov DWORD [20+edi],ebp
1245 ; saved r[5]
1246 ; ############### Calculate word 6
1247 xor ebp,ebp
1248 ; sqr a[3]*a[3]
1249 mul eax
1250 add ebx,eax
1251 adc ecx,edx
1252 adc ebp,0
1253 mov DWORD [24+edi],ebx
1254 ; saved r[6]
1255 mov DWORD [28+edi],ecx
1256 pop ebx
1257 pop ebp
1258 pop edi
1259 pop esi