Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / test / X86 / bug-reorder-bb-jrcxz.s
blob13611119beaf0745ca68e67202401b265d650de5
1 # Test performs a BB reordering with unsupported
2 # instruction jrcxz. Reordering works correctly with the
3 # follow options: None, Normal or Reverse. Other strategies
4 # are completed with Assertion `isIntN(Size * 8 + 1, Value).
5 # The cause is the distance between BB where one contains
6 # jrcxz instruction.
7 # Example: OpenSSL
8 # https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
10 # REQUIRES: system-linux
12 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
13 # RUN: %s -o %t.o
14 # RUN: link_fdata %s %t.o %t.fdata
15 # RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q
17 # RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
18 # RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \
19 # RUN: --split-functions --split-all-cold --split-eh --dyno-stats \
20 # RUN: --print-finalized 2>&1 | FileCheck %s
22 # CHECK-NOT: value of -2105 is too large for field of 1 byte.
24 .text
25 .section .text.startup,"ax",@progbits
26 .p2align 5,,31
27 .globl main
28 .type main, @function
29 main:
30 jmp bn_sqrx8x_internal
32 .globl bn_sqrx8x_internal
33 .hidden bn_sqrx8x_internal
34 .type bn_sqrx8x_internal,@function
35 .align 32
36 bn_sqrx8x_internal:
37 __bn_sqrx8x_internal:
38 # FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
39 # FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972
40 # FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972
41 # FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
42 # FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
43 # FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
44 # FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
45 # FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
46 # FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
47 # FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
48 # FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
49 # FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
50 # FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
51 # FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
52 # FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
53 # FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
54 # FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
55 # FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
56 # FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
57 # FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
58 # FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
59 # FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
60 # FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
61 # FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
62 # FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
63 # FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
64 # FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
65 # FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
66 # FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
67 # FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
68 # FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
69 .cfi_startproc
70 leaq 48+8(%rsp),%rdi
71 leaq (%rsi,%r9,1),%rbp
72 movq %r9,0+8(%rsp)
73 movq %rbp,8+8(%rsp)
74 jmp .Lsqr8x_zero_start
76 .align 32
77 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
78 .Lsqrx8x_zero:
79 .byte 0x3e
80 movdqa %xmm0,0(%rdi)
81 movdqa %xmm0,16(%rdi)
82 movdqa %xmm0,32(%rdi)
83 movdqa %xmm0,48(%rdi)
84 .Lsqr8x_zero_start:
85 movdqa %xmm0,64(%rdi)
86 movdqa %xmm0,80(%rdi)
87 movdqa %xmm0,96(%rdi)
88 movdqa %xmm0,112(%rdi)
89 leaq 128(%rdi),%rdi
90 subq $64,%r9
91 jnz .Lsqrx8x_zero
93 movq 0(%rsi),%rdx
95 xorq %r10,%r10
96 xorq %r11,%r11
97 xorq %r12,%r12
98 xorq %r13,%r13
99 xorq %r14,%r14
100 xorq %r15,%r15
101 leaq 48+8(%rsp),%rdi
102 xorq %rbp,%rbp
103 jmp .Lsqrx8x_outer_loop
105 .align 32
106 .Lsqrx8x_outer_loop:
107 mulxq 8(%rsi),%r8,%rax
108 adcxq %r9,%r8
109 adoxq %rax,%r10
110 mulxq 16(%rsi),%r9,%rax
111 adcxq %r10,%r9
112 adoxq %rax,%r11
113 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
114 adcxq %r11,%r10
115 adoxq %rax,%r12
116 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
117 adcxq %r12,%r11
118 adoxq %rax,%r13
119 mulxq 40(%rsi),%r12,%rax
120 adcxq %r13,%r12
121 adoxq %rax,%r14
122 mulxq 48(%rsi),%r13,%rax
123 adcxq %r14,%r13
124 adoxq %r15,%rax
125 mulxq 56(%rsi),%r14,%r15
126 movq 8(%rsi),%rdx
127 adcxq %rax,%r14
128 adoxq %rbp,%r15
129 adcq 64(%rdi),%r15
130 movq %r8,8(%rdi)
131 movq %r9,16(%rdi)
132 sbbq %rcx,%rcx
133 xorq %rbp,%rbp
135 mulxq 16(%rsi),%r8,%rbx
136 mulxq 24(%rsi),%r9,%rax
137 adcxq %r10,%r8
138 adoxq %rbx,%r9
139 mulxq 32(%rsi),%r10,%rbx
140 adcxq %r11,%r9
141 adoxq %rax,%r10
142 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
143 adcxq %r12,%r10
144 adoxq %rbx,%r11
145 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
146 adcxq %r13,%r11
147 adoxq %r14,%r12
148 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
149 movq 16(%rsi),%rdx
150 adcxq %rax,%r12
151 adoxq %rbx,%r13
152 adcxq %r15,%r13
153 adoxq %rbp,%r14
154 adcxq %rbp,%r14
156 movq %r8,24(%rdi)
157 movq %r9,32(%rdi)
159 mulxq 24(%rsi),%r8,%rbx
160 mulxq 32(%rsi),%r9,%rax
161 adcxq %r10,%r8
162 adoxq %rbx,%r9
163 mulxq 40(%rsi),%r10,%rbx
164 adcxq %r11,%r9
165 adoxq %rax,%r10
166 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
167 adcxq %r12,%r10
168 adoxq %r13,%r11
169 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
170 .byte 0x3e
171 movq 24(%rsi),%rdx
172 adcxq %rbx,%r11
173 adoxq %rax,%r12
174 adcxq %r14,%r12
175 movq %r8,40(%rdi)
176 movq %r9,48(%rdi)
177 mulxq 32(%rsi),%r8,%rax
178 adoxq %rbp,%r13
179 adcxq %rbp,%r13
181 mulxq 40(%rsi),%r9,%rbx
182 adcxq %r10,%r8
183 adoxq %rax,%r9
184 mulxq 48(%rsi),%r10,%rax
185 adcxq %r11,%r9
186 adoxq %r12,%r10
187 mulxq 56(%rsi),%r11,%r12
188 movq 32(%rsi),%rdx
189 movq 40(%rsi),%r14
190 adcxq %rbx,%r10
191 adoxq %rax,%r11
192 movq 48(%rsi),%r15
193 adcxq %r13,%r11
194 adoxq %rbp,%r12
195 adcxq %rbp,%r12
197 movq %r8,56(%rdi)
198 movq %r9,64(%rdi)
200 mulxq %r14,%r9,%rax
201 movq 56(%rsi),%r8
202 adcxq %r10,%r9
203 mulxq %r15,%r10,%rbx
204 adoxq %rax,%r10
205 adcxq %r11,%r10
206 mulxq %r8,%r11,%rax
207 movq %r14,%rdx
208 adoxq %rbx,%r11
209 adcxq %r12,%r11
211 adcxq %rbp,%rax
213 mulxq %r15,%r14,%rbx
214 mulxq %r8,%r12,%r13
215 movq %r15,%rdx
216 leaq 64(%rsi),%rsi
217 adcxq %r14,%r11
218 adoxq %rbx,%r12
219 adcxq %rax,%r12
220 adoxq %rbp,%r13
222 .byte 0x67,0x67
223 mulxq %r8,%r8,%r14
224 adcxq %r8,%r13
225 adcxq %rbp,%r14
227 cmpq 8+8(%rsp),%rsi
228 je .Lsqrx8x_outer_break
230 negq %rcx
231 movq $-8,%rcx
232 movq %rbp,%r15
233 movq 64(%rdi),%r8
234 adcxq 72(%rdi),%r9
235 adcxq 80(%rdi),%r10
236 adcxq 88(%rdi),%r11
237 adcq 96(%rdi),%r12
238 adcq 104(%rdi),%r13
239 adcq 112(%rdi),%r14
240 adcq 120(%rdi),%r15
241 leaq (%rsi),%rbp
242 leaq 128(%rdi),%rdi
243 sbbq %rax,%rax
245 movq -64(%rsi),%rdx
246 movq %rax,16+8(%rsp)
247 movq %rdi,24+8(%rsp)
250 xorl %eax,%eax
251 jmp .Lsqrx8x_loop
253 .align 32
254 .Lsqrx8x_loop:
255 movq %r8,%rbx
256 mulxq 0(%rbp),%rax,%r8
257 adcxq %rax,%rbx
258 adoxq %r9,%r8
260 mulxq 8(%rbp),%rax,%r9
261 adcxq %rax,%r8
262 adoxq %r10,%r9
264 mulxq 16(%rbp),%rax,%r10
265 adcxq %rax,%r9
266 adoxq %r11,%r10
268 mulxq 24(%rbp),%rax,%r11
269 adcxq %rax,%r10
270 adoxq %r12,%r11
272 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
273 adcxq %rax,%r11
274 adoxq %r13,%r12
276 mulxq 40(%rbp),%rax,%r13
277 adcxq %rax,%r12
278 adoxq %r14,%r13
280 mulxq 48(%rbp),%rax,%r14
281 movq %rbx,(%rdi,%rcx,8)
282 movl $0,%ebx
283 adcxq %rax,%r13
284 adoxq %r15,%r14
286 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
287 movq 8(%rsi,%rcx,8),%rdx
288 adcxq %rax,%r14
289 adoxq %rbx,%r15
290 adcxq %rbx,%r15
292 .byte 0x67
293 incq %rcx
294 jnz .Lsqrx8x_loop
296 leaq 64(%rbp),%rbp
297 movq $-8,%rcx
298 cmpq 8+8(%rsp),%rbp
299 je .Lsqrx8x_break
301 subq 16+8(%rsp),%rbx
302 .byte 0x66
303 movq -64(%rsi),%rdx
304 adcxq 0(%rdi),%r8
305 adcxq 8(%rdi),%r9
306 adcq 16(%rdi),%r10
307 adcq 24(%rdi),%r11
308 adcq 32(%rdi),%r12
309 adcq 40(%rdi),%r13
310 adcq 48(%rdi),%r14
311 adcq 56(%rdi),%r15
312 leaq 64(%rdi),%rdi
313 .byte 0x67
314 sbbq %rax,%rax
315 xorl %ebx,%ebx
316 movq %rax,16+8(%rsp)
317 jmp .Lsqrx8x_loop
319 .align 32
320 .Lsqrx8x_break:
321 xorq %rbp,%rbp
322 subq 16+8(%rsp),%rbx
323 adcxq %rbp,%r8
324 movq 24+8(%rsp),%rcx
325 adcxq %rbp,%r9
326 movq 0(%rsi),%rdx
327 adcq $0,%r10
328 movq %r8,0(%rdi)
329 adcq $0,%r11
330 adcq $0,%r12
331 adcq $0,%r13
332 adcq $0,%r14
333 adcq $0,%r15
334 cmpq %rcx,%rdi
335 je .Lsqrx8x_outer_loop
337 movq %r9,8(%rdi)
338 movq 8(%rcx),%r9
339 movq %r10,16(%rdi)
340 movq 16(%rcx),%r10
341 movq %r11,24(%rdi)
342 movq 24(%rcx),%r11
343 movq %r12,32(%rdi)
344 movq 32(%rcx),%r12
345 movq %r13,40(%rdi)
346 movq 40(%rcx),%r13
347 movq %r14,48(%rdi)
348 movq 48(%rcx),%r14
349 movq %r15,56(%rdi)
350 movq 56(%rcx),%r15
351 movq %rcx,%rdi
352 jmp .Lsqrx8x_outer_loop
354 .align 32
355 .Lsqrx8x_outer_break:
356 movq %r9,72(%rdi)
357 .byte 102,72,15,126,217
358 movq %r10,80(%rdi)
359 movq %r11,88(%rdi)
360 movq %r12,96(%rdi)
361 movq %r13,104(%rdi)
362 movq %r14,112(%rdi)
363 leaq 48+8(%rsp),%rdi
364 movq (%rsi,%rcx,1),%rdx
366 movq 8(%rdi),%r11
367 xorq %r10,%r10
368 movq 0+8(%rsp),%r9
369 adoxq %r11,%r11
370 movq 16(%rdi),%r12
371 movq 24(%rdi),%r13
373 .align 32
374 .Lsqrx4x_shift_n_add:
375 mulxq %rdx,%rax,%rbx
376 adoxq %r12,%r12
377 adcxq %r10,%rax
378 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
379 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
380 adoxq %r13,%r13
381 adcxq %r11,%rbx
382 movq 40(%rdi),%r11
383 movq %rax,0(%rdi)
384 movq %rbx,8(%rdi)
386 mulxq %rdx,%rax,%rbx
387 adoxq %r10,%r10
388 adcxq %r12,%rax
389 movq 16(%rsi,%rcx,1),%rdx
390 movq 48(%rdi),%r12
391 adoxq %r11,%r11
392 adcxq %r13,%rbx
393 movq 56(%rdi),%r13
394 movq %rax,16(%rdi)
395 movq %rbx,24(%rdi)
397 mulxq %rdx,%rax,%rbx
398 adoxq %r12,%r12
399 adcxq %r10,%rax
400 movq 24(%rsi,%rcx,1),%rdx
401 leaq 32(%rcx),%rcx
402 movq 64(%rdi),%r10
403 adoxq %r13,%r13
404 adcxq %r11,%rbx
405 movq 72(%rdi),%r11
406 movq %rax,32(%rdi)
407 movq %rbx,40(%rdi)
409 mulxq %rdx,%rax,%rbx
410 adoxq %r10,%r10
411 adcxq %r12,%rax
412 jrcxz .Lsqrx4x_shift_n_add_break
413 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
414 adoxq %r11,%r11
415 adcxq %r13,%rbx
416 movq 80(%rdi),%r12
417 movq 88(%rdi),%r13
418 movq %rax,48(%rdi)
419 movq %rbx,56(%rdi)
420 leaq 64(%rdi),%rdi
422 jmp .Lsqrx4x_shift_n_add
424 .align 32
425 .Lsqrx4x_shift_n_add_break:
426 adcxq %r13,%rbx
427 movq %rax,48(%rdi)
428 movq %rbx,56(%rdi)
429 leaq 64(%rdi),%rdi
430 .byte 102,72,15,126,213
431 __bn_sqrx8x_reduction:
432 xorl %eax,%eax
433 movq 32+8(%rsp),%rbx
434 movq 48+8(%rsp),%rdx
435 leaq -64(%rbp,%r9,1),%rcx
437 movq %rcx,0+8(%rsp)
438 movq %rdi,8+8(%rsp)
440 leaq 48+8(%rsp),%rdi
441 jmp .Lsqrx8x_reduction_loop
443 .align 32
444 .Lsqrx8x_reduction_loop:
445 movq 8(%rdi),%r9
446 movq 16(%rdi),%r10
447 movq 24(%rdi),%r11
448 movq 32(%rdi),%r12
449 movq %rdx,%r8
450 imulq %rbx,%rdx
451 movq 40(%rdi),%r13
452 movq 48(%rdi),%r14
453 movq 56(%rdi),%r15
454 movq %rax,24+8(%rsp)
456 leaq 64(%rdi),%rdi
457 xorq %rsi,%rsi
458 movq $-8,%rcx
459 jmp .Lsqrx8x_reduce
461 .align 32
462 .Lsqrx8x_reduce:
463 movq %r8,%rbx
464 mulxq 0(%rbp),%rax,%r8
465 adcxq %rbx,%rax
466 adoxq %r9,%r8
468 mulxq 8(%rbp),%rbx,%r9
469 adcxq %rbx,%r8
470 adoxq %r10,%r9
472 mulxq 16(%rbp),%rbx,%r10
473 adcxq %rbx,%r9
474 adoxq %r11,%r10
476 mulxq 24(%rbp),%rbx,%r11
477 adcxq %rbx,%r10
478 adoxq %r12,%r11
480 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
481 movq %rdx,%rax
482 movq %r8,%rdx
483 adcxq %rbx,%r11
484 adoxq %r13,%r12
486 mulxq 32+8(%rsp),%rbx,%rdx
487 movq %rax,%rdx
488 movq %rax,64+48+8(%rsp,%rcx,8)
490 mulxq 40(%rbp),%rax,%r13
491 adcxq %rax,%r12
492 adoxq %r14,%r13
494 mulxq 48(%rbp),%rax,%r14
495 adcxq %rax,%r13
496 adoxq %r15,%r14
498 mulxq 56(%rbp),%rax,%r15
499 movq %rbx,%rdx
500 adcxq %rax,%r14
501 adoxq %rsi,%r15
502 adcxq %rsi,%r15
504 .byte 0x67,0x67,0x67
505 incq %rcx
506 jnz .Lsqrx8x_reduce
508 movq %rsi,%rax
509 cmpq 0+8(%rsp),%rbp
510 jae .Lsqrx8x_no_tail
512 movq 48+8(%rsp),%rdx
513 addq 0(%rdi),%r8
514 leaq 64(%rbp),%rbp
515 movq $-8,%rcx
516 adcxq 8(%rdi),%r9
517 adcxq 16(%rdi),%r10
518 adcq 24(%rdi),%r11
519 adcq 32(%rdi),%r12
520 adcq 40(%rdi),%r13
521 adcq 48(%rdi),%r14
522 adcq 56(%rdi),%r15
523 leaq 64(%rdi),%rdi
524 sbbq %rax,%rax
526 xorq %rsi,%rsi
527 movq %rax,16+8(%rsp)
528 jmp .Lsqrx8x_tail
530 .align 32
531 .Lsqrx8x_tail:
532 movq %r8,%rbx
533 mulxq 0(%rbp),%rax,%r8
534 adcxq %rax,%rbx
535 adoxq %r9,%r8
537 mulxq 8(%rbp),%rax,%r9
538 adcxq %rax,%r8
539 adoxq %r10,%r9
541 mulxq 16(%rbp),%rax,%r10
542 adcxq %rax,%r9
543 adoxq %r11,%r10
545 mulxq 24(%rbp),%rax,%r11
546 adcxq %rax,%r10
547 adoxq %r12,%r11
549 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
550 adcxq %rax,%r11
551 adoxq %r13,%r12
553 mulxq 40(%rbp),%rax,%r13
554 adcxq %rax,%r12
555 adoxq %r14,%r13
557 mulxq 48(%rbp),%rax,%r14
558 adcxq %rax,%r13
559 adoxq %r15,%r14
561 mulxq 56(%rbp),%rax,%r15
562 movq 72+48+8(%rsp,%rcx,8),%rdx
563 adcxq %rax,%r14
564 adoxq %rsi,%r15
565 movq %rbx,(%rdi,%rcx,8)
566 movq %r8,%rbx
567 adcxq %rsi,%r15
569 incq %rcx
570 jnz .Lsqrx8x_tail
572 cmpq 0+8(%rsp),%rbp
573 jae .Lsqrx8x_tail_done
575 subq 16+8(%rsp),%rsi
576 movq 48+8(%rsp),%rdx
577 leaq 64(%rbp),%rbp
578 adcq 0(%rdi),%r8
579 adcq 8(%rdi),%r9
580 adcq 16(%rdi),%r10
581 adcq 24(%rdi),%r11
582 adcq 32(%rdi),%r12
583 adcq 40(%rdi),%r13
584 adcq 48(%rdi),%r14
585 adcq 56(%rdi),%r15
586 leaq 64(%rdi),%rdi
587 sbbq %rax,%rax
588 subq $8,%rcx
590 xorq %rsi,%rsi
591 movq %rax,16+8(%rsp)
592 jmp .Lsqrx8x_tail
594 .align 32
595 .Lsqrx8x_tail_done:
596 xorq %rax,%rax
597 addq 24+8(%rsp),%r8
598 adcq $0,%r9
599 adcq $0,%r10
600 adcq $0,%r11
601 adcq $0,%r12
602 adcq $0,%r13
603 adcq $0,%r14
604 adcq $0,%r15
605 adcq $0,%rax
607 subq 16+8(%rsp),%rsi
608 .Lsqrx8x_no_tail:
609 adcq 0(%rdi),%r8
610 .byte 102,72,15,126,217
611 adcq 8(%rdi),%r9
612 movq 56(%rbp),%rsi
613 .byte 102,72,15,126,213
614 adcq 16(%rdi),%r10
615 adcq 24(%rdi),%r11
616 adcq 32(%rdi),%r12
617 adcq 40(%rdi),%r13
618 adcq 48(%rdi),%r14
619 adcq 56(%rdi),%r15
620 adcq $0,%rax
622 movq 32+8(%rsp),%rbx
623 movq 64(%rdi,%rcx,1),%rdx
625 movq %r8,0(%rdi)
626 leaq 64(%rdi),%r8
627 movq %r9,8(%rdi)
628 movq %r10,16(%rdi)
629 movq %r11,24(%rdi)
630 movq %r12,32(%rdi)
631 movq %r13,40(%rdi)
632 movq %r14,48(%rdi)
633 movq %r15,56(%rdi)
635 leaq 64(%rdi,%rcx,1),%rdi
636 cmpq 8+8(%rsp),%r8
637 jb .Lsqrx8x_reduction_loop
638 .byte 0xf3,0xc3
639 .cfi_endproc
640 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal