1 # Test performs a BB reordering with unsupported
2 # instruction jrcxz. Reordering works correctly with the
3 # follow options: None, Normal or Reverse. Other strategies
4 # are completed with Assertion `isIntN(Size * 8 + 1, Value).
5 # The cause is the distance between BB where one contains
8 # https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
10 # REQUIRES: system-linux
12 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
14 # RUN: link_fdata %s %t.o %t.fdata
15 # RUN: %clang %cflags %t.o -falign-labels -march=native -o %t.exe -Wl,-q
17 # RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
18 # RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort \
19 # RUN: --split-functions --split-all-cold --split-eh --dyno-stats \
20 # RUN: --print-finalized 2>&1 | FileCheck %s
22 # CHECK-NOT: value of -2105 is too large for field of 1 byte.
25 .section .text.startup,"ax",@progbits
30 jmp bn_sqrx8x_internal
32 .globl bn_sqrx8x_internal
33 .hidden bn_sqrx8x_internal
34 .type bn_sqrx8x_internal,@function
38 # FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
39 # FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972
40 # FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972
41 # FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
42 # FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
43 # FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
44 # FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
45 # FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
46 # FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
47 # FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
48 # FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
49 # FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
50 # FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
51 # FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
52 # FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
53 # FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
54 # FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
55 # FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
56 # FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
57 # FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
58 # FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
59 # FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
60 # FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
61 # FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
62 # FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
63 # FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
64 # FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
65 # FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
66 # FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
67 # FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
68 # FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
71 leaq
(%rsi
,%r9,1),%rbp
74 jmp
.Lsqr8x_zero_start
77 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
88 movdqa
%xmm0
,112(%rdi
)
103 jmp
.Lsqrx8x_outer_loop
107 mulxq
8(%rsi
),%r8,%rax
110 mulxq
16(%rsi
),%r9,%rax
113 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
116 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
119 mulxq
40(%rsi
),%r12,%rax
122 mulxq
48(%rsi
),%r13,%rax
125 mulxq
56(%rsi
),%r14,%r15
135 mulxq
16(%rsi
),%r8,%rbx
136 mulxq
24(%rsi
),%r9,%rax
139 mulxq
32(%rsi
),%r10,%rbx
142 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
145 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
148 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
159 mulxq
24(%rsi
),%r8,%rbx
160 mulxq
32(%rsi
),%r9,%rax
163 mulxq
40(%rsi
),%r10,%rbx
166 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
169 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
177 mulxq
32(%rsi
),%r8,%rax
181 mulxq
40(%rsi
),%r9,%rbx
184 mulxq
48(%rsi
),%r10,%rax
187 mulxq
56(%rsi
),%r11,%r12
228 je
.Lsqrx8x_outer_break
256 mulxq
0(%rbp
),%rax
,%r8
260 mulxq
8(%rbp
),%rax
,%r9
264 mulxq
16(%rbp
),%rax
,%r10
268 mulxq
24(%rbp
),%rax
,%r11
272 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
276 mulxq
40(%rbp
),%rax
,%r13
280 mulxq
48(%rbp
),%rax
,%r14
281 movq
%rbx
,(%rdi
,%rcx
,8)
286 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
287 movq
8(%rsi
,%rcx
,8),%rdx
335 je
.Lsqrx8x_outer_loop
352 jmp
.Lsqrx8x_outer_loop
355 .Lsqrx8x_outer_break:
357 .byte 102,72,15,126,217
364 movq
(%rsi
,%rcx
,1),%rdx
374 .Lsqrx4x_shift_n_add:
378 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
379 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
389 movq
16(%rsi
,%rcx
,1),%rdx
400 movq
24(%rsi
,%rcx
,1),%rdx
412 jrcxz
.Lsqrx4x_shift_n_add_break
413 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
422 jmp
.Lsqrx4x_shift_n_add
425 .Lsqrx4x_shift_n_add_break:
430 .byte 102,72,15,126,213
431 __bn_sqrx8x_reduction
:
435 leaq
-64(%rbp
,%r9,1),%rcx
441 jmp
.Lsqrx8x_reduction_loop
444 .Lsqrx8x_reduction_loop:
464 mulxq
0(%rbp
),%rax
,%r8
468 mulxq
8(%rbp
),%rbx
,%r9
472 mulxq
16(%rbp
),%rbx
,%r10
476 mulxq
24(%rbp
),%rbx
,%r11
480 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
486 mulxq
32+8(%rsp
),%rbx
,%rdx
488 movq
%rax
,64+48+8(%rsp
,%rcx
,8)
490 mulxq
40(%rbp
),%rax
,%r13
494 mulxq
48(%rbp
),%rax
,%r14
498 mulxq
56(%rbp
),%rax
,%r15
533 mulxq
0(%rbp
),%rax
,%r8
537 mulxq
8(%rbp
),%rax
,%r9
541 mulxq
16(%rbp
),%rax
,%r10
545 mulxq
24(%rbp
),%rax
,%r11
549 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
553 mulxq
40(%rbp
),%rax
,%r13
557 mulxq
48(%rbp
),%rax
,%r14
561 mulxq
56(%rbp
),%rax
,%r15
562 movq
72+48+8(%rsp
,%rcx
,8),%rdx
565 movq
%rbx
,(%rdi
,%rcx
,8)
573 jae
.Lsqrx8x_tail_done
610 .byte 102,72,15,126,217
613 .byte 102,72,15,126,213
623 movq
64(%rdi
,%rcx
,1),%rdx
635 leaq
64(%rdi
,%rcx
,1),%rdi
637 jb
.Lsqrx8x_reduction_loop
640 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal