1 ; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV
2 ; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV
4 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6 ; Test 2xCMOV patterns exposed after legalization.
7 ; One way to do that is with (select (fcmp une/oeq)), which gets
8 ; legalized to setp/setne.
10 ; CHECK-LABEL: test_select_fcmp_oeq_i32:
12 ; CMOV-NEXT: movl %edi, %eax
13 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
14 ; CMOV-NEXT: cmovnel %esi, %eax
15 ; CMOV-NEXT: cmovpl %esi, %eax
18 ; NOCMOV-NEXT: flds 8(%esp)
19 ; NOCMOV-NEXT: flds 4(%esp)
20 ; NOCMOV-NEXT: fucompp
21 ; NOCMOV-NEXT: fnstsw %ax
23 ; NOCMOV-NEXT: leal 16(%esp), %eax
24 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
25 ; NOCMOV-NEXT: jp [[TBB]]
26 ; NOCMOV-NEXT: leal 12(%esp), %eax
27 ; NOCMOV-NEXT:[[TBB]]:
28 ; NOCMOV-NEXT: movl (%eax), %eax
30 define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
32 %cmp = fcmp oeq float %a, %b
33 %r = select i1 %cmp, i32 %c, i32 %d
37 ; CHECK-LABEL: test_select_fcmp_oeq_i64:
39 ; CMOV-NEXT: movq %rdi, %rax
40 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
41 ; CMOV-NEXT: cmovneq %rsi, %rax
42 ; CMOV-NEXT: cmovpq %rsi, %rax
45 ; NOCMOV-NEXT: flds 8(%esp)
46 ; NOCMOV-NEXT: flds 4(%esp)
47 ; NOCMOV-NEXT: fucompp
48 ; NOCMOV-NEXT: fnstsw %ax
50 ; NOCMOV-NEXT: leal 20(%esp), %ecx
51 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
52 ; NOCMOV-NEXT: jp [[TBB]]
53 ; NOCMOV-NEXT: leal 12(%esp), %ecx
54 ; NOCMOV-NEXT: [[TBB]]:
55 ; NOCMOV-NEXT: movl (%ecx), %eax
56 ; NOCMOV-NEXT: movl 4(%ecx), %edx
58 define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 {
60 %cmp = fcmp oeq float %a, %b
61 %r = select i1 %cmp, i64 %c, i64 %d
65 ; CHECK-LABEL: test_select_fcmp_une_i64:
67 ; CMOV-NEXT: movq %rsi, %rax
68 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
69 ; CMOV-NEXT: cmovneq %rdi, %rax
70 ; CMOV-NEXT: cmovpq %rdi, %rax
73 ; NOCMOV-NEXT: flds 8(%esp)
74 ; NOCMOV-NEXT: flds 4(%esp)
75 ; NOCMOV-NEXT: fucompp
76 ; NOCMOV-NEXT: fnstsw %ax
78 ; NOCMOV-NEXT: leal 12(%esp), %ecx
79 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
80 ; NOCMOV-NEXT: jp [[TBB]]
81 ; NOCMOV-NEXT: leal 20(%esp), %ecx
82 ; NOCMOV-NEXT: [[TBB]]:
83 ; NOCMOV-NEXT: movl (%ecx), %eax
84 ; NOCMOV-NEXT: movl 4(%ecx), %edx
86 define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 {
88 %cmp = fcmp une float %a, %b
89 %r = select i1 %cmp, i64 %c, i64 %d
93 ; CHECK-LABEL: test_select_fcmp_oeq_f64:
95 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
96 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
97 ; CMOV-NEXT: jp [[TBB]]
98 ; CMOV-NEXT: movaps %xmm2, %xmm3
100 ; CMOV-NEXT: movaps %xmm3, %xmm0
103 ; NOCMOV-NEXT: flds 8(%esp)
104 ; NOCMOV-NEXT: flds 4(%esp)
105 ; NOCMOV-NEXT: fucompp
106 ; NOCMOV-NEXT: fnstsw %ax
108 ; NOCMOV-NEXT: leal 20(%esp), %eax
109 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
110 ; NOCMOV-NEXT: jp [[TBB]]
111 ; NOCMOV-NEXT: leal 12(%esp), %eax
112 ; NOCMOV-NEXT: [[TBB]]:
113 ; NOCMOV-NEXT: fldl (%eax)
115 define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
117 %cmp = fcmp oeq float %a, %b
118 %r = select i1 %cmp, double %c, double %d
122 ; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
124 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
125 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
126 ; CMOV-NEXT: jp [[TBB]]
127 ; CMOV-NEXT: movaps %xmm2, %xmm3
128 ; CMOV-NEXT: [[TBB]]:
129 ; CMOV-NEXT: movaps %xmm3, %xmm0
132 ; NOCMOV-NEXT: pushl %edi
133 ; NOCMOV-NEXT: pushl %esi
134 ; NOCMOV-NEXT: flds 20(%esp)
135 ; NOCMOV-NEXT: flds 16(%esp)
136 ; NOCMOV-NEXT: fucompp
137 ; NOCMOV-NEXT: fnstsw %ax
139 ; NOCMOV-NEXT: leal 40(%esp), %eax
140 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
141 ; NOCMOV-NEXT: jp [[TBB]]
142 ; NOCMOV-NEXT: leal 24(%esp), %eax
143 ; NOCMOV-NEXT: [[TBB]]:
144 ; NOCMOV-NEXT: movl (%eax), %ecx
145 ; NOCMOV-NEXT: leal 44(%esp), %edx
146 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
147 ; NOCMOV-NEXT: jp [[TBB]]
148 ; NOCMOV-NEXT: leal 28(%esp), %edx
149 ; NOCMOV-NEXT: [[TBB]]:
150 ; NOCMOV-NEXT: movl 12(%esp), %eax
151 ; NOCMOV-NEXT: movl (%edx), %edx
152 ; NOCMOV-NEXT: leal 48(%esp), %esi
153 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
154 ; NOCMOV-NEXT: jp [[TBB]]
155 ; NOCMOV-NEXT: leal 32(%esp), %esi
156 ; NOCMOV-NEXT: [[TBB]]:
157 ; NOCMOV-NEXT: movl (%esi), %esi
158 ; NOCMOV-NEXT: leal 52(%esp), %edi
159 ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
160 ; NOCMOV-NEXT: jp [[TBB]]
161 ; NOCMOV-NEXT: leal 36(%esp), %edi
162 ; NOCMOV-NEXT: [[TBB]]:
163 ; NOCMOV-NEXT: movl (%edi), %edi
164 ; NOCMOV-NEXT: movl %edi, 12(%eax)
165 ; NOCMOV-NEXT: movl %esi, 8(%eax)
166 ; NOCMOV-NEXT: movl %edx, 4(%eax)
167 ; NOCMOV-NEXT: movl %ecx, (%eax)
168 ; NOCMOV-NEXT: popl %esi
169 ; NOCMOV-NEXT: popl %edi
170 ; NOCMOV-NEXT: retl $4
171 define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
173 %cmp = fcmp oeq float %a, %b
174 %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
178 ; Also make sure we catch the original code-sequence of interest:
180 ; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
181 ; CMOV-NEXT: .long 1065353216
183 ; CHECK-LABEL: test_zext_fcmp_une:
184 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
185 ; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
186 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
187 ; CMOV-NEXT: jp [[TBB]]
188 ; CMOV-NEXT: xorps %xmm0, %xmm0
189 ; CMOV-NEXT: [[TBB]]:
194 define float @test_zext_fcmp_une(float %a, float %b) #0 {
196 %cmp = fcmp une float %a, %b
197 %conv1 = zext i1 %cmp to i32
198 %conv2 = sitofp i32 %conv1 to float
202 ; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
203 ; CMOV-NEXT: .long 1065353216
205 ; CHECK-LABEL: test_zext_fcmp_oeq:
206 ; CMOV-NEXT: ucomiss %xmm1, %xmm0
207 ; CMOV-NEXT: xorps %xmm0, %xmm0
208 ; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
209 ; CMOV-NEXT: jp [[TBB]]
210 ; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
211 ; CMOV-NEXT: [[TBB]]:
216 define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
218 %cmp = fcmp oeq float %a, %b
219 %conv1 = zext i1 %cmp to i32
220 %conv2 = sitofp i32 %conv1 to float
224 attributes #0 = { nounwind }
228 ; The following test failed because llvm had a bug where a structure like:
230 ; %12 = CMOV_GR8 %7, %11 ... (lt)
231 ; %13 = CMOV_GR8 %12, %11 ... (gt)
235 ; The first two cmovs got expanded to:
242 ; %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)
244 ; Which was invalid as %12 is not the same value as %13
246 ; CHECK-LABEL: no_cascade_opt:
247 ; CMOV-DAG: cmpl %edx, %esi
248 ; CMOV-DAG: movb $20, %al
249 ; CMOV-DAG: movb $20, %dl
250 ; CMOV: jge [[BB2:.LBB[0-9_]+]]
251 ; CMOV: jle [[BB3:.LBB[0-9_]+]]
252 ; CMOV: [[BB0:.LBB[0-9_]+]]
253 ; CMOV: testl %edi, %edi
254 ; CMOV: jne [[BB4:.LBB[0-9_]+]]
255 ; CMOV: [[BB1:.LBB[0-9_]+]]
256 ; CMOV: movb %al, g8(%rip)
259 ; CMOV: movl %ecx, %edx
262 ; CMOV: movl %edx, %eax
263 ; CMOV: testl %edi, %edi
266 ; CMOV: movl %edx, %eax
267 ; CMOV: movb %al, g8(%rip)
269 define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) {
271 %c0 = icmp eq i32 %v0, 0
272 %c1 = icmp slt i32 %v1, %v2
273 %c2 = icmp sgt i32 %v1, %v2
274 %trunc = trunc i32 %v3 to i8
275 %sel0 = select i1 %c1, i8 20, i8 %trunc
276 %sel1 = select i1 %c2, i8 20, i8 %sel0
277 %sel2 = select i1 %c0, i8 %sel1, i8 %sel0
278 store volatile i8 %sel2, i8* @g8