1 ; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s
3 ; This test checks that only a single js gets generated in the final code
4 ; for lowering the CMOV pseudos that get created for this IR.
8 define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind {
10 %cmp = icmp slt i32 %v1, 0
11 %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3
12 %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2
13 %sub = sub i32 %v1.v2, %v2.v3
17 ; This test checks that only a single js gets generated in the final code
18 ; for lowering the CMOV pseudos that get created for this IR. This makes
19 ; sure the code for the lowering for opposite conditions gets tested.
24 define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind {
26 %cmp1 = icmp slt i32 %v1, 0
27 %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3
28 %cmp2 = icmp sge i32 %v1, 0
29 %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2
30 %sub = sub i32 %v1.v2, %v2.v3
34 ; This test checks that only a single js gets generated in the final code
35 ; for lowering the CMOV pseudos that get created for this IR.
39 define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind {
41 %cmp = icmp slt i8 %v1, 0
42 %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3
43 %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2
44 %t1 = sext i8 %v2.v3 to i32
45 %t2 = sext i8 %v1.v2 to i32
46 %sub = sub i32 %t1, %t2
50 ; This test checks that only a single js gets generated in the final code
51 ; for lowering the CMOV pseudos that get created for this IR.
55 define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind {
57 %cmp = icmp slt i16 %v1, 0
58 %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3
59 %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2
60 %t1 = sext i16 %v2.v3 to i32
61 %t2 = sext i16 %v1.v2 to i32
62 %sub = sub i32 %t1, %t2
66 ; This test checks that only a single js gets generated in the final code
67 ; for lowering the CMOV pseudos that get created for this IR.
71 define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind {
73 %cmp = icmp slt i32 %v1, 0
74 %t1 = select i1 %cmp, float %v2, float %v3
75 %t2 = select i1 %cmp, float %v3, float %v4
76 %sub = fsub float %t1, %t2
80 ; This test checks that only a single je gets generated in the final code
81 ; for lowering the CMOV pseudos that get created for this IR.
85 define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind {
87 %cmp = icmp eq i32 %v1, 0
88 %t1 = select i1 %cmp, double %v2, double %v3
89 %t2 = select i1 %cmp, double %v3, double %v4
90 %sub = fsub double %t1, %t2
94 ; This test checks that only a single je gets generated in the final code
95 ; for lowering the CMOV pseudos that get created for this IR.
99 define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind {
101 %cmp = icmp eq i32 %v1, 0
102 %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3
103 %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4
104 %sub = fsub <4 x float> %t1, %t2
108 ; This test checks that only a single je gets generated in the final code
109 ; for lowering the CMOV pseudos that get created for this IR.
113 define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind {
115 %cmp = icmp eq i32 %v1, 0
116 %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3
117 %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4
118 %sub = fsub <2 x double> %t1, %t2
119 ret <2 x double> %sub
122 ; This test checks that only a single ja gets generated in the final code
123 ; for lowering the CMOV pseudos that get created for this IR. This combines
124 ; all the supported types together into one long string of selects based
125 ; on the same condition.
129 define void @foo8(i32 %v1,
133 float %v32, float %v33,
134 double %v42, double %v43,
135 <4 x float> %v52, <4 x float> %v53,
136 <2 x double> %v62, <2 x double> %v63,
137 <8 x float> %v72, <8 x float> %v73,
138 <4 x double> %v82, <4 x double> %v83,
139 <16 x float> %v92, <16 x float> %v93,
140 <8 x double> %v102, <8 x double> %v103,
143 %add.ptr11 = getelementptr inbounds i8, ptr %dst, i32 2
145 %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4
147 %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8
149 %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16
151 %add.ptr51 = getelementptr inbounds i8, ptr %dst, i32 32
153 %add.ptr61 = getelementptr inbounds i8, ptr %dst, i32 48
155 %add.ptr71 = getelementptr inbounds i8, ptr %dst, i32 64
157 %add.ptr81 = getelementptr inbounds i8, ptr %dst, i32 128
159 %add.ptr91 = getelementptr inbounds i8, ptr %dst, i32 64
161 %add.ptr101 = getelementptr inbounds i8, ptr %dst, i32 128
163 ; These operations are necessary, because select of two single use loads
164 ; ends up getting optimized into a select of two leas, followed by a
165 ; single load of the selected address.
166 %t13 = xor i16 %v13, 11
167 %t23 = xor i32 %v23, 1234
168 %t33 = fadd float %v33, %v32
169 %t43 = fadd double %v43, %v42
170 %t53 = fadd <4 x float> %v53, %v52
171 %t63 = fadd <2 x double> %v63, %v62
172 %t73 = fsub <8 x float> %v73, %v72
173 %t83 = fsub <4 x double> %v83, %v82
174 %t93 = fsub <16 x float> %v93, %v92
175 %t103 = fsub <8 x double> %v103, %v102
177 %cmp = icmp ugt i32 %v1, 31
178 %t11 = select i1 %cmp, i16 %v12, i16 %t13
179 %t21 = select i1 %cmp, i32 %v22, i32 %t23
180 %t31 = select i1 %cmp, float %v32, float %t33
181 %t41 = select i1 %cmp, double %v42, double %t43
182 %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53
183 %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63
184 %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73
185 %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83
186 %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93
187 %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103
189 store i16 %t11, ptr %add.ptr11, align 2
190 store i32 %t21, ptr %add.ptr21, align 4
191 store float %t31, ptr %add.ptr31, align 4
192 store double %t41, ptr %add.ptr41, align 8
193 store <4 x float> %t51, ptr %add.ptr51, align 16
194 store <2 x double> %t61, ptr %add.ptr61, align 16
195 store <8 x float> %t71, ptr %add.ptr71, align 32
196 store <4 x double> %t81, ptr %add.ptr81, align 32
197 store <16 x float> %t91, ptr %add.ptr91, align 32
198 store <8 x double> %t101, ptr %add.ptr101, align 32
203 ; This test checks that only a single ja gets generated in the final code
204 ; for lowering the CMOV pseudos that get created for this IR.
205 ; on the same condition.
206 ; Contrary to my expectations, this doesn't exercise the code for
207 ; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1. Instead the selects all
208 ; get lowered into vector length number of selects, which all eventually turn
209 ; into a huge number of CMOV_GR8, which are all contiguous, so the optimization
210 ; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get
211 ; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1
212 ; pseudo-opcodes to be generated, this test should be replaced with one that
213 ; tests those opcodes.
218 define void @foo9(i32 %v1,
219 <8 x i1> %v12, <8 x i1> %v13,
220 <16 x i1> %v22, <16 x i1> %v23,
221 <32 x i1> %v32, <32 x i1> %v33,
222 <64 x i1> %v42, <64 x i1> %v43,
226 %add.ptr21 = getelementptr inbounds i8, ptr %dst, i32 4
228 %add.ptr31 = getelementptr inbounds i8, ptr %dst, i32 8
230 %add.ptr41 = getelementptr inbounds i8, ptr %dst, i32 16
232 ; These operations are necessary, because select of two single use loads
233 ; ends up getting optimized into a select of two leas, followed by a
234 ; single load of the selected address.
235 %t13 = xor <8 x i1> %v13, %v12
236 %t23 = xor <16 x i1> %v23, %v22
237 %t33 = xor <32 x i1> %v33, %v32
238 %t43 = xor <64 x i1> %v43, %v42
240 %cmp = icmp ugt i32 %v1, 31
241 %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13
242 %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23
243 %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33
244 %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43
246 store <8 x i1> %t11, ptr %dst, align 16
247 store <16 x i1> %t21, ptr %add.ptr21, align 4
248 store <32 x i1> %t31, ptr %add.ptr31, align 8
249 store <64 x i1> %t41, ptr %add.ptr41, align 16