1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -S | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 define float @test_rcp_ss_0(float %a) {
6 ; CHECK-LABEL: @test_rcp_ss_0(
7 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
8 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
9 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
10 ; CHECK-NEXT: ret float [[TMP3]]
12 %1 = insertelement <4 x float> undef, float %a, i32 0
13 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
14 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
15 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
16 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
17 %6 = extractelement <4 x float> %5, i32 0
21 define float @test_rcp_ss_1(float %a) {
22 ; CHECK-LABEL: @test_rcp_ss_1(
23 ; CHECK-NEXT: ret float 1.000000e+00
25 %1 = insertelement <4 x float> undef, float %a, i32 0
26 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
27 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
28 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
29 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
30 %6 = extractelement <4 x float> %5, i32 1
34 define float @test_sqrt_ss_0(float %a) {
35 ; CHECK-LABEL: @test_sqrt_ss_0(
36 ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float %a)
37 ; CHECK-NEXT: ret float [[TMP1]]
39 %1 = insertelement <4 x float> undef, float %a, i32 0
40 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
41 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
42 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
43 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
44 %6 = extractelement <4 x float> %5, i32 0
48 define float @test_sqrt_ss_2(float %a) {
49 ; CHECK-LABEL: @test_sqrt_ss_2(
50 ; CHECK-NEXT: ret float 2.000000e+00
52 %1 = insertelement <4 x float> undef, float %a, i32 0
53 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
54 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
55 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
56 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
57 %6 = extractelement <4 x float> %5, i32 2
61 define float @test_rsqrt_ss_0(float %a) {
62 ; CHECK-LABEL: @test_rsqrt_ss_0(
63 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
64 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
65 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
66 ; CHECK-NEXT: ret float [[TMP3]]
68 %1 = insertelement <4 x float> undef, float %a, i32 0
69 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
70 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
71 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
72 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
73 %6 = extractelement <4 x float> %5, i32 0
77 define float @test_rsqrt_ss_3(float %a) {
78 ; CHECK-LABEL: @test_rsqrt_ss_3(
79 ; CHECK-NEXT: ret float 3.000000e+00
81 %1 = insertelement <4 x float> undef, float %a, i32 0
82 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
83 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
84 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
85 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
86 %6 = extractelement <4 x float> %5, i32 3
90 define float @test_add_ss_0(float %a, float %b) {
91 ; CHECK-LABEL: @test_add_ss_0(
92 ; CHECK-NEXT: [[TMP1:%.*]] = fadd float %a, %b
93 ; CHECK-NEXT: ret float [[TMP1]]
95 %1 = insertelement <4 x float> undef, float %a, i32 0
96 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
97 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
98 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
99 %5 = insertelement <4 x float> undef, float %b, i32 0
100 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
101 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
102 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
103 %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
104 %r = extractelement <4 x float> %9, i32 0
108 define float @test_add_ss_1(float %a, float %b) {
109 ; CHECK-LABEL: @test_add_ss_1(
110 ; CHECK-NEXT: ret float 1.000000e+00
112 %1 = insertelement <4 x float> undef, float %a, i32 0
113 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
114 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
115 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
116 %5 = insertelement <4 x float> undef, float %b, i32 0
117 %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
118 %7 = extractelement <4 x float> %6, i32 1
122 define float @test_sub_ss_0(float %a, float %b) {
123 ; CHECK-LABEL: @test_sub_ss_0(
124 ; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b
125 ; CHECK-NEXT: ret float [[TMP1]]
127 %1 = insertelement <4 x float> undef, float %a, i32 0
128 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
129 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
130 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
131 %5 = insertelement <4 x float> undef, float %b, i32 0
132 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
133 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
134 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
135 %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
136 %r = extractelement <4 x float> %9, i32 0
140 define float @test_sub_ss_2(float %a, float %b) {
141 ; CHECK-LABEL: @test_sub_ss_2(
142 ; CHECK-NEXT: ret float 2.000000e+00
144 %1 = insertelement <4 x float> undef, float %a, i32 0
145 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
146 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
147 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
148 %5 = insertelement <4 x float> undef, float %b, i32 0
149 %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
150 %7 = extractelement <4 x float> %6, i32 2
154 define float @test_mul_ss_0(float %a, float %b) {
155 ; CHECK-LABEL: @test_mul_ss_0(
156 ; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b
157 ; CHECK-NEXT: ret float [[TMP1]]
159 %1 = insertelement <4 x float> undef, float %a, i32 0
160 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
161 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
162 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
163 %5 = insertelement <4 x float> undef, float %b, i32 0
164 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
165 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
166 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
167 %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
168 %r = extractelement <4 x float> %9, i32 0
172 define float @test_mul_ss_3(float %a, float %b) {
173 ; CHECK-LABEL: @test_mul_ss_3(
174 ; CHECK-NEXT: ret float 3.000000e+00
176 %1 = insertelement <4 x float> undef, float %a, i32 0
177 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
178 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
179 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
180 %5 = insertelement <4 x float> undef, float %b, i32 0
181 %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
182 %7 = extractelement <4 x float> %6, i32 3
186 define float @test_div_ss_0(float %a, float %b) {
187 ; CHECK-LABEL: @test_div_ss_0(
188 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float %a, %b
189 ; CHECK-NEXT: ret float [[TMP1]]
191 %1 = insertelement <4 x float> undef, float %a, i32 0
192 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
193 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
194 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
195 %5 = insertelement <4 x float> undef, float %b, i32 0
196 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
197 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
198 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
199 %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
200 %r = extractelement <4 x float> %9, i32 0
204 define float @test_div_ss_1(float %a, float %b) {
205 ; CHECK-LABEL: @test_div_ss_1(
206 ; CHECK-NEXT: ret float 1.000000e+00
208 %1 = insertelement <4 x float> undef, float %a, i32 0
209 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
210 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
211 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
212 %5 = insertelement <4 x float> undef, float %b, i32 0
213 %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
214 %7 = extractelement <4 x float> %6, i32 1
218 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
219 ; CHECK-LABEL: @test_min_ss(
220 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
221 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
223 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
224 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
225 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
226 %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
230 define float @test_min_ss_0(float %a, float %b) {
231 ; CHECK-LABEL: @test_min_ss_0(
232 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
233 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
234 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
235 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
236 ; CHECK-NEXT: ret float [[TMP4]]
238 %1 = insertelement <4 x float> undef, float %a, i32 0
239 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
240 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
241 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
242 %5 = insertelement <4 x float> undef, float %b, i32 0
243 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
244 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
245 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
246 %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
247 %10 = extractelement <4 x float> %9, i32 0
251 define float @test_min_ss_2(float %a, float %b) {
252 ; CHECK-LABEL: @test_min_ss_2(
253 ; CHECK-NEXT: ret float 2.000000e+00
255 %1 = insertelement <4 x float> undef, float %a, i32 0
256 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
257 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
258 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
259 %5 = insertelement <4 x float> undef, float %b, i32 0
260 %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
261 %7 = extractelement <4 x float> %6, i32 2
265 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
266 ; CHECK-LABEL: @test_max_ss(
267 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
268 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
270 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
271 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
272 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
273 %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
277 define float @test_max_ss_0(float %a, float %b) {
278 ; CHECK-LABEL: @test_max_ss_0(
279 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
280 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
281 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
282 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
283 ; CHECK-NEXT: ret float [[TMP4]]
285 %1 = insertelement <4 x float> undef, float %a, i32 0
286 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
287 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
288 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
289 %5 = insertelement <4 x float> undef, float %b, i32 0
290 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
291 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
292 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
293 %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
294 %10 = extractelement <4 x float> %9, i32 0
298 define float @test_max_ss_3(float %a, float %b) {
299 ; CHECK-LABEL: @test_max_ss_3(
300 ; CHECK-NEXT: ret float 3.000000e+00
302 %1 = insertelement <4 x float> undef, float %a, i32 0
303 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
304 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
305 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
306 %5 = insertelement <4 x float> undef, float %b, i32 0
307 %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
308 %7 = extractelement <4 x float> %6, i32 3
312 define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
313 ; CHECK-LABEL: @test_cmp_ss(
314 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
315 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
317 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
318 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
319 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
320 %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
324 define float @test_cmp_ss_0(float %a, float %b) {
325 ; CHECK-LABEL: @test_cmp_ss_0(
326 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
327 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
328 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
329 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
330 ; CHECK-NEXT: ret float [[R]]
332 %1 = insertelement <4 x float> undef, float %a, i32 0
333 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
334 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
335 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
336 %5 = insertelement <4 x float> undef, float %b, i32 0
337 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
338 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
339 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
340 %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
341 %r = extractelement <4 x float> %9, i32 0
345 define float @test_cmp_ss_1(float %a, float %b) {
346 ; CHECK-LABEL: @test_cmp_ss_1(
347 ; CHECK-NEXT: ret float 1.000000e+00
349 %1 = insertelement <4 x float> undef, float %a, i32 0
350 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
351 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
352 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
353 %5 = insertelement <4 x float> undef, float %b, i32 0
354 %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
355 %7 = extractelement <4 x float> %6, i32 1
359 define i32 @test_comieq_ss_0(float %a, float %b) {
360 ; CHECK-LABEL: @test_comieq_ss_0(
361 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
362 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
363 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
364 ; CHECK-NEXT: ret i32 [[TMP3]]
366 %1 = insertelement <4 x float> undef, float %a, i32 0
367 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
368 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
369 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
370 %5 = insertelement <4 x float> undef, float %b, i32 0
371 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
372 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
373 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
374 %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
378 define i32 @test_comige_ss_0(float %a, float %b) {
379 ; CHECK-LABEL: @test_comige_ss_0(
380 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
381 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
382 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
383 ; CHECK-NEXT: ret i32 [[TMP3]]
385 %1 = insertelement <4 x float> undef, float %a, i32 0
386 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
387 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
388 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
389 %5 = insertelement <4 x float> undef, float %b, i32 0
390 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
391 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
392 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
393 %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
397 define i32 @test_comigt_ss_0(float %a, float %b) {
398 ; CHECK-LABEL: @test_comigt_ss_0(
399 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
400 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
401 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
402 ; CHECK-NEXT: ret i32 [[TMP3]]
404 %1 = insertelement <4 x float> undef, float %a, i32 0
405 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
406 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
407 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
408 %5 = insertelement <4 x float> undef, float %b, i32 0
409 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
410 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
411 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
412 %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
416 define i32 @test_comile_ss_0(float %a, float %b) {
417 ; CHECK-LABEL: @test_comile_ss_0(
418 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
419 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
420 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
421 ; CHECK-NEXT: ret i32 [[TMP3]]
423 %1 = insertelement <4 x float> undef, float %a, i32 0
424 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
425 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
426 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
427 %5 = insertelement <4 x float> undef, float %b, i32 0
428 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
429 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
430 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
431 %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
435 define i32 @test_comilt_ss_0(float %a, float %b) {
436 ; CHECK-LABEL: @test_comilt_ss_0(
437 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
438 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
439 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
440 ; CHECK-NEXT: ret i32 [[TMP3]]
442 %1 = insertelement <4 x float> undef, float %a, i32 0
443 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
444 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
445 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
446 %5 = insertelement <4 x float> undef, float %b, i32 0
447 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
448 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
449 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
450 %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
454 define i32 @test_comineq_ss_0(float %a, float %b) {
455 ; CHECK-LABEL: @test_comineq_ss_0(
456 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
457 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
458 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
459 ; CHECK-NEXT: ret i32 [[TMP3]]
461 %1 = insertelement <4 x float> undef, float %a, i32 0
462 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
463 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
464 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
465 %5 = insertelement <4 x float> undef, float %b, i32 0
466 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
467 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
468 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
469 %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
473 define i32 @test_ucomieq_ss_0(float %a, float %b) {
474 ; CHECK-LABEL: @test_ucomieq_ss_0(
475 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
476 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
477 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
478 ; CHECK-NEXT: ret i32 [[TMP3]]
480 %1 = insertelement <4 x float> undef, float %a, i32 0
481 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
482 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
483 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
484 %5 = insertelement <4 x float> undef, float %b, i32 0
485 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
486 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
487 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
488 %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
492 define i32 @test_ucomige_ss_0(float %a, float %b) {
493 ; CHECK-LABEL: @test_ucomige_ss_0(
494 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
495 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
496 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
497 ; CHECK-NEXT: ret i32 [[TMP3]]
499 %1 = insertelement <4 x float> undef, float %a, i32 0
500 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
501 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
502 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
503 %5 = insertelement <4 x float> undef, float %b, i32 0
504 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
505 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
506 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
507 %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
511 define i32 @test_ucomigt_ss_0(float %a, float %b) {
512 ; CHECK-LABEL: @test_ucomigt_ss_0(
513 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
514 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
515 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
516 ; CHECK-NEXT: ret i32 [[TMP3]]
518 %1 = insertelement <4 x float> undef, float %a, i32 0
519 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
520 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
521 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
522 %5 = insertelement <4 x float> undef, float %b, i32 0
523 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
524 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
525 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
526 %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
530 define i32 @test_ucomile_ss_0(float %a, float %b) {
531 ; CHECK-LABEL: @test_ucomile_ss_0(
532 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
533 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
534 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
535 ; CHECK-NEXT: ret i32 [[TMP3]]
537 %1 = insertelement <4 x float> undef, float %a, i32 0
538 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
539 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
540 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
541 %5 = insertelement <4 x float> undef, float %b, i32 0
542 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
543 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
544 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
545 %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
549 define i32 @test_ucomilt_ss_0(float %a, float %b) {
550 ; CHECK-LABEL: @test_ucomilt_ss_0(
551 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
552 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
553 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
554 ; CHECK-NEXT: ret i32 [[TMP3]]
556 %1 = insertelement <4 x float> undef, float %a, i32 0
557 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
558 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
559 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
560 %5 = insertelement <4 x float> undef, float %b, i32 0
561 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
562 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
563 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
564 %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
568 define i32 @test_ucomineq_ss_0(float %a, float %b) {
569 ; CHECK-LABEL: @test_ucomineq_ss_0(
570 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
571 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
572 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
573 ; CHECK-NEXT: ret i32 [[TMP3]]
575 %1 = insertelement <4 x float> undef, float %a, i32 0
576 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
577 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
578 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
579 %5 = insertelement <4 x float> undef, float %b, i32 0
580 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
581 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
582 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
583 %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
587 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
588 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
589 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
591 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
592 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
593 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
594 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
595 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
596 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
597 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
599 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
600 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
601 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
602 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
603 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
604 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
606 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
607 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
608 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
609 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
610 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
611 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)