1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 define float @test_rcp_ss_0(float %a) {
6 ; CHECK-LABEL: @test_rcp_ss_0(
7 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
8 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
9 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
10 ; CHECK-NEXT: ret float [[TMP3]]
12 %1 = insertelement <4 x float> undef, float %a, i32 0
13 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
14 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
15 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
16 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
17 %6 = extractelement <4 x float> %5, i32 0
21 define float @test_rcp_ss_1(float %a) {
22 ; CHECK-LABEL: @test_rcp_ss_1(
23 ; CHECK-NEXT: ret float 1.000000e+00
25 %1 = insertelement <4 x float> undef, float %a, i32 0
26 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
27 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
28 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
29 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
30 %6 = extractelement <4 x float> %5, i32 1
34 define float @test_sqrt_ss_0(float %a) {
35 ; CHECK-LABEL: @test_sqrt_ss_0(
36 ; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[A:%.*]])
37 ; CHECK-NEXT: ret float [[TMP1]]
39 %1 = insertelement <4 x float> undef, float %a, i32 0
40 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
41 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
42 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
43 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
44 %6 = extractelement <4 x float> %5, i32 0
48 define float @test_sqrt_ss_2(float %a) {
49 ; CHECK-LABEL: @test_sqrt_ss_2(
50 ; CHECK-NEXT: ret float 2.000000e+00
52 %1 = insertelement <4 x float> undef, float %a, i32 0
53 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
54 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
55 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
56 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
57 %6 = extractelement <4 x float> %5, i32 2
61 define float @test_rsqrt_ss_0(float %a) {
62 ; CHECK-LABEL: @test_rsqrt_ss_0(
63 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
64 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
65 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
66 ; CHECK-NEXT: ret float [[TMP3]]
68 %1 = insertelement <4 x float> undef, float %a, i32 0
69 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
70 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
71 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
72 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
73 %6 = extractelement <4 x float> %5, i32 0
77 define float @test_rsqrt_ss_3(float %a) {
78 ; CHECK-LABEL: @test_rsqrt_ss_3(
79 ; CHECK-NEXT: ret float 3.000000e+00
81 %1 = insertelement <4 x float> undef, float %a, i32 0
82 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
83 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
84 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
85 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
86 %6 = extractelement <4 x float> %5, i32 3
90 define float @test_add_ss_0(float %a, float %b) {
91 ; CHECK-LABEL: @test_add_ss_0(
92 ; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
93 ; CHECK-NEXT: ret float [[TMP1]]
95 %1 = insertelement <4 x float> undef, float %a, i32 0
96 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
97 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
98 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
99 %5 = insertelement <4 x float> undef, float %b, i32 0
100 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
101 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
102 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
103 %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
104 %r = extractelement <4 x float> %9, i32 0
108 define float @test_add_ss_1(float %a, float %b) {
109 ; CHECK-LABEL: @test_add_ss_1(
110 ; CHECK-NEXT: ret float 1.000000e+00
112 %1 = insertelement <4 x float> undef, float %a, i32 0
113 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
114 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
115 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
116 %5 = insertelement <4 x float> undef, float %b, i32 0
117 %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
118 %7 = extractelement <4 x float> %6, i32 1
122 define float @test_add_ss_2(float %a) {
123 ; CHECK-LABEL: @test_add_ss_2(
124 ; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[A]]
125 ; CHECK-NEXT: ret float [[TMP1]]
127 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
128 %2 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %1, <4 x float> %1)
129 %3 = extractelement <4 x float> %2, i32 0
133 define float @test_sub_ss_0(float %a, float %b) {
134 ; CHECK-LABEL: @test_sub_ss_0(
135 ; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[B:%.*]]
136 ; CHECK-NEXT: ret float [[TMP1]]
138 %1 = insertelement <4 x float> undef, float %a, i32 0
139 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
140 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
141 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
142 %5 = insertelement <4 x float> undef, float %b, i32 0
143 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
144 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
145 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
146 %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
147 %r = extractelement <4 x float> %9, i32 0
151 define float @test_sub_ss_2(float %a, float %b) {
152 ; CHECK-LABEL: @test_sub_ss_2(
153 ; CHECK-NEXT: ret float 2.000000e+00
155 %1 = insertelement <4 x float> undef, float %a, i32 0
156 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
157 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
158 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
159 %5 = insertelement <4 x float> undef, float %b, i32 0
160 %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
161 %7 = extractelement <4 x float> %6, i32 2
165 define float @test_sub_ss_3(float %a) {
166 ; CHECK-LABEL: @test_sub_ss_3(
167 ; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[A]]
168 ; CHECK-NEXT: ret float [[TMP1]]
170 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
171 %2 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %1, <4 x float> %1)
172 %3 = extractelement <4 x float> %2, i32 0
176 define float @test_mul_ss_0(float %a, float %b) {
177 ; CHECK-LABEL: @test_mul_ss_0(
178 ; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[B:%.*]]
179 ; CHECK-NEXT: ret float [[TMP1]]
181 %1 = insertelement <4 x float> undef, float %a, i32 0
182 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
183 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
184 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
185 %5 = insertelement <4 x float> undef, float %b, i32 0
186 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
187 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
188 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
189 %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
190 %r = extractelement <4 x float> %9, i32 0
194 define float @test_mul_ss_3(float %a, float %b) {
195 ; CHECK-LABEL: @test_mul_ss_3(
196 ; CHECK-NEXT: ret float 3.000000e+00
198 %1 = insertelement <4 x float> undef, float %a, i32 0
199 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
200 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
201 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
202 %5 = insertelement <4 x float> undef, float %b, i32 0
203 %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
204 %7 = extractelement <4 x float> %6, i32 3
208 define float @test_mul_ss_4(float %a) {
209 ; CHECK-LABEL: @test_mul_ss_4(
210 ; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[A]]
211 ; CHECK-NEXT: ret float [[TMP1]]
213 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
214 %2 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %1, <4 x float> %1)
215 %3 = extractelement <4 x float> %2, i32 0
219 define float @test_div_ss_0(float %a, float %b) {
220 ; CHECK-LABEL: @test_div_ss_0(
221 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[B:%.*]]
222 ; CHECK-NEXT: ret float [[TMP1]]
224 %1 = insertelement <4 x float> undef, float %a, i32 0
225 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
226 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
227 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
228 %5 = insertelement <4 x float> undef, float %b, i32 0
229 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
230 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
231 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
232 %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
233 %r = extractelement <4 x float> %9, i32 0
237 define float @test_div_ss_1(float %a, float %b) {
238 ; CHECK-LABEL: @test_div_ss_1(
239 ; CHECK-NEXT: ret float 1.000000e+00
241 %1 = insertelement <4 x float> undef, float %a, i32 0
242 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
243 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
244 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
245 %5 = insertelement <4 x float> undef, float %b, i32 0
246 %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
247 %7 = extractelement <4 x float> %6, i32 1
251 define float @test_div_ss_2(float %a) {
252 ; CHECK-LABEL: @test_div_ss_2(
253 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[A]]
254 ; CHECK-NEXT: ret float [[TMP1]]
256 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
257 %2 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %1, <4 x float> %1)
258 %3 = extractelement <4 x float> %2, i32 0
262 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
263 ; CHECK-LABEL: @test_min_ss(
264 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
265 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
267 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
268 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
269 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
270 %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
274 define float @test_min_ss_0(float %a, float %b) {
275 ; CHECK-LABEL: @test_min_ss_0(
276 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
277 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
278 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
279 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
280 ; CHECK-NEXT: ret float [[TMP4]]
282 %1 = insertelement <4 x float> undef, float %a, i32 0
283 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
284 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
285 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
286 %5 = insertelement <4 x float> undef, float %b, i32 0
287 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
288 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
289 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
290 %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
291 %10 = extractelement <4 x float> %9, i32 0
295 define float @test_min_ss_2(float %a, float %b) {
296 ; CHECK-LABEL: @test_min_ss_2(
297 ; CHECK-NEXT: ret float 2.000000e+00
299 %1 = insertelement <4 x float> undef, float %a, i32 0
300 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
301 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
302 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
303 %5 = insertelement <4 x float> undef, float %b, i32 0
304 %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
305 %7 = extractelement <4 x float> %6, i32 2
309 define float @test_min_ss_3(float %a) {
310 ; CHECK-LABEL: @test_min_ss_3(
311 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
312 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
313 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
314 ; CHECK-NEXT: ret float [[TMP3]]
316 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
317 %2 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %1)
318 %3 = extractelement <4 x float> %2, i32 0
322 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
323 ; CHECK-LABEL: @test_max_ss(
324 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]])
325 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
327 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
328 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
329 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
330 %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
334 define float @test_max_ss_0(float %a, float %b) {
335 ; CHECK-LABEL: @test_max_ss_0(
336 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
337 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
338 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
339 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
340 ; CHECK-NEXT: ret float [[TMP4]]
342 %1 = insertelement <4 x float> undef, float %a, i32 0
343 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
344 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
345 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
346 %5 = insertelement <4 x float> undef, float %b, i32 0
347 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
348 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
349 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
350 %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
351 %10 = extractelement <4 x float> %9, i32 0
355 define float @test_max_ss_3(float %a, float %b) {
356 ; CHECK-LABEL: @test_max_ss_3(
357 ; CHECK-NEXT: ret float 3.000000e+00
359 %1 = insertelement <4 x float> undef, float %a, i32 0
360 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
361 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
362 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
363 %5 = insertelement <4 x float> undef, float %b, i32 0
364 %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
365 %7 = extractelement <4 x float> %6, i32 3
369 define float @test_max_ss_4(float %a) {
370 ; CHECK-LABEL: @test_max_ss_4(
371 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
372 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]])
373 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
374 ; CHECK-NEXT: ret float [[TMP3]]
376 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
377 %2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %1)
378 %3 = extractelement <4 x float> %2, i32 0
382 define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
383 ; CHECK-LABEL: @test_cmp_ss(
384 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0)
385 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
387 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
388 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
389 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
390 %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
394 define float @test_cmp_ss_0(float %a, float %b) {
395 ; CHECK-LABEL: @test_cmp_ss_0(
396 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
397 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
398 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
399 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
400 ; CHECK-NEXT: ret float [[R]]
402 %1 = insertelement <4 x float> undef, float %a, i32 0
403 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
404 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
405 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
406 %5 = insertelement <4 x float> undef, float %b, i32 0
407 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
408 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
409 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
410 %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
411 %r = extractelement <4 x float> %9, i32 0
415 define float @test_cmp_ss_1(float %a, float %b) {
416 ; CHECK-LABEL: @test_cmp_ss_1(
417 ; CHECK-NEXT: ret float 1.000000e+00
419 %1 = insertelement <4 x float> undef, float %a, i32 0
420 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
421 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
422 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
423 %5 = insertelement <4 x float> undef, float %b, i32 0
424 %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
425 %7 = extractelement <4 x float> %6, i32 1
429 define float @test_cmp_ss_2(float %a) {
430 ; CHECK-LABEL: @test_cmp_ss_2(
431 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i32 0
432 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]], i8 3)
433 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
434 ; CHECK-NEXT: ret float [[TMP3]]
436 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
437 %2 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %1, i8 3)
438 %3 = extractelement <4 x float> %2, i32 0
442 define i32 @test_comieq_ss_0(float %a, float %b) {
443 ; CHECK-LABEL: @test_comieq_ss_0(
444 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
445 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
446 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
447 ; CHECK-NEXT: ret i32 [[TMP3]]
449 %1 = insertelement <4 x float> undef, float %a, i32 0
450 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
451 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
452 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
453 %5 = insertelement <4 x float> undef, float %b, i32 0
454 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
455 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
456 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
457 %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
461 define i32 @test_comige_ss_0(float %a, float %b) {
462 ; CHECK-LABEL: @test_comige_ss_0(
463 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
464 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
465 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
466 ; CHECK-NEXT: ret i32 [[TMP3]]
468 %1 = insertelement <4 x float> undef, float %a, i32 0
469 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
470 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
471 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
472 %5 = insertelement <4 x float> undef, float %b, i32 0
473 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
474 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
475 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
476 %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
480 define i32 @test_comigt_ss_0(float %a, float %b) {
481 ; CHECK-LABEL: @test_comigt_ss_0(
482 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
483 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
484 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
485 ; CHECK-NEXT: ret i32 [[TMP3]]
487 %1 = insertelement <4 x float> undef, float %a, i32 0
488 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
489 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
490 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
491 %5 = insertelement <4 x float> undef, float %b, i32 0
492 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
493 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
494 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
495 %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
499 define i32 @test_comile_ss_0(float %a, float %b) {
500 ; CHECK-LABEL: @test_comile_ss_0(
501 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
502 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
503 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
504 ; CHECK-NEXT: ret i32 [[TMP3]]
506 %1 = insertelement <4 x float> undef, float %a, i32 0
507 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
508 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
509 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
510 %5 = insertelement <4 x float> undef, float %b, i32 0
511 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
512 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
513 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
514 %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
518 define i32 @test_comilt_ss_0(float %a, float %b) {
519 ; CHECK-LABEL: @test_comilt_ss_0(
520 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
521 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
522 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
523 ; CHECK-NEXT: ret i32 [[TMP3]]
525 %1 = insertelement <4 x float> undef, float %a, i32 0
526 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
527 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
528 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
529 %5 = insertelement <4 x float> undef, float %b, i32 0
530 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
531 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
532 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
533 %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
537 define i32 @test_comineq_ss_0(float %a, float %b) {
538 ; CHECK-LABEL: @test_comineq_ss_0(
539 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
540 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
541 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
542 ; CHECK-NEXT: ret i32 [[TMP3]]
544 %1 = insertelement <4 x float> undef, float %a, i32 0
545 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
546 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
547 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
548 %5 = insertelement <4 x float> undef, float %b, i32 0
549 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
550 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
551 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
552 %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
556 define i32 @test_ucomieq_ss_0(float %a, float %b) {
557 ; CHECK-LABEL: @test_ucomieq_ss_0(
558 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
559 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
560 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
561 ; CHECK-NEXT: ret i32 [[TMP3]]
563 %1 = insertelement <4 x float> undef, float %a, i32 0
564 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
565 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
566 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
567 %5 = insertelement <4 x float> undef, float %b, i32 0
568 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
569 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
570 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
571 %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
575 define i32 @test_ucomige_ss_0(float %a, float %b) {
576 ; CHECK-LABEL: @test_ucomige_ss_0(
577 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
578 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
579 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
580 ; CHECK-NEXT: ret i32 [[TMP3]]
582 %1 = insertelement <4 x float> undef, float %a, i32 0
583 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
584 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
585 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
586 %5 = insertelement <4 x float> undef, float %b, i32 0
587 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
588 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
589 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
590 %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
594 define i32 @test_ucomigt_ss_0(float %a, float %b) {
595 ; CHECK-LABEL: @test_ucomigt_ss_0(
596 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
597 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
598 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
599 ; CHECK-NEXT: ret i32 [[TMP3]]
601 %1 = insertelement <4 x float> undef, float %a, i32 0
602 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
603 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
604 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
605 %5 = insertelement <4 x float> undef, float %b, i32 0
606 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
607 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
608 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
609 %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
613 define i32 @test_ucomile_ss_0(float %a, float %b) {
614 ; CHECK-LABEL: @test_ucomile_ss_0(
615 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
616 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
617 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
618 ; CHECK-NEXT: ret i32 [[TMP3]]
620 %1 = insertelement <4 x float> undef, float %a, i32 0
621 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
622 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
623 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
624 %5 = insertelement <4 x float> undef, float %b, i32 0
625 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
626 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
627 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
628 %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
632 define i32 @test_ucomilt_ss_0(float %a, float %b) {
633 ; CHECK-LABEL: @test_ucomilt_ss_0(
634 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
635 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
636 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
637 ; CHECK-NEXT: ret i32 [[TMP3]]
639 %1 = insertelement <4 x float> undef, float %a, i32 0
640 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
641 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
642 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
643 %5 = insertelement <4 x float> undef, float %b, i32 0
644 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
645 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
646 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
647 %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
651 define i32 @test_ucomineq_ss_0(float %a, float %b) {
652 ; CHECK-LABEL: @test_ucomineq_ss_0(
653 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i32 0
654 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0
655 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
656 ; CHECK-NEXT: ret i32 [[TMP3]]
658 %1 = insertelement <4 x float> undef, float %a, i32 0
659 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
660 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
661 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
662 %5 = insertelement <4 x float> undef, float %b, i32 0
663 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
664 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
665 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
666 %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
670 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
671 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
672 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
674 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
675 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
676 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
677 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
678 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
679 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
680 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
682 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
683 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
684 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
685 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
686 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
687 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
689 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
690 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
691 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
692 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
693 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
694 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)