1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
7 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
9 __m128
test_mm_add_ps(__m128 A
, __m128 B
) {
10 // CHECK-LABEL: test_mm_add_ps
11 // CHECK: fadd <4 x float>
12 return _mm_add_ps(A
, B
);
15 __m128
test_mm_add_ss(__m128 A
, __m128 B
) {
16 // CHECK-LABEL: test_mm_add_ss
17 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
18 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
20 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
21 return _mm_add_ss(A
, B
);
24 __m128
test_mm_and_ps(__m128 A
, __m128 B
) {
25 // CHECK-LABEL: test_mm_and_ps
26 // CHECK: and <4 x i32>
27 return _mm_and_ps(A
, B
);
30 __m128
test_mm_andnot_ps(__m128 A
, __m128 B
) {
31 // CHECK-LABEL: test_mm_andnot_ps
32 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
33 // CHECK: and <4 x i32>
34 return _mm_andnot_ps(A
, B
);
37 __m128
test_mm_cmpeq_ps(__m128 __a
, __m128 __b
) {
38 // CHECK-LABEL: test_mm_cmpeq_ps
39 // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
40 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
41 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
42 // CHECK-NEXT: ret <4 x float> [[BC]]
43 return _mm_cmpeq_ps(__a
, __b
);
46 __m128
test_mm_cmpeq_ss(__m128 __a
, __m128 __b
) {
47 // CHECK-LABEL: test_mm_cmpeq_ss
48 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
49 return _mm_cmpeq_ss(__a
, __b
);
52 __m128
test_mm_cmpge_ps(__m128 __a
, __m128 __b
) {
53 // CHECK-LABEL: test_mm_cmpge_ps
54 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
55 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
56 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
57 // CHECK-NEXT: ret <4 x float> [[BC]]
58 return _mm_cmpge_ps(__a
, __b
);
61 __m128
test_mm_cmpge_ss(__m128 __a
, __m128 __b
) {
62 // CHECK-LABEL: test_mm_cmpge_ss
63 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
64 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
65 return _mm_cmpge_ss(__a
, __b
);
68 __m128
test_mm_cmpgt_ps(__m128 __a
, __m128 __b
) {
69 // CHECK-LABEL: test_mm_cmpgt_ps
70 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
71 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
72 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
73 // CHECK-NEXT: ret <4 x float> [[BC]]
74 return _mm_cmpgt_ps(__a
, __b
);
77 __m128
test_mm_cmpgt_ss(__m128 __a
, __m128 __b
) {
78 // CHECK-LABEL: test_mm_cmpgt_ss
79 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
80 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
81 return _mm_cmpgt_ss(__a
, __b
);
84 __m128
test_mm_cmple_ps(__m128 __a
, __m128 __b
) {
85 // CHECK-LABEL: test_mm_cmple_ps
86 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
87 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
88 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
89 // CHECK-NEXT: ret <4 x float> [[BC]]
90 return _mm_cmple_ps(__a
, __b
);
93 __m128
test_mm_cmple_ss(__m128 __a
, __m128 __b
) {
94 // CHECK-LABEL: test_mm_cmple_ss
95 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
96 return _mm_cmple_ss(__a
, __b
);
99 __m128
test_mm_cmplt_ps(__m128 __a
, __m128 __b
) {
100 // CHECK-LABEL: test_mm_cmplt_ps
101 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
102 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
103 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
104 // CHECK-NEXT: ret <4 x float> [[BC]]
105 return _mm_cmplt_ps(__a
, __b
);
108 __m128
test_mm_cmplt_ss(__m128 __a
, __m128 __b
) {
109 // CHECK-LABEL: test_mm_cmplt_ss
110 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
111 return _mm_cmplt_ss(__a
, __b
);
114 __m128
test_mm_cmpneq_ps(__m128 __a
, __m128 __b
) {
115 // CHECK-LABEL: test_mm_cmpneq_ps
116 // CHECK: [[CMP:%.*]] = fcmp une <4 x float>
117 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
118 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
119 // CHECK-NEXT: ret <4 x float> [[BC]]
120 return _mm_cmpneq_ps(__a
, __b
);
123 __m128
test_mm_cmpneq_ss(__m128 __a
, __m128 __b
) {
124 // CHECK-LABEL: test_mm_cmpneq_ss
125 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
126 return _mm_cmpneq_ss(__a
, __b
);
129 __m128
test_mm_cmpnge_ps(__m128 __a
, __m128 __b
) {
130 // CHECK-LABEL: test_mm_cmpnge_ps
131 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
132 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
133 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
134 // CHECK-NEXT: ret <4 x float> [[BC]]
135 return _mm_cmpnge_ps(__a
, __b
);
138 __m128
test_mm_cmpnge_ss(__m128 __a
, __m128 __b
) {
139 // CHECK-LABEL: test_mm_cmpnge_ss
140 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
141 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
142 return _mm_cmpnge_ss(__a
, __b
);
145 __m128
test_mm_cmpngt_ps(__m128 __a
, __m128 __b
) {
146 // CHECK-LABEL: test_mm_cmpngt_ps
147 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
148 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
149 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
150 // CHECK-NEXT: ret <4 x float> [[BC]]
151 return _mm_cmpngt_ps(__a
, __b
);
154 __m128
test_mm_cmpngt_ss(__m128 __a
, __m128 __b
) {
155 // CHECK-LABEL: test_mm_cmpngt_ss
156 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
157 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
158 return _mm_cmpngt_ss(__a
, __b
);
161 __m128
test_mm_cmpnle_ps(__m128 __a
, __m128 __b
) {
162 // CHECK-LABEL: test_mm_cmpnle_ps
163 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
164 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
165 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
166 // CHECK-NEXT: ret <4 x float> [[BC]]
167 return _mm_cmpnle_ps(__a
, __b
);
170 __m128
test_mm_cmpnle_ss(__m128 __a
, __m128 __b
) {
171 // CHECK-LABEL: test_mm_cmpnle_ss
172 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
173 return _mm_cmpnle_ss(__a
, __b
);
176 __m128
test_mm_cmpnlt_ps(__m128 __a
, __m128 __b
) {
177 // CHECK-LABEL: test_mm_cmpnlt_ps
178 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
179 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
180 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
181 // CHECK-NEXT: ret <4 x float> [[BC]]
182 return _mm_cmpnlt_ps(__a
, __b
);
185 __m128
test_mm_cmpnlt_ss(__m128 __a
, __m128 __b
) {
186 // CHECK-LABEL: test_mm_cmpnlt_ss
187 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
188 return _mm_cmpnlt_ss(__a
, __b
);
191 __m128
test_mm_cmpord_ps(__m128 __a
, __m128 __b
) {
192 // CHECK-LABEL: test_mm_cmpord_ps
193 // CHECK: [[CMP:%.*]] = fcmp ord <4 x float>
194 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
195 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
196 // CHECK-NEXT: ret <4 x float> [[BC]]
197 return _mm_cmpord_ps(__a
, __b
);
200 __m128
test_mm_cmpord_ss(__m128 __a
, __m128 __b
) {
201 // CHECK-LABEL: test_mm_cmpord_ss
202 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
203 return _mm_cmpord_ss(__a
, __b
);
206 __m128
test_mm_cmpunord_ps(__m128 __a
, __m128 __b
) {
207 // CHECK-LABEL: test_mm_cmpunord_ps
208 // CHECK: [[CMP:%.*]] = fcmp uno <4 x float>
209 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
210 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
211 // CHECK-NEXT: ret <4 x float> [[BC]]
212 return _mm_cmpunord_ps(__a
, __b
);
215 __m128
test_mm_cmpunord_ss(__m128 __a
, __m128 __b
) {
216 // CHECK-LABEL: test_mm_cmpunord_ss
217 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
218 return _mm_cmpunord_ss(__a
, __b
);
221 int test_mm_comieq_ss(__m128 A
, __m128 B
) {
222 // CHECK-LABEL: test_mm_comieq_ss
223 // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
224 return _mm_comieq_ss(A
, B
);
227 int test_mm_comige_ss(__m128 A
, __m128 B
) {
228 // CHECK-LABEL: test_mm_comige_ss
229 // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
230 return _mm_comige_ss(A
, B
);
233 int test_mm_comigt_ss(__m128 A
, __m128 B
) {
234 // CHECK-LABEL: test_mm_comigt_ss
235 // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
236 return _mm_comigt_ss(A
, B
);
239 int test_mm_comile_ss(__m128 A
, __m128 B
) {
240 // CHECK-LABEL: test_mm_comile_ss
241 // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
242 return _mm_comile_ss(A
, B
);
245 int test_mm_comilt_ss(__m128 A
, __m128 B
) {
246 // CHECK-LABEL: test_mm_comilt_ss
247 // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
248 return _mm_comilt_ss(A
, B
);
251 int test_mm_comineq_ss(__m128 A
, __m128 B
) {
252 // CHECK-LABEL: test_mm_comineq_ss
253 // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
254 return _mm_comineq_ss(A
, B
);
257 int test_mm_cvt_ss2si(__m128 A
) {
258 // CHECK-LABEL: test_mm_cvt_ss2si
259 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
260 return _mm_cvt_ss2si(A
);
263 __m128
test_mm_cvtsi32_ss(__m128 A
, int B
) {
264 // CHECK-LABEL: test_mm_cvtsi32_ss
265 // CHECK: sitofp i32 %{{.*}} to float
266 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
267 return _mm_cvtsi32_ss(A
, B
);
271 __m128
test_mm_cvtsi64_ss(__m128 A
, long long B
) {
272 // CHECK-LABEL: test_mm_cvtsi64_ss
273 // CHECK: sitofp i64 %{{.*}} to float
274 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
275 return _mm_cvtsi64_ss(A
, B
);
279 float test_mm_cvtss_f32(__m128 A
) {
280 // CHECK-LABEL: test_mm_cvtss_f32
281 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
282 return _mm_cvtss_f32(A
);
285 int test_mm_cvtss_si32(__m128 A
) {
286 // CHECK-LABEL: test_mm_cvtss_si32
287 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
288 return _mm_cvtss_si32(A
);
292 long long test_mm_cvtss_si64(__m128 A
) {
293 // CHECK-LABEL: test_mm_cvtss_si64
294 // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
295 return _mm_cvtss_si64(A
);
299 int test_mm_cvtt_ss2si(__m128 A
) {
300 // CHECK-LABEL: test_mm_cvtt_ss2si
301 // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
302 return _mm_cvtt_ss2si(A
);
305 int test_mm_cvttss_si32(__m128 A
) {
306 // CHECK-LABEL: test_mm_cvttss_si32
307 // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
308 return _mm_cvttss_si32(A
);
312 long long test_mm_cvttss_si64(__m128 A
) {
313 // CHECK-LABEL: test_mm_cvttss_si64
314 // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
315 return _mm_cvttss_si64(A
);
319 __m128
test_mm_div_ps(__m128 A
, __m128 B
) {
320 // CHECK-LABEL: test_mm_div_ps
321 // CHECK: fdiv <4 x float>
322 return _mm_div_ps(A
, B
);
325 __m128
test_mm_div_ss(__m128 A
, __m128 B
) {
326 // CHECK-LABEL: test_mm_div_ss
327 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
328 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
330 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
331 return _mm_div_ss(A
, B
);
334 unsigned int test_MM_GET_EXCEPTION_MASK(void) {
335 // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
336 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
337 // CHECK: and i32 %{{.*}}, 8064
338 return _MM_GET_EXCEPTION_MASK();
341 unsigned int test_MM_GET_EXCEPTION_STATE(void) {
342 // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE
343 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
344 // CHECK: and i32 %{{.*}}, 63
345 return _MM_GET_EXCEPTION_STATE();
348 unsigned int test_MM_GET_FLUSH_ZERO_MODE(void) {
349 // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE
350 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
351 // CHECK: and i32 %{{.*}}, 32768
352 return _MM_GET_FLUSH_ZERO_MODE();
355 unsigned int test_MM_GET_ROUNDING_MODE(void) {
356 // CHECK-LABEL: test_MM_GET_ROUNDING_MODE
357 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
358 // CHECK: and i32 %{{.*}}, 24576
359 return _MM_GET_ROUNDING_MODE();
362 unsigned int test_mm_getcsr(void) {
363 // CHECK-LABEL: test_mm_getcsr
364 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
369 __m128
test_mm_load_ps(float* y
) {
370 // CHECK-LABEL: test_mm_load_ps
371 // CHECK: load <4 x float>, ptr {{.*}}, align 16
372 return _mm_load_ps(y
);
375 __m128
test_mm_load_ps1(float* y
) {
376 // CHECK-LABEL: test_mm_load_ps1
377 // CHECK: load float, ptr %{{.*}}, align 4
378 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
379 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
380 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
381 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
382 return _mm_load_ps1(y
);
385 __m128
test_mm_load_ss(float* y
) {
386 // CHECK-LABEL: test_mm_load_ss
387 // CHECK: load float, ptr {{.*}}, align 1{{$}}
388 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
389 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
390 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
391 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
392 return _mm_load_ss(y
);
395 __m128
test_mm_load1_ps(float* y
) {
396 // CHECK-LABEL: test_mm_load1_ps
397 // CHECK: load float, ptr %{{.*}}, align 4
398 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
399 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
400 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
401 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
402 return _mm_load1_ps(y
);
405 __m128
test_mm_loadh_pi(__m128 x
, __m64
* y
) {
406 // CHECK-LABEL: test_mm_loadh_pi
407 // CHECK: load <2 x float>, ptr {{.*}}, align 1{{$}}
408 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
409 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
410 return _mm_loadh_pi(x
,y
);
413 __m128
test_mm_loadl_pi(__m128 x
, __m64
* y
) {
414 // CHECK-LABEL: test_mm_loadl_pi
415 // CHECK: load <2 x float>, ptr {{.*}}, align 1{{$}}
416 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
417 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
418 return _mm_loadl_pi(x
,y
);
421 __m128
test_mm_loadr_ps(float* A
) {
422 // CHECK-LABEL: test_mm_loadr_ps
423 // CHECK: load <4 x float>, ptr %{{.*}}, align 16
424 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
425 return _mm_loadr_ps(A
);
428 __m128
test_mm_loadu_ps(float* A
) {
429 // CHECK-LABEL: test_mm_loadu_ps
430 // CHECK: load <4 x float>, ptr %{{.*}}, align 1{{$}}
431 return _mm_loadu_ps(A
);
434 __m128
test_mm_max_ps(__m128 A
, __m128 B
) {
435 // CHECK-LABEL: test_mm_max_ps
436 // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
437 return _mm_max_ps(A
, B
);
440 __m128
test_mm_max_ss(__m128 A
, __m128 B
) {
441 // CHECK-LABEL: test_mm_max_ss
442 // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
443 return _mm_max_ss(A
, B
);
446 __m128
test_mm_min_ps(__m128 A
, __m128 B
) {
447 // CHECK-LABEL: test_mm_min_ps
448 // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
449 return _mm_min_ps(A
, B
);
452 __m128
test_mm_min_ss(__m128 A
, __m128 B
) {
453 // CHECK-LABEL: test_mm_min_ss
454 // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
455 return _mm_min_ss(A
, B
);
458 __m128
test_mm_move_ss(__m128 A
, __m128 B
) {
459 // CHECK-LABEL: test_mm_move_ss
460 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
461 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
462 return _mm_move_ss(A
, B
);
465 __m128
test_mm_movehl_ps(__m128 A
, __m128 B
) {
466 // CHECK-LABEL: test_mm_movehl_ps
467 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
468 return _mm_movehl_ps(A
, B
);
471 __m128
test_mm_movelh_ps(__m128 A
, __m128 B
) {
472 // CHECK-LABEL: test_mm_movelh_ps
473 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
474 return _mm_movelh_ps(A
, B
);
477 int test_mm_movemask_ps(__m128 A
) {
478 // CHECK-LABEL: test_mm_movemask_ps
479 // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
480 return _mm_movemask_ps(A
);
483 __m128
test_mm_mul_ps(__m128 A
, __m128 B
) {
484 // CHECK-LABEL: test_mm_mul_ps
485 // CHECK: fmul <4 x float>
486 return _mm_mul_ps(A
, B
);
489 __m128
test_mm_mul_ss(__m128 A
, __m128 B
) {
490 // CHECK-LABEL: test_mm_mul_ss
491 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
492 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
494 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
495 return _mm_mul_ss(A
, B
);
498 __m128
test_mm_or_ps(__m128 A
, __m128 B
) {
499 // CHECK-LABEL: test_mm_or_ps
500 // CHECK: or <4 x i32>
501 return _mm_or_ps(A
, B
);
504 void test_mm_prefetch(char const* p
) {
505 // CHECK-LABEL: test_mm_prefetch
506 // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
510 __m128
test_mm_rcp_ps(__m128 x
) {
511 // CHECK-LABEL: test_mm_rcp_ps
512 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}})
513 return _mm_rcp_ps(x
);
516 __m128
test_mm_rcp_ss(__m128 x
) {
517 // CHECK-LABEL: test_mm_rcp_ss
518 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
519 return _mm_rcp_ss(x
);
522 __m128
test_mm_rsqrt_ps(__m128 x
) {
523 // CHECK-LABEL: test_mm_rsqrt_ps
524 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}})
525 return _mm_rsqrt_ps(x
);
528 __m128
test_mm_rsqrt_ss(__m128 x
) {
529 // CHECK-LABEL: test_mm_rsqrt_ss
530 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
531 return _mm_rsqrt_ss(x
);
534 void test_MM_SET_EXCEPTION_MASK(unsigned int A
) {
535 // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK
536 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
538 // CHECK: and i32 {{.*}}, -8065
541 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
542 _MM_SET_EXCEPTION_MASK(A
);
545 void test_MM_SET_EXCEPTION_STATE(unsigned int A
) {
546 // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE
547 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
549 // CHECK: and i32 {{.*}}, -64
552 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
553 _MM_SET_EXCEPTION_STATE(A
);
556 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A
) {
557 // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE
558 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
560 // CHECK: and i32 {{.*}}, -32769
563 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
564 _MM_SET_FLUSH_ZERO_MODE(A
);
567 __m128
test_mm_set_ps(float A
, float B
, float C
, float D
) {
568 // CHECK-LABEL: test_mm_set_ps
569 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
570 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
571 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
572 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
573 return _mm_set_ps(A
, B
, C
, D
);
576 __m128
test_mm_set_ps1(float A
) {
577 // CHECK-LABEL: test_mm_set_ps1
578 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
579 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
580 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
581 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
582 return _mm_set_ps1(A
);
585 void test_MM_SET_ROUNDING_MODE(unsigned int A
) {
586 // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
587 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
589 // CHECK: and i32 {{.*}}, -24577
592 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
593 _MM_SET_ROUNDING_MODE(A
);
596 __m128
test_mm_set_ss(float A
) {
597 // CHECK-LABEL: test_mm_set_ss
598 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
599 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1
600 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2
601 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
602 return _mm_set_ss(A
);
605 __m128
test_mm_set1_ps(float A
) {
606 // CHECK-LABEL: test_mm_set1_ps
607 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
608 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
609 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
610 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
611 return _mm_set1_ps(A
);
614 void test_mm_setcsr(unsigned int A
) {
615 // CHECK-LABEL: test_mm_setcsr
617 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
621 __m128
test_mm_setr_ps(float A
, float B
, float C
, float D
) {
622 // CHECK-LABEL: test_mm_setr_ps
623 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
624 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
625 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
626 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
627 return _mm_setr_ps(A
, B
, C
, D
);
630 __m128
test_mm_setzero_ps(void) {
631 // CHECK-LABEL: test_mm_setzero_ps
632 // CHECK: store <4 x float> zeroinitializer
633 return _mm_setzero_ps();
636 void test_mm_sfence(void) {
637 // CHECK-LABEL: test_mm_sfence
638 // CHECK: call void @llvm.x86.sse.sfence()
642 __m128
test_mm_shuffle_ps(__m128 A
, __m128 B
) {
643 // CHECK-LABEL: test_mm_shuffle_ps
644 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
645 return _mm_shuffle_ps(A
, B
, 0);
648 __m128
test_mm_sqrt_ps(__m128 x
) {
649 // CHECK-LABEL: test_mm_sqrt_ps
650 // CHECK: call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}})
651 return _mm_sqrt_ps(x
);
654 __m128
test_mm_sqrt_ss(__m128 x
) {
655 // CHECK-LABEL: test_mm_sqrt_ss
656 // CHECK: extractelement <4 x float> {{.*}}, i64 0
657 // CHECK: call float @llvm.sqrt.f32(float {{.*}})
658 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0
659 return _mm_sqrt_ss(x
);
662 void test_mm_store_ps(float* x
, __m128 y
) {
663 // CHECK-LABEL: test_mm_store_ps
664 // CHECK: store <4 x float> %{{.*}}, ptr {{.*}}, align 16
668 void test_mm_store_ps1(float* x
, __m128 y
) {
669 // CHECK-LABEL: test_mm_store_ps1
670 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
671 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16
675 void test_mm_store_ss(float* x
, __m128 y
) {
676 // CHECK-LABEL: test_mm_store_ss
677 // CHECK: extractelement <4 x float> {{.*}}, i32 0
678 // CHECK: store float %{{.*}}, ptr {{.*}}, align 1{{$}}
682 void test_mm_store1_ps(float* x
, __m128 y
) {
683 // CHECK-LABEL: test_mm_store1_ps
684 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
685 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16
689 void test_mm_storeh_pi(__m64
* x
, __m128 y
) {
690 // CHECK-LABEL: test_mm_storeh_pi
691 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 2, i32 3>
692 // CHECK: store <2 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}}
696 void test_mm_storel_pi(__m64
* x
, __m128 y
) {
697 // CHECK-LABEL: test_mm_storel_pi
698 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
699 // CHECK: store <2 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}}
703 void test_mm_storer_ps(float* x
, __m128 y
) {
704 // CHECK-LABEL: test_mm_storer_ps
705 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
706 // CHECK: store <4 x float> %{{.*}}, ptr {{.*}}, align 16
710 void test_mm_storeu_ps(float* x
, __m128 y
) {
711 // CHECK-LABEL: test_mm_storeu_ps
712 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}}
713 // CHECK-NEXT: ret void
717 void test_mm_stream_ps(float*A
, __m128 B
) {
718 // CHECK-LABEL: test_mm_stream_ps
719 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
723 void test_mm_stream_ps_void(void *A
, __m128 B
) {
724 // CHECK-LABEL: test_mm_stream_ps_void
725 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
729 __m128
test_mm_sub_ps(__m128 A
, __m128 B
) {
730 // CHECK-LABEL: test_mm_sub_ps
731 // CHECK: fsub <4 x float>
732 return _mm_sub_ps(A
, B
);
735 __m128
test_mm_sub_ss(__m128 A
, __m128 B
) {
736 // CHECK-LABEL: test_mm_sub_ss
737 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
738 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
740 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
741 return _mm_sub_ss(A
, B
);
744 void test_MM_TRANSPOSE4_PS(__m128
*A
, __m128
*B
, __m128
*C
, __m128
*D
) {
745 // CHECK-LABEL: test_MM_TRANSPOSE4_PS
746 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
747 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
748 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
749 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
750 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
751 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
752 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
753 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
754 _MM_TRANSPOSE4_PS(*A
, *B
, *C
, *D
);
757 int test_mm_ucomieq_ss(__m128 A
, __m128 B
) {
758 // CHECK-LABEL: test_mm_ucomieq_ss
759 // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
760 return _mm_ucomieq_ss(A
, B
);
763 int test_mm_ucomige_ss(__m128 A
, __m128 B
) {
764 // CHECK-LABEL: test_mm_ucomige_ss
765 // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
766 return _mm_ucomige_ss(A
, B
);
769 int test_mm_ucomigt_ss(__m128 A
, __m128 B
) {
770 // CHECK-LABEL: test_mm_ucomigt_ss
771 // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
772 return _mm_ucomigt_ss(A
, B
);
775 int test_mm_ucomile_ss(__m128 A
, __m128 B
) {
776 // CHECK-LABEL: test_mm_ucomile_ss
777 // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
778 return _mm_ucomile_ss(A
, B
);
781 int test_mm_ucomilt_ss(__m128 A
, __m128 B
) {
782 // CHECK-LABEL: test_mm_ucomilt_ss
783 // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
784 return _mm_ucomilt_ss(A
, B
);
787 int test_mm_ucomineq_ss(__m128 A
, __m128 B
) {
788 // CHECK-LABEL: test_mm_ucomineq_ss
789 // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
790 return _mm_ucomineq_ss(A
, B
);
793 __m128
test_mm_undefined_ps(void) {
794 // CHECK-LABEL: test_mm_undefined_ps
795 // CHECK: ret <4 x float> zeroinitializer
796 return _mm_undefined_ps();
799 __m128
test_mm_unpackhi_ps(__m128 A
, __m128 B
) {
800 // CHECK-LABEL: test_mm_unpackhi_ps
801 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
802 return _mm_unpackhi_ps(A
, B
);
805 __m128
test_mm_unpacklo_ps(__m128 A
, __m128 B
) {
806 // CHECK-LABEL: test_mm_unpacklo_ps
807 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
808 return _mm_unpacklo_ps(A
, B
);
811 __m128
test_mm_xor_ps(__m128 A
, __m128 B
) {
812 // CHECK-LABEL: test_mm_xor_ps
813 // CHECK: xor <4 x i32>
814 return _mm_xor_ps(A
, B
);