1 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
3 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
4 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s
8 #include "builtin_test_helpers.h"
10 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
12 __m128
test_mm_add_ps(__m128 A
, __m128 B
) {
13 // CHECK-LABEL: test_mm_add_ps
14 // CHECK: fadd <4 x float>
15 return _mm_add_ps(A
, B
);
17 TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +9.0f
, +4.0f
, +4.0f
, +5.0f
));
19 __m128
test_mm_add_ss(__m128 A
, __m128 B
) {
20 // CHECK-LABEL: test_mm_add_ss
21 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
22 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
24 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
25 return _mm_add_ss(A
, B
);
27 TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +9.0f
, +0.0f
, +2.0f
, +4.0f
));
29 __m128
test_mm_and_ps(__m128 A
, __m128 B
) {
30 // CHECK-LABEL: test_mm_and_ps
31 // CHECK: and <4 x i32>
32 return _mm_and_ps(A
, B
);
34 TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128
){-4.0f
, -5.0f
, +6.0f
, +7.0f
}, (__m128
){+0.0f
, -0.0f
, -0.0f
, +7.0f
}), -0.0f
, -0.0f
, +0.0f
, +7.0f
));
36 __m128
test_mm_andnot_ps(__m128 A
, __m128 B
) {
37 // CHECK-LABEL: test_mm_andnot_ps
38 // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
39 // CHECK: and <4 x i32>
40 return _mm_andnot_ps(A
, B
);
42 TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128
){-4.0f
, -5.0f
, +6.0f
, +7.0f
}, (__m128
){+0.0f
, -0.0f
, -0.0f
, +7.0f
}), +0.0f
, +0.0f
, +0.0f
, +0.0f
));
44 __m128
test_mm_cmp_ps_eq_oq(__m128 a
, __m128 b
) {
45 // CHECK-LABEL: test_mm_cmp_ps_eq_oq
46 // CHECK: fcmp oeq <4 x float> %{{.*}}, %{{.*}}
47 return _mm_cmp_ps(a
, b
, _CMP_EQ_OQ
);
50 __m128
test_mm_cmp_ps_lt_os(__m128 a
, __m128 b
) {
51 // CHECK-LABEL: test_mm_cmp_ps_lt_os
52 // CHECK: fcmp olt <4 x float> %{{.*}}, %{{.*}}
53 return _mm_cmp_ps(a
, b
, _CMP_LT_OS
);
56 __m128
test_mm_cmp_ps_le_os(__m128 a
, __m128 b
) {
57 // CHECK-LABEL: test_mm_cmp_ps_le_os
58 // CHECK: fcmp ole <4 x float> %{{.*}}, %{{.*}}
59 return _mm_cmp_ps(a
, b
, _CMP_LE_OS
);
62 __m128
test_mm_cmp_ps_unord_q(__m128 a
, __m128 b
) {
63 // CHECK-LABEL: test_mm_cmp_ps_unord_q
64 // CHECK: fcmp uno <4 x float> %{{.*}}, %{{.*}}
65 return _mm_cmp_ps(a
, b
, _CMP_UNORD_Q
);
68 __m128
test_mm_cmp_ps_neq_uq(__m128 a
, __m128 b
) {
69 // CHECK-LABEL: test_mm_cmp_ps_neq_uq
70 // CHECK: fcmp une <4 x float> %{{.*}}, %{{.*}}
71 return _mm_cmp_ps(a
, b
, _CMP_NEQ_UQ
);
74 __m128
test_mm_cmp_ps_nlt_us(__m128 a
, __m128 b
) {
75 // CHECK-LABEL: test_mm_cmp_ps_nlt_us
76 // CHECK: fcmp uge <4 x float> %{{.*}}, %{{.*}}
77 return _mm_cmp_ps(a
, b
, _CMP_NLT_US
);
80 __m128
test_mm_cmp_ps_nle_us(__m128 a
, __m128 b
) {
81 // CHECK-LABEL: test_mm_cmp_ps_nle_us
82 // CHECK: fcmp ugt <4 x float> %{{.*}}, %{{.*}}
83 return _mm_cmp_ps(a
, b
, _CMP_NLE_US
);
86 __m128
test_mm_cmp_ps_ord_q(__m128 a
, __m128 b
) {
87 // CHECK-LABEL: test_mm_cmp_ps_ord_q
88 // CHECK: fcmp ord <4 x float> %{{.*}}, %{{.*}}
89 return _mm_cmp_ps(a
, b
, _CMP_ORD_Q
);
92 __m128
test_mm_cmp_ss(__m128 A
, __m128 B
) {
93 // CHECK-LABEL: test_mm_cmp_ss
94 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
95 return _mm_cmp_ss(A
, B
, _CMP_ORD_Q
);
98 __m128
test_mm_cmpeq_ps(__m128 __a
, __m128 __b
) {
99 // CHECK-LABEL: test_mm_cmpeq_ps
100 // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
101 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
102 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
103 // CHECK-NEXT: ret <4 x float> [[BC]]
104 return _mm_cmpeq_ps(__a
, __b
);
107 __m128
test_mm_cmpeq_ss(__m128 __a
, __m128 __b
) {
108 // CHECK-LABEL: test_mm_cmpeq_ss
109 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
110 return _mm_cmpeq_ss(__a
, __b
);
113 __m128
test_mm_cmpge_ps(__m128 __a
, __m128 __b
) {
114 // CHECK-LABEL: test_mm_cmpge_ps
115 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
116 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
117 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
118 // CHECK-NEXT: ret <4 x float> [[BC]]
119 return _mm_cmpge_ps(__a
, __b
);
122 __m128
test_mm_cmpge_ss(__m128 __a
, __m128 __b
) {
123 // CHECK-LABEL: test_mm_cmpge_ss
124 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
125 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
126 return _mm_cmpge_ss(__a
, __b
);
129 __m128
test_mm_cmpgt_ps(__m128 __a
, __m128 __b
) {
130 // CHECK-LABEL: test_mm_cmpgt_ps
131 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
132 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
133 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
134 // CHECK-NEXT: ret <4 x float> [[BC]]
135 return _mm_cmpgt_ps(__a
, __b
);
138 __m128
test_mm_cmpgt_ss(__m128 __a
, __m128 __b
) {
139 // CHECK-LABEL: test_mm_cmpgt_ss
140 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
141 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
142 return _mm_cmpgt_ss(__a
, __b
);
145 __m128
test_mm_cmple_ps(__m128 __a
, __m128 __b
) {
146 // CHECK-LABEL: test_mm_cmple_ps
147 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float>
148 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
149 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
150 // CHECK-NEXT: ret <4 x float> [[BC]]
151 return _mm_cmple_ps(__a
, __b
);
154 __m128
test_mm_cmple_ss(__m128 __a
, __m128 __b
) {
155 // CHECK-LABEL: test_mm_cmple_ss
156 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
157 return _mm_cmple_ss(__a
, __b
);
160 __m128
test_mm_cmplt_ps(__m128 __a
, __m128 __b
) {
161 // CHECK-LABEL: test_mm_cmplt_ps
162 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float>
163 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
164 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
165 // CHECK-NEXT: ret <4 x float> [[BC]]
166 return _mm_cmplt_ps(__a
, __b
);
169 __m128
test_mm_cmplt_ss(__m128 __a
, __m128 __b
) {
170 // CHECK-LABEL: test_mm_cmplt_ss
171 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
172 return _mm_cmplt_ss(__a
, __b
);
175 __m128
test_mm_cmpneq_ps(__m128 __a
, __m128 __b
) {
176 // CHECK-LABEL: test_mm_cmpneq_ps
177 // CHECK: [[CMP:%.*]] = fcmp une <4 x float>
178 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
179 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
180 // CHECK-NEXT: ret <4 x float> [[BC]]
181 return _mm_cmpneq_ps(__a
, __b
);
184 __m128
test_mm_cmpneq_ss(__m128 __a
, __m128 __b
) {
185 // CHECK-LABEL: test_mm_cmpneq_ss
186 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
187 return _mm_cmpneq_ss(__a
, __b
);
190 __m128
test_mm_cmpnge_ps(__m128 __a
, __m128 __b
) {
191 // CHECK-LABEL: test_mm_cmpnge_ps
192 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
193 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
194 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
195 // CHECK-NEXT: ret <4 x float> [[BC]]
196 return _mm_cmpnge_ps(__a
, __b
);
199 __m128
test_mm_cmpnge_ss(__m128 __a
, __m128 __b
) {
200 // CHECK-LABEL: test_mm_cmpnge_ss
201 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
202 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
203 return _mm_cmpnge_ss(__a
, __b
);
206 __m128
test_mm_cmpngt_ps(__m128 __a
, __m128 __b
) {
207 // CHECK-LABEL: test_mm_cmpngt_ps
208 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
209 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
210 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
211 // CHECK-NEXT: ret <4 x float> [[BC]]
212 return _mm_cmpngt_ps(__a
, __b
);
215 __m128
test_mm_cmpngt_ss(__m128 __a
, __m128 __b
) {
216 // CHECK-LABEL: test_mm_cmpngt_ss
217 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
218 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
219 return _mm_cmpngt_ss(__a
, __b
);
222 __m128
test_mm_cmpnle_ps(__m128 __a
, __m128 __b
) {
223 // CHECK-LABEL: test_mm_cmpnle_ps
224 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float>
225 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
226 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
227 // CHECK-NEXT: ret <4 x float> [[BC]]
228 return _mm_cmpnle_ps(__a
, __b
);
231 __m128
test_mm_cmpnle_ss(__m128 __a
, __m128 __b
) {
232 // CHECK-LABEL: test_mm_cmpnle_ss
233 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
234 return _mm_cmpnle_ss(__a
, __b
);
237 __m128
test_mm_cmpnlt_ps(__m128 __a
, __m128 __b
) {
238 // CHECK-LABEL: test_mm_cmpnlt_ps
239 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float>
240 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
241 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
242 // CHECK-NEXT: ret <4 x float> [[BC]]
243 return _mm_cmpnlt_ps(__a
, __b
);
246 __m128
test_mm_cmpnlt_ss(__m128 __a
, __m128 __b
) {
247 // CHECK-LABEL: test_mm_cmpnlt_ss
248 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
249 return _mm_cmpnlt_ss(__a
, __b
);
252 __m128
test_mm_cmpord_ps(__m128 __a
, __m128 __b
) {
253 // CHECK-LABEL: test_mm_cmpord_ps
254 // CHECK: [[CMP:%.*]] = fcmp ord <4 x float>
255 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
256 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
257 // CHECK-NEXT: ret <4 x float> [[BC]]
258 return _mm_cmpord_ps(__a
, __b
);
261 __m128
test_mm_cmpord_ss(__m128 __a
, __m128 __b
) {
262 // CHECK-LABEL: test_mm_cmpord_ss
263 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
264 return _mm_cmpord_ss(__a
, __b
);
267 __m128
test_mm_cmpunord_ps(__m128 __a
, __m128 __b
) {
268 // CHECK-LABEL: test_mm_cmpunord_ps
269 // CHECK: [[CMP:%.*]] = fcmp uno <4 x float>
270 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
271 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
272 // CHECK-NEXT: ret <4 x float> [[BC]]
273 return _mm_cmpunord_ps(__a
, __b
);
276 __m128
test_mm_cmpunord_ss(__m128 __a
, __m128 __b
) {
277 // CHECK-LABEL: test_mm_cmpunord_ss
278 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
279 return _mm_cmpunord_ss(__a
, __b
);
282 int test_mm_comieq_ss(__m128 A
, __m128 B
) {
283 // CHECK-LABEL: test_mm_comieq_ss
284 // CHECK: call {{.*}}i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
285 return _mm_comieq_ss(A
, B
);
288 int test_mm_comige_ss(__m128 A
, __m128 B
) {
289 // CHECK-LABEL: test_mm_comige_ss
290 // CHECK: call {{.*}}i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
291 return _mm_comige_ss(A
, B
);
294 int test_mm_comigt_ss(__m128 A
, __m128 B
) {
295 // CHECK-LABEL: test_mm_comigt_ss
296 // CHECK: call {{.*}}i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
297 return _mm_comigt_ss(A
, B
);
300 int test_mm_comile_ss(__m128 A
, __m128 B
) {
301 // CHECK-LABEL: test_mm_comile_ss
302 // CHECK: call {{.*}}i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
303 return _mm_comile_ss(A
, B
);
306 int test_mm_comilt_ss(__m128 A
, __m128 B
) {
307 // CHECK-LABEL: test_mm_comilt_ss
308 // CHECK: call {{.*}}i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
309 return _mm_comilt_ss(A
, B
);
312 int test_mm_comineq_ss(__m128 A
, __m128 B
) {
313 // CHECK-LABEL: test_mm_comineq_ss
314 // CHECK: call {{.*}}i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
315 return _mm_comineq_ss(A
, B
);
318 int test_mm_cvt_ss2si(__m128 A
) {
319 // CHECK-LABEL: test_mm_cvt_ss2si
320 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
321 return _mm_cvt_ss2si(A
);
324 __m128
test_mm_cvtsi32_ss(__m128 A
, int B
) {
325 // CHECK-LABEL: test_mm_cvtsi32_ss
326 // CHECK: sitofp i32 %{{.*}} to float
327 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
328 return _mm_cvtsi32_ss(A
, B
);
330 TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, 42), +42.0f
, +0.0f
, +2.0f
, +4.0f
));
332 __m128
test_mm_cvt_si2ss(__m128 A
, int B
) {
333 // CHECK-LABEL: test_mm_cvt_si2ss
334 // CHECK: sitofp i32 %{{.*}} to float
335 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
336 return _mm_cvt_si2ss(A
, B
);
338 TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128
){+4.0f
, +2.0f
, +0.0f
, +4.0f
}, -99), -99.0f
, +2.0f
, +0.0f
, +4.0f
));
341 __m128
test_mm_cvtsi64_ss(__m128 A
, long long B
) {
342 // CHECK-LABEL: test_mm_cvtsi64_ss
343 // CHECK: sitofp i64 %{{.*}} to float
344 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
345 return _mm_cvtsi64_ss(A
, B
);
347 TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, 555), +555.0f
, +0.0f
, +2.0f
, +4.0f
));
350 float test_mm_cvtss_f32(__m128 A
) {
351 // CHECK-LABEL: test_mm_cvtss_f32
352 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
353 return _mm_cvtss_f32(A
);
355 TEST_CONSTEXPR(_mm_cvtss_f32((__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}) == +8.0f
);
357 int test_mm_cvtss_si32(__m128 A
) {
358 // CHECK-LABEL: test_mm_cvtss_si32
359 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
360 return _mm_cvtss_si32(A
);
364 long long test_mm_cvtss_si64(__m128 A
) {
365 // CHECK-LABEL: test_mm_cvtss_si64
366 // CHECK: call {{.*}}i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
367 return _mm_cvtss_si64(A
);
371 int test_mm_cvtt_ss2si(__m128 A
) {
372 // CHECK-LABEL: test_mm_cvtt_ss2si
373 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
374 return _mm_cvtt_ss2si(A
);
377 int test_mm_cvttss_si32(__m128 A
) {
378 // CHECK-LABEL: test_mm_cvttss_si32
379 // CHECK: call {{.*}}i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}})
380 return _mm_cvttss_si32(A
);
384 long long test_mm_cvttss_si64(__m128 A
) {
385 // CHECK-LABEL: test_mm_cvttss_si64
386 // CHECK: call {{.*}}i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}})
387 return _mm_cvttss_si64(A
);
391 __m128
test_mm_div_ps(__m128 A
, __m128 B
) {
392 // CHECK-LABEL: test_mm_div_ps
393 // CHECK: fdiv <4 x float>
394 return _mm_div_ps(A
, B
);
396 TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +0.125f
, +0.0f
, +1.0f
, +4.0f
));
398 __m128
test_mm_div_ss(__m128 A
, __m128 B
) {
399 // CHECK-LABEL: test_mm_div_ss
400 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
401 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
403 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
404 return _mm_div_ss(A
, B
);
406 TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128
){+1.0f
, +5.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +0.125f
, +5.0f
, +2.0f
, +4.0f
));
408 unsigned int test_MM_GET_EXCEPTION_MASK(void) {
409 // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
410 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
411 // CHECK: and i32 %{{.*}}, 8064
412 return _MM_GET_EXCEPTION_MASK();
415 unsigned int test_MM_GET_EXCEPTION_STATE(void) {
416 // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE
417 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
418 // CHECK: and i32 %{{.*}}, 63
419 return _MM_GET_EXCEPTION_STATE();
422 unsigned int test_MM_GET_FLUSH_ZERO_MODE(void) {
423 // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE
424 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
425 // CHECK: and i32 %{{.*}}, 32768
426 return _MM_GET_FLUSH_ZERO_MODE();
429 unsigned int test_MM_GET_ROUNDING_MODE(void) {
430 // CHECK-LABEL: test_MM_GET_ROUNDING_MODE
431 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
432 // CHECK: and i32 %{{.*}}, 24576
433 return _MM_GET_ROUNDING_MODE();
436 unsigned int test_mm_getcsr(void) {
437 // CHECK-LABEL: test_mm_getcsr
438 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
443 __m128
test_mm_load_ps(float* y
) {
444 // CHECK-LABEL: test_mm_load_ps
445 // CHECK: load <4 x float>, ptr {{.*}}, align 16
446 return _mm_load_ps(y
);
449 __m128
test_mm_load_ps1(float* y
) {
450 // CHECK-LABEL: test_mm_load_ps1
451 // CHECK: load float, ptr %{{.*}}, align 4
452 // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
453 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
454 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
455 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
456 return _mm_load_ps1(y
);
459 __m128
test_mm_load_ss(float* y
) {
460 // CHECK-LABEL: test_mm_load_ss
461 // CHECK: load float, ptr {{.*}}, align 1{{$}}
462 // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
463 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
464 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
465 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
466 return _mm_load_ss(y
);
469 __m128
test_mm_load1_ps(float* y
) {
470 // CHECK-LABEL: test_mm_load1_ps
471 // CHECK: load float, ptr %{{.*}}, align 4
472 // CHECK: insertelement <4 x float> poison, float %{{.*}}, i32 0
473 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
474 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
475 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
476 return _mm_load1_ps(y
);
479 __m128
test_mm_loadh_pi(__m128 x
, __m64
* y
) {
480 // CHECK-LABEL: test_mm_loadh_pi
481 // CHECK: load <2 x float>, ptr {{.*}}, align 1{{$}}
482 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
483 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
484 return _mm_loadh_pi(x
,y
);
487 __m128
test_mm_loadl_pi(__m128 x
, __m64
* y
) {
488 // CHECK-LABEL: test_mm_loadl_pi
489 // CHECK: load <2 x float>, ptr {{.*}}, align 1{{$}}
490 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
491 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
492 return _mm_loadl_pi(x
,y
);
495 __m128
test_mm_loadr_ps(float* A
) {
496 // CHECK-LABEL: test_mm_loadr_ps
497 // CHECK: load <4 x float>, ptr %{{.*}}, align 16
498 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
499 return _mm_loadr_ps(A
);
502 __m128
test_mm_loadu_ps(float* A
) {
503 // CHECK-LABEL: test_mm_loadu_ps
504 // CHECK: load <4 x float>, ptr %{{.*}}, align 1{{$}}
505 return _mm_loadu_ps(A
);
508 __m128
test_mm_max_ps(__m128 A
, __m128 B
) {
509 // CHECK-LABEL: test_mm_max_ps
510 // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
511 return _mm_max_ps(A
, B
);
514 __m128
test_mm_max_ss(__m128 A
, __m128 B
) {
515 // CHECK-LABEL: test_mm_max_ss
516 // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
517 return _mm_max_ss(A
, B
);
520 __m128
test_mm_min_ps(__m128 A
, __m128 B
) {
521 // CHECK-LABEL: test_mm_min_ps
522 // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
523 return _mm_min_ps(A
, B
);
526 __m128
test_mm_min_ss(__m128 A
, __m128 B
) {
527 // CHECK-LABEL: test_mm_min_ss
528 // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
529 return _mm_min_ss(A
, B
);
532 __m128
test_mm_move_ss(__m128 A
, __m128 B
) {
533 // CHECK-LABEL: test_mm_move_ss
534 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
535 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
536 return _mm_move_ss(A
, B
);
538 TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +8.0f
, +0.0f
, +2.0f
, +4.0f
));
540 __m128
test_mm_movehl_ps(__m128 A
, __m128 B
) {
541 // CHECK-LABEL: test_mm_movehl_ps
542 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
543 return _mm_movehl_ps(A
, B
);
545 TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +2.0f
, +1.0f
, +2.0f
, +4.0f
));
547 __m128
test_mm_movelh_ps(__m128 A
, __m128 B
) {
548 // CHECK-LABEL: test_mm_movelh_ps
549 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
550 return _mm_movelh_ps(A
, B
);
552 TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +1.0f
, +0.0f
, +8.0f
, +4.0f
));
554 int test_mm_movemask_ps(__m128 A
) {
555 // CHECK-LABEL: test_mm_movemask_ps
556 // CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
557 return _mm_movemask_ps(A
);
560 __m128
test_mm_mul_ps(__m128 A
, __m128 B
) {
561 // CHECK-LABEL: test_mm_mul_ps
562 // CHECK: fmul <4 x float>
563 return _mm_mul_ps(A
, B
);
565 TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +8.0f
, +0.0f
, +4.0f
, +4.0f
));
567 __m128
test_mm_mul_ss(__m128 A
, __m128 B
) {
568 // CHECK-LABEL: test_mm_mul_ss
569 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
570 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
572 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
573 return _mm_mul_ss(A
, B
);
575 TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +8.0f
, +0.0f
, +4.0f
, +4.0f
));
577 __m128
test_mm_or_ps(__m128 A
, __m128 B
) {
578 // CHECK-LABEL: test_mm_or_ps
579 // CHECK: or <4 x i32>
580 return _mm_or_ps(A
, B
);
582 TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128
){-4.0f
, -5.0f
, +6.0f
, +7.0f
}, (__m128
){+0.0f
, -0.0f
, -0.0f
, +7.0f
}), -4.0f
, -5.0f
, -6.0f
, +7.0f
));
584 void test_mm_prefetch(char const* p
) {
585 // CHECK-LABEL: test_mm_prefetch
586 // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
590 __m128
test_mm_rcp_ps(__m128 x
) {
591 // CHECK-LABEL: test_mm_rcp_ps
592 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}})
593 return _mm_rcp_ps(x
);
596 __m128
test_mm_rcp_ss(__m128 x
) {
597 // CHECK-LABEL: test_mm_rcp_ss
598 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
599 return _mm_rcp_ss(x
);
602 __m128
test_mm_rsqrt_ps(__m128 x
) {
603 // CHECK-LABEL: test_mm_rsqrt_ps
604 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}})
605 return _mm_rsqrt_ps(x
);
608 __m128
test_mm_rsqrt_ss(__m128 x
) {
609 // CHECK-LABEL: test_mm_rsqrt_ss
610 // CHECK: call {{.*}}<4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
611 return _mm_rsqrt_ss(x
);
614 void test_MM_SET_EXCEPTION_MASK(unsigned int A
) {
615 // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK
616 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
618 // CHECK: and i32 {{.*}}, -8065
621 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
622 _MM_SET_EXCEPTION_MASK(A
);
625 void test_MM_SET_EXCEPTION_STATE(unsigned int A
) {
626 // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE
627 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
629 // CHECK: and i32 {{.*}}, -64
632 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
633 _MM_SET_EXCEPTION_STATE(A
);
636 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A
) {
637 // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE
638 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
640 // CHECK: and i32 {{.*}}, -32769
643 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
644 _MM_SET_FLUSH_ZERO_MODE(A
);
647 __m128
test_mm_set_ps(float A
, float B
, float C
, float D
) {
648 // CHECK-LABEL: test_mm_set_ps
649 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0
650 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
651 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
652 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
653 return _mm_set_ps(A
, B
, C
, D
);
655 TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f
, +1.0f
, +2.0f
, +3.0f
), +3.0f
, +2.0f
, +1.0f
, +.0f
));
657 __m128
test_mm_set_ps1(float A
) {
658 // CHECK-LABEL: test_mm_set_ps1
659 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0
660 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
661 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
662 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
663 return _mm_set_ps1(A
);
665 TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f
), -2.0f
, -2.0f
, -2.0f
, -2.0f
));
667 void test_MM_SET_ROUNDING_MODE(unsigned int A
) {
668 // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
669 // CHECK: call void @llvm.x86.sse.stmxcsr(ptr {{.*}})
671 // CHECK: and i32 {{.*}}, -24577
674 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
675 _MM_SET_ROUNDING_MODE(A
);
678 __m128
test_mm_set_ss(float A
) {
679 // CHECK-LABEL: test_mm_set_ss
680 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0
681 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1
682 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2
683 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
684 return _mm_set_ss(A
);
686 TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f
), +1.0f
, +0.0f
, +0.0f
, +0.0f
));
688 __m128
test_mm_set1_ps(float A
) {
689 // CHECK-LABEL: test_mm_set1_ps
690 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0
691 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
692 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
693 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
694 return _mm_set1_ps(A
);
696 TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f
), +2.0f
, +2.0f
, +2.0f
, +2.0f
));
698 void test_mm_setcsr(unsigned int A
) {
699 // CHECK-LABEL: test_mm_setcsr
701 // CHECK: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
705 __m128
test_mm_setr_ps(float A
, float B
, float C
, float D
) {
706 // CHECK-LABEL: test_mm_setr_ps
707 // CHECK: insertelement <4 x float> poison, float {{.*}}, i32 0
708 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
709 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
710 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
711 return _mm_setr_ps(A
, B
, C
, D
);
713 TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f
, +1.0f
, +2.0f
, +3.0f
), +0.0f
, +1.0f
, +2.0f
, +3.0f
));
715 __m128
test_mm_setzero_ps(void) {
716 // CHECK-LABEL: test_mm_setzero_ps
717 // CHECK: store <4 x float> zeroinitializer
718 return _mm_setzero_ps();
720 TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f
, +0.0f
, +0.0f
, +0.0f
));
722 void test_mm_sfence(void) {
723 // CHECK-LABEL: test_mm_sfence
724 // CHECK: call void @llvm.x86.sse.sfence()
728 __m128
test_mm_shuffle_ps(__m128 A
, __m128 B
) {
729 // CHECK-LABEL: test_mm_shuffle_ps
730 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
731 return _mm_shuffle_ps(A
, B
, 0);
734 __m128
test_mm_sqrt_ps(__m128 x
) {
735 // CHECK-LABEL: test_mm_sqrt_ps
736 // CHECK: call {{.*}}<4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}})
737 return _mm_sqrt_ps(x
);
740 __m128
test_mm_sqrt_ss(__m128 x
) {
741 // CHECK-LABEL: test_mm_sqrt_ss
742 // CHECK: extractelement <4 x float> {{.*}}, i64 0
743 // CHECK: call float @llvm.sqrt.f32(float {{.*}})
744 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i64 0
745 return _mm_sqrt_ss(x
);
748 void test_mm_store_ps(float* x
, __m128 y
) {
749 // CHECK-LABEL: test_mm_store_ps
750 // CHECK: store <4 x float> %{{.*}}, ptr {{.*}}, align 16
754 void test_mm_store_ps1(float* x
, __m128 y
) {
755 // CHECK-LABEL: test_mm_store_ps1
756 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
757 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16
761 void test_mm_store_ss(float* x
, __m128 y
) {
762 // CHECK-LABEL: test_mm_store_ss
763 // CHECK: extractelement <4 x float> {{.*}}, i32 0
764 // CHECK: store float %{{.*}}, ptr {{.*}}, align 1{{$}}
768 void test_mm_store1_ps(float* x
, __m128 y
) {
769 // CHECK-LABEL: test_mm_store1_ps
770 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
771 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16
775 void test_mm_storeh_pi(__m64
* x
, __m128 y
) {
776 // CHECK-LABEL: test_mm_storeh_pi
777 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 2, i32 3>
778 // CHECK: store <2 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}}
782 void test_mm_storel_pi(__m64
* x
, __m128 y
) {
783 // CHECK-LABEL: test_mm_storel_pi
784 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
785 // CHECK: store <2 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}}
789 void test_mm_storer_ps(float* x
, __m128 y
) {
790 // CHECK-LABEL: test_mm_storer_ps
791 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
792 // CHECK: store <4 x float> %{{.*}}, ptr {{.*}}, align 16
796 void test_mm_storeu_ps(float* x
, __m128 y
) {
797 // CHECK-LABEL: test_mm_storeu_ps
798 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}}
799 // CHECK-NEXT: ret void
803 void test_mm_stream_ps(float*A
, __m128 B
) {
804 // CHECK-LABEL: test_mm_stream_ps
805 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
809 void test_mm_stream_ps_void(void *A
, __m128 B
) {
810 // CHECK-LABEL: test_mm_stream_ps_void
811 // CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
815 __m128
test_mm_sub_ps(__m128 A
, __m128 B
) {
816 // CHECK-LABEL: test_mm_sub_ps
817 // CHECK: fsub <4 x float>
818 return _mm_sub_ps(A
, B
);
820 TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), -7.0f
, -4.0f
, +0.0f
, +3.0f
));
822 __m128
test_mm_sub_ss(__m128 A
, __m128 B
) {
823 // CHECK-LABEL: test_mm_sub_ss
824 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
825 // CHECK: extractelement <4 x float> %{{.*}}, i32 0
827 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
828 return _mm_sub_ss(A
, B
);
830 TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), -7.0f
, +0.0f
, +2.0f
, +4.0f
));
832 void test_MM_TRANSPOSE4_PS(__m128
*A
, __m128
*B
, __m128
*C
, __m128
*D
) {
833 // CHECK-LABEL: test_MM_TRANSPOSE4_PS
834 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
835 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
836 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
837 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
838 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
839 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
840 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
841 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
842 _MM_TRANSPOSE4_PS(*A
, *B
, *C
, *D
);
845 int test_mm_ucomieq_ss(__m128 A
, __m128 B
) {
846 // CHECK-LABEL: test_mm_ucomieq_ss
847 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
848 return _mm_ucomieq_ss(A
, B
);
851 int test_mm_ucomige_ss(__m128 A
, __m128 B
) {
852 // CHECK-LABEL: test_mm_ucomige_ss
853 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
854 return _mm_ucomige_ss(A
, B
);
857 int test_mm_ucomigt_ss(__m128 A
, __m128 B
) {
858 // CHECK-LABEL: test_mm_ucomigt_ss
859 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
860 return _mm_ucomigt_ss(A
, B
);
863 int test_mm_ucomile_ss(__m128 A
, __m128 B
) {
864 // CHECK-LABEL: test_mm_ucomile_ss
865 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
866 return _mm_ucomile_ss(A
, B
);
869 int test_mm_ucomilt_ss(__m128 A
, __m128 B
) {
870 // CHECK-LABEL: test_mm_ucomilt_ss
871 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
872 return _mm_ucomilt_ss(A
, B
);
875 int test_mm_ucomineq_ss(__m128 A
, __m128 B
) {
876 // CHECK-LABEL: test_mm_ucomineq_ss
877 // CHECK: call {{.*}}i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
878 return _mm_ucomineq_ss(A
, B
);
881 __m128
test_mm_undefined_ps(void) {
882 // CHECK-LABEL: test_mm_undefined_ps
883 // CHECK: ret <4 x float> zeroinitializer
884 return _mm_undefined_ps();
887 __m128
test_mm_unpackhi_ps(__m128 A
, __m128 B
) {
888 // CHECK-LABEL: test_mm_unpackhi_ps
889 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
890 return _mm_unpackhi_ps(A
, B
);
892 TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +2.0f
, +2.0f
, +4.0f
, +1.0f
));
894 __m128
test_mm_unpacklo_ps(__m128 A
, __m128 B
) {
895 // CHECK-LABEL: test_mm_unpacklo_ps
896 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
897 return _mm_unpacklo_ps(A
, B
);
899 TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128
){+1.0f
, +0.0f
, +2.0f
, +4.0f
}, (__m128
){+8.0f
, +4.0f
, +2.0f
, +1.0f
}), +1.0f
, +8.0f
, +0.0f
, +4.0f
));
901 __m128
test_mm_xor_ps(__m128 A
, __m128 B
) {
902 // CHECK-LABEL: test_mm_xor_ps
903 // CHECK: xor <4 x i32>
904 return _mm_xor_ps(A
, B
);
906 TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128
){-4.0f
, -5.0f
, +6.0f
, +7.0f
}, (__m128
){+0.0f
, -0.0f
, -0.0f
, +7.0f
}), -4.0f
, +5.0f
, -6.0f
, +0.0f
));