1 // REQUIRES: powerpc-registered-target
3 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
4 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
6 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
8 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
9 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10
11 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
12 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
13 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
14 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10
16 // CHECK-BE-DAG: @_mm_movemask_pd.__perm_mask = internal constant <4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656>, align 16
17 // CHECK-BE-DAG: @_mm_shuffle_epi32.__permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
18 // CHECK-BE-DAG: @_mm_shufflehi_epi16.__permute_selectors = internal constant [4 x i16] [i16 2057, i16 2571, i16 3085, i16 3599], align 2
19 // CHECK-BE-DAG: @_mm_shufflelo_epi16.__permute_selectors = internal constant [4 x i16] [i16 1, i16 515, i16 1029, i16 1543], align 2
21 // CHECK-LE-DAG: @_mm_movemask_pd.__perm_mask = internal constant <4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144>, align 16
22 // CHECK-LE-DAG: @_mm_shuffle_epi32.__permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
23 // CHECK-LE-DAG: @_mm_shufflehi_epi16.__permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
24 // CHECK-LE-DAG: @_mm_shufflelo_epi16.__permute_selectors = internal constant [4 x i16] [i16 256, i16 770, i16 1284, i16 1798], align 2
26 #include <emmintrin.h>
28 __m128i resi
, mi1
, mi2
;
31 __m128d resd
, md1
, md2
;
32 __m64 res64
, m641
, m642
;
40 void __attribute__((noinline
))
42 resi
= _mm_add_epi64(mi1
, mi2
);
43 resi
= _mm_add_epi32(mi1
, mi2
);
44 resi
= _mm_add_epi16(mi1
, mi2
);
45 resi
= _mm_add_epi8(mi1
, mi2
);
46 resd
= _mm_add_pd(md1
, md2
);
47 resd
= _mm_add_sd(md1
, md2
);
48 res64
= _mm_add_si64(m641
, m642
);
49 resi
= _mm_adds_epi16(mi1
, mi2
);
50 resi
= _mm_adds_epi8(mi1
, mi2
);
51 resi
= _mm_adds_epu16(mi1
, mi2
);
52 resi
= _mm_adds_epu8(mi1
, mi2
);
55 // CHECK-LABEL: @test_add
57 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi64
58 // CHECK: add <2 x i64>
60 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi32
61 // CHECK: add <4 x i32>
63 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi16
64 // CHECK: add <8 x i16>
66 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi8
67 // CHECK: add <16 x i8>
69 // CHECK-LABEL: define available_externally <2 x double> @_mm_add_pd
70 // CHECK: fadd <2 x double>
72 // CHECK-LABEL: define available_externally <2 x double> @_mm_add_sd
75 // CHECK-LABEL: define available_externally i64 @_mm_add_si64
78 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi16
79 // CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
81 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi8
82 // CHECK: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])
84 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu16
85 // CHECK: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])
87 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu8
88 // CHECK: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])
90 void __attribute__((noinline
))
92 resi
= _mm_avg_epu16(mi1
, mi2
);
93 resi
= _mm_avg_epu8(mi1
, mi2
);
96 // CHECK-LABEL: @test_avg
98 // CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu16
99 // CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])
101 // CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu8
102 // CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])
104 void __attribute__((noinline
))
106 resi
= _mm_bslli_si128(mi1
, i
);
107 resi
= _mm_bsrli_si128(mi1
, i
);
110 // CHECK-LABEL: @test_bs
112 // CHECK-LABEL: define available_externally <2 x i64> @_mm_bslli_si128
113 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
114 // CHECK: br i1 %[[CMP]]
115 // CHECK: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
116 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
118 // CHECK-LABEL: define available_externally <2 x i64> @_mm_bsrli_si128
119 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
120 // CHECK: br i1 %[[CMP]]
121 // CHECK-LE: call i1 @llvm.is.constant
122 // CHECK-LE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}}
123 // CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]])
124 // CHECK-LE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8
125 // CHECK-LE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
126 // CHECK-LE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
127 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
128 // CHECK-LE: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
129 // CHECK-BE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8
130 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
131 // CHECK-BE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
132 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
133 // CHECK-BE: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
135 void __attribute__((noinline
))
137 res
= _mm_castpd_ps(md1
);
138 resi
= _mm_castpd_si128(md1
);
139 resd
= _mm_castps_pd(m1
);
140 resi
= _mm_castps_si128(m1
);
141 resd
= _mm_castsi128_pd(mi1
);
142 res
= _mm_castsi128_ps(mi1
);
145 // CHECK-LABEL: @test_cast
147 // CHECK-LABEL: define available_externally <4 x float> @_mm_castpd_ps
149 // CHECK-LABEL: define available_externally <2 x i64> @_mm_castpd_si128
151 // CHECK-LABEL: define available_externally <2 x double> @_mm_castps_pd
153 // CHECK-LABEL: define available_externally <2 x i64> @_mm_castps_si128
155 // CHECK-LABEL: define available_externally <2 x double> @_mm_castsi128_pd
157 // CHECK-LABEL: define available_externally <4 x float> @_mm_castsi128_ps
159 void __attribute__((noinline
))
161 resi
= _mm_cmpeq_epi32(mi1
, mi2
);
162 resi
= _mm_cmpeq_epi16(mi1
, mi2
);
163 resi
= _mm_cmpeq_epi8(mi1
, mi2
);
164 resi
= _mm_cmpgt_epi32(mi1
, mi2
);
165 resi
= _mm_cmpgt_epi16(mi1
, mi2
);
166 resi
= _mm_cmpgt_epi8(mi1
, mi2
);
167 resi
= _mm_cmplt_epi32(mi1
, mi2
);
168 resi
= _mm_cmplt_epi16(mi1
, mi2
);
169 resi
= _mm_cmplt_epi8(mi1
, mi2
);
170 resd
= _mm_cmpeq_pd(md1
, md2
);
171 resd
= _mm_cmpeq_sd(md1
, md2
);
172 resd
= _mm_cmpge_pd(md1
, md2
);
173 resd
= _mm_cmpge_sd(md1
, md2
);
174 resd
= _mm_cmpgt_pd(md1
, md2
);
175 resd
= _mm_cmpgt_sd(md1
, md2
);
176 resd
= _mm_cmple_pd(md1
, md2
);
177 resd
= _mm_cmple_sd(md1
, md2
);
178 resd
= _mm_cmplt_pd(md1
, md2
);
179 resd
= _mm_cmplt_sd(md1
, md2
);
180 resd
= _mm_cmpneq_pd(md1
, md2
);
181 resd
= _mm_cmpneq_sd(md1
, md2
);
182 resd
= _mm_cmpnge_pd(md1
, md2
);
183 resd
= _mm_cmpnge_sd(md1
, md2
);
184 resd
= _mm_cmpngt_pd(md1
, md2
);
185 resd
= _mm_cmpngt_sd(md1
, md2
);
186 resd
= _mm_cmpnle_pd(md1
, md2
);
187 resd
= _mm_cmpnle_sd(md1
, md2
);
188 resd
= _mm_cmpnlt_pd(md1
, md2
);
189 resd
= _mm_cmpnlt_sd(md1
, md2
);
190 resd
= _mm_cmpord_pd(md1
, md2
);
191 resd
= _mm_cmpord_sd(md1
, md2
);
192 resd
= _mm_cmpunord_pd(md1
, md2
);
193 resd
= _mm_cmpunord_sd(md1
, md2
);
196 // CHECK-LABEL: @test_cmp
198 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi32
199 // CHECK: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])
201 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi16
202 // CHECK: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])
204 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi8
205 // CHECK: call <16 x i8> @vec_cmpeq(signed char vector[16], signed char vector[16])
207 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi32
208 // CHECK: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])
210 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi16
211 // CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
213 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi8
214 // CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])
216 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi32
217 // CHECK: call <4 x i32> @vec_cmplt(int vector[4], int vector[4])
219 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi16
220 // CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
222 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi8
223 // CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])
225 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_pd
226 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
228 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_sd
229 // CHECK: call <2 x double> @vec_splats(double)
230 // CHECK: call <2 x double> @vec_splats(double)
231 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
232 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
234 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_pd
235 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
237 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_sd
238 // CHECK: call <2 x double> @vec_splats(double)
239 // CHECK: call <2 x double> @vec_splats(double)
240 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
241 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
243 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_pd
244 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
246 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_sd
247 // CHECK: call <2 x double> @vec_splats(double)
248 // CHECK: call <2 x double> @vec_splats(double)
249 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
250 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
252 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_pd
253 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
255 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_sd
256 // CHECK: call <2 x double> @vec_splats(double)
257 // CHECK: call <2 x double> @vec_splats(double)
258 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
259 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
261 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_pd
262 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
264 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_sd
265 // CHECK: call <2 x double> @vec_splats(double)
266 // CHECK: call <2 x double> @vec_splats(double)
267 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
268 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
270 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_pd
271 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
272 // CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2])
274 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_sd
275 // CHECK: call <2 x double> @vec_splats(double)
276 // CHECK: call <2 x double> @vec_splats(double)
277 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
278 // CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2])
279 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
281 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_pd
282 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
284 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_sd
285 // CHECK: call <2 x double> @vec_splats(double)
286 // CHECK: call <2 x double> @vec_splats(double)
287 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
288 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
290 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_pd
291 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
293 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_sd
294 // CHECK: call <2 x double> @vec_splats(double)
295 // CHECK: call <2 x double> @vec_splats(double)
296 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
297 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
299 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_pd
300 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
302 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_sd
303 // CHECK: call <2 x double> @vec_splats(double)
304 // CHECK: call <2 x double> @vec_splats(double)
305 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
306 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
308 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_pd
309 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
311 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_sd
312 // CHECK: call <2 x double> @vec_splats(double)
313 // CHECK: call <2 x double> @vec_splats(double)
314 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
315 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
317 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_pd
318 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
319 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
320 // CHECK: call <2 x i64> @vec_and(unsigned long long vector[2], unsigned long long vector[2])
322 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_sd
323 // CHECK: call <2 x double> @vec_splats(double)
324 // CHECK: call <2 x double> @vec_splats(double)
325 // CHECK: call <2 x double> @_mm_cmpord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
326 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
328 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_pd
329 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
330 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
331 // CHECK: call <2 x i64> @vec_nor(unsigned long long vector[2], unsigned long long vector[2])
332 // CHECK: call <2 x i64> @vec_orc(unsigned long long vector[2], unsigned long long vector[2])
334 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_sd
335 // CHECK: call <2 x double> @vec_splats(double)
336 // CHECK: call <2 x double> @vec_splats(double)
337 // CHECK: call <2 x double> @_mm_cmpunord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
338 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
340 void __attribute__((noinline
))
342 i
= _mm_comieq_sd(md1
, md2
);
343 i
= _mm_comige_sd(md1
, md2
);
344 i
= _mm_comigt_sd(md1
, md2
);
345 i
= _mm_comile_sd(md1
, md2
);
346 i
= _mm_comilt_sd(md1
, md2
);
347 i
= _mm_comineq_sd(md1
, md2
);
350 // CHECK-LABEL: @test_comi
352 // CHECK-LABEL: define available_externally signext i32 @_mm_comieq_sd
353 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq double
354 // CHECK: zext i1 %[[CMP]] to i32
356 // CHECK-LABEL: define available_externally signext i32 @_mm_comige_sd
357 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge double
358 // CHECK: zext i1 %[[CMP]] to i32
360 // CHECK-LABEL: define available_externally signext i32 @_mm_comigt_sd
361 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt double
362 // CHECK: zext i1 %[[CMP]] to i32
364 // CHECK-LABEL: define available_externally signext i32 @_mm_comile_sd
365 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole double
366 // CHECK: zext i1 %[[CMP]] to i32
368 // CHECK-LABEL: define available_externally signext i32 @_mm_comilt_sd
369 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt double
370 // CHECK: zext i1 %[[CMP]] to i32
372 // CHECK-LABEL: define available_externally signext i32 @_mm_comineq_sd
373 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une double
374 // CHECK: zext i1 %[[CMP]] to i32
376 void __attribute__((noinline
))
384 // CHECK-LABEL: @test_control
386 // CHECK-LABEL: define available_externally void @_mm_clflush
387 // CHECK: call void asm sideeffect "dcbf 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
389 // CHECK-LABEL: define available_externally void @_mm_lfence()
390 // CHECK: fence release
392 // CHECK-LABEL: define available_externally void @_mm_mfence()
393 // CHECK: fence seq_cst
395 // CHECK-LABEL: define available_externally void @_mm_pause()
396 // CHECK: call i64 asm sideeffect "\09mfppr\09$0; or 31,31,31; isync; lwsync; isync; mtppr\09$0;", "=r,~{memory}"()
398 void __attribute__((noinline
))
400 resd
= _mm_cvtepi32_pd(mi1
);
401 res
= _mm_cvtepi32_ps(mi1
);
402 resi
= _mm_cvtpd_epi32(md1
);
403 res64
= _mm_cvtpd_pi32(md1
);
404 res
= _mm_cvtpd_ps(md1
);
405 resd
= _mm_cvtpi32_pd(res64
);
406 resi
= _mm_cvtps_epi32(m1
);
407 resd
= _mm_cvtps_pd(m1
);
408 *dp
= _mm_cvtsd_f64(md1
);
409 i
= _mm_cvtsd_si32(md1
);
410 i64s
[0] = _mm_cvtsd_si64(md1
);
411 i64s
[0] = _mm_cvtsd_si64x(md1
);
412 res
= _mm_cvtsd_ss(m1
, md2
);
413 i
= _mm_cvtsi128_si32(mi1
);
414 i64s
[0] = _mm_cvtsi128_si64(mi1
);
415 i64s
[0] = _mm_cvtsi128_si64x(mi1
);
416 resd
= _mm_cvtsi32_sd(md1
, i
);
417 resi
= _mm_cvtsi32_si128(i
);
418 resd
= _mm_cvtsi64_sd(md1
, i64s
[1]);
419 resi
= _mm_cvtsi64_si128(i64s
[1]);
420 resd
= _mm_cvtsi64x_sd(md1
, i64s
[1]);
421 resi
= _mm_cvtsi64x_si128(i64s
[1]);
422 resd
= _mm_cvtss_sd(md1
, m1
);
423 resi
= _mm_cvttpd_epi32(md1
);
424 res64
= _mm_cvttpd_pi32(md1
);
425 resi
= _mm_cvttps_epi32(m1
);
426 i
= _mm_cvttsd_si32(md1
);
427 i64s
[0] = _mm_cvttsd_si64(md1
);
428 i64s
[0] = _mm_cvttsd_si64x(md1
);
431 // CHECK-LABEL: @test_converts
433 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd
434 // CHECK: call <2 x i64> @vec_unpackh(int vector[4])
435 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double>
436 // CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
438 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps
439 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
441 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtpd_epi32
442 // CHECK: call <2 x double> @vec_rint(double vector[2])
443 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
444 // CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
445 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
446 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4])
447 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
449 // CHECK-LABEL: define available_externally i64 @_mm_cvtpd_pi32
450 // CHECK: call <2 x i64> @_mm_cvtpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
451 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
453 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpd_ps
454 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
455 // CHECK: call <4 x i32> asm "xvcvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
456 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
457 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4])
458 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
460 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtpi32_pd
461 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
462 // CHECK: call <2 x i64> @vec_unpackl(int vector[4])
463 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double>
464 // CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
466 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32
467 // CHECK: call <4 x float> @vec_rint(float vector[4])
468 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
470 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtps_pd
471 // CHECK-BE: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
472 // CHECK-BE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
473 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
474 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
475 // CHECK-LE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
477 // CHECK-LABEL: define available_externally double @_mm_cvtsd_f64
478 // CHECK: extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
480 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtsd_si32
481 // CHECK: call <2 x double> @vec_rint(double vector[2])
482 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32
484 // CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64
485 // CHECK: call <2 x double> @vec_rint(double vector[2])
486 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64
488 // CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64x
489 // CHECK: call i64 @_mm_cvtsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
491 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsd_ss
492 // CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
493 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = fptrunc double %[[EXT]] to float
494 // CHECK-BE: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %[[TRUNC]], i32 0
495 // CHECK-LE: call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
496 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
497 // CHECK-LE: call <4 x float> asm "xscvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
498 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
500 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtsi128_si32
501 // CHECK: extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0
503 // CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64
504 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
506 // CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64x
507 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
509 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi32_sd
510 // CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to double
512 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi32_si128
513 // CHECK: call <2 x i64> @_mm_set_epi32(i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
515 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64_sd
516 // CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to double
518 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64_si128
519 // CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
520 // CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1
522 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64x_sd
523 // CHECK: call <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})
525 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64x_si128
526 // CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
527 // CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1
529 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtss_sd
530 // CHECK-BE: fpext float %{{[0-9a-zA-Z_.]+}} to double
531 // CHECK-LE: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
532 // CHECK-LE: call <2 x double> asm "xscvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
533 // CHECK-LE: call <2 x double> @vec_mergel(double vector[2], double vector[2])
535 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttpd_epi32
536 // CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"
537 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
538 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4])
539 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
541 // CHECK-LABEL: define available_externally i64 @_mm_cvttpd_pi32
542 // CHECK: call <2 x i64> @_mm_cvttpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
544 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttps_epi32
545 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
547 // CHECK-LABEL: define available_externally signext i32 @_mm_cvttsd_si32
548 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32
550 // CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64
551 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64
553 // CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64x
554 // CHECK: call i64 @_mm_cvttsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
556 void __attribute__((noinline
))
558 resd
= _mm_div_pd(md1
, md2
);
559 resd
= _mm_div_sd(md1
, md2
);
562 // CHECK-LABEL: @test_div
564 // CHECK-LABEL: define available_externally <2 x double> @_mm_div_pd
565 // CHECK: fdiv <2 x double>
567 // CHECK-LABEL: define available_externally <2 x double> @_mm_div_sd
568 // CHECK: fdiv double
570 void __attribute__((noinline
))
572 i
= _mm_extract_epi16(mi1
, i
);
575 // CHECK-LABEL: @test_extract
577 // CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi16
578 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 7
579 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <8 x i16> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
580 // CHECK: zext i16 %[[EXT]] to i32
582 void __attribute__((noinline
))
584 resi
= _mm_insert_epi16 (mi1
, i
, is
[0]);
587 // CHECK-LABEL: @test_insert
589 // CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi16
590 // CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
591 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 7
593 void __attribute__((noinline
))
595 resd
= _mm_load_pd(dp
);
596 resd
= _mm_load_pd1(dp
);
597 resd
= _mm_load_sd(dp
);
598 resi
= _mm_load_si128(mip
);
599 resd
= _mm_load1_pd(dp
);
600 resd
= _mm_loadh_pd(md1
, dp
);
601 resi
= _mm_loadl_epi64(mip
);
602 resd
= _mm_loadl_pd(md1
, dp
);
603 resd
= _mm_loadr_pd(dp
);
604 resd
= _mm_loadu_pd(dp
);
605 resi
= _mm_loadu_si128(mip
);
608 // CHECK-LABEL: @test_load
610 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd
611 // CHECK: call <16 x i8> @vec_ld(long, unsigned char vector[16] const*)(i64 noundef 0, ptr noundef %{{[0-9a-zA-Z_.]+}})
613 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd1
614 // CHECK: call <2 x double> @_mm_load1_pd(ptr noundef %{{[0-9a-zA-Z_.]+}})
616 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_sd
617 // CHECK: call <2 x double> @_mm_set_sd(double noundef %{{[0-9a-zA-Z_.]+}})
619 // CHECK-LABEL: define available_externally <2 x i64> @_mm_load_si128
620 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
621 // CHECK: load <2 x i64>, ptr %[[ADDR]], align 16
623 // CHECK-LABEL: define available_externally <2 x double> @_mm_load1_pd
624 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
625 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, ptr %[[ADDR]]
626 // CHECK: call <2 x double> @vec_splats(double)(double noundef %[[VAL]])
628 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadh_pd
629 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
630 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, ptr %{{[0-9a-zA-Z_.]+}}
631 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
632 // CHECK: insertelement <2 x double> %[[VEC]], double %[[VAL]], i32 1
634 // CHECK-LABEL: define available_externally <2 x i64> @_mm_loadl_epi64
635 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
637 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadl_pd
638 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
639 // CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load double, ptr %[[ADDR]]
640 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
641 // CHECK: insertelement <2 x double> %[[VEC]], double %[[ADDR2]], i32 0
643 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadr_pd
644 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
645 // CHECK: call <2 x double> @_mm_load_pd(ptr noundef %[[ADDR]])
646 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
648 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadu_pd
649 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
650 // CHECK: call <2 x double> @vec_vsx_ld(int, double const*)(i32 noundef signext 0, ptr noundef %[[ADDR]])
652 // CHECK-LABEL: define available_externally <2 x i64> @_mm_loadu_si128
653 // CHECK: load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
654 // CHECK: call <4 x i32> @vec_vsx_ld(int, int const*)(i32 noundef signext 0, ptr noundef %{{[0-9a-zA-Z_.]+}})
656 void __attribute__((noinline
))
658 resd
= _mm_and_pd(md1
, md2
);
659 resi
= _mm_and_si128(mi1
, mi2
);
660 resd
= _mm_andnot_pd(md1
, md2
);
661 resi
= _mm_andnot_si128(mi1
, mi2
);
662 resd
= _mm_xor_pd(md1
, md2
);
663 resi
= _mm_xor_si128(mi1
, mi2
);
664 resd
= _mm_or_pd(md1
, md2
);
665 resi
= _mm_or_si128(mi1
, mi2
);
668 // CHECK-LABEL: @test_logical
670 // CHECK-LABEL: define available_externally <2 x double> @_mm_and_pd
671 // CHECK: call <2 x double> @vec_and(double vector[2], double vector[2])
673 // CHECK-LABEL: define available_externally <2 x i64> @_mm_and_si128
674 // CHECK: call <2 x i64> @vec_and(long long vector[2], long long vector[2])
676 // CHECK-LABEL: define available_externally <2 x double> @_mm_andnot_pd
677 // CHECK: call <2 x double> @vec_andc(double vector[2], double vector[2])
679 // CHECK-LABEL: define available_externally <2 x i64> @_mm_andnot_si128
680 // CHECK: call <2 x i64> @vec_andc(long long vector[2], long long vector[2])
682 // CHECK-LABEL: define available_externally <2 x double> @_mm_xor_pd
683 // CHECK: call <2 x double> @vec_xor(double vector[2], double vector[2])
685 // CHECK-LABEL: define available_externally <2 x i64> @_mm_xor_si128
686 // CHECK: call <2 x i64> @vec_xor(long long vector[2], long long vector[2])
688 // CHECK-LABEL: define available_externally <2 x double> @_mm_or_pd
689 // CHECK: call <2 x double> @vec_or(double vector[2], double vector[2])
691 // CHECK-LABEL: define available_externally <2 x i64> @_mm_or_si128
692 // CHECK: call <2 x i64> @vec_or(long long vector[2], long long vector[2])
694 void __attribute__((noinline
))
696 resi
= _mm_max_epi16(mi1
, mi2
);
697 resi
= _mm_max_epu8(mi1
, mi2
);
698 resd
= _mm_max_pd(md1
, md2
);
699 resd
= _mm_max_sd(md1
, md2
);
702 // CHECK-LABEL: @test_max
704 // CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epi16
705 // CHECK: call <8 x i16> @vec_max(short vector[8], short vector[8])
707 // CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epu8
708 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
710 // CHECK-LABEL: define available_externally <2 x double> @_mm_max_pd
711 // CHECK: call <2 x double> @vec_max(double vector[2], double vector[2])
713 // CHECK-LABEL: define available_externally <2 x double> @_mm_max_sd
714 // CHECK: call <2 x double> @vec_splats(double)
715 // CHECK: call <2 x double> @vec_splats(double)
716 // CHECK: call <2 x double> @vec_max(double vector[2], double vector[2])
717 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
719 void __attribute__((noinline
))
721 resi
= _mm_min_epi16(mi1
, mi2
);
722 resi
= _mm_min_epu8(mi1
, mi2
);
723 resd
= _mm_min_pd(md1
, md2
);
724 resd
= _mm_min_sd(md1
, md2
);
727 // CHECK-LABEL: @test_min
729 // CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epi16
730 // CHECK: call <8 x i16> @vec_min(short vector[8], short vector[8])
732 // CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epu8
733 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
735 // CHECK-LABEL: define available_externally <2 x double> @_mm_min_pd
736 // CHECK: call <2 x double> @vec_min(double vector[2], double vector[2])
738 // CHECK-LABEL: define available_externally <2 x double> @_mm_min_sd
739 // CHECK: call <2 x double> @vec_splats(double)
740 // CHECK: call <2 x double> @vec_splats(double)
741 // CHECK: call <2 x double> @vec_min(double vector[2], double vector[2])
742 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
744 void __attribute__((noinline
))
746 resi
= _mm_move_epi64(mi1
);
747 resd
= _mm_move_sd(md1
, md2
);
748 i
= _mm_movemask_epi8(mi1
);
749 i
= _mm_movemask_pd(md1
);
750 res64
= _mm_movepi64_pi64(mi1
);
751 resi
= _mm_movpi64_epi64(m641
);
752 _mm_maskmoveu_si128(mi1
, mi2
, chs
);
755 // CHECK-LABEL: @test_move
757 // CHECK-LABEL: define available_externally <2 x i64> @_mm_move_epi64
758 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
760 // CHECK-LABEL: define available_externally <2 x double> @_mm_move_sd
761 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
762 // CHECK: insertelement <2 x double> %{{[0-9a-zA-Z_.]+}}, double %[[EXT]], i32 0
764 // CHECK-P10-LABEL: define available_externally signext i32 @_mm_movemask_epi8
765 // CHECK-P10: call zeroext i32 @vec_extractm(unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
767 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_epi8
768 // CHECK: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 120, i8 112, i8 104, i8 96, i8 88, i8 80, i8 72, i8 64, i8 56, i8 48, i8 40, i8 32, i8 24, i8 16, i8 8, i8 0>)
769 // CHECK-LE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
770 // CHECK-BE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
771 // CHECK: trunc i64 %[[VAL]] to i32
773 // CHECK-P10-LABEL: define available_externally signext i32 @_mm_movemask_pd
774 // CHECK-P10: call zeroext i32 @vec_extractm(unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}})
776 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pd
777 // CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
778 // CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
779 // CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656> to <16 x i8>))
780 // CHECK-BE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
782 // CHECK-LABEL: define available_externally i64 @_mm_movepi64_pi64
783 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
785 // CHECK-LABEL: define available_externally <2 x i64> @_mm_movpi64_epi64
786 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
788 // CHECK-LABEL: define available_externally void @_mm_maskmoveu_si128
789 // CHECK: call <2 x i64> @_mm_loadu_si128(ptr noundef %{{[0-9a-zA-Z_.]+}})
790 // CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])
791 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])
792 // CHECK: call void @_mm_storeu_si128(ptr noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}})
794 void __attribute__((noinline
))
796 resi
= _mm_mul_epu32(mi1
, mi2
);
797 resd
= _mm_mul_pd(md1
, md2
);
798 resd
= _mm_mul_sd(md1
, md2
);
799 res64
= _mm_mul_su32(m641
, m642
);
800 resi
= _mm_mulhi_epi16(mi1
, mi2
);
801 resi
= _mm_mulhi_epu16(mi1
, mi2
);
802 resi
= _mm_mullo_epi16(mi1
, mi2
);
805 // CHECK-LABEL: @test_mul
807 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mul_epu32
808 // CHECK-LE: call <2 x i64> asm "vmulouw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}})
809 // CHECK-BE: call <2 x i64> asm "vmuleuw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}})
811 // CHECK-LABEL: define available_externally <2 x double> @_mm_mul_pd
812 // CHECK: fmul <2 x double>
814 // CHECK-LABEL: define available_externally <2 x double> @_mm_mul_sd
815 // CHECK: fmul double
817 // CHECK-LABEL: define available_externally i64 @_mm_mul_su32
818 // CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
819 // CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
820 // CHECK: %[[EXT1:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
821 // CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
822 // CHECK: mul i64 %[[EXT1]], %[[EXT2]]
824 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epi16
825 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
826 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
827 // CHECK: call <4 x i32> @vec_vmulesh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
828 // CHECK: call <4 x i32> @vec_vmulosh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
829 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
831 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epu16
832 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
833 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
834 // CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
835 // CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
836 // CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])
838 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mullo_epi16
839 // CHECK: mul <8 x i16>
841 void __attribute__((noinline
))
843 resi
= _mm_packs_epi16(mi1
, mi2
);
844 resi
= _mm_packs_epi32(mi1
, mi2
);
845 resi
= _mm_packus_epi16(mi1
, mi2
);
848 // CHECK-LABEL: @test_pack
850 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi16
851 // CHECK: call <16 x i8> @vec_packs(short vector[8], short vector[8])
853 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi32
854 // CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
856 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packus_epi16
857 // CHECK: call <16 x i8> @vec_packsu(short vector[8], short vector[8])
859 void __attribute__((noinline
))
861 resi
= _mm_sad_epu8(mi1
, mi2
);
864 // CHECK-LABEL: @test_sad
866 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sad_epu8
867 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
868 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
869 // CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
870 // CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
871 // CHECK-LE: call <4 x i32> asm "vsum2sws $0,$1,$2", "=v,v,v"(<4 x i32> %11, <4 x i32> zeroinitializer)
872 // CHECK-BE: call <4 x i32> @vec_sum2s(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
873 // CHECK-BE: call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int)
875 void __attribute__((noinline
))
877 resi
= _mm_set_epi16(ss
[7], ss
[6], ss
[5], ss
[4], ss
[3], ss
[2], ss
[1], ss
[0]);
878 resi
= _mm_set_epi32(is
[3], is
[2], is
[1], is
[0]);
879 resi
= _mm_set_epi64(m641
, m642
);
880 resi
= _mm_set_epi64x(i64s
[0], i64s
[1]);
881 resi
= _mm_set_epi8(chs
[15], chs
[14], chs
[13], chs
[12], chs
[11], chs
[10], chs
[9], chs
[8], chs
[7], chs
[6], chs
[5], chs
[4], chs
[3], chs
[2], chs
[1], chs
[0]);
882 resd
= _mm_set_pd(dp
[0], dp
[1]);
883 resd
= _mm_set_pd1(dp
[0]);
884 resd
= _mm_set_sd(dp
[0]);
885 resi
= _mm_set1_epi16(ss
[0]);
886 resi
= _mm_set1_epi32(i
);
887 resi
= _mm_set1_epi64(m641
);
888 resi
= _mm_set1_epi64x(i64s
[0]);
889 resi
= _mm_set1_epi8(chs
[0]);
890 resd
= _mm_set1_pd(dp
[0]);
891 resi
= _mm_setr_epi16(ss
[7], ss
[6], ss
[5], ss
[4], ss
[3], ss
[2], ss
[1], ss
[0]);
892 resi
= _mm_setr_epi32(is
[3], is
[2], is
[1], is
[0]);
893 resi
= _mm_setr_epi64(m641
, m642
);
894 resi
= _mm_setr_epi8(chs
[15], chs
[14], chs
[13], chs
[12], chs
[11], chs
[10], chs
[9], chs
[8], chs
[7], chs
[6], chs
[5], chs
[4], chs
[3], chs
[2], chs
[1], chs
[0]);
895 resd
= _mm_setr_pd(dp
[0], dp
[1]);
896 resd
= _mm_setzero_pd();
897 resi
= _mm_setzero_si128();
900 // CHECK-LABEL: @test_set
902 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi16
903 // CHECK-COUNT-8: store i16 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 2
904 // CHECK: insertelement <8 x i16> undef, i16 {{[0-9a-zA-Z_%.]+}}, i32 0
905 // CHECK-COUNT-7: insertelement <8 x i16> {{[0-9a-zA-Z_%.]+}}, i16 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-7]}}
907 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi32
908 // CHECK-COUNT-4: store i32 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 4
909 // CHECK: insertelement <4 x i32> undef, i32 {{[0-9a-zA-Z_%.]+}}, i32 0
910 // CHECK-COUNT-3: insertelement <4 x i32> {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-3]}}
912 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64
913 // CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})
915 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64x
916 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
917 // CHECK: insertelement <2 x i64> %[[VEC]], i64 %{{[0-9a-zA-Z_.]+}}, i32 1
919 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi8
920 // CHECK-COUNT-16: store i8 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 1
921 // CHECK: insertelement <16 x i8> undef, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
922 // CHECK-COUNT-15: {{[0-9a-zA-Z_%.]+}} = insertelement <16 x i8> {{[0-9a-zA-Z_%.]+}}, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
924 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd
925 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
926 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
928 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd1
929 // CHECK: call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})
931 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_sd
932 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
933 // CHECK: insertelement <2 x double> %[[VEC]], double 0.000000e+00, i32 1
935 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi16
936 // CHECK-COUNT-8: load i16, ptr %{{[0-9a-zA-Z_.]+}}, align 2
937 // CHECK: call <2 x i64> @_mm_set_epi16
939 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi32
940 // CHECK-COUNT-4: load i32, ptr %{{[0-9a-zA-Z_.]+}}, align 4
941 // CHECK: call <2 x i64> @_mm_set_epi32
943 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64
944 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
945 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
946 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
948 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64x
949 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
950 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
951 // CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
953 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi8
954 // CHECK-COUNT-16: load i8, ptr %{{[0-9a-zA-Z_.]+}}, align 1
955 // CHECK: call <2 x i64> @_mm_set_epi8
957 // CHECK-LABEL: define available_externally <2 x double> @_mm_set1_pd
958 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
959 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
961 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi16
962 // CHECK-COUNT-8: load i16, ptr {{[0-9a-zA-Z_%.]+}}, align 2
963 // CHECK: call <2 x i64> @_mm_set_epi16
965 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi32
966 // CHECK-COUNT-4: load i32, ptr {{[0-9a-zA-Z_%.]+}}, align 4
967 // CHECK: call <2 x i64> @_mm_set_epi32
969 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi64
970 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
971 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
972 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
974 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi8
975 // CHECK-COUNT-16: load i8, ptr {{[0-9a-zA-Z_%.]+}}, align 1
976 // CHECK: call <2 x i64> @_mm_set_epi8
978 // CHECK-LABEL: define available_externally <2 x double> @_mm_setr_pd
979 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
980 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
982 // CHECK-LABEL: define available_externally <2 x double> @_mm_setzero_pd()
983 // CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 0)
985 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setzero_si128()
986 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
988 void __attribute__((noinline
))
990 resi
= _mm_shuffle_epi32(mi1
, i
);
991 resd
= _mm_shuffle_pd(md1
, md2
, i
);
992 resi
= _mm_shufflehi_epi16(mi1
, i
);
993 resi
= _mm_shufflelo_epi16(mi1
, i
);
996 // CHECK-LABEL: @test_shuffle
998 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shuffle_epi32
999 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
1000 // CHECK: sext i32 %[[AND]] to i64
1001 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
1002 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1003 // CHECK: sext i32 %[[AND2]] to i64
1004 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
1005 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
1006 // CHECK: sext i32 %[[AND3]] to i64
1007 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
1008 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1009 // CHECK: sext i32 %[[AND4]] to i64
1010 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1011 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
1012 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1013 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
1014 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1015 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
1016 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
1017 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1018 // CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
1019 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
1021 // CHECK-LABEL: define available_externally <2 x double> @_mm_shuffle_pd
1022 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3
1023 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 0
1024 // CHECK: br i1 %[[CMP]]
1025 // CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2])
1026 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 1
1027 // CHECK: br i1 %[[CMP2]]
1028 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
1029 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 2
1030 // CHECK: br i1 %[[CMP3]]
1031 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 0, i32 3>
1032 // CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2])
1034 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflehi_epi16
1035 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
1036 // CHECK: sext i32 %[[AND]] to i64
1037 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
1038 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1039 // CHECK: sext i32 %[[AND2]] to i64
1040 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
1041 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
1042 // CHECK: sext i32 %[[AND3]] to i64
1043 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
1044 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
1045 // CHECK: sext i32 %[[AND4]] to i64
1046 // CHECK-LE: store <2 x i64> <i64 1663540288323457296, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1047 // CHECK-BE: store <2 x i64> <i64 1157726452361532951, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1048 // CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
1049 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
1051 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16
1052 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 {{[0-9a-zA-Z_%.]+}}, 3
1053 // CHECK: sext i32 %[[AND]] to i64
1054 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 2
1055 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1056 // CHECK: sext i32 %[[AND2]] to i64
1057 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 4
1058 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
1059 // CHECK: sext i32 %[[AND3]] to i64
1060 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 6
1061 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
1062 // CHECK: sext i32 %[[AND4]] to i64
1063 // CHECK-LE: store <2 x i64> <i64 0, i64 2242261671028070680>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1064 // CHECK-BE: store <2 x i64> <i64 0, i64 1736447835066146335>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1065 // CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
1066 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
1068 void __attribute__((noinline
))
1070 resi
= _mm_sll_epi16(mi1
, mi2
);
1071 resi
= _mm_sll_epi32(mi1
, mi2
);
1072 resi
= _mm_sll_epi64(mi1
, mi2
);
1073 resi
= _mm_slli_epi16(mi1
, i
);
1074 resi
= _mm_slli_epi32(mi1
, i
);
1075 resi
= _mm_slli_epi64(mi1
, i
);
1076 resi
= _mm_slli_si128(mi1
, i
);
1079 // CHECK-LABEL: @test_sll
1081 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16
1082 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1083 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
1084 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
1085 // CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1086 // CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8])
1087 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])
1089 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32
1090 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1091 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
1092 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
1093 // CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4])
1094 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])
1096 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64
1097 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
1098 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef <i64 64, i64 64>)
1099 // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2])
1100 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2])
1102 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi16
1103 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1104 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
1105 // CHECK: br i1 %[[CMP]]
1106 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1107 // CHECK: br i1 %[[CMP2]]
1108 // CHECK: call i1 @llvm.is.constant
1109 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1110 // CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]])
1111 // CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
1112 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC2]])
1113 // CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])
1115 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi32
1116 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1117 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
1118 // CHECK: br i1 %[[CMP]]
1119 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
1120 // CHECK: br i1 %[[CMP2]]
1121 // CHECK: call i1 @llvm.is.constant
1122 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1123 // CHECK: br i1 %[[CMP3]]
1124 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1125 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1126 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1127 // CHECK: call <4 x i32> @vec_sl(int vector[4], unsigned int vector[4])
1129 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi64
1130 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1131 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
1132 // CHECK: br i1 %[[CMP]]
1133 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64
1134 // CHECK: br i1 %[[CMP2]]
1135 // CHECK: call i1 @llvm.is.constant
1136 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1137 // CHECK: br i1 %[[CMP3]]
1138 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1139 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1140 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1141 // CHECK: call <2 x i64> @vec_sl(long long vector[2], unsigned long long vector[2])
1143 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_si128
1144 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1145 // CHECK-BE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}}
1146 // CHECK-BE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]])
1147 // CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
1148 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1150 void __attribute__((noinline
))
1152 resd
= _mm_sqrt_pd(md1
);
1153 resd
= _mm_sqrt_sd(md1
, md2
);
1156 // CHECK-LABEL: @test_sqrt
1158 // CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_pd
1159 // CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef {{[0-9a-zA-Z_%.]+}})
1161 // CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_sd
1162 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})
1163 // CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
1164 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
1166 void __attribute__((noinline
))
1168 resi
= _mm_sra_epi16(mi1
, mi2
);
1169 resi
= _mm_sra_epi32(mi1
, mi2
);
1170 resi
= _mm_srai_epi16(mi1
, i
);
1171 resi
= _mm_srai_epi32(mi1
, i
);
1174 // CHECK-LABEL: @test_sra
1176 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16
1177 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1178 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1179 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
1180 // CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1181 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
1183 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32
1184 // CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1185 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1186 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
1187 // CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 31, i32 31, i32 31, i32 31>)
1188 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])
1190 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16
1191 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1192 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1193 // CHECK: br i1 %[[CMP]]
1194 // CHECK: call i1 @llvm.is.constant
1195 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1196 // CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]])
1197 // CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
1198 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %{{[0-9a-zA-Z_.]+}})
1199 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
1201 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32
1202 // CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1203 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
1204 // CHECK: br i1 %[[CMP]]
1205 // CHECK: call i1 @llvm.is.constant
1206 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1207 // CHECK: br i1 %[[CMP2]]
1208 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1209 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1210 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1211 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1212 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])
1214 void __attribute__((noinline
))
1216 resi
= _mm_srl_epi16(mi1
, mi2
);
1217 resi
= _mm_srl_epi32(mi1
, mi2
);
1218 resi
= _mm_srl_epi64(mi1
, mi2
);
1219 resi
= _mm_srli_epi16(mi1
, i
);
1220 resi
= _mm_srli_epi32(mi1
, i
);
1221 resi
= _mm_srli_epi64(mi1
, i
);
1222 resi
= _mm_srli_si128(mi1
, i
);
1225 // CHECK-LABEL: @test_srl
1227 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16
1228 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1229 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1230 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
1231 // CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1232 // CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])
1233 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])
1235 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32
1236 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1237 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
1238 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
1239 // CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4])
1240 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])
1242 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64
1243 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1244 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef <i64 64, i64 64>)
1245 // CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2])
1246 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2])
1248 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi16
1249 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1250 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1251 // CHECK: br i1 %[[CMP]]
1252 // CHECK: call i1 @llvm.is.constant
1253 // CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1254 // CHECK: call <8 x i16> @vec_splat_s16(signed char)
1255 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
1256 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC]])
1257 // CHECK: call <8 x i16> @vec_sr(short vector[8], unsigned short vector[8])
1259 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi32
1260 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1261 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
1262 // CHECK: br i1 %[[CMP]]
1263 // CHECK: call i1 @llvm.is.constant
1264 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1265 // CHECK: br i1 %[[CMP2]]
1266 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1267 // CHECK: call <4 x i32> @vec_splat_s32(signed char)
1268 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1269 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1270 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
1272 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi64
1273 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1274 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64
1275 // CHECK: br i1 %[[CMP]]
1276 // CHECK: call i1 @llvm.is.constant
1277 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1278 // CHECK: br i1 %[[CMP2]]
1279 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1280 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1281 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
1282 // CHECK: call <2 x i64> @vec_splats(unsigned long long)(i64 noundef %[[EXT]])
1283 // CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
1284 // CHECK: call <2 x i64> @vec_sr(long long vector[2], unsigned long long vector[2])
1286 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_si128
1287 // CHECK: call <2 x i64> @_mm_bsrli_si128
1289 void __attribute__((noinline
))
1291 _mm_store_pd(dp
, md1
);
1292 _mm_store_pd1(dp
, md1
);
1293 _mm_store_sd(dp
, md1
);
1294 _mm_store_si128(mip
, mi1
);
1295 _mm_store1_pd(dp
, md1
);
1296 _mm_storeh_pd(dp
, md1
);
1297 _mm_storel_epi64(mip
, mi1
);
1298 _mm_storel_pd(dp
, md1
);
1299 _mm_storer_pd(dp
, md1
);
1300 _mm_storeu_pd(dp
, md1
);
1301 _mm_storeu_si128(mip
, mi1
);
1304 // CHECK-LABEL: @test_store
1306 // CHECK-LABEL: define available_externally void @_mm_store_pd
1307 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1308 // CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %[[ADDR]])
1310 // CHECK-LABEL: define available_externally void @_mm_store_pd1
1311 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1312 // CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1313 // CHECK: call void @_mm_store1_pd(ptr noundef %[[ADDR]], <2 x double> noundef %[[ADDR2]])
1315 // CHECK-LABEL: define available_externally void @_mm_store_sd
1316 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1317 // CHECK: store double %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]]
1319 // CHECK-LABEL: define available_externally void @_mm_store_si128
1320 // CHECK: %[[LOAD:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1321 // CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %[[LOAD]])
1323 // CHECK-LABEL: define available_externally void @_mm_store1_pd
1324 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1325 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1326 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %[[VAL]], i32 noundef zeroext 0)
1327 // CHECK: call void @_mm_store_pd(ptr noundef %[[ADDR]], <2 x double> noundef %[[CALL]])
1329 // CHECK-LABEL: define available_externally void @_mm_storeh_pd
1330 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1331 // CHECK: store double %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]]
1333 // CHECK-LABEL: define available_externally void @_mm_storel_epi64
1334 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1335 // CHECK: store i64 %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 8
1337 // CHECK-LABEL: define available_externally void @_mm_storel_pd
1338 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1339 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1340 // CHECK: call void @_mm_store_sd(ptr noundef %[[ADDR]], <2 x double> noundef %[[VAL]])
1342 // CHECK-LABEL: define available_externally void @_mm_storer_pd
1343 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
1344 // CHECK: call void @_mm_store_pd(ptr noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
1346 // CHECK-LABEL: define available_externally void @_mm_storeu_pd
1347 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1348 // CHECK: store <2 x double> %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 1
1350 // CHECK-LABEL: define available_externally void @_mm_storeu_si128
1351 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1352 // CHECK: store <2 x i64> %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 1
1354 void __attribute__((noinline
))
1356 _mm_stream_pd(dp
, md1
);
1357 _mm_stream_si128(mip
, mi1
);
1358 _mm_stream_si32(is
, i
);
1359 _mm_stream_si64(i64s
, i64s
[1]);
1362 // CHECK-LABEL: @test_stream
1364 // CHECK-LABEL: define available_externally void @_mm_stream_pd
1365 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1367 // CHECK-LABEL: define available_externally void @_mm_stream_si128
1368 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1370 // CHECK-LABEL: define available_externally void @_mm_stream_si32
1371 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1373 // CHECK-LABEL: define available_externally void @_mm_stream_si64
1374 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1376 void __attribute__((noinline
))
1378 resi
= _mm_sub_epi64(mi1
, mi2
);
1379 resi
= _mm_sub_epi32(mi1
, mi2
);
1380 resi
= _mm_sub_epi16(mi1
, mi2
);
1381 resi
= _mm_sub_epi8(mi1
, mi2
);
1382 resd
= _mm_sub_pd(md1
, md2
);
1383 resd
= _mm_sub_sd(md1
, md2
);
1384 res64
= _mm_sub_si64(m641
, m642
);
1385 resi
= _mm_subs_epi16(mi1
, mi2
);
1386 resi
= _mm_subs_epi8(mi1
, mi2
);
1387 resi
= _mm_subs_epu16(mi1
, mi2
);
1388 resi
= _mm_subs_epu8(mi1
, mi2
);
1391 // CHECK-LABEL: @test_sub
1393 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi64
1394 // CHECK: sub <2 x i64>
1396 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi32
1397 // CHECK: sub <4 x i32>
1399 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi16
1400 // CHECK: sub <8 x i16>
1402 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi8
1403 // CHECK: sub <16 x i8>
1405 // CHECK-LABEL: define available_externally <2 x double> @_mm_sub_pd
1406 // CHECK: fsub <2 x double>
1408 // CHECK-LABEL: define available_externally <2 x double> @_mm_sub_sd
1409 // CHECK: fsub double
1411 // CHECK-LABEL: define available_externally i64 @_mm_sub_si64
1414 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi16
1415 // CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
1417 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi8
1418 // CHECK: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])
1420 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu16
1421 // CHECK: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])
1423 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu8
1424 // CHECK: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])
1426 void __attribute__((noinline
))
1428 i
= _mm_ucomieq_sd(md1
, md2
);
1429 i
= _mm_ucomige_sd(md1
, md2
);
1430 i
= _mm_ucomigt_sd(md1
, md2
);
1431 i
= _mm_ucomile_sd(md1
, md2
);
1432 i
= _mm_ucomilt_sd(md1
, md2
);
1433 i
= _mm_ucomineq_sd(md1
, md2
);
1436 // CHECK-LABEL: @test_ucomi
1438 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_sd
1439 // CHECK: fcmp oeq double
1441 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_sd
1442 // CHECK: fcmp oge double
1444 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_sd
1445 // CHECK: fcmp ogt double
1447 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_sd
1448 // CHECK: fcmp ole double
1450 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_sd
1451 // CHECK: fcmp olt double
1453 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_sd
1454 // CHECK: fcmp une double
1456 void __attribute__((noinline
))
1458 resd
= _mm_undefined_pd();
1459 resi
= _mm_undefined_si128();
1462 // CHECK-LABEL: @test_undefined
1464 // CHECK-LABEL: define available_externally <2 x double> @_mm_undefined_pd()
1465 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16
1466 // CHECK: store <2 x double> %[[VAL]], ptr %[[ADDR]], align 16
1467 // CHECK: load <2 x double>, ptr %[[ADDR]], align 16
1469 // CHECK-LABEL: define available_externally <2 x i64> @_mm_undefined_si128()
1470 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x i64>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16
1471 // CHECK: store <2 x i64> %[[VAL]], ptr %[[ADDR]], align 16
1472 // CHECK: load <2 x i64>, ptr %[[ADDR]], align 16
1474 void __attribute__((noinline
))
1476 resi
= _mm_unpackhi_epi16(mi1
, mi2
);
1477 resi
= _mm_unpackhi_epi32(mi1
, mi2
);
1478 resi
= _mm_unpackhi_epi64(mi1
, mi2
);
1479 resi
= _mm_unpackhi_epi8(mi1
, mi2
);
1480 resd
= _mm_unpackhi_pd(md1
, md2
);
1481 resi
= _mm_unpacklo_epi16(mi1
, mi2
);
1482 resi
= _mm_unpacklo_epi32(mi1
, mi2
);
1483 resi
= _mm_unpacklo_epi64(mi1
, mi2
);
1484 resi
= _mm_unpacklo_epi8(mi1
, mi2
);
1485 resd
= _mm_unpacklo_pd(md1
, md2
);
1488 // CHECK-LABEL: @test_unpack
1490 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi16
1491 // CHECK: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])
1493 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi32
1494 // CHECK: call <4 x i32> @vec_mergel(unsigned int vector[4], unsigned int vector[4])
1496 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi64
1497 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
1499 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi8
1500 // CHECK: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
1502 // CHECK-LABEL: define available_externally <2 x double> @_mm_unpackhi_pd
1503 // CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2])
1505 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi16
1506 // CHECK: call <8 x i16> @vec_mergeh(short vector[8], short vector[8])
1508 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi32
1509 // CHECK: call <4 x i32> @vec_mergeh(int vector[4], int vector[4])
1511 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi64
1512 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
1514 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi8
1515 // CHECK: call <16 x i8> @vec_mergeh(unsigned char vector[16], unsigned char vector[16])
1517 // CHECK-LABEL: define available_externally <2 x double> @_mm_unpacklo_pd
1518 // CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2])