1 // REQUIRES: powerpc-registered-target
3 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
4 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
6 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
8 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
9 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10
11 // RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
12 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
13 // RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
14 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
16 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
17 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
18 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
19 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P10
21 // CHECK-BE-DAG: @_mm_movemask_pd.__perm_mask = internal constant <4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656>, align 16
22 // CHECK-BE-DAG: @_mm_shuffle_epi32.__permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
23 // CHECK-BE-DAG: @_mm_shufflehi_epi16.__permute_selectors = internal constant [4 x i16] [i16 2057, i16 2571, i16 3085, i16 3599], align 2
24 // CHECK-BE-DAG: @_mm_shufflelo_epi16.__permute_selectors = internal constant [4 x i16] [i16 1, i16 515, i16 1029, i16 1543], align 2
26 // CHECK-LE-DAG: @_mm_movemask_pd.__perm_mask = internal constant <4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144>, align 16
27 // CHECK-LE-DAG: @_mm_shuffle_epi32.__permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
28 // CHECK-LE-DAG: @_mm_shufflehi_epi16.__permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
29 // CHECK-LE-DAG: @_mm_shufflelo_epi16.__permute_selectors = internal constant [4 x i16] [i16 256, i16 770, i16 1284, i16 1798], align 2
31 #include <emmintrin.h>
33 __m128i resi
, mi1
, mi2
;
36 __m128d resd
, md1
, md2
;
37 __m64 res64
, m641
, m642
;
45 void __attribute__((noinline
))
47 resi
= _mm_add_epi64(mi1
, mi2
);
48 resi
= _mm_add_epi32(mi1
, mi2
);
49 resi
= _mm_add_epi16(mi1
, mi2
);
50 resi
= _mm_add_epi8(mi1
, mi2
);
51 resd
= _mm_add_pd(md1
, md2
);
52 resd
= _mm_add_sd(md1
, md2
);
53 res64
= _mm_add_si64(m641
, m642
);
54 resi
= _mm_adds_epi16(mi1
, mi2
);
55 resi
= _mm_adds_epi8(mi1
, mi2
);
56 resi
= _mm_adds_epu16(mi1
, mi2
);
57 resi
= _mm_adds_epu8(mi1
, mi2
);
60 // CHECK-LABEL: @test_add
62 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi64
63 // CHECK: add <2 x i64>
65 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi32
66 // CHECK: add <4 x i32>
68 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi16
69 // CHECK: add <8 x i16>
71 // CHECK-LABEL: define available_externally <2 x i64> @_mm_add_epi8
72 // CHECK: add <16 x i8>
74 // CHECK-LABEL: define available_externally <2 x double> @_mm_add_pd
75 // CHECK: fadd <2 x double>
77 // CHECK-LABEL: define available_externally <2 x double> @_mm_add_sd
80 // CHECK-LABEL: define available_externally i64 @_mm_add_si64
83 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi16
84 // CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
86 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epi8
87 // CHECK: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])
89 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu16
90 // CHECK: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])
92 // CHECK-LABEL: define available_externally <2 x i64> @_mm_adds_epu8
93 // CHECK: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])
95 void __attribute__((noinline
))
97 resi
= _mm_avg_epu16(mi1
, mi2
);
98 resi
= _mm_avg_epu8(mi1
, mi2
);
101 // CHECK-LABEL: @test_avg
103 // CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu16
104 // CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])
106 // CHECK-LABEL: define available_externally <2 x i64> @_mm_avg_epu8
107 // CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])
109 void __attribute__((noinline
))
111 resi
= _mm_bslli_si128(mi1
, i
);
112 resi
= _mm_bsrli_si128(mi1
, i
);
115 // CHECK-LABEL: @test_bs
117 // CHECK-LABEL: define available_externally <2 x i64> @_mm_bslli_si128
118 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
119 // CHECK: br i1 %[[CMP]]
120 // CHECK: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
121 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
123 // CHECK-LABEL: define available_externally <2 x i64> @_mm_bsrli_si128
124 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
125 // CHECK: br i1 %[[CMP]]
126 // CHECK-LE: call i1 @llvm.is.constant
127 // CHECK-LE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}}
128 // CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]])
129 // CHECK-LE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8
130 // CHECK-LE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
131 // CHECK-LE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
132 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
133 // CHECK-LE: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
134 // CHECK-BE: %[[MUL:[0-9a-zA-Z_.]+]] = mul nsw i32 %{{[0-9a-zA-Z_.]+}}, 8
135 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
136 // CHECK-BE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
137 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
138 // CHECK-BE: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
140 void __attribute__((noinline
))
142 res
= _mm_castpd_ps(md1
);
143 resi
= _mm_castpd_si128(md1
);
144 resd
= _mm_castps_pd(m1
);
145 resi
= _mm_castps_si128(m1
);
146 resd
= _mm_castsi128_pd(mi1
);
147 res
= _mm_castsi128_ps(mi1
);
150 // CHECK-LABEL: @test_cast
152 // CHECK-LABEL: define available_externally <4 x float> @_mm_castpd_ps
154 // CHECK-LABEL: define available_externally <2 x i64> @_mm_castpd_si128
156 // CHECK-LABEL: define available_externally <2 x double> @_mm_castps_pd
158 // CHECK-LABEL: define available_externally <2 x i64> @_mm_castps_si128
160 // CHECK-LABEL: define available_externally <2 x double> @_mm_castsi128_pd
162 // CHECK-LABEL: define available_externally <4 x float> @_mm_castsi128_ps
164 void __attribute__((noinline
))
166 resi
= _mm_cmpeq_epi32(mi1
, mi2
);
167 resi
= _mm_cmpeq_epi16(mi1
, mi2
);
168 resi
= _mm_cmpeq_epi8(mi1
, mi2
);
169 resi
= _mm_cmpgt_epi32(mi1
, mi2
);
170 resi
= _mm_cmpgt_epi16(mi1
, mi2
);
171 resi
= _mm_cmpgt_epi8(mi1
, mi2
);
172 resi
= _mm_cmplt_epi32(mi1
, mi2
);
173 resi
= _mm_cmplt_epi16(mi1
, mi2
);
174 resi
= _mm_cmplt_epi8(mi1
, mi2
);
175 resd
= _mm_cmpeq_pd(md1
, md2
);
176 resd
= _mm_cmpeq_sd(md1
, md2
);
177 resd
= _mm_cmpge_pd(md1
, md2
);
178 resd
= _mm_cmpge_sd(md1
, md2
);
179 resd
= _mm_cmpgt_pd(md1
, md2
);
180 resd
= _mm_cmpgt_sd(md1
, md2
);
181 resd
= _mm_cmple_pd(md1
, md2
);
182 resd
= _mm_cmple_sd(md1
, md2
);
183 resd
= _mm_cmplt_pd(md1
, md2
);
184 resd
= _mm_cmplt_sd(md1
, md2
);
185 resd
= _mm_cmpneq_pd(md1
, md2
);
186 resd
= _mm_cmpneq_sd(md1
, md2
);
187 resd
= _mm_cmpnge_pd(md1
, md2
);
188 resd
= _mm_cmpnge_sd(md1
, md2
);
189 resd
= _mm_cmpngt_pd(md1
, md2
);
190 resd
= _mm_cmpngt_sd(md1
, md2
);
191 resd
= _mm_cmpnle_pd(md1
, md2
);
192 resd
= _mm_cmpnle_sd(md1
, md2
);
193 resd
= _mm_cmpnlt_pd(md1
, md2
);
194 resd
= _mm_cmpnlt_sd(md1
, md2
);
195 resd
= _mm_cmpord_pd(md1
, md2
);
196 resd
= _mm_cmpord_sd(md1
, md2
);
197 resd
= _mm_cmpunord_pd(md1
, md2
);
198 resd
= _mm_cmpunord_sd(md1
, md2
);
201 // CHECK-LABEL: @test_cmp
203 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi32
204 // CHECK: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])
206 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi16
207 // CHECK: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])
209 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpeq_epi8
210 // CHECK: call <16 x i8> @vec_cmpeq(signed char vector[16], signed char vector[16])
212 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi32
213 // CHECK: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])
215 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi16
216 // CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
218 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmpgt_epi8
219 // CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])
221 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi32
222 // CHECK: call <4 x i32> @vec_cmplt(int vector[4], int vector[4])
224 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi16
225 // CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
227 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cmplt_epi8
228 // CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])
230 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_pd
231 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
233 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpeq_sd
234 // CHECK: call <2 x double> @vec_splats(double)
235 // CHECK: call <2 x double> @vec_splats(double)
236 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
237 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
239 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_pd
240 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
242 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpge_sd
243 // CHECK: call <2 x double> @vec_splats(double)
244 // CHECK: call <2 x double> @vec_splats(double)
245 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
246 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
248 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_pd
249 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
251 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpgt_sd
252 // CHECK: call <2 x double> @vec_splats(double)
253 // CHECK: call <2 x double> @vec_splats(double)
254 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
255 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
257 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_pd
258 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
260 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmple_sd
261 // CHECK: call <2 x double> @vec_splats(double)
262 // CHECK: call <2 x double> @vec_splats(double)
263 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
264 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
266 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_pd
267 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
269 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmplt_sd
270 // CHECK: call <2 x double> @vec_splats(double)
271 // CHECK: call <2 x double> @vec_splats(double)
272 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
273 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
275 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_pd
276 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
277 // CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2])
279 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpneq_sd
280 // CHECK: call <2 x double> @vec_splats(double)
281 // CHECK: call <2 x double> @vec_splats(double)
282 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
283 // CHECK: call <2 x double> @vec_nor(double vector[2], double vector[2])
284 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
286 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_pd
287 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
289 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnge_sd
290 // CHECK: call <2 x double> @vec_splats(double)
291 // CHECK: call <2 x double> @vec_splats(double)
292 // CHECK: call <2 x i64> @vec_cmplt(double vector[2], double vector[2])
293 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
295 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_pd
296 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
298 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpngt_sd
299 // CHECK: call <2 x double> @vec_splats(double)
300 // CHECK: call <2 x double> @vec_splats(double)
301 // CHECK: call <2 x i64> @vec_cmple(double vector[2], double vector[2])
302 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
304 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_pd
305 // CHECK: call <2 x i64> @vec_cmpgt(double vector[2], double vector[2])
307 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnle_sd
308 // CHECK: call <2 x double> @vec_splats(double)
309 // CHECK: call <2 x double> @vec_splats(double)
310 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
311 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
313 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_pd
314 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
316 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpnlt_sd
317 // CHECK: call <2 x double> @vec_splats(double)
318 // CHECK: call <2 x double> @vec_splats(double)
319 // CHECK: call <2 x i64> @vec_cmpge(double vector[2], double vector[2])
320 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
322 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_pd
323 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
324 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
325 // CHECK: call <2 x i64> @vec_and(unsigned long long vector[2], unsigned long long vector[2])
327 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpord_sd
328 // CHECK: call <2 x double> @vec_splats(double)
329 // CHECK: call <2 x double> @vec_splats(double)
330 // CHECK: call <2 x double> @_mm_cmpord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
331 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
333 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_pd
334 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
335 // CHECK: call <2 x i64> @vec_cmpeq(double vector[2], double vector[2])
336 // CHECK: call <2 x i64> @vec_nor(unsigned long long vector[2], unsigned long long vector[2])
337 // CHECK: call <2 x i64> @vec_orc(unsigned long long vector[2], unsigned long long vector[2])
339 // CHECK-LABEL: define available_externally <2 x double> @_mm_cmpunord_sd
340 // CHECK: call <2 x double> @vec_splats(double)
341 // CHECK: call <2 x double> @vec_splats(double)
342 // CHECK: call <2 x double> @_mm_cmpunord_pd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
343 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
345 void __attribute__((noinline
))
347 i
= _mm_comieq_sd(md1
, md2
);
348 i
= _mm_comige_sd(md1
, md2
);
349 i
= _mm_comigt_sd(md1
, md2
);
350 i
= _mm_comile_sd(md1
, md2
);
351 i
= _mm_comilt_sd(md1
, md2
);
352 i
= _mm_comineq_sd(md1
, md2
);
355 // CHECK-LABEL: @test_comi
357 // CHECK-LABEL: define available_externally signext i32 @_mm_comieq_sd
358 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq double
359 // CHECK: zext i1 %[[CMP]] to i32
361 // CHECK-LABEL: define available_externally signext i32 @_mm_comige_sd
362 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge double
363 // CHECK: zext i1 %[[CMP]] to i32
365 // CHECK-LABEL: define available_externally signext i32 @_mm_comigt_sd
366 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt double
367 // CHECK: zext i1 %[[CMP]] to i32
369 // CHECK-LABEL: define available_externally signext i32 @_mm_comile_sd
370 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole double
371 // CHECK: zext i1 %[[CMP]] to i32
373 // CHECK-LABEL: define available_externally signext i32 @_mm_comilt_sd
374 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt double
375 // CHECK: zext i1 %[[CMP]] to i32
377 // CHECK-LABEL: define available_externally signext i32 @_mm_comineq_sd
378 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une double
379 // CHECK: zext i1 %[[CMP]] to i32
381 void __attribute__((noinline
))
389 // CHECK-LABEL: @test_control
391 // CHECK-LABEL: define available_externally void @_mm_clflush
392 // CHECK: call void asm sideeffect "dcbf 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
394 // CHECK-LABEL: define available_externally void @_mm_lfence()
395 // CHECK: fence release
397 // CHECK-LABEL: define available_externally void @_mm_mfence()
398 // CHECK: fence seq_cst
400 // CHECK-LABEL: define available_externally void @_mm_pause()
401 // CHECK: call i64 asm sideeffect "\09mfppr\09$0; or 31,31,31; isync; lwsync; isync; mtppr\09$0;", "=r,~{memory}"()
403 void __attribute__((noinline
))
405 resd
= _mm_cvtepi32_pd(mi1
);
406 res
= _mm_cvtepi32_ps(mi1
);
407 resi
= _mm_cvtpd_epi32(md1
);
408 res64
= _mm_cvtpd_pi32(md1
);
409 res
= _mm_cvtpd_ps(md1
);
410 resd
= _mm_cvtpi32_pd(res64
);
411 resi
= _mm_cvtps_epi32(m1
);
412 resd
= _mm_cvtps_pd(m1
);
413 *dp
= _mm_cvtsd_f64(md1
);
414 i
= _mm_cvtsd_si32(md1
);
415 i64s
[0] = _mm_cvtsd_si64(md1
);
416 i64s
[0] = _mm_cvtsd_si64x(md1
);
417 res
= _mm_cvtsd_ss(m1
, md2
);
418 i
= _mm_cvtsi128_si32(mi1
);
419 i64s
[0] = _mm_cvtsi128_si64(mi1
);
420 i64s
[0] = _mm_cvtsi128_si64x(mi1
);
421 resd
= _mm_cvtsi32_sd(md1
, i
);
422 resi
= _mm_cvtsi32_si128(i
);
423 resd
= _mm_cvtsi64_sd(md1
, i64s
[1]);
424 resi
= _mm_cvtsi64_si128(i64s
[1]);
425 resd
= _mm_cvtsi64x_sd(md1
, i64s
[1]);
426 resi
= _mm_cvtsi64x_si128(i64s
[1]);
427 resd
= _mm_cvtss_sd(md1
, m1
);
428 resi
= _mm_cvttpd_epi32(md1
);
429 res64
= _mm_cvttpd_pi32(md1
);
430 resi
= _mm_cvttps_epi32(m1
);
431 i
= _mm_cvttsd_si32(md1
);
432 i64s
[0] = _mm_cvttsd_si64(md1
);
433 i64s
[0] = _mm_cvttsd_si64x(md1
);
436 // CHECK-LABEL: @test_converts
438 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd
439 // CHECK: call <2 x i64> @vec_unpackh(int vector[4])
440 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double>
441 // CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
443 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps
444 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
446 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtpd_epi32
447 // CHECK: call <2 x double> @vec_rint(double vector[2])
448 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
449 // CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
450 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
451 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4])
452 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
454 // CHECK-LABEL: define available_externally i64 @_mm_cvtpd_pi32
455 // CHECK: call <2 x i64> @_mm_cvtpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
456 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
458 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpd_ps
459 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
460 // CHECK: call <4 x i32> asm "xvcvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
461 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
462 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4])
463 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
465 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtpi32_pd
466 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
467 // CHECK: call <2 x i64> @vec_unpackl(int vector[4])
468 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double>
469 // CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
471 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32
472 // CHECK: call <4 x float> @vec_rint(float vector[4])
473 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
475 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtps_pd
476 // CHECK-BE: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
477 // CHECK-BE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
478 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
479 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
480 // CHECK-LE: call <2 x double> asm " xvcvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
482 // CHECK-LABEL: define available_externally double @_mm_cvtsd_f64
483 // CHECK: extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
485 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtsd_si32
486 // CHECK: call <2 x double> @vec_rint(double vector[2])
487 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32
489 // CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64
490 // CHECK: call <2 x double> @vec_rint(double vector[2])
491 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64
493 // CHECK-LABEL: define available_externally i64 @_mm_cvtsd_si64x
494 // CHECK: call i64 @_mm_cvtsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
496 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsd_ss
497 // CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
498 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = fptrunc double %[[EXT]] to float
499 // CHECK-BE: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %[[TRUNC]], i32 0
500 // CHECK-LE: call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
501 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
502 // CHECK-LE: call <4 x float> asm "xscvdpsp ${0:x},${1:x}", "=^wa,^wa"(<2 x double> %{{[0-9a-zA-Z_.]+}})
503 // CHECK-LE: shufflevector <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> %{{[0-9a-zA-Z_.]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
505 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtsi128_si32
506 // CHECK: extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0
508 // CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64
509 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
511 // CHECK-LABEL: define available_externally i64 @_mm_cvtsi128_si64x
512 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
514 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi32_sd
515 // CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to double
517 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi32_si128
518 // CHECK: call <2 x i64> @_mm_set_epi32(i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext 0, i32 noundef signext %{{[0-9a-zA-Z_.]+}})
520 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64_sd
521 // CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to double
523 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64_si128
524 // CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
525 // CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1
527 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtsi64x_sd
528 // CHECK: call <2 x double> @_mm_cvtsi64_sd(<2 x double> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})
530 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtsi64x_si128
531 // CHECK: %[[INS:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
532 // CHECK: insertelement <2 x i64> %[[INS]], i64 0, i32 1
534 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtss_sd
535 // CHECK-BE: fpext float %{{[0-9a-zA-Z_.]+}} to double
536 // CHECK-LE: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
537 // CHECK-LE: call <2 x double> asm "xscvspdp ${0:x},${1:x}", "=^wa,^wa"(<4 x float> %{{[0-9a-zA-Z_.]+}})
538 // CHECK-LE: call <2 x double> @vec_mergel(double vector[2], double vector[2])
540 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttpd_epi32
541 // CHECK: call <4 x i32> asm "xvcvdpsxws ${0:x},${1:x}", "=^wa,^wa"
542 // CHECK-LE: call <4 x i32> @vec_mergeo(int vector[4], int vector[4])
543 // CHECK-BE: call <4 x i32> @vec_mergee(int vector[4], int vector[4])
544 // CHECK: call <4 x i32> @vec_vpkudum(long long vector[2], long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef zeroinitializer)
546 // CHECK-LABEL: define available_externally i64 @_mm_cvttpd_pi32
547 // CHECK: call <2 x i64> @_mm_cvttpd_epi32(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
549 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvttps_epi32
550 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
552 // CHECK-LABEL: define available_externally signext i32 @_mm_cvttsd_si32
553 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i32
555 // CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64
556 // CHECK: fptosi double %{{[0-9a-zA-Z_.]+}} to i64
558 // CHECK-LABEL: define available_externally i64 @_mm_cvttsd_si64x
559 // CHECK: call i64 @_mm_cvttsd_si64(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
561 void __attribute__((noinline
))
563 resd
= _mm_div_pd(md1
, md2
);
564 resd
= _mm_div_sd(md1
, md2
);
567 // CHECK-LABEL: @test_div
569 // CHECK-LABEL: define available_externally <2 x double> @_mm_div_pd
570 // CHECK: fdiv <2 x double>
572 // CHECK-LABEL: define available_externally <2 x double> @_mm_div_sd
573 // CHECK: fdiv double
575 void __attribute__((noinline
))
577 i
= _mm_extract_epi16(mi1
, i
);
580 // CHECK-LABEL: @test_extract
582 // CHECK-LABEL: define available_externally signext i32 @_mm_extract_epi16
583 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 7
584 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <8 x i16> %{{[0-9a-zA-Z_.]+}}, i32 %[[AND]]
585 // CHECK: zext i16 %[[EXT]] to i32
587 void __attribute__((noinline
))
589 resi
= _mm_insert_epi16 (mi1
, i
, is
[0]);
592 // CHECK-LABEL: @test_insert
594 // CHECK-LABEL: define available_externally <2 x i64> @_mm_insert_epi16
595 // CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
596 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 7
598 void __attribute__((noinline
))
600 resd
= _mm_load_pd(dp
);
601 resd
= _mm_load_pd1(dp
);
602 resd
= _mm_load_sd(dp
);
603 resi
= _mm_load_si128(mip
);
604 resd
= _mm_load1_pd(dp
);
605 resd
= _mm_loadh_pd(md1
, dp
);
606 resi
= _mm_loadl_epi64(mip
);
607 resd
= _mm_loadl_pd(md1
, dp
);
608 resd
= _mm_loadr_pd(dp
);
609 resd
= _mm_loadu_pd(dp
);
610 resi
= _mm_loadu_si128(mip
);
613 // CHECK-LABEL: @test_load
615 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd
616 // CHECK: call <16 x i8> @vec_ld(long, unsigned char vector[16] const*)(i64 noundef 0, ptr noundef %{{[0-9a-zA-Z_.]+}})
618 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_pd1
619 // CHECK: call <2 x double> @_mm_load1_pd(ptr noundef %{{[0-9a-zA-Z_.]+}})
621 // CHECK-LABEL: define available_externally <2 x double> @_mm_load_sd
622 // CHECK: call <2 x double> @_mm_set_sd(double noundef %{{[0-9a-zA-Z_.]+}})
624 // CHECK-LABEL: define available_externally <2 x i64> @_mm_load_si128
625 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
626 // CHECK: load <2 x i64>, ptr %[[ADDR]], align 16
628 // CHECK-LABEL: define available_externally <2 x double> @_mm_load1_pd
629 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
630 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, ptr %[[ADDR]]
631 // CHECK: call <2 x double> @vec_splats(double)(double noundef %[[VAL]])
633 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadh_pd
634 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
635 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load double, ptr %{{[0-9a-zA-Z_.]+}}
636 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
637 // CHECK: insertelement <2 x double> %[[VEC]], double %[[VAL]], i32 1
639 // CHECK-LABEL: define available_externally <2 x i64> @_mm_loadl_epi64
640 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
642 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadl_pd
643 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
644 // CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load double, ptr %[[ADDR]]
645 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
646 // CHECK: insertelement <2 x double> %[[VEC]], double %[[ADDR2]], i32 0
648 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadr_pd
649 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
650 // CHECK: call <2 x double> @_mm_load_pd(ptr noundef %[[ADDR]])
651 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
653 // CHECK-LABEL: define available_externally <2 x double> @_mm_loadu_pd
654 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
655 // CHECK: call <2 x double> @vec_vsx_ld(int, double const*)(i32 noundef signext 0, ptr noundef %[[ADDR]])
657 // CHECK-LABEL: define available_externally <2 x i64> @_mm_loadu_si128
658 // CHECK: load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
659 // CHECK: call <4 x i32> @vec_vsx_ld(int, int const*)(i32 noundef signext 0, ptr noundef %{{[0-9a-zA-Z_.]+}})
661 void __attribute__((noinline
))
663 resd
= _mm_and_pd(md1
, md2
);
664 resi
= _mm_and_si128(mi1
, mi2
);
665 resd
= _mm_andnot_pd(md1
, md2
);
666 resi
= _mm_andnot_si128(mi1
, mi2
);
667 resd
= _mm_xor_pd(md1
, md2
);
668 resi
= _mm_xor_si128(mi1
, mi2
);
669 resd
= _mm_or_pd(md1
, md2
);
670 resi
= _mm_or_si128(mi1
, mi2
);
673 // CHECK-LABEL: @test_logical
675 // CHECK-LABEL: define available_externally <2 x double> @_mm_and_pd
676 // CHECK: call <2 x double> @vec_and(double vector[2], double vector[2])
678 // CHECK-LABEL: define available_externally <2 x i64> @_mm_and_si128
679 // CHECK: call <2 x i64> @vec_and(long long vector[2], long long vector[2])
681 // CHECK-LABEL: define available_externally <2 x double> @_mm_andnot_pd
682 // CHECK: call <2 x double> @vec_andc(double vector[2], double vector[2])
684 // CHECK-LABEL: define available_externally <2 x i64> @_mm_andnot_si128
685 // CHECK: call <2 x i64> @vec_andc(long long vector[2], long long vector[2])
687 // CHECK-LABEL: define available_externally <2 x double> @_mm_xor_pd
688 // CHECK: call <2 x double> @vec_xor(double vector[2], double vector[2])
690 // CHECK-LABEL: define available_externally <2 x i64> @_mm_xor_si128
691 // CHECK: call <2 x i64> @vec_xor(long long vector[2], long long vector[2])
693 // CHECK-LABEL: define available_externally <2 x double> @_mm_or_pd
694 // CHECK: call <2 x double> @vec_or(double vector[2], double vector[2])
696 // CHECK-LABEL: define available_externally <2 x i64> @_mm_or_si128
697 // CHECK: call <2 x i64> @vec_or(long long vector[2], long long vector[2])
699 void __attribute__((noinline
))
701 resi
= _mm_max_epi16(mi1
, mi2
);
702 resi
= _mm_max_epu8(mi1
, mi2
);
703 resd
= _mm_max_pd(md1
, md2
);
704 resd
= _mm_max_sd(md1
, md2
);
707 // CHECK-LABEL: @test_max
709 // CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epi16
710 // CHECK: call <8 x i16> @vec_max(short vector[8], short vector[8])
712 // CHECK-LABEL: define available_externally <2 x i64> @_mm_max_epu8
713 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
715 // CHECK-LABEL: define available_externally <2 x double> @_mm_max_pd
716 // CHECK: call <2 x double> @vec_max(double vector[2], double vector[2])
718 // CHECK-LABEL: define available_externally <2 x double> @_mm_max_sd
719 // CHECK: call <2 x double> @vec_splats(double)
720 // CHECK: call <2 x double> @vec_splats(double)
721 // CHECK: call <2 x double> @vec_max(double vector[2], double vector[2])
722 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
724 void __attribute__((noinline
))
726 resi
= _mm_min_epi16(mi1
, mi2
);
727 resi
= _mm_min_epu8(mi1
, mi2
);
728 resd
= _mm_min_pd(md1
, md2
);
729 resd
= _mm_min_sd(md1
, md2
);
732 // CHECK-LABEL: @test_min
734 // CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epi16
735 // CHECK: call <8 x i16> @vec_min(short vector[8], short vector[8])
737 // CHECK-LABEL: define available_externally <2 x i64> @_mm_min_epu8
738 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
740 // CHECK-LABEL: define available_externally <2 x double> @_mm_min_pd
741 // CHECK: call <2 x double> @vec_min(double vector[2], double vector[2])
743 // CHECK-LABEL: define available_externally <2 x double> @_mm_min_sd
744 // CHECK: call <2 x double> @vec_splats(double)
745 // CHECK: call <2 x double> @vec_splats(double)
746 // CHECK: call <2 x double> @vec_min(double vector[2], double vector[2])
747 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
749 void __attribute__((noinline
))
751 resi
= _mm_move_epi64(mi1
);
752 resd
= _mm_move_sd(md1
, md2
);
753 i
= _mm_movemask_epi8(mi1
);
754 i
= _mm_movemask_pd(md1
);
755 res64
= _mm_movepi64_pi64(mi1
);
756 resi
= _mm_movpi64_epi64(m641
);
757 _mm_maskmoveu_si128(mi1
, mi2
, chs
);
760 // CHECK-LABEL: @test_move
762 // CHECK-LABEL: define available_externally <2 x i64> @_mm_move_epi64
763 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
765 // CHECK-LABEL: define available_externally <2 x double> @_mm_move_sd
766 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x double> %{{[0-9a-zA-Z_.]+}}, i32 0
767 // CHECK: insertelement <2 x double> %{{[0-9a-zA-Z_.]+}}, double %[[EXT]], i32 0
769 // CHECK-P10-LABEL: define available_externally signext i32 @_mm_movemask_epi8
770 // CHECK-P10: call zeroext i32 @vec_extractm(unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
772 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_epi8
773 // CHECK: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 120, i8 112, i8 104, i8 96, i8 88, i8 80, i8 72, i8 64, i8 56, i8 48, i8 40, i8 32, i8 24, i8 16, i8 8, i8 0>)
774 // CHECK-LE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
775 // CHECK-BE: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
776 // CHECK: trunc i64 %[[VAL]] to i32
778 // CHECK-P10-LABEL: define available_externally signext i32 @_mm_movemask_pd
779 // CHECK-P10: call zeroext i32 @vec_extractm(unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}})
781 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pd
782 // CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139094976, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
783 // CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
784 // CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 -2139078656> to <16 x i8>))
785 // CHECK-BE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
787 // CHECK-LABEL: define available_externally i64 @_mm_movepi64_pi64
788 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
790 // CHECK-LABEL: define available_externally <2 x i64> @_mm_movpi64_epi64
791 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef 0, i64 noundef %{{[0-9a-zA-Z_.]+}})
793 // CHECK-LABEL: define available_externally void @_mm_maskmoveu_si128
794 // CHECK: call <2 x i64> @_mm_loadu_si128(ptr noundef %{{[0-9a-zA-Z_.]+}})
795 // CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])
796 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], unsigned char vector[16])
797 // CHECK: call void @_mm_storeu_si128(ptr noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef %{{[0-9a-zA-Z_.]+}})
799 void __attribute__((noinline
))
801 resi
= _mm_mul_epu32(mi1
, mi2
);
802 resd
= _mm_mul_pd(md1
, md2
);
803 resd
= _mm_mul_sd(md1
, md2
);
804 res64
= _mm_mul_su32(m641
, m642
);
805 resi
= _mm_mulhi_epi16(mi1
, mi2
);
806 resi
= _mm_mulhi_epu16(mi1
, mi2
);
807 resi
= _mm_mullo_epi16(mi1
, mi2
);
810 // CHECK-LABEL: @test_mul
812 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mul_epu32
813 // CHECK-LE: call <2 x i64> asm "vmulouw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}})
814 // CHECK-BE: call <2 x i64> asm "vmuleuw $0,$1,$2", "=v,v,v"(<2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}})
816 // CHECK-LABEL: define available_externally <2 x double> @_mm_mul_pd
817 // CHECK: fmul <2 x double>
819 // CHECK-LABEL: define available_externally <2 x double> @_mm_mul_sd
820 // CHECK: fmul double
822 // CHECK-LABEL: define available_externally i64 @_mm_mul_su32
823 // CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
824 // CHECK: trunc i64 %{{[0-9a-zA-Z_.]+}} to i32
825 // CHECK: %[[EXT1:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
826 // CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
827 // CHECK: mul i64 %[[EXT1]], %[[EXT2]]
829 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epi16
830 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
831 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
832 // CHECK: call <4 x i32> @vec_vmulesh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
833 // CHECK: call <4 x i32> @vec_vmulosh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
834 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
836 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhi_epu16
837 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
838 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 8, i8 9, i8 24, i8 25, i8 12, i8 13, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
839 // CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
840 // CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
841 // CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])
843 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mullo_epi16
844 // CHECK: mul <8 x i16>
846 void __attribute__((noinline
))
848 resi
= _mm_packs_epi16(mi1
, mi2
);
849 resi
= _mm_packs_epi32(mi1
, mi2
);
850 resi
= _mm_packus_epi16(mi1
, mi2
);
853 // CHECK-LABEL: @test_pack
855 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi16
856 // CHECK: call <16 x i8> @vec_packs(short vector[8], short vector[8])
858 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packs_epi32
859 // CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
861 // CHECK-LABEL: define available_externally <2 x i64> @_mm_packus_epi16
862 // CHECK: call <16 x i8> @vec_packsu(short vector[8], short vector[8])
864 void __attribute__((noinline
))
866 resi
= _mm_sad_epu8(mi1
, mi2
);
869 // CHECK-LABEL: @test_sad
871 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sad_epu8
872 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
873 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
874 // CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
875 // CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
876 // CHECK-LE: call <4 x i32> asm "vsum2sws $0,$1,$2", "=v,v,v"(<4 x i32> %11, <4 x i32> zeroinitializer)
877 // CHECK-BE: call <4 x i32> @vec_sum2s(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
878 // CHECK-BE: call <4 x i32> @vec_sld(int vector[4], int vector[4], unsigned int)
880 void __attribute__((noinline
))
882 resi
= _mm_set_epi16(ss
[7], ss
[6], ss
[5], ss
[4], ss
[3], ss
[2], ss
[1], ss
[0]);
883 resi
= _mm_set_epi32(is
[3], is
[2], is
[1], is
[0]);
884 resi
= _mm_set_epi64(m641
, m642
);
885 resi
= _mm_set_epi64x(i64s
[0], i64s
[1]);
886 resi
= _mm_set_epi8(chs
[15], chs
[14], chs
[13], chs
[12], chs
[11], chs
[10], chs
[9], chs
[8], chs
[7], chs
[6], chs
[5], chs
[4], chs
[3], chs
[2], chs
[1], chs
[0]);
887 resd
= _mm_set_pd(dp
[0], dp
[1]);
888 resd
= _mm_set_pd1(dp
[0]);
889 resd
= _mm_set_sd(dp
[0]);
890 resi
= _mm_set1_epi16(ss
[0]);
891 resi
= _mm_set1_epi32(i
);
892 resi
= _mm_set1_epi64(m641
);
893 resi
= _mm_set1_epi64x(i64s
[0]);
894 resi
= _mm_set1_epi8(chs
[0]);
895 resd
= _mm_set1_pd(dp
[0]);
896 resi
= _mm_setr_epi16(ss
[7], ss
[6], ss
[5], ss
[4], ss
[3], ss
[2], ss
[1], ss
[0]);
897 resi
= _mm_setr_epi32(is
[3], is
[2], is
[1], is
[0]);
898 resi
= _mm_setr_epi64(m641
, m642
);
899 resi
= _mm_setr_epi8(chs
[15], chs
[14], chs
[13], chs
[12], chs
[11], chs
[10], chs
[9], chs
[8], chs
[7], chs
[6], chs
[5], chs
[4], chs
[3], chs
[2], chs
[1], chs
[0]);
900 resd
= _mm_setr_pd(dp
[0], dp
[1]);
901 resd
= _mm_setzero_pd();
902 resi
= _mm_setzero_si128();
905 // CHECK-LABEL: @test_set
907 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi16
908 // CHECK-COUNT-8: store i16 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 2
909 // CHECK: insertelement <8 x i16> undef, i16 {{[0-9a-zA-Z_%.]+}}, i32 0
910 // CHECK-COUNT-7: insertelement <8 x i16> {{[0-9a-zA-Z_%.]+}}, i16 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-7]}}
912 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi32
913 // CHECK-COUNT-4: store i32 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 4
914 // CHECK: insertelement <4 x i32> undef, i32 {{[0-9a-zA-Z_%.]+}}, i32 0
915 // CHECK-COUNT-3: insertelement <4 x i32> {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9a-zA-Z_%.]+}}, i32 {{[1-3]}}
917 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64
918 // CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef %{{[0-9a-zA-Z_.]+}})
920 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi64x
921 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x i64> undef, i64 %{{[0-9a-zA-Z_.]+}}, i32 0
922 // CHECK: insertelement <2 x i64> %[[VEC]], i64 %{{[0-9a-zA-Z_.]+}}, i32 1
924 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set_epi8
925 // CHECK-COUNT-16: store i8 {{[0-9a-zA-Z_%.]+}}, ptr {{[0-9a-zA-Z_%.]+}}, align 1
926 // CHECK: insertelement <16 x i8> undef, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
927 // CHECK-COUNT-15: {{[0-9a-zA-Z_%.]+}} = insertelement <16 x i8> {{[0-9a-zA-Z_%.]+}}, i8 {{[0-9a-zA-Z_%.]+}}, i32 {{[0-9]+}}
929 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd
930 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
931 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
933 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_pd1
934 // CHECK: call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})
936 // CHECK-LABEL: define available_externally <2 x double> @_mm_set_sd
937 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
938 // CHECK: insertelement <2 x double> %[[VEC]], double 0.000000e+00, i32 1
940 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi16
941 // CHECK-COUNT-8: load i16, ptr %{{[0-9a-zA-Z_.]+}}, align 2
942 // CHECK: call <2 x i64> @_mm_set_epi16
944 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi32
945 // CHECK-COUNT-4: load i32, ptr %{{[0-9a-zA-Z_.]+}}, align 4
946 // CHECK: call <2 x i64> @_mm_set_epi32
948 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64
949 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
950 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
951 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
953 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi64x
954 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
955 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
956 // CHECK: call <2 x i64> @_mm_set_epi64x(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
958 // CHECK-LABEL: define available_externally <2 x i64> @_mm_set1_epi8
959 // CHECK-COUNT-16: load i8, ptr %{{[0-9a-zA-Z_.]+}}, align 1
960 // CHECK: call <2 x i64> @_mm_set_epi8
962 // CHECK-LABEL: define available_externally <2 x double> @_mm_set1_pd
963 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
964 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
966 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi16
967 // CHECK-COUNT-8: load i16, ptr {{[0-9a-zA-Z_%.]+}}, align 2
968 // CHECK: call <2 x i64> @_mm_set_epi16
970 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi32
971 // CHECK-COUNT-4: load i32, ptr {{[0-9a-zA-Z_%.]+}}, align 4
972 // CHECK: call <2 x i64> @_mm_set_epi32
974 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi64
975 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
976 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = load i64, ptr %{{[0-9a-zA-Z_.]+}}, align 8
977 // CHECK: call <2 x i64> @_mm_set_epi64(i64 noundef %[[VAL1]], i64 noundef %[[VAL2]])
979 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setr_epi8
980 // CHECK-COUNT-16: load i8, ptr {{[0-9a-zA-Z_%.]+}}, align 1
981 // CHECK: call <2 x i64> @_mm_set_epi8
983 // CHECK-LABEL: define available_externally <2 x double> @_mm_setr_pd
984 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <2 x double> undef, double %{{[0-9a-zA-Z_.]+}}, i32 0
985 // CHECK: insertelement <2 x double> %[[VEC]], double %{{[0-9a-zA-Z_.]+}}, i32 1
987 // CHECK-LABEL: define available_externally <2 x double> @_mm_setzero_pd()
988 // CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 0)
990 // CHECK-LABEL: define available_externally <2 x i64> @_mm_setzero_si128()
991 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
993 void __attribute__((noinline
))
995 resi
= _mm_shuffle_epi32(mi1
, i
);
996 resd
= _mm_shuffle_pd(md1
, md2
, i
);
997 resi
= _mm_shufflehi_epi16(mi1
, i
);
998 resi
= _mm_shufflelo_epi16(mi1
, i
);
1001 // CHECK-LABEL: @test_shuffle
1003 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shuffle_epi32
1004 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
1005 // CHECK: sext i32 %[[AND]] to i64
1006 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
1007 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1008 // CHECK: sext i32 %[[AND2]] to i64
1009 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
1010 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
1011 // CHECK: sext i32 %[[AND3]] to i64
1012 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
1013 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1014 // CHECK: sext i32 %[[AND4]] to i64
1015 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1016 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
1017 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1018 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
1019 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1020 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
1021 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
1022 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_epi32.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
1023 // CHECK: add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
1024 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])
1026 // CHECK-LABEL: define available_externally <2 x double> @_mm_shuffle_pd
1027 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3
1028 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 0
1029 // CHECK: br i1 %[[CMP]]
1030 // CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2])
1031 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 1
1032 // CHECK: br i1 %[[CMP2]]
1033 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
1034 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp eq i32 %{{[0-9a-zA-Z_.]+}}, 2
1035 // CHECK: br i1 %[[CMP3]]
1036 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 0, i32 3>
1037 // CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2])
1039 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflehi_epi16
1040 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
1041 // CHECK: sext i32 %[[AND]] to i64
1042 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
1043 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1044 // CHECK: sext i32 %[[AND2]] to i64
1045 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
1046 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
1047 // CHECK: sext i32 %[[AND3]] to i64
1048 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
1049 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
1050 // CHECK: sext i32 %[[AND4]] to i64
1051 // CHECK-LE: store <2 x i64> <i64 1663540288323457296, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1052 // CHECK-BE: store <2 x i64> <i64 1157726452361532951, i64 0>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1053 // CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflehi_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
1054 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
1056 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shufflelo_epi16
1057 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 {{[0-9a-zA-Z_%.]+}}, 3
1058 // CHECK: sext i32 %[[AND]] to i64
1059 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 2
1060 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
1061 // CHECK: sext i32 %[[AND2]] to i64
1062 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 4
1063 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
1064 // CHECK: sext i32 %[[AND3]] to i64
1065 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 {{[0-9a-zA-Z_%.]+}}, 6
1066 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
1067 // CHECK: sext i32 %[[AND4]] to i64
1068 // CHECK-LE: store <2 x i64> <i64 0, i64 2242261671028070680>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1069 // CHECK-BE: store <2 x i64> <i64 0, i64 1736447835066146335>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1070 // CHECK-COUNT-4: getelementptr inbounds [4 x i16], ptr @_mm_shufflelo_epi16.__permute_selectors, i64 0, i64 {{[0-9a-zA-Z_%.]+}}
1071 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
1073 void __attribute__((noinline
))
1075 resi
= _mm_sll_epi16(mi1
, mi2
);
1076 resi
= _mm_sll_epi32(mi1
, mi2
);
1077 resi
= _mm_sll_epi64(mi1
, mi2
);
1078 resi
= _mm_slli_epi16(mi1
, i
);
1079 resi
= _mm_slli_epi32(mi1
, i
);
1080 resi
= _mm_slli_epi64(mi1
, i
);
1081 resi
= _mm_slli_si128(mi1
, i
);
1084 // CHECK-LABEL: @test_sll
1086 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16
1087 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1088 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
1089 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
1090 // CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1091 // CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8])
1092 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])
1094 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32
1095 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1096 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
1097 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
1098 // CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4])
1099 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])
1101 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64
1102 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
1103 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef <i64 64, i64 64>)
1104 // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2])
1105 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2])
1107 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi16
1108 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1109 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
1110 // CHECK: br i1 %[[CMP]]
1111 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1112 // CHECK: br i1 %[[CMP2]]
1113 // CHECK: call i1 @llvm.is.constant
1114 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1115 // CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]])
1116 // CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
1117 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC2]])
1118 // CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])
1120 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi32
1121 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1122 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
1123 // CHECK: br i1 %[[CMP]]
1124 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
1125 // CHECK: br i1 %[[CMP2]]
1126 // CHECK: call i1 @llvm.is.constant
1127 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1128 // CHECK: br i1 %[[CMP3]]
1129 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1130 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1131 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1132 // CHECK: call <4 x i32> @vec_sl(int vector[4], unsigned int vector[4])
1134 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_epi64
1135 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1136 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp sge i32 %{{[0-9a-zA-Z_.]+}}, 0
1137 // CHECK: br i1 %[[CMP]]
1138 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64
1139 // CHECK: br i1 %[[CMP2]]
1140 // CHECK: call i1 @llvm.is.constant
1141 // CHECK: %[[CMP3:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1142 // CHECK: br i1 %[[CMP3]]
1143 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1144 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1145 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1146 // CHECK: call <2 x i64> @vec_sl(long long vector[2], unsigned long long vector[2])
1148 // CHECK-LABEL: define available_externally <2 x i64> @_mm_slli_si128
1149 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1150 // CHECK-BE: %[[SUB:[0-9a-zA-Z_.]+]] = sub nsw i32 16, %{{[0-9a-zA-Z_.]+}}
1151 // CHECK-BE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext %[[SUB]])
1152 // CHECK-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
1153 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1155 void __attribute__((noinline
))
1157 resd
= _mm_sqrt_pd(md1
);
1158 resd
= _mm_sqrt_sd(md1
, md2
);
1161 // CHECK-LABEL: @test_sqrt
1163 // CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_pd
1164 // CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef {{[0-9a-zA-Z_%.]+}})
1166 // CHECK-LABEL: define available_externally <2 x double> @_mm_sqrt_sd
1167 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @_mm_set1_pd(double noundef %{{[0-9a-zA-Z_.]+}})
1168 // CHECK: call <2 x double> @vec_sqrt(double vector[2])(<2 x double> noundef %{{[0-9a-zA-Z_.]+}})
1169 // CHECK: call <2 x double> @_mm_setr_pd(double noundef %{{[0-9a-zA-Z_.]+}}, double noundef %{{[0-9a-zA-Z_.]+}})
1171 void __attribute__((noinline
))
1173 resi
= _mm_sra_epi16(mi1
, mi2
);
1174 resi
= _mm_sra_epi32(mi1
, mi2
);
1175 resi
= _mm_srai_epi16(mi1
, i
);
1176 resi
= _mm_srai_epi32(mi1
, i
);
1179 // CHECK-LABEL: @test_sra
1181 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16
1182 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1183 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1184 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
1185 // CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1186 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
1188 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32
1189 // CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1190 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1191 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
1192 // CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 31, i32 31, i32 31, i32 31>)
1193 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])
1195 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16
1196 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1197 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1198 // CHECK: br i1 %[[CMP]]
1199 // CHECK: call i1 @llvm.is.constant
1200 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1201 // CHECK: call <8 x i16> @vec_splat_s16(signed char)(i8 noundef signext %[[TRUNC]])
1202 // CHECK: %[[TRUNC2:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
1203 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %{{[0-9a-zA-Z_.]+}})
1204 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])
1206 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32
1207 // CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1208 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
1209 // CHECK: br i1 %[[CMP]]
1210 // CHECK: call i1 @llvm.is.constant
1211 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1212 // CHECK: br i1 %[[CMP2]]
1213 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1214 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1215 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1216 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1217 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])
1219 void __attribute__((noinline
))
1221 resi
= _mm_srl_epi16(mi1
, mi2
);
1222 resi
= _mm_srl_epi32(mi1
, mi2
);
1223 resi
= _mm_srl_epi64(mi1
, mi2
);
1224 resi
= _mm_srli_epi16(mi1
, i
);
1225 resi
= _mm_srli_epi32(mi1
, i
);
1226 resi
= _mm_srli_epi64(mi1
, i
);
1227 resi
= _mm_srli_si128(mi1
, i
);
1230 // CHECK-LABEL: @test_srl
1232 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16
1233 // CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1234 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1235 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
1236 // CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1237 // CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])
1238 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])
1240 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32
1241 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1242 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
1243 // CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
1244 // CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4])
1245 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])
1247 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64
1248 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1249 // CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef <i64 64, i64 64>)
1250 // CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2])
1251 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2])
1253 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi16
1254 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1255 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1256 // CHECK: br i1 %[[CMP]]
1257 // CHECK: call i1 @llvm.is.constant
1258 // CHECK: trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1259 // CHECK: call <8 x i16> @vec_splat_s16(signed char)
1260 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i16
1261 // CHECK: call <8 x i16> @vec_splats(unsigned short)(i16 noundef zeroext %[[TRUNC]])
1262 // CHECK: call <8 x i16> @vec_sr(short vector[8], unsigned short vector[8])
1264 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi32
1265 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1266 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
1267 // CHECK: br i1 %[[CMP]]
1268 // CHECK: call i1 @llvm.is.constant
1269 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1270 // CHECK: br i1 %[[CMP2]]
1271 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1272 // CHECK: call <4 x i32> @vec_splat_s32(signed char)
1273 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1274 // CHECK: call <4 x i32> @vec_splats(unsigned int)
1275 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
1277 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_epi64
1278 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1279 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 64
1280 // CHECK: br i1 %[[CMP]]
1281 // CHECK: call i1 @llvm.is.constant
1282 // CHECK: %[[CMP2:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
1283 // CHECK: br i1 %[[CMP2]]
1284 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %{{[0-9a-zA-Z_.]+}} to i8
1285 // CHECK: call <4 x i32> @vec_splat_s32(signed char)(i8 noundef signext %[[TRUNC]])
1286 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
1287 // CHECK: call <2 x i64> @vec_splats(unsigned long long)(i64 noundef %[[EXT]])
1288 // CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext %{{[0-9a-zA-Z_.]+}})
1289 // CHECK: call <2 x i64> @vec_sr(long long vector[2], unsigned long long vector[2])
1291 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srli_si128
1292 // CHECK: call <2 x i64> @_mm_bsrli_si128
1294 void __attribute__((noinline
))
1296 _mm_store_pd(dp
, md1
);
1297 _mm_store_pd1(dp
, md1
);
1298 _mm_store_sd(dp
, md1
);
1299 _mm_store_si128(mip
, mi1
);
1300 _mm_store1_pd(dp
, md1
);
1301 _mm_storeh_pd(dp
, md1
);
1302 _mm_storel_epi64(mip
, mi1
);
1303 _mm_storel_pd(dp
, md1
);
1304 _mm_storer_pd(dp
, md1
);
1305 _mm_storeu_pd(dp
, md1
);
1306 _mm_storeu_si128(mip
, mi1
);
1309 // CHECK-LABEL: @test_store
1311 // CHECK-LABEL: define available_externally void @_mm_store_pd
1312 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1313 // CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %[[ADDR]])
1315 // CHECK-LABEL: define available_externally void @_mm_store_pd1
1316 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1317 // CHECK: %[[ADDR2:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1318 // CHECK: call void @_mm_store1_pd(ptr noundef %[[ADDR]], <2 x double> noundef %[[ADDR2]])
1320 // CHECK-LABEL: define available_externally void @_mm_store_sd
1321 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1322 // CHECK: store double %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]]
1324 // CHECK-LABEL: define available_externally void @_mm_store_si128
1325 // CHECK: %[[LOAD:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1326 // CHECK: call void @vec_st(unsigned char vector[16], long, unsigned char vector[16]*)(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %[[LOAD]])
1328 // CHECK-LABEL: define available_externally void @_mm_store1_pd
1329 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1330 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1331 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call <2 x double> @vec_splat(double vector[2], unsigned int)(<2 x double> noundef %[[VAL]], i32 noundef zeroext 0)
1332 // CHECK: call void @_mm_store_pd(ptr noundef %[[ADDR]], <2 x double> noundef %[[CALL]])
1334 // CHECK-LABEL: define available_externally void @_mm_storeh_pd
1335 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1336 // CHECK: store double %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]]
1338 // CHECK-LABEL: define available_externally void @_mm_storel_epi64
1339 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1340 // CHECK: store i64 %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 8
1342 // CHECK-LABEL: define available_externally void @_mm_storel_pd
1343 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1344 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
1345 // CHECK: call void @_mm_store_sd(ptr noundef %[[ADDR]], <2 x double> noundef %[[VAL]])
1347 // CHECK-LABEL: define available_externally void @_mm_storer_pd
1348 // CHECK: shufflevector <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i64> %{{[0-9a-zA-Z_.]+}}, <2 x i32> <i32 1, i32 2>
1349 // CHECK: call void @_mm_store_pd(ptr noundef %{{[0-9a-zA-Z_.]+}}, <2 x double> noundef %{{[0-9a-zA-Z_.]+}})
1351 // CHECK-LABEL: define available_externally void @_mm_storeu_pd
1352 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1353 // CHECK: store <2 x double> %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 1
1355 // CHECK-LABEL: define available_externally void @_mm_storeu_si128
1356 // CHECK: %[[ADDR:[0-9a-zA-Z_.]+]] = load ptr, ptr %{{[0-9a-zA-Z_.]+}}, align 8
1357 // CHECK: store <2 x i64> %{{[0-9a-zA-Z_.]+}}, ptr %[[ADDR]], align 1
1359 void __attribute__((noinline
))
1361 _mm_stream_pd(dp
, md1
);
1362 _mm_stream_si128(mip
, mi1
);
1363 _mm_stream_si32(is
, i
);
1364 _mm_stream_si64(i64s
, i64s
[1]);
1367 // CHECK-LABEL: @test_stream
1369 // CHECK-LABEL: define available_externally void @_mm_stream_pd
1370 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1372 // CHECK-LABEL: define available_externally void @_mm_stream_si128
1373 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1375 // CHECK-LABEL: define available_externally void @_mm_stream_si32
1376 // CHECK: call void asm sideeffect "dcbtstt 0,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1378 // CHECK-LABEL: define available_externally void @_mm_stream_si64
1379 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1381 void __attribute__((noinline
))
1383 resi
= _mm_sub_epi64(mi1
, mi2
);
1384 resi
= _mm_sub_epi32(mi1
, mi2
);
1385 resi
= _mm_sub_epi16(mi1
, mi2
);
1386 resi
= _mm_sub_epi8(mi1
, mi2
);
1387 resd
= _mm_sub_pd(md1
, md2
);
1388 resd
= _mm_sub_sd(md1
, md2
);
1389 res64
= _mm_sub_si64(m641
, m642
);
1390 resi
= _mm_subs_epi16(mi1
, mi2
);
1391 resi
= _mm_subs_epi8(mi1
, mi2
);
1392 resi
= _mm_subs_epu16(mi1
, mi2
);
1393 resi
= _mm_subs_epu8(mi1
, mi2
);
1396 // CHECK-LABEL: @test_sub
1398 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi64
1399 // CHECK: sub <2 x i64>
1401 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi32
1402 // CHECK: sub <4 x i32>
1404 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi16
1405 // CHECK: sub <8 x i16>
1407 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sub_epi8
1408 // CHECK: sub <16 x i8>
1410 // CHECK-LABEL: define available_externally <2 x double> @_mm_sub_pd
1411 // CHECK: fsub <2 x double>
1413 // CHECK-LABEL: define available_externally <2 x double> @_mm_sub_sd
1414 // CHECK: fsub double
1416 // CHECK-LABEL: define available_externally i64 @_mm_sub_si64
1419 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi16
1420 // CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
1422 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epi8
1423 // CHECK: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])
1425 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu16
1426 // CHECK: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])
1428 // CHECK-LABEL: define available_externally <2 x i64> @_mm_subs_epu8
1429 // CHECK: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])
1431 void __attribute__((noinline
))
1433 i
= _mm_ucomieq_sd(md1
, md2
);
1434 i
= _mm_ucomige_sd(md1
, md2
);
1435 i
= _mm_ucomigt_sd(md1
, md2
);
1436 i
= _mm_ucomile_sd(md1
, md2
);
1437 i
= _mm_ucomilt_sd(md1
, md2
);
1438 i
= _mm_ucomineq_sd(md1
, md2
);
1441 // CHECK-LABEL: @test_ucomi
1443 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_sd
1444 // CHECK: fcmp oeq double
1446 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_sd
1447 // CHECK: fcmp oge double
1449 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_sd
1450 // CHECK: fcmp ogt double
1452 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_sd
1453 // CHECK: fcmp ole double
1455 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_sd
1456 // CHECK: fcmp olt double
1458 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_sd
1459 // CHECK: fcmp une double
1461 void __attribute__((noinline
))
1463 resd
= _mm_undefined_pd();
1464 resi
= _mm_undefined_si128();
1467 // CHECK-LABEL: @test_undefined
1469 // CHECK-LABEL: define available_externally <2 x double> @_mm_undefined_pd()
1470 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x double>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16
1471 // CHECK: store <2 x double> %[[VAL]], ptr %[[ADDR]], align 16
1472 // CHECK: load <2 x double>, ptr %[[ADDR]], align 16
1474 // CHECK-LABEL: define available_externally <2 x i64> @_mm_undefined_si128()
1475 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = load <2 x i64>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16
1476 // CHECK: store <2 x i64> %[[VAL]], ptr %[[ADDR]], align 16
1477 // CHECK: load <2 x i64>, ptr %[[ADDR]], align 16
1479 void __attribute__((noinline
))
1481 resi
= _mm_unpackhi_epi16(mi1
, mi2
);
1482 resi
= _mm_unpackhi_epi32(mi1
, mi2
);
1483 resi
= _mm_unpackhi_epi64(mi1
, mi2
);
1484 resi
= _mm_unpackhi_epi8(mi1
, mi2
);
1485 resd
= _mm_unpackhi_pd(md1
, md2
);
1486 resi
= _mm_unpacklo_epi16(mi1
, mi2
);
1487 resi
= _mm_unpacklo_epi32(mi1
, mi2
);
1488 resi
= _mm_unpacklo_epi64(mi1
, mi2
);
1489 resi
= _mm_unpacklo_epi8(mi1
, mi2
);
1490 resd
= _mm_unpacklo_pd(md1
, md2
);
1493 // CHECK-LABEL: @test_unpack
1495 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi16
1496 // CHECK: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])
1498 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi32
1499 // CHECK: call <4 x i32> @vec_mergel(unsigned int vector[4], unsigned int vector[4])
1501 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi64
1502 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
1504 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpackhi_epi8
1505 // CHECK: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
1507 // CHECK-LABEL: define available_externally <2 x double> @_mm_unpackhi_pd
1508 // CHECK: call <2 x double> @vec_mergel(double vector[2], double vector[2])
1510 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi16
1511 // CHECK: call <8 x i16> @vec_mergeh(short vector[8], short vector[8])
1513 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi32
1514 // CHECK: call <4 x i32> @vec_mergeh(int vector[4], int vector[4])
1516 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi64
1517 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
1519 // CHECK-LABEL: define available_externally <2 x i64> @_mm_unpacklo_epi8
1520 // CHECK: call <16 x i8> @vec_mergeh(unsigned char vector[16], unsigned char vector[16])
1522 // CHECK-LABEL: define available_externally <2 x double> @_mm_unpacklo_pd
1523 // CHECK: call <2 x double> @vec_mergeh(double vector[2], double vector[2])