1 // REQUIRES: powerpc-registered-target
3 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
4 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
6 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns
7 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
8 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
9 // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
10 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns
12 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
13 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-P10-LE
15 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
16 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
17 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
18 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns
19 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
20 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
21 // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -nostdlibinc -DNO_WARN_X86_INTRINSICS %s \
22 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns
24 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
25 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
26 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
27 // RUN: -fno-discard-value-names -mllvm -disable-llvm-optzns
28 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr10 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
29 // RUN: -ffp-contract=off -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-P10-BE
31 #include <xmmintrin.h>
39 // CHECK-LE-DAG: @_mm_shuffle_pi16.__permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
40 // CHECK-BE-DAG: @_mm_shuffle_pi16.__permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2
42 // CHECK-LE-DAG: @_mm_shuffle_ps.__permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
43 // CHECK-BE-DAG: @_mm_shuffle_ps.__permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
45 void __attribute__((noinline
))
47 res
= _mm_add_ps(m1
, m2
);
48 res
= _mm_add_ss(m1
, m2
);
51 // CHECK-LABEL: @test_add
53 // CHECK-LABEL: define available_externally <4 x float> @_mm_add_ps
54 // CHECK: fadd <4 x float>
56 // CHECK-LABEL: define available_externally <4 x float> @_mm_add_ss
57 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
58 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
59 // CHECK: fadd <4 x float>
60 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
62 void __attribute__((noinline
))
64 res64
= _mm_avg_pu16(ms
[0], ms
[1]);
65 res64
= _mm_avg_pu8(ms
[0], ms
[1]);
68 // CHECK-LABEL: @test_avg
70 // CHECK-LABEL: define available_externally i64 @_mm_avg_pu16
71 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
72 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
73 // CHECK: call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])
74 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
76 // CHECK-LABEL: define available_externally i64 @_mm_avg_pu8
77 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
78 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
79 // CHECK: call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])
80 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
82 void __attribute__((noinline
))
84 res64
= _m_pavgw(ms
[0], ms
[1]);
85 res64
= _m_pavgb(ms
[0], ms
[1]);
88 // CHECK-LABEL: @test_alt_name_avg
90 // CHECK-LABEL: define available_externally i64 @_m_pavgw
91 // CHECK: call i64 @_mm_avg_pu16
93 // CHECK-LABEL: define available_externally i64 @_m_pavgb
94 // CHECK: call i64 @_mm_avg_pu8
96 void __attribute__((noinline
))
98 res
= _mm_cmpeq_ps(m1
, m2
);
99 res
= _mm_cmpeq_ss(m1
, m2
);
100 res
= _mm_cmpge_ps(m1
, m2
);
101 res
= _mm_cmpge_ss(m1
, m2
);
102 res
= _mm_cmpgt_ps(m1
, m2
);
103 res
= _mm_cmpgt_ss(m1
, m2
);
104 res
= _mm_cmple_ps(m1
, m2
);
105 res
= _mm_cmple_ss(m1
, m2
);
106 res
= _mm_cmplt_ps(m1
, m2
);
107 res
= _mm_cmplt_ss(m1
, m2
);
108 res
= _mm_cmpneq_ps(m1
, m2
);
109 res
= _mm_cmpneq_ss(m1
, m2
);
110 res
= _mm_cmpnge_ps(m1
, m2
);
111 res
= _mm_cmpnge_ss(m1
, m2
);
112 res
= _mm_cmpngt_ps(m1
, m2
);
113 res
= _mm_cmpngt_ss(m1
, m2
);
114 res
= _mm_cmpnle_ps(m1
, m2
);
115 res
= _mm_cmpnle_ss(m1
, m2
);
116 res
= _mm_cmpnlt_ps(m1
, m2
);
117 res
= _mm_cmpnlt_ss(m1
, m2
);
118 res
= _mm_cmpord_ps(m1
, m2
);
119 res
= _mm_cmpord_ss(m1
, m2
);
120 res
= _mm_cmpunord_ps(m1
, m2
);
121 res
= _mm_cmpunord_ss(m1
, m2
);
124 // CHECK-LABEL: @test_cmp
126 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ps
127 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
129 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpeq_ss
130 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
131 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
132 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
133 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
135 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ps
136 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
138 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpge_ss
139 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
140 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
141 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
142 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
144 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ps
145 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
147 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpgt_ss
148 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
149 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
150 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
151 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
153 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ps
154 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
156 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmple_ss
157 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
158 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
159 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
160 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
162 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmplt_ps
163 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
165 // CHECK: @_mm_cmplt_ss
166 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
167 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
168 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
169 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
171 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ps
172 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
173 // CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
175 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpneq_ss
176 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
177 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
178 // CHECK: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
179 // CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
180 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
182 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ps
183 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
185 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnge_ss
186 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
187 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
188 // CHECK: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
189 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
191 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ps
192 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
194 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpngt_ss
195 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
196 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
197 // CHECK: call <4 x i32> @vec_cmple(float vector[4], float vector[4])
198 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
200 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ps
201 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
203 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnle_ss
204 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
205 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
206 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
207 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
209 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ps
210 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
212 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpnlt_ss
213 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
214 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
215 // CHECK: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
216 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
218 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps
219 // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
220 // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
221 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
222 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
223 // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])
225 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss
226 // CHECK: call <4 x float> @vec_abs(float vector[4])
227 // CHECK: call <4 x float> @vec_abs(float vector[4])
228 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
229 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
230 // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
231 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
233 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps
234 // CHECK: call <4 x float> @vec_abs(float vector[4])
235 // CHECK: call <4 x float> @vec_abs(float vector[4])
236 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
237 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
238 // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])
240 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss
241 // CHECK: call <4 x float> @vec_abs(float vector[4])
242 // CHECK: call <4 x float> @vec_abs(float vector[4])
243 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
244 // CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
245 // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])
246 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
248 void __attribute__((noinline
))
250 i
= _mm_comieq_ss(m1
, m2
);
251 i
= _mm_comige_ss(m1
, m2
);
252 i
= _mm_comigt_ss(m1
, m2
);
253 i
= _mm_comile_ss(m1
, m2
);
254 i
= _mm_comilt_ss(m1
, m2
);
255 i
= _mm_comineq_ss(m1
, m2
);
258 // CHECK-LABEL: @test_comi
260 // CHECK-LABEL: define available_externally signext i32 @_mm_comieq_ss
261 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
262 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
263 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oeq float %[[VAL1]], %[[VAL2]]
264 // CHECK: zext i1 %[[CMP]] to i32
266 // CHECK-LABEL: define available_externally signext i32 @_mm_comige_ss
267 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
268 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
269 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp oge float %[[VAL1]], %[[VAL2]]
270 // CHECK: zext i1 %[[CMP]] to i32
272 // CHECK-LABEL: define available_externally signext i32 @_mm_comigt_ss
273 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
274 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
275 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ogt float %[[VAL1]], %[[VAL2]]
276 // CHECK: zext i1 %[[CMP]] to i32
278 // CHECK-LABEL: define available_externally signext i32 @_mm_comile_ss
279 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
280 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
281 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp ole float %[[VAL1]], %[[VAL2]]
282 // CHECK: zext i1 %[[CMP]] to i32
284 // CHECK-LABEL: define available_externally signext i32 @_mm_comilt_ss
285 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
286 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
287 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp olt float %[[VAL1]], %[[VAL2]]
288 // CHECK: zext i1 %[[CMP]] to i32
290 // CHECK-LABEL: define available_externally signext i32 @_mm_comineq_ss
291 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
292 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
293 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = fcmp une float %[[VAL1]], %[[VAL2]]
294 // CHECK: zext i1 %[[CMP]] to i32
296 void __attribute__((noinline
))
298 res
= _mm_cvt_pi2ps(m1
, ms
[1]);
299 res64
= _mm_cvt_ps2pi(m1
);
300 res
= _mm_cvt_si2ss(m1
, i
);
301 i
= _mm_cvt_ss2si(m1
);
302 res
= _mm_cvtpi16_ps(ms
[0]);
303 res
= _mm_cvtpi32_ps(m1
, ms
[1]);
304 res
= _mm_cvtpi32x2_ps(ms
[0], ms
[1]);
305 res
= _mm_cvtpi8_ps(ms
[0]);
306 res64
= _mm_cvtps_pi16(m1
);
307 res64
= _mm_cvtps_pi32(m1
);
308 res64
= _mm_cvtps_pi8(m1
);
309 res
= _mm_cvtpu16_ps(ms
[0]);
310 res
= _mm_cvtpu8_ps(ms
[0]);
311 res
= _mm_cvtsi32_ss(m1
, i
);
312 res
= _mm_cvtsi64_ss(m1
, i64
);
313 fs
[0] = _mm_cvtss_f32(m1
);
314 i
= _mm_cvtss_si32(m1
);
315 i64
= _mm_cvtss_si64(m1
);
316 res64
= _mm_cvtt_ps2pi(m1
);
317 i
= _mm_cvtt_ss2si(m1
);
318 res64
= _mm_cvttps_pi32(m1
);
319 i
= _mm_cvttss_si32(m1
);
320 i64
= _mm_cvttss_si64(m1
);
323 // CHECK-LABEL: @test_convert
325 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_pi2ps
326 // CHECK: call <4 x float> @_mm_cvtpi32_ps
328 // CHECK-LABEL: define available_externally i64 @_mm_cvt_ps2pi
329 // CHECK: call i64 @_mm_cvtps_pi32
331 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvt_si2ss
332 // CHECK: call <4 x float> @_mm_cvtsi32_ss
334 // CHECK-LABEL: define available_externally signext i32 @_mm_cvt_ss2si
335 // CHECK: call signext i32 @_mm_cvtss_si32
337 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi16_ps
338 // CHECK: call <4 x i32> @vec_vupklsh(short vector[8])
339 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
341 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32_ps
342 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
344 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi32x2_ps
345 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
347 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpi8_ps
348 // CHECK: call <8 x i16> @vec_vupkhsb(signed char vector[16])
349 // CHECK: call <4 x i32> @vec_vupkhsh(short vector[8])
350 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
352 // CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi16
353 // CHECK: call <4 x float> @vec_rint(float vector[4])
354 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
355 // CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])
356 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
358 // CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi32
359 // CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
360 // CHECK: call <4 x float> @vec_rint(float vector[4])
361 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
362 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
364 // CHECK-LABEL: define available_externally i64 @_mm_cvtps_pi8
365 // CHECK: call <4 x float> @vec_rint(float vector[4])
366 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
367 // CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
368 // CHECK: call <16 x i8> @vec_pack(short vector[8], short vector[8])
369 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
371 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu16_ps
372 // CHECK-LE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
373 // CHECK-BE: call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
374 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
376 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtpu8_ps
377 // CHECK-BE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
378 // CHECK-BE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef zeroinitializer, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
379 // CHECK-LE: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
380 // CHECK-LE: call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
381 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
383 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi32_ss
384 // CHECK: sitofp i32 %{{[0-9a-zA-Z_.]+}} to float
385 // CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0
387 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtsi64_ss
388 // CHECK: sitofp i64 %{{[0-9a-zA-Z_.]+}} to float
389 // CHECK: insertelement <4 x float> %{{[0-9a-zA-Z_.]+}}, float %{{[0-9a-zA-Z_.]+}}, i32 0
391 // CHECK-LABEL: define available_externally float @_mm_cvtss_f32
392 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
394 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtss_si32
395 // CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0"
396 // CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0"
397 // CHECK-P10-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0"
398 // CHECK-P10-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i32, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0"
399 // CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 0
400 // CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 1
401 // CHECK: extractvalue { <4 x float>, i32, double } %[[VEC]], 2
403 // CHECK-LABEL: define available_externally i64 @_mm_cvtss_si64
404 // CHECK-LE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0"
405 // CHECK-BE: %[[VEC:[0-9a-zA-Z_.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid $2,$2;\0Amfvsrd $1,${2:x};\0A", "=^wa,=r,=f,0"
406 // CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 0
407 // CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 1
408 // CHECK: extractvalue { <4 x float>, i64, double } %[[VEC]], 2
410 // CHECK-LABEL: define available_externally i64 @_mm_cvtt_ps2pi
411 // CHECK: call i64 @_mm_cvttps_pi32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
413 // CHECK-LABEL: define available_externally signext i32 @_mm_cvtt_ss2si
414 // CHECK: call signext i32 @_mm_cvttss_si32(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
416 // CHECK-LABEL: define available_externally i64 @_mm_cvttps_pi32
417 // CHECK: call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
418 // CHECK: call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0)
419 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
421 // CHECK-LABEL: define available_externally signext i32 @_mm_cvttss_si32
422 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
423 // CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i32
425 // CHECK-LABEL: define available_externally i64 @_mm_cvttss_si64
426 // CHECK: extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
427 // CHECK: fptosi float %{{[0-9a-zA-Z_.]+}} to i64
429 void __attribute__((noinline
))
431 res
= _mm_div_ps(m1
, m2
);
432 res
= _mm_div_ss(m1
, m2
);
435 // CHECK-LABEL: @test_div
437 // CHECK-LABEL: define available_externally <4 x float> @_mm_div_ps
438 // CHECK: fdiv <4 x float>
440 // CHECK-LABEL: define available_externally <4 x float> @_mm_div_ss
441 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
442 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
443 // CHECK: fdiv <4 x float>
444 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
446 void __attribute__((noinline
))
448 i
= _mm_extract_pi16(ms
[0], i2
);
449 i
= _m_pextrw(ms
[0], i2
);
452 // CHECK-LABEL: @test_extract
454 // CHECK-LABEL: define available_externally signext i32 @_mm_extract_pi16
455 // CHECK: and i32 %{{[0-9a-zA-Z_.]+}}, 3
456 // CHECK-BE: sub i32 3, %{{[0-9a-zA-Z_.]+}}
457 // CHECK: %[[MUL:[0-9a-zA-Z_.]+]] = mul i32 %{{[0-9a-zA-Z_.]+}}, 16
458 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = zext i32 %[[MUL]] to i64
459 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = lshr i64 %{{[0-9a-zA-Z_.]+}}, %[[EXT]]
460 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64 %[[SHR]], 65535
461 // CHECK: trunc i64 %[[AND]] to i32
463 // CHECK-LABEL: define available_externally signext i32 @_m_pextrw
464 // CHECK: call signext i32 @_mm_extract_pi16
466 void __attribute__((noinline
))
468 res64
= _mm_insert_pi16(ms
[0], i
, i2
);
469 res64
= _m_pinsrw(ms
[0], i
, i2
);
472 // CHECK-LABEL: @test_insert
474 // CHECK-LABEL: define available_externally i64 @_mm_insert_pi16
475 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
476 // CHECK: mul nsw i32 %[[AND]], 16
477 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = sext i32 %{{[0-9a-zA-Z_.]+}} to i64
478 // CHECK: %[[EXT2:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
479 // CHECK: shl i64 %[[EXT]], %[[EXT2]]
480 // CHECK: %[[EXT3:[0-9a-zA-Z_.]+]] = zext i32 %{{[0-9a-zA-Z_.]+}} to i64
481 // CHECK: shl i64 65535, %[[EXT3]]
482 // CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
483 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]]
484 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %{{[0-9a-zA-Z_.]+}}
485 // CHECK: or i64 %[[AND2]], %[[AND3]]
487 // CHECK-LABEL: define available_externally i64 @_m_pinsrw
488 // CHECK: call i64 @_mm_insert_pi16
490 void __attribute__((noinline
))
492 res
= _mm_load_ps(fs
);
493 res
= _mm_load_ps1(fs
);
494 res
= _mm_load_ss(fs
);
495 res
= _mm_load1_ps(fs
);
496 res
= _mm_loadh_pi(m1
, &ms
[0]);
497 res
= _mm_loadl_pi(m1
, &ms
[0]);
498 res
= _mm_loadr_ps(fs
);
499 res
= _mm_loadu_ps(fs
);
502 // CHECK-LABEL: @test_load
504 // CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps
505 // CHECK: call <4 x float> @vec_ld(long, float vector[4] const*)
507 // CHECK-LABEL: define available_externally <4 x float> @_mm_load_ps1
508 // CHECK: call <4 x float> @_mm_load1_ps
510 // CHECK-LABEL: define available_externally <4 x float> @_mm_load_ss
511 // CHECK: call <4 x float> @_mm_set_ss
513 // CHECK-LABEL: define available_externally <4 x float> @_mm_load1_ps
514 // CHECK: call <4 x float> @_mm_set1_ps
516 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadh_pi
517 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
518 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
519 // CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[VAL]], i32 1
521 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadl_pi
522 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
523 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
524 // CHECK: insertelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i64 %[[EXT]], i32 0
526 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadr_ps
527 // CHECK: call <4 x float> @vec_ld(long, float vector[4] const*)
528 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
530 // CHECK-LABEL: define available_externally <4 x float> @_mm_loadu_ps
531 // CHECK: call <4 x float> @vec_vsx_ld(int, float const*)
533 void __attribute__((noinline
))
535 res
= _mm_or_ps(m1
, m2
);
536 res
= _mm_and_ps(m1
, m2
);
537 res
= _mm_andnot_ps(m1
, m2
);
538 res
= _mm_xor_ps(m1
, m2
);
541 // CHECK-LABEL: @test_logic
543 // CHECK-LABEL: define available_externally <4 x float> @_mm_or_ps
544 // CHECK: call <4 x float> @vec_or(float vector[4], float vector[4])
546 // CHECK-LABEL: define available_externally <4 x float> @_mm_and_ps
547 // CHECK: call <4 x float> @vec_and(float vector[4], float vector[4])
549 // CHECK-LABEL: define available_externally <4 x float> @_mm_andnot_ps
550 // CHECK: call <4 x float> @vec_andc(float vector[4], float vector[4])
552 // CHECK-LABEL: define available_externally <4 x float> @_mm_xor_ps
553 // CHECK: call <4 x float> @vec_xor(float vector[4], float vector[4])
555 void __attribute__((noinline
))
557 res
= _mm_max_ps(m1
, m2
);
558 res
= _mm_max_ss(m1
, m2
);
559 res64
= _mm_max_pi16(ms
[0], ms
[1]);
560 res64
= _mm_max_pu8(ms
[0], ms
[1]);
563 // CHECK-LABEL: @test_max
565 // CHECK-LABEL: define available_externally <4 x float> @_mm_max_ps
566 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
567 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])
569 // CHECK-LABEL: define available_externally <4 x float> @_mm_max_ss
570 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
571 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
572 // CHECK: call <4 x float> @vec_max(float vector[4], float vector[4])
573 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
575 // CHECK-LABEL: define available_externally i64 @_mm_max_pi16
576 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
577 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
578 // CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])
579 // CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])
580 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
581 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
583 // CHECK-LABEL: define available_externally i64 @_mm_max_pu8
584 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
585 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
586 // CHECK: call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])
587 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])
588 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
589 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
591 void __attribute__((noinline
))
592 test_alt_name_max() {
593 res64
= _m_pmaxsw(ms
[0], ms
[1]);
594 res64
= _m_pmaxub(ms
[0], ms
[1]);
597 // CHECK-LABEL: @test_alt_name_max
599 // CHECK-LABEL: define available_externally i64 @_m_pmaxsw
600 // CHECK: call i64 @_mm_max_pi16
602 // CHECK-LABEL: define available_externally i64 @_m_pmaxub
603 // CHECK: call i64 @_mm_max_pu8
605 void __attribute__((noinline
))
607 res
= _mm_min_ps(m1
, m2
);
608 res
= _mm_min_ss(m1
, m2
);
609 res64
= _mm_min_pi16(ms
[0], ms
[1]);
610 res64
= _mm_min_pu8(ms
[0], ms
[1]);
613 // CHECK-LABEL: @test_min
615 // CHECK-LABEL: define available_externally <4 x float> @_mm_min_ps
616 // CHECK: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
617 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])
619 // CHECK-LABEL: define available_externally <4 x float> @_mm_min_ss
620 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
621 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, {{i32|i32 noundef zeroext}} 0)
622 // CHECK: call <4 x float> @vec_min(float vector[4], float vector[4])
623 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
625 // CHECK-LABEL: define available_externally i64 @_mm_min_pi16
626 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
627 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
628 // CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])
629 // CHECK: call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])
630 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
631 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
633 // CHECK-LABEL: define available_externally i64 @_mm_min_pu8
634 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
635 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
636 // CHECK: call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])
637 // CHECK: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])
638 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
639 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
641 void __attribute__((noinline
))
642 test_alt_name_min() {
643 res64
= _m_pminsw(ms
[0], ms
[1]);
644 res64
= _m_pminub(ms
[0], ms
[1]);
647 // CHECK-LABEL: @test_alt_name_min
649 // CHECK-LABEL: define available_externally i64 @_m_pminsw
650 // CHECK: call i64 @_mm_min_pi16
652 // CHECK-LABEL: define available_externally i64 @_m_pminub
653 // CHECK: call i64 @_mm_min_pu8
655 void __attribute__((noinline
))
657 _mm_maskmove_si64(ms
[0], ms
[1], (char *)&res64
);
658 res
= _mm_move_ss(m1
, m2
);
659 res
= _mm_movehl_ps(m1
, m2
);
660 res
= _mm_movelh_ps(m1
, m2
);
661 i
= _mm_movemask_pi8(ms
[0]);
662 i
= _mm_movemask_ps(m1
);
665 // CHECK-LABEL: @test_move
667 // CHECK-LABEL: define available_externally void @_mm_maskmove_si64
668 // CHECK: store i64 -9187201950435737472, ptr %{{[0-9a-zA-Z_.]+}}, align 8
669 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i64
670 // CHECK: call i64 @_mm_cmpeq_pi8(i64 noundef %[[AND]], i64 noundef %{{[0-9a-zA-Z_.]+}})
671 // CHECK: %[[XOR:[0-9a-zA-Z_.]+]] = xor i64 %{{[0-9a-zA-Z_.]+}}, -1
672 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i64 %{{[0-9a-zA-Z_.]+}}, %[[XOR]]
673 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i64
674 // CHECK: or i64 %[[AND2]], %[[AND3]]
676 // CHECK-LABEL: define available_externally <4 x float> @_mm_move_ss
677 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
679 // CHECK-LABEL: define available_externally <4 x float> @_mm_movehl_ps
680 // CHECK: call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])
682 // CHECK-LABEL: define available_externally <4 x float> @_mm_movelh_ps
683 // CHECK: call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])
685 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_pi8
686 // CHECK-LE: store i64 2269495618449464, ptr %{{[0-9a-zA-Z_.]+}}, align 8
687 // CHECK-BE: store i64 4048780183313844224, ptr %{{[0-9a-zA-Z_.]+}}, align 8
688 // CHECK: %[[CALL:[0-9a-zA-Z_.]+]] = call i64 @llvm.ppc.bpermd
689 // CHECK: trunc i64 %[[CALL]] to i32
691 // CHECK-LABEL: define available_externally signext i32 @_mm_movemask_ps
692 // CHECK-LE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
693 // CHECK-LE: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
694 // CHECK-LE: trunc i64 %[[EXT]] to i32
695 // CHECK-BE: call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
696 // CHECK-BE: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
697 // CHECK-BE: trunc i64 %[[EXT]] to i32
698 // CHECK-P10-LE: call zeroext i32 @vec_extractm(unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
699 // CHECK-P10-BE: call zeroext i32 @vec_extractm(unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
701 void __attribute__((noinline
))
702 test_alt_name_move() {
703 i
= _m_pmovmskb(ms
[0]);
704 _m_maskmovq(ms
[0], ms
[1], (char *)&res64
);
707 // CHECK-LABEL: @test_alt_name_move
709 // CHECK-LABEL: define available_externally signext i32 @_m_pmovmskb
710 // CHECK: call signext i32 @_mm_movemask_pi8
712 // CHECK-LABEL: define available_externally void @_m_maskmovq
713 // CHECK: call void @_mm_maskmove_si64
715 void __attribute__((noinline
))
717 res
= _mm_mul_ps(m1
, m2
);
718 res
= _mm_mul_ss(m1
, m2
);
719 res64
= _mm_mulhi_pu16(ms
[0], ms
[1]);
720 res64
= _m_pmulhuw(ms
[0], ms
[1]);
723 // CHECK-LABEL: @test_mul
725 // CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ps
726 // CHECK: fmul <4 x float>
728 // CHECK-LABEL: define available_externally <4 x float> @_mm_mul_ss
729 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
730 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
731 // CHECK: fmul <4 x float>
732 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
734 // CHECK-LABEL: define available_externally i64 @_mm_mulhi_pu16
735 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
736 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
737 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
738 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
739 // CHECK: call <4 x i32> @vec_vmuleuh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
740 // CHECK: call <4 x i32> @vec_vmulouh(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}})
741 // CHECK: call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])
742 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
744 // CHECK-LABEL: define available_externally i64 @_m_pmulhuw
745 // CHECK: call i64 @_mm_mulhi_pu16
747 void __attribute__((noinline
))
749 _mm_prefetch(ms
, _MM_HINT_NTA
);
752 // CHECK-LABEL: @test_prefetch
754 // CHECK-LABEL: define available_externally void @_mm_prefetch
755 // CHECK: call void @llvm.prefetch.p0(ptr %{{[0-9a-zA-Z_.]+}}, i32 0, i32 3, i32 1)
757 void __attribute__((noinline
))
759 res
= _mm_rcp_ps(m1
);
760 res
= _mm_rcp_ss(m1
);
763 // CHECK-LABEL: @test_rcp
765 // CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ps
766 // CHECK: call <4 x float> @vec_re(float vector[4])
768 // CHECK-LABEL: define available_externally <4 x float> @_mm_rcp_ss
769 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)
770 // CHECK: call <4 x float> @_mm_rcp_ps(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
771 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
773 void __attribute__((noinline
))
775 res
= _mm_rsqrt_ps(m1
);
776 res
= _mm_rsqrt_ss(m1
);
779 // CHECK-LABEL: @test_rsqrt
781 // CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ps
782 // CHECK: call <4 x float> @vec_rsqrte(float vector[4])
784 // CHECK-LABEL: define available_externally <4 x float> @_mm_rsqrt_ss
785 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
786 // CHECK: call <4 x float> @vec_rsqrte(float vector[4])
787 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
789 void __attribute__((noinline
))
791 res64
= _mm_sad_pu8(ms
[0], ms
[1]);
792 res64
= _m_psadbw(ms
[0], ms
[1]);
795 // CHECK-LABEL: @test_sad
797 // CHECK-LABEL: define available_externally i64 @_mm_sad_pu8
798 // CHECK: call void @llvm.memset.p0.i64(ptr align 8 %{{[0-9a-zA-Z_.]+}}, i8 0, i64 8, i1 false)
799 // CHECK: insertelement <2 x i64> <i64 0, i64 undef>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
800 // CHECK: insertelement <2 x i64> <i64 0, i64 undef>, i64 %{{[0-9a-zA-Z_.]+}}, i32 1
801 // CHECK: call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])
802 // CHECK: call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])
803 // CHECK: call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])
804 // CHECK: call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
805 // CHECK: call <4 x i32> @vec_sums(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
806 // CHECK: %[[EXT:[0-9a-zA-Z_.]+]] = extractelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 3
807 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[EXT]] to i16
808 // CHECK: %[[GEP:[0-9a-zA-Z_.]+]] = getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
809 // CHECK: store i16 %[[TRUNC]], ptr %[[GEP]], align 8
811 // CHECK-LABEL: define available_externally i64 @_m_psadbw
812 // CHECK: call i64 @_mm_sad_pu8
814 void __attribute__((noinline
))
816 res
= _mm_set_ps(fs
[0], fs
[1], fs
[2], fs
[3]);
817 res
= _mm_set_ps1(fs
[0]);
818 res
= _mm_set_ss(fs
[0]);
819 res
= _mm_set1_ps(fs
[0]);
820 res
= _mm_setr_ps(fs
[0], fs
[1], fs
[2], fs
[3]);
823 // CHECK-LABEL: @test_set
825 // CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps
826 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
827 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
828 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
829 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
830 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16
832 // CHECK-LABEL: define available_externally <4 x float> @_mm_set_ps1
833 // CHECK: call <4 x float> @_mm_set1_ps
835 // CHECK-LABEL: define available_externally <4 x float> @_mm_set_ss
836 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
837 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float 0.000000e+00, i32 1
838 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float 0.000000e+00, i32 2
839 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float 0.000000e+00, i32 3
840 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16
842 // CHECK-LABEL: define available_externally <4 x float> @_mm_set1_ps
843 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
844 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
845 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
846 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
847 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16
849 // CHECK-LABEL: define available_externally <4 x float> @_mm_setr_ps
850 // CHECK: %[[VEC:[0-9a-zA-Z_.]+]] = insertelement <4 x float> undef, float %{{[0-9a-zA-Z_.]+}}, i32 0
851 // CHECK: %[[VEC2:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC]], float %{{[0-9a-zA-Z_.]+}}, i32 1
852 // CHECK: %[[VEC3:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC2]], float %{{[0-9a-zA-Z_.]+}}, i32 2
853 // CHECK: %[[VEC4:[0-9a-zA-Z_.]+]] = insertelement <4 x float> %[[VEC3]], float %{{[0-9a-zA-Z_.]+}}, i32 3
854 // CHECK: store <4 x float> %[[VEC4]], ptr %{{[0-9a-zA-Z_.]+}}, align 16
856 void __attribute__((noinline
))
858 res
= _mm_setzero_ps();
861 // CHECK-LABEL: @test_setzero
863 // CHECK-LABEL: define available_externally <4 x float> @_mm_setzero_ps
864 // CHECK: store <4 x float> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
866 void __attribute__((noinline
))
871 // CHECK-LABEL: @test_sfence
873 // CHECK-LABEL: define available_externally void @_mm_sfence
874 // CHECK: fence release
876 void __attribute__((noinline
))
878 res64
= _mm_shuffle_pi16(ms
[0], i
);
879 res
= _mm_shuffle_ps(m1
, m2
, i
);
880 res64
= _m_pshufw(ms
[0], i
);
883 // CHECK-LABEL: @test_shuffle
885 // CHECK-LABEL: define available_externally i64 @_mm_shuffle_pi16
886 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
887 // CHECK: sext i32 %[[AND]] to i64
888 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
889 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
890 // CHECK: sext i32 %[[AND2]] to i64
891 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
892 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
893 // CHECK: sext i32 %[[AND3]] to i64
894 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
895 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
896 // CHECK: sext i32 %[[AND4]] to i64
897 // CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
898 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
899 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
900 // CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
901 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
902 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
903 // CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
904 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 2
905 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 1
906 // CHECK: getelementptr inbounds [4 x i16], ptr @_mm_shuffle_pi16.__permute_selectors, i64 0, i64 %{{[0-9a-zA-Z_.]+}}
907 // CHECK-LE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 3
908 // CHECK-BE: getelementptr inbounds [4 x i16], ptr %{{[0-9a-zA-Z_.]+}}, i64 0, i64 0
909 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
910 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
911 // CHECK: call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])
912 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
914 // CHECK-LABEL: define available_externally <4 x float> @_mm_shuffle_ps
915 // CHECK: %[[AND:[0-9a-zA-Z_.]+]] = and i32 %{{[0-9a-zA-Z_.]+}}, 3
916 // CHECK: sext i32 %[[AND]] to i64
917 // CHECK: %[[SHR:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 2
918 // CHECK: %[[AND2:[0-9a-zA-Z_.]+]] = and i32 %[[SHR]], 3
919 // CHECK: sext i32 %[[AND2]] to i64
920 // CHECK: %[[SHR2:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 4
921 // CHECK: %[[AND3:[0-9a-zA-Z_.]+]] = and i32 %[[SHR2]], 3
922 // CHECK: sext i32 %[[AND3]] to i64
923 // CHECK: %[[SHR3:[0-9a-zA-Z_.]+]] = ashr i32 %{{[0-9a-zA-Z_.]+}}, 6
924 // CHECK: %[[AND4:[0-9a-zA-Z_.]+]] = and i32 %[[SHR3]], 3
925 // CHECK: sext i32 %[[AND4]] to i64
926 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
927 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 0
928 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
929 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %{{[0-9a-zA-Z_.]+}}, i32 1
930 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
931 // CHECK: %[[ADD:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
932 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD]], i32 2
933 // CHECK: getelementptr inbounds [4 x i32], ptr @_mm_shuffle_ps.__permute_selectors, i64 0, i64
934 // CHECK: %[[ADD2:[0-9a-zA-Z_.]+]] = add i32 %{{[0-9a-zA-Z_.]+}}, 269488144
935 // CHECK: insertelement <4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 %[[ADD2]], i32 3
936 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])
938 // CHECK-LABEL: define available_externally i64 @_m_pshufw
939 // CHECK: call i64 @_mm_shuffle_pi16
941 void __attribute__((noinline
))
943 res
= _mm_sqrt_ps(m1
);
944 res
= _mm_sqrt_ss(m1
);
947 // CHECK-LABEL: @test_sqrt
949 // CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ps
950 // CHECK: call <4 x float> @vec_sqrt(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
952 // CHECK-LABEL: define available_externally <4 x float> @_mm_sqrt_ss
953 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
954 // CHECK: call <4 x float> @vec_sqrt(float vector[4])
955 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
957 void __attribute__((noinline
))
959 _mm_store_ps(fs
, m1
);
960 _mm_store_ps1(fs
, m1
);
961 _mm_store_ss(fs
, m1
);
962 _mm_store1_ps(fs
, m1
);
963 _mm_storeh_pi(ms
, m1
);
964 _mm_storel_pi(ms
, m1
);
965 _mm_storer_ps(fs
, m1
);
968 // CHECK-LABEL: @test_store
970 // CHECK-LABEL: define available_externally void @_mm_store_ps
971 // CHECK: call void @vec_st(float vector[4], long, float vector[4]*)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i64 noundef 0, ptr noundef %{{[0-9a-zA-Z_.]+}})
973 // CHECK-LABEL: define available_externally void @_mm_store_ps1
974 // CHECK: call void @_mm_store1_ps
976 // CHECK-LABEL: define available_externally void @_mm_store_ss
977 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
978 // CHECK: store float %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 4
980 // CHECK-LABEL: define available_externally void @_mm_store1_ps
981 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
982 // CHECK: call void @_mm_store_ps
984 // CHECK-LABEL: define available_externally void @_mm_storeh_pi
985 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 1
986 // CHECK: store i64 %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 8
988 // CHECK-LABEL: define available_externally void @_mm_storel_pi
989 // CHECK: %[[VAL:[0-9a-zA-Z_.]+]] = extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
990 // CHECK: store i64 %[[VAL]], ptr %{{[0-9a-zA-Z_.]+}}, align 8
992 // CHECK-LABEL: define available_externally void @_mm_storer_ps
993 // CHECK: call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
994 // CHECK: call void @_mm_store_ps
996 void __attribute__((noinline
))
998 _mm_stream_pi(&res64
, ms
[0]);
999 _mm_stream_ps(&fs
[0], m1
);
1002 // CHECK-LABEL: @test_stream
1004 // CHECK-LABEL: define available_externally void @_mm_stream_pi
1005 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1007 // CHECK-LABEL: define available_externally void @_mm_stream_ps
1008 // CHECK: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(ptr %{{[0-9a-zA-Z_.]+}})
1009 // CHECK: call void @_mm_store_ps
1011 void __attribute__((noinline
))
1013 res
= _mm_sub_ps(m1
, m2
);
1014 res
= _mm_sub_ss(m1
, m2
);
1017 // CHECK-LABEL: @test_sub
1019 // CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ps
1020 // CHECK: fsub <4 x float>
1022 // CHECK-LABEL: define available_externally <4 x float> @_mm_sub_ss
1023 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1024 // CHECK: call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
1025 // CHECK: fsub <4 x float>
1026 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)
1028 void __attribute__((noinline
))
1031 _MM_TRANSPOSE4_PS(m1
, m2
, m3
, m4
);
1034 // CHECK-LABEL: @test_transpose
1036 // CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
1037 // CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
1038 // CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
1039 // CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
1040 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
1041 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
1042 // CHECK: call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
1043 // CHECK: call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
1045 void __attribute__((noinline
))
1047 i
= _mm_ucomieq_ss(m1
, m2
);
1048 i
= _mm_ucomige_ss(m1
, m2
);
1049 i
= _mm_ucomigt_ss(m1
, m2
);
1050 i
= _mm_ucomile_ss(m1
, m2
);
1051 i
= _mm_ucomilt_ss(m1
, m2
);
1052 i
= _mm_ucomineq_ss(m1
, m2
);
1055 // CHECK-LABEL: @test_ucomi
1057 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomieq_ss
1058 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1059 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1060 // CHECK: fcmp oeq float %[[VAL1]], %[[VAL2]]
1062 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomige_ss
1063 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1064 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1065 // CHECK: fcmp oge float %[[VAL1]], %[[VAL2]]
1067 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomigt_ss
1068 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1069 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1070 // CHECK: fcmp ogt float %[[VAL1]], %[[VAL2]]
1072 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomile_ss
1073 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1074 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1075 // CHECK: fcmp ole float %[[VAL1]], %[[VAL2]]
1077 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomilt_ss
1078 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1079 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1080 // CHECK: fcmp olt float %[[VAL1]], %[[VAL2]]
1082 // CHECK-LABEL: define available_externally signext i32 @_mm_ucomineq_ss
1083 // CHECK: %[[VAL1:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1084 // CHECK: %[[VAL2:[0-9a-zA-Z_.]+]] = extractelement <4 x float> %{{[0-9a-zA-Z_.]+}}, i32 0
1085 // CHECK: fcmp une float %[[VAL1]], %[[VAL2]]
1087 void __attribute__((noinline
))
1089 res
= _mm_undefined_ps();
1092 // CHECK-LABEL: @test_undefined
1094 // CHECK-LABEL: define available_externally <4 x float> @_mm_undefined_ps
1095 // CHECK: alloca <4 x float>, align 16
1096 // CHECK: load <4 x float>, ptr %[[ADDR:[0-9a-zA-Z_.]+]], align 16
1097 // CHECK: load <4 x float>, ptr %[[ADDR]], align 16
1099 void __attribute__((noinline
))
1101 res
= _mm_unpackhi_ps(m1
, m2
);
1102 res
= _mm_unpacklo_ps(m1
, m2
);
1105 // CHECK-LABEL: @test_unpack
1107 // CHECK-LABEL: define available_externally <4 x float> @_mm_unpackhi_ps
1108 // CHECK: call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
1110 // CHECK-LABEL: define available_externally <4 x float> @_mm_unpacklo_ps
1111 // CHECK: call <4 x float> @vec_vmrghw(float vector[4], float vector[4])