Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGen / LoongArch / lsx / builtin.c
blob05a3d13a7fb9aeb9eb089f9ad49167a31e633754
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s
4 typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16)));
5 typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1)));
6 typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16)));
7 typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1)));
8 typedef short v8i16 __attribute__ ((vector_size(16), aligned(16)));
9 typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2)));
10 typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16)));
11 typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2)));
12 typedef int v4i32 __attribute__ ((vector_size(16), aligned(16)));
13 typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4)));
14 typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16)));
15 typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4)));
16 typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16)));
17 typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8)));
18 typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16)));
19 typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8)));
20 typedef float v4f32 __attribute__ ((vector_size(16), aligned(16)));
21 typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4)));
22 typedef double v2f64 __attribute__ ((vector_size(16), aligned(16)));
23 typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8)));
25 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
26 typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
27 typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
30 // CHECK-LABEL: @vsll_b(
31 // CHECK-NEXT: entry:
32 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
33 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
34 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
35 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
36 // CHECK-NEXT: ret i128 [[TMP3]]
38 v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); }
39 // CHECK-LABEL: @vsll_h(
40 // CHECK-NEXT: entry:
41 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
42 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
43 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
44 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
45 // CHECK-NEXT: ret i128 [[TMP3]]
47 v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); }
48 // CHECK-LABEL: @vsll_w(
49 // CHECK-NEXT: entry:
50 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
51 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
52 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
53 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
54 // CHECK-NEXT: ret i128 [[TMP3]]
56 v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); }
57 // CHECK-LABEL: @vsll_d(
58 // CHECK-NEXT: entry:
59 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
60 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
61 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
62 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
63 // CHECK-NEXT: ret i128 [[TMP3]]
65 v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); }
66 // CHECK-LABEL: @vslli_b(
67 // CHECK-NEXT: entry:
68 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
69 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1)
70 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
71 // CHECK-NEXT: ret i128 [[TMP2]]
73 v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); }
74 // CHECK-LABEL: @vslli_h(
75 // CHECK-NEXT: entry:
76 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
77 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1)
78 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
79 // CHECK-NEXT: ret i128 [[TMP2]]
81 v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); }
82 // CHECK-LABEL: @vslli_w(
83 // CHECK-NEXT: entry:
84 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
85 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1)
86 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
87 // CHECK-NEXT: ret i128 [[TMP2]]
89 v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); }
90 // CHECK-LABEL: @vslli_d(
91 // CHECK-NEXT: entry:
92 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
93 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1)
94 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
95 // CHECK-NEXT: ret i128 [[TMP2]]
97 v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); }
98 // CHECK-LABEL: @vsra_b(
99 // CHECK-NEXT: entry:
100 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
101 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
102 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
103 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
104 // CHECK-NEXT: ret i128 [[TMP3]]
106 v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); }
107 // CHECK-LABEL: @vsra_h(
108 // CHECK-NEXT: entry:
109 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
110 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
111 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
112 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
113 // CHECK-NEXT: ret i128 [[TMP3]]
115 v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); }
116 // CHECK-LABEL: @vsra_w(
117 // CHECK-NEXT: entry:
118 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
119 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
120 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
121 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
122 // CHECK-NEXT: ret i128 [[TMP3]]
124 v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); }
125 // CHECK-LABEL: @vsra_d(
126 // CHECK-NEXT: entry:
127 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
128 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
129 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
130 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
131 // CHECK-NEXT: ret i128 [[TMP3]]
133 v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); }
134 // CHECK-LABEL: @vsrai_b(
135 // CHECK-NEXT: entry:
136 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
137 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1)
138 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
139 // CHECK-NEXT: ret i128 [[TMP2]]
141 v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); }
142 // CHECK-LABEL: @vsrai_h(
143 // CHECK-NEXT: entry:
144 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
145 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1)
146 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
147 // CHECK-NEXT: ret i128 [[TMP2]]
149 v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); }
150 // CHECK-LABEL: @vsrai_w(
151 // CHECK-NEXT: entry:
152 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
153 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1)
154 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
155 // CHECK-NEXT: ret i128 [[TMP2]]
157 v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); }
158 // CHECK-LABEL: @vsrai_d(
159 // CHECK-NEXT: entry:
160 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
161 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1)
162 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
163 // CHECK-NEXT: ret i128 [[TMP2]]
165 v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); }
166 // CHECK-LABEL: @vsrar_b(
167 // CHECK-NEXT: entry:
168 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
169 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
170 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
171 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
172 // CHECK-NEXT: ret i128 [[TMP3]]
174 v16i8 vsrar_b(v16i8 _1, v16i8 _2) {
175 return __builtin_lsx_vsrar_b(_1, _2);
177 // CHECK-LABEL: @vsrar_h(
178 // CHECK-NEXT: entry:
179 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
180 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
181 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
182 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
183 // CHECK-NEXT: ret i128 [[TMP3]]
185 v8i16 vsrar_h(v8i16 _1, v8i16 _2) {
186 return __builtin_lsx_vsrar_h(_1, _2);
188 // CHECK-LABEL: @vsrar_w(
189 // CHECK-NEXT: entry:
190 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
191 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
192 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
193 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
194 // CHECK-NEXT: ret i128 [[TMP3]]
196 v4i32 vsrar_w(v4i32 _1, v4i32 _2) {
197 return __builtin_lsx_vsrar_w(_1, _2);
199 // CHECK-LABEL: @vsrar_d(
200 // CHECK-NEXT: entry:
201 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
202 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
203 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
204 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
205 // CHECK-NEXT: ret i128 [[TMP3]]
207 v2i64 vsrar_d(v2i64 _1, v2i64 _2) {
208 return __builtin_lsx_vsrar_d(_1, _2);
210 // CHECK-LABEL: @vsrari_b(
211 // CHECK-NEXT: entry:
212 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
213 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1)
214 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
215 // CHECK-NEXT: ret i128 [[TMP2]]
217 v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); }
218 // CHECK-LABEL: @vsrari_h(
219 // CHECK-NEXT: entry:
220 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
221 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1)
222 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
223 // CHECK-NEXT: ret i128 [[TMP2]]
225 v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); }
226 // CHECK-LABEL: @vsrari_w(
227 // CHECK-NEXT: entry:
228 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
229 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1)
230 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
231 // CHECK-NEXT: ret i128 [[TMP2]]
233 v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); }
234 // CHECK-LABEL: @vsrari_d(
235 // CHECK-NEXT: entry:
236 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
237 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1)
238 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
239 // CHECK-NEXT: ret i128 [[TMP2]]
241 v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); }
242 // CHECK-LABEL: @vsrl_b(
243 // CHECK-NEXT: entry:
244 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
245 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
246 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
247 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
248 // CHECK-NEXT: ret i128 [[TMP3]]
250 v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); }
251 // CHECK-LABEL: @vsrl_h(
252 // CHECK-NEXT: entry:
253 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
254 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
255 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
256 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
257 // CHECK-NEXT: ret i128 [[TMP3]]
259 v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); }
260 // CHECK-LABEL: @vsrl_w(
261 // CHECK-NEXT: entry:
262 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
263 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
264 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
265 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
266 // CHECK-NEXT: ret i128 [[TMP3]]
268 v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); }
269 // CHECK-LABEL: @vsrl_d(
270 // CHECK-NEXT: entry:
271 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
272 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
273 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
274 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
275 // CHECK-NEXT: ret i128 [[TMP3]]
277 v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); }
278 // CHECK-LABEL: @vsrli_b(
279 // CHECK-NEXT: entry:
280 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
281 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1)
282 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
283 // CHECK-NEXT: ret i128 [[TMP2]]
285 v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); }
286 // CHECK-LABEL: @vsrli_h(
287 // CHECK-NEXT: entry:
288 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
289 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1)
290 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
291 // CHECK-NEXT: ret i128 [[TMP2]]
293 v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); }
294 // CHECK-LABEL: @vsrli_w(
295 // CHECK-NEXT: entry:
296 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
297 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1)
298 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
299 // CHECK-NEXT: ret i128 [[TMP2]]
301 v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); }
302 // CHECK-LABEL: @vsrli_d(
303 // CHECK-NEXT: entry:
304 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
305 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1)
306 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
307 // CHECK-NEXT: ret i128 [[TMP2]]
309 v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); }
310 // CHECK-LABEL: @vsrlr_b(
311 // CHECK-NEXT: entry:
312 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
313 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
314 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
315 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
316 // CHECK-NEXT: ret i128 [[TMP3]]
318 v16i8 vsrlr_b(v16i8 _1, v16i8 _2) {
319 return __builtin_lsx_vsrlr_b(_1, _2);
321 // CHECK-LABEL: @vsrlr_h(
322 // CHECK-NEXT: entry:
323 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
324 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
325 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
326 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
327 // CHECK-NEXT: ret i128 [[TMP3]]
329 v8i16 vsrlr_h(v8i16 _1, v8i16 _2) {
330 return __builtin_lsx_vsrlr_h(_1, _2);
332 // CHECK-LABEL: @vsrlr_w(
333 // CHECK-NEXT: entry:
334 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
335 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
336 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
337 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
338 // CHECK-NEXT: ret i128 [[TMP3]]
340 v4i32 vsrlr_w(v4i32 _1, v4i32 _2) {
341 return __builtin_lsx_vsrlr_w(_1, _2);
343 // CHECK-LABEL: @vsrlr_d(
344 // CHECK-NEXT: entry:
345 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
346 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
347 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
348 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
349 // CHECK-NEXT: ret i128 [[TMP3]]
351 v2i64 vsrlr_d(v2i64 _1, v2i64 _2) {
352 return __builtin_lsx_vsrlr_d(_1, _2);
354 // CHECK-LABEL: @vsrlri_b(
355 // CHECK-NEXT: entry:
356 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
357 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1)
358 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
359 // CHECK-NEXT: ret i128 [[TMP2]]
361 v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); }
362 // CHECK-LABEL: @vsrlri_h(
363 // CHECK-NEXT: entry:
364 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
365 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1)
366 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
367 // CHECK-NEXT: ret i128 [[TMP2]]
369 v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); }
370 // CHECK-LABEL: @vsrlri_w(
371 // CHECK-NEXT: entry:
372 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
373 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1)
374 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
375 // CHECK-NEXT: ret i128 [[TMP2]]
377 v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); }
378 // CHECK-LABEL: @vsrlri_d(
379 // CHECK-NEXT: entry:
380 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
381 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1)
382 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
383 // CHECK-NEXT: ret i128 [[TMP2]]
385 v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); }
386 // CHECK-LABEL: @vbitclr_b(
387 // CHECK-NEXT: entry:
388 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
389 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
390 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
391 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
392 // CHECK-NEXT: ret i128 [[TMP3]]
394 v16u8 vbitclr_b(v16u8 _1, v16u8 _2) {
395 return __builtin_lsx_vbitclr_b(_1, _2);
397 // CHECK-LABEL: @vbitclr_h(
398 // CHECK-NEXT: entry:
399 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
400 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
401 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
402 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
403 // CHECK-NEXT: ret i128 [[TMP3]]
405 v8u16 vbitclr_h(v8u16 _1, v8u16 _2) {
406 return __builtin_lsx_vbitclr_h(_1, _2);
408 // CHECK-LABEL: @vbitclr_w(
409 // CHECK-NEXT: entry:
410 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
411 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
412 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
413 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
414 // CHECK-NEXT: ret i128 [[TMP3]]
416 v4u32 vbitclr_w(v4u32 _1, v4u32 _2) {
417 return __builtin_lsx_vbitclr_w(_1, _2);
419 // CHECK-LABEL: @vbitclr_d(
420 // CHECK-NEXT: entry:
421 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
422 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
423 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
424 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
425 // CHECK-NEXT: ret i128 [[TMP3]]
427 v2u64 vbitclr_d(v2u64 _1, v2u64 _2) {
428 return __builtin_lsx_vbitclr_d(_1, _2);
430 // CHECK-LABEL: @vbitclri_b(
431 // CHECK-NEXT: entry:
432 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
433 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1)
434 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
435 // CHECK-NEXT: ret i128 [[TMP2]]
437 v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); }
438 // CHECK-LABEL: @vbitclri_h(
439 // CHECK-NEXT: entry:
440 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
441 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1)
442 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
443 // CHECK-NEXT: ret i128 [[TMP2]]
445 v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); }
446 // CHECK-LABEL: @vbitclri_w(
447 // CHECK-NEXT: entry:
448 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
449 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1)
450 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
451 // CHECK-NEXT: ret i128 [[TMP2]]
453 v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); }
454 // CHECK-LABEL: @vbitclri_d(
455 // CHECK-NEXT: entry:
456 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
457 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1)
458 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
459 // CHECK-NEXT: ret i128 [[TMP2]]
461 v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); }
462 // CHECK-LABEL: @vbitset_b(
463 // CHECK-NEXT: entry:
464 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
465 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
466 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
467 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
468 // CHECK-NEXT: ret i128 [[TMP3]]
470 v16u8 vbitset_b(v16u8 _1, v16u8 _2) {
471 return __builtin_lsx_vbitset_b(_1, _2);
473 // CHECK-LABEL: @vbitset_h(
474 // CHECK-NEXT: entry:
475 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
476 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
477 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
478 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
479 // CHECK-NEXT: ret i128 [[TMP3]]
481 v8u16 vbitset_h(v8u16 _1, v8u16 _2) {
482 return __builtin_lsx_vbitset_h(_1, _2);
484 // CHECK-LABEL: @vbitset_w(
485 // CHECK-NEXT: entry:
486 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
487 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
488 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
489 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
490 // CHECK-NEXT: ret i128 [[TMP3]]
492 v4u32 vbitset_w(v4u32 _1, v4u32 _2) {
493 return __builtin_lsx_vbitset_w(_1, _2);
495 // CHECK-LABEL: @vbitset_d(
496 // CHECK-NEXT: entry:
497 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
498 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
499 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
500 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
501 // CHECK-NEXT: ret i128 [[TMP3]]
503 v2u64 vbitset_d(v2u64 _1, v2u64 _2) {
504 return __builtin_lsx_vbitset_d(_1, _2);
506 // CHECK-LABEL: @vbitseti_b(
507 // CHECK-NEXT: entry:
508 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
509 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1)
510 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
511 // CHECK-NEXT: ret i128 [[TMP2]]
513 v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); }
514 // CHECK-LABEL: @vbitseti_h(
515 // CHECK-NEXT: entry:
516 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
517 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1)
518 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
519 // CHECK-NEXT: ret i128 [[TMP2]]
521 v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); }
522 // CHECK-LABEL: @vbitseti_w(
523 // CHECK-NEXT: entry:
524 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
525 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1)
526 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
527 // CHECK-NEXT: ret i128 [[TMP2]]
529 v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); }
530 // CHECK-LABEL: @vbitseti_d(
531 // CHECK-NEXT: entry:
532 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
533 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1)
534 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
535 // CHECK-NEXT: ret i128 [[TMP2]]
537 v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); }
538 // CHECK-LABEL: @vbitrev_b(
539 // CHECK-NEXT: entry:
540 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
541 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
542 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
543 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
544 // CHECK-NEXT: ret i128 [[TMP3]]
546 v16u8 vbitrev_b(v16u8 _1, v16u8 _2) {
547 return __builtin_lsx_vbitrev_b(_1, _2);
549 // CHECK-LABEL: @vbitrev_h(
550 // CHECK-NEXT: entry:
551 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
552 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
553 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
554 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
555 // CHECK-NEXT: ret i128 [[TMP3]]
557 v8u16 vbitrev_h(v8u16 _1, v8u16 _2) {
558 return __builtin_lsx_vbitrev_h(_1, _2);
560 // CHECK-LABEL: @vbitrev_w(
561 // CHECK-NEXT: entry:
562 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
563 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
564 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
565 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
566 // CHECK-NEXT: ret i128 [[TMP3]]
568 v4u32 vbitrev_w(v4u32 _1, v4u32 _2) {
569 return __builtin_lsx_vbitrev_w(_1, _2);
571 // CHECK-LABEL: @vbitrev_d(
572 // CHECK-NEXT: entry:
573 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
574 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
575 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
576 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
577 // CHECK-NEXT: ret i128 [[TMP3]]
579 v2u64 vbitrev_d(v2u64 _1, v2u64 _2) {
580 return __builtin_lsx_vbitrev_d(_1, _2);
582 // CHECK-LABEL: @vbitrevi_b(
583 // CHECK-NEXT: entry:
584 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
585 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1)
586 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
587 // CHECK-NEXT: ret i128 [[TMP2]]
589 v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); }
590 // CHECK-LABEL: @vbitrevi_h(
591 // CHECK-NEXT: entry:
592 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
593 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1)
594 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
595 // CHECK-NEXT: ret i128 [[TMP2]]
597 v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); }
598 // CHECK-LABEL: @vbitrevi_w(
599 // CHECK-NEXT: entry:
600 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
601 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1)
602 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
603 // CHECK-NEXT: ret i128 [[TMP2]]
605 v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); }
606 // CHECK-LABEL: @vbitrevi_d(
607 // CHECK-NEXT: entry:
608 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
609 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1)
610 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
611 // CHECK-NEXT: ret i128 [[TMP2]]
613 v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); }
614 // CHECK-LABEL: @vadd_b(
615 // CHECK-NEXT: entry:
616 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
617 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
618 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
619 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
620 // CHECK-NEXT: ret i128 [[TMP3]]
622 v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); }
623 // CHECK-LABEL: @vadd_h(
624 // CHECK-NEXT: entry:
625 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
626 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
627 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
628 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
629 // CHECK-NEXT: ret i128 [[TMP3]]
631 v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); }
632 // CHECK-LABEL: @vadd_w(
633 // CHECK-NEXT: entry:
634 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
635 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
636 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
637 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
638 // CHECK-NEXT: ret i128 [[TMP3]]
640 v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); }
641 // CHECK-LABEL: @vadd_d(
642 // CHECK-NEXT: entry:
643 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
644 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
645 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
646 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
647 // CHECK-NEXT: ret i128 [[TMP3]]
649 v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); }
650 // CHECK-LABEL: @vaddi_bu(
651 // CHECK-NEXT: entry:
652 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
653 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1)
654 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
655 // CHECK-NEXT: ret i128 [[TMP2]]
657 v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); }
658 // CHECK-LABEL: @vaddi_hu(
659 // CHECK-NEXT: entry:
660 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
661 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1)
662 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
663 // CHECK-NEXT: ret i128 [[TMP2]]
665 v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); }
666 // CHECK-LABEL: @vaddi_wu(
667 // CHECK-NEXT: entry:
668 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
669 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1)
670 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
671 // CHECK-NEXT: ret i128 [[TMP2]]
673 v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); }
674 // CHECK-LABEL: @vaddi_du(
675 // CHECK-NEXT: entry:
676 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
677 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1)
678 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
679 // CHECK-NEXT: ret i128 [[TMP2]]
681 v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); }
682 // CHECK-LABEL: @vsub_b(
683 // CHECK-NEXT: entry:
684 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
685 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
686 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
687 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
688 // CHECK-NEXT: ret i128 [[TMP3]]
690 v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); }
691 // CHECK-LABEL: @vsub_h(
692 // CHECK-NEXT: entry:
693 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
694 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
695 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
696 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
697 // CHECK-NEXT: ret i128 [[TMP3]]
699 v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); }
700 // CHECK-LABEL: @vsub_w(
701 // CHECK-NEXT: entry:
702 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
703 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
704 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
705 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
706 // CHECK-NEXT: ret i128 [[TMP3]]
708 v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); }
709 // CHECK-LABEL: @vsub_d(
710 // CHECK-NEXT: entry:
711 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
712 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
713 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
714 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
715 // CHECK-NEXT: ret i128 [[TMP3]]
717 v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); }
718 // CHECK-LABEL: @vsubi_bu(
719 // CHECK-NEXT: entry:
720 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
721 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1)
722 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
723 // CHECK-NEXT: ret i128 [[TMP2]]
725 v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); }
726 // CHECK-LABEL: @vsubi_hu(
727 // CHECK-NEXT: entry:
728 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
729 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1)
730 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
731 // CHECK-NEXT: ret i128 [[TMP2]]
733 v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); }
734 // CHECK-LABEL: @vsubi_wu(
735 // CHECK-NEXT: entry:
736 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
737 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1)
738 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
739 // CHECK-NEXT: ret i128 [[TMP2]]
741 v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); }
742 // CHECK-LABEL: @vsubi_du(
743 // CHECK-NEXT: entry:
744 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
745 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1)
746 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
747 // CHECK-NEXT: ret i128 [[TMP2]]
749 v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); }
750 // CHECK-LABEL: @vmax_b(
751 // CHECK-NEXT: entry:
752 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
753 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
754 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
755 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
756 // CHECK-NEXT: ret i128 [[TMP3]]
758 v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); }
759 // CHECK-LABEL: @vmax_h(
760 // CHECK-NEXT: entry:
761 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
762 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
763 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
764 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
765 // CHECK-NEXT: ret i128 [[TMP3]]
767 v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); }
768 // CHECK-LABEL: @vmax_w(
769 // CHECK-NEXT: entry:
770 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
771 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
772 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
773 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
774 // CHECK-NEXT: ret i128 [[TMP3]]
776 v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); }
777 // CHECK-LABEL: @vmax_d(
778 // CHECK-NEXT: entry:
779 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
780 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
781 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
782 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
783 // CHECK-NEXT: ret i128 [[TMP3]]
785 v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); }
786 // CHECK-LABEL: @vmaxi_b(
787 // CHECK-NEXT: entry:
788 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
789 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1)
790 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
791 // CHECK-NEXT: ret i128 [[TMP2]]
793 v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); }
794 // CHECK-LABEL: @vmaxi_h(
795 // CHECK-NEXT: entry:
796 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
797 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1)
798 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
799 // CHECK-NEXT: ret i128 [[TMP2]]
801 v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); }
802 // CHECK-LABEL: @vmaxi_w(
803 // CHECK-NEXT: entry:
804 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
805 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1)
806 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
807 // CHECK-NEXT: ret i128 [[TMP2]]
809 v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); }
810 // CHECK-LABEL: @vmaxi_d(
811 // CHECK-NEXT: entry:
812 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
813 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1)
814 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
815 // CHECK-NEXT: ret i128 [[TMP2]]
817 v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); }
818 // CHECK-LABEL: @vmax_bu(
819 // CHECK-NEXT: entry:
820 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
821 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
822 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
823 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
824 // CHECK-NEXT: ret i128 [[TMP3]]
826 v16u8 vmax_bu(v16u8 _1, v16u8 _2) {
827 return __builtin_lsx_vmax_bu(_1, _2);
829 // CHECK-LABEL: @vmax_hu(
830 // CHECK-NEXT: entry:
831 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
832 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
833 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
834 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
835 // CHECK-NEXT: ret i128 [[TMP3]]
837 v8u16 vmax_hu(v8u16 _1, v8u16 _2) {
838 return __builtin_lsx_vmax_hu(_1, _2);
840 // CHECK-LABEL: @vmax_wu(
841 // CHECK-NEXT: entry:
842 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
843 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
844 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
845 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
846 // CHECK-NEXT: ret i128 [[TMP3]]
848 v4u32 vmax_wu(v4u32 _1, v4u32 _2) {
849 return __builtin_lsx_vmax_wu(_1, _2);
851 // CHECK-LABEL: @vmax_du(
852 // CHECK-NEXT: entry:
853 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
854 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
855 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
856 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
857 // CHECK-NEXT: ret i128 [[TMP3]]
859 v2u64 vmax_du(v2u64 _1, v2u64 _2) {
860 return __builtin_lsx_vmax_du(_1, _2);
862 // CHECK-LABEL: @vmaxi_bu(
863 // CHECK-NEXT: entry:
864 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
865 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1)
866 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
867 // CHECK-NEXT: ret i128 [[TMP2]]
869 v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); }
870 // CHECK-LABEL: @vmaxi_hu(
871 // CHECK-NEXT: entry:
872 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
873 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1)
874 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
875 // CHECK-NEXT: ret i128 [[TMP2]]
877 v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); }
878 // CHECK-LABEL: @vmaxi_wu(
879 // CHECK-NEXT: entry:
880 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
881 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1)
882 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
883 // CHECK-NEXT: ret i128 [[TMP2]]
885 v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); }
886 // CHECK-LABEL: @vmaxi_du(
887 // CHECK-NEXT: entry:
888 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
889 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1)
890 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
891 // CHECK-NEXT: ret i128 [[TMP2]]
893 v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); }
894 // CHECK-LABEL: @vmin_b(
895 // CHECK-NEXT: entry:
896 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
897 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
898 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
899 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
900 // CHECK-NEXT: ret i128 [[TMP3]]
902 v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); }
903 // CHECK-LABEL: @vmin_h(
904 // CHECK-NEXT: entry:
905 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
906 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
907 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
908 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
909 // CHECK-NEXT: ret i128 [[TMP3]]
911 v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); }
912 // CHECK-LABEL: @vmin_w(
913 // CHECK-NEXT: entry:
914 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
915 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
916 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
917 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
918 // CHECK-NEXT: ret i128 [[TMP3]]
920 v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); }
921 // CHECK-LABEL: @vmin_d(
922 // CHECK-NEXT: entry:
923 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
924 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
925 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
926 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
927 // CHECK-NEXT: ret i128 [[TMP3]]
929 v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); }
930 // CHECK-LABEL: @vmini_b(
931 // CHECK-NEXT: entry:
932 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
933 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1)
934 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
935 // CHECK-NEXT: ret i128 [[TMP2]]
937 v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); }
938 // CHECK-LABEL: @vmini_h(
939 // CHECK-NEXT: entry:
940 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
941 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1)
942 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
943 // CHECK-NEXT: ret i128 [[TMP2]]
945 v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); }
946 // CHECK-LABEL: @vmini_w(
947 // CHECK-NEXT: entry:
948 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
949 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1)
950 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
951 // CHECK-NEXT: ret i128 [[TMP2]]
953 v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); }
954 // CHECK-LABEL: @vmini_d(
955 // CHECK-NEXT: entry:
956 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
957 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1)
958 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
959 // CHECK-NEXT: ret i128 [[TMP2]]
961 v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); }
962 // CHECK-LABEL: @vmin_bu(
963 // CHECK-NEXT: entry:
964 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
965 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
966 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
967 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
968 // CHECK-NEXT: ret i128 [[TMP3]]
970 v16u8 vmin_bu(v16u8 _1, v16u8 _2) {
971 return __builtin_lsx_vmin_bu(_1, _2);
973 // CHECK-LABEL: @vmin_hu(
974 // CHECK-NEXT: entry:
975 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
976 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
977 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
978 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
979 // CHECK-NEXT: ret i128 [[TMP3]]
981 v8u16 vmin_hu(v8u16 _1, v8u16 _2) {
982 return __builtin_lsx_vmin_hu(_1, _2);
984 // CHECK-LABEL: @vmin_wu(
985 // CHECK-NEXT: entry:
986 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
987 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
988 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
989 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
990 // CHECK-NEXT: ret i128 [[TMP3]]
992 v4u32 vmin_wu(v4u32 _1, v4u32 _2) {
993 return __builtin_lsx_vmin_wu(_1, _2);
995 // CHECK-LABEL: @vmin_du(
996 // CHECK-NEXT: entry:
997 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
998 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
999 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1000 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1001 // CHECK-NEXT: ret i128 [[TMP3]]
1003 v2u64 vmin_du(v2u64 _1, v2u64 _2) {
1004 return __builtin_lsx_vmin_du(_1, _2);
1006 // CHECK-LABEL: @vmini_bu(
1007 // CHECK-NEXT: entry:
1008 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1009 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1)
1010 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1011 // CHECK-NEXT: ret i128 [[TMP2]]
1013 v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); }
1014 // CHECK-LABEL: @vmini_hu(
1015 // CHECK-NEXT: entry:
1016 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1017 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1)
1018 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1019 // CHECK-NEXT: ret i128 [[TMP2]]
1021 v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); }
1022 // CHECK-LABEL: @vmini_wu(
1023 // CHECK-NEXT: entry:
1024 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1025 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1)
1026 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1027 // CHECK-NEXT: ret i128 [[TMP2]]
1029 v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); }
1030 // CHECK-LABEL: @vmini_du(
1031 // CHECK-NEXT: entry:
1032 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1033 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1)
1034 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1035 // CHECK-NEXT: ret i128 [[TMP2]]
1037 v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); }
1038 // CHECK-LABEL: @vseq_b(
1039 // CHECK-NEXT: entry:
1040 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1041 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1042 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1043 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1044 // CHECK-NEXT: ret i128 [[TMP3]]
1046 v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); }
1047 // CHECK-LABEL: @vseq_h(
1048 // CHECK-NEXT: entry:
1049 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1050 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1051 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1052 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1053 // CHECK-NEXT: ret i128 [[TMP3]]
1055 v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); }
1056 // CHECK-LABEL: @vseq_w(
1057 // CHECK-NEXT: entry:
1058 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1059 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1060 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1061 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1062 // CHECK-NEXT: ret i128 [[TMP3]]
1064 v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); }
1065 // CHECK-LABEL: @vseq_d(
1066 // CHECK-NEXT: entry:
1067 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1068 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1069 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1070 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1071 // CHECK-NEXT: ret i128 [[TMP3]]
1073 v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); }
1074 // CHECK-LABEL: @vseqi_b(
1075 // CHECK-NEXT: entry:
1076 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1077 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1)
1078 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1079 // CHECK-NEXT: ret i128 [[TMP2]]
1081 v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); }
1082 // CHECK-LABEL: @vseqi_h(
1083 // CHECK-NEXT: entry:
1084 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1085 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1)
1086 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1087 // CHECK-NEXT: ret i128 [[TMP2]]
1089 v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); }
1090 // CHECK-LABEL: @vseqi_w(
1091 // CHECK-NEXT: entry:
1092 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1093 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1)
1094 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1095 // CHECK-NEXT: ret i128 [[TMP2]]
1097 v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); }
1098 // CHECK-LABEL: @vseqi_d(
1099 // CHECK-NEXT: entry:
1100 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1101 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1)
1102 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1103 // CHECK-NEXT: ret i128 [[TMP2]]
1105 v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); }
1106 // CHECK-LABEL: @vslti_b(
1107 // CHECK-NEXT: entry:
1108 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1109 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1)
1110 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1111 // CHECK-NEXT: ret i128 [[TMP2]]
1113 v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); }
1114 // CHECK-LABEL: @vslt_b(
1115 // CHECK-NEXT: entry:
1116 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1117 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1118 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1119 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1120 // CHECK-NEXT: ret i128 [[TMP3]]
1122 v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); }
1123 // CHECK-LABEL: @vslt_h(
1124 // CHECK-NEXT: entry:
1125 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1126 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1127 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1128 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1129 // CHECK-NEXT: ret i128 [[TMP3]]
1131 v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); }
1132 // CHECK-LABEL: @vslt_w(
1133 // CHECK-NEXT: entry:
1134 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1135 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1136 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1137 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1138 // CHECK-NEXT: ret i128 [[TMP3]]
1140 v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); }
1141 // CHECK-LABEL: @vslt_d(
1142 // CHECK-NEXT: entry:
1143 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1144 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1145 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1146 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1147 // CHECK-NEXT: ret i128 [[TMP3]]
1149 v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); }
1150 // CHECK-LABEL: @vslti_h(
1151 // CHECK-NEXT: entry:
1152 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1153 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1)
1154 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1155 // CHECK-NEXT: ret i128 [[TMP2]]
1157 v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); }
1158 // CHECK-LABEL: @vslti_w(
1159 // CHECK-NEXT: entry:
1160 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1161 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1)
1162 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1163 // CHECK-NEXT: ret i128 [[TMP2]]
1165 v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); }
1166 // CHECK-LABEL: @vslti_d(
1167 // CHECK-NEXT: entry:
1168 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1169 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1)
1170 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1171 // CHECK-NEXT: ret i128 [[TMP2]]
1173 v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); }
1174 // CHECK-LABEL: @vslt_bu(
1175 // CHECK-NEXT: entry:
1176 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1177 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1178 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1179 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1180 // CHECK-NEXT: ret i128 [[TMP3]]
1182 v16i8 vslt_bu(v16u8 _1, v16u8 _2) {
1183 return __builtin_lsx_vslt_bu(_1, _2);
1185 // CHECK-LABEL: @vslt_hu(
1186 // CHECK-NEXT: entry:
1187 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1188 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1189 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1190 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1191 // CHECK-NEXT: ret i128 [[TMP3]]
1193 v8i16 vslt_hu(v8u16 _1, v8u16 _2) {
1194 return __builtin_lsx_vslt_hu(_1, _2);
1196 // CHECK-LABEL: @vslt_wu(
1197 // CHECK-NEXT: entry:
1198 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1199 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1200 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1201 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1202 // CHECK-NEXT: ret i128 [[TMP3]]
1204 v4i32 vslt_wu(v4u32 _1, v4u32 _2) {
1205 return __builtin_lsx_vslt_wu(_1, _2);
1207 // CHECK-LABEL: @vslt_du(
1208 // CHECK-NEXT: entry:
1209 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1210 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1211 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1212 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1213 // CHECK-NEXT: ret i128 [[TMP3]]
1215 v2i64 vslt_du(v2u64 _1, v2u64 _2) {
1216 return __builtin_lsx_vslt_du(_1, _2);
1218 // CHECK-LABEL: @vslti_bu(
1219 // CHECK-NEXT: entry:
1220 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1221 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1)
1222 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1223 // CHECK-NEXT: ret i128 [[TMP2]]
1225 v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); }
1226 // CHECK-LABEL: @vslti_hu(
1227 // CHECK-NEXT: entry:
1228 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1229 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1)
1230 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1231 // CHECK-NEXT: ret i128 [[TMP2]]
1233 v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); }
1234 // CHECK-LABEL: @vslti_wu(
1235 // CHECK-NEXT: entry:
1236 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1237 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1)
1238 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1239 // CHECK-NEXT: ret i128 [[TMP2]]
1241 v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); }
1242 // CHECK-LABEL: @vslti_du(
1243 // CHECK-NEXT: entry:
1244 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1245 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1)
1246 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1247 // CHECK-NEXT: ret i128 [[TMP2]]
1249 v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); }
1250 // CHECK-LABEL: @vsle_b(
1251 // CHECK-NEXT: entry:
1252 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1253 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1254 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1255 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1256 // CHECK-NEXT: ret i128 [[TMP3]]
1258 v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); }
1259 // CHECK-LABEL: @vsle_h(
1260 // CHECK-NEXT: entry:
1261 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1262 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1263 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1264 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1265 // CHECK-NEXT: ret i128 [[TMP3]]
1267 v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); }
1268 // CHECK-LABEL: @vsle_w(
1269 // CHECK-NEXT: entry:
1270 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1271 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1272 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1273 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1274 // CHECK-NEXT: ret i128 [[TMP3]]
1276 v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); }
1277 // CHECK-LABEL: @vsle_d(
1278 // CHECK-NEXT: entry:
1279 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1280 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1281 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1282 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1283 // CHECK-NEXT: ret i128 [[TMP3]]
1285 v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); }
1286 // CHECK-LABEL: @vslei_b(
1287 // CHECK-NEXT: entry:
1288 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1289 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1)
1290 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1291 // CHECK-NEXT: ret i128 [[TMP2]]
1293 v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); }
1294 // CHECK-LABEL: @vslei_h(
1295 // CHECK-NEXT: entry:
1296 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1297 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1)
1298 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1299 // CHECK-NEXT: ret i128 [[TMP2]]
1301 v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); }
1302 // CHECK-LABEL: @vslei_w(
1303 // CHECK-NEXT: entry:
1304 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1305 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1)
1306 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1307 // CHECK-NEXT: ret i128 [[TMP2]]
1309 v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); }
1310 // CHECK-LABEL: @vslei_d(
1311 // CHECK-NEXT: entry:
1312 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1313 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1)
1314 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1315 // CHECK-NEXT: ret i128 [[TMP2]]
1317 v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); }
1318 // CHECK-LABEL: @vsle_bu(
1319 // CHECK-NEXT: entry:
1320 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1321 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1322 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1323 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1324 // CHECK-NEXT: ret i128 [[TMP3]]
1326 v16i8 vsle_bu(v16u8 _1, v16u8 _2) {
1327 return __builtin_lsx_vsle_bu(_1, _2);
1329 // CHECK-LABEL: @vsle_hu(
1330 // CHECK-NEXT: entry:
1331 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1332 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1333 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1334 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1335 // CHECK-NEXT: ret i128 [[TMP3]]
1337 v8i16 vsle_hu(v8u16 _1, v8u16 _2) {
1338 return __builtin_lsx_vsle_hu(_1, _2);
1340 // CHECK-LABEL: @vsle_wu(
1341 // CHECK-NEXT: entry:
1342 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1343 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1344 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1345 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1346 // CHECK-NEXT: ret i128 [[TMP3]]
1348 v4i32 vsle_wu(v4u32 _1, v4u32 _2) {
1349 return __builtin_lsx_vsle_wu(_1, _2);
1351 // CHECK-LABEL: @vsle_du(
1352 // CHECK-NEXT: entry:
1353 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1354 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1355 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1356 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1357 // CHECK-NEXT: ret i128 [[TMP3]]
1359 v2i64 vsle_du(v2u64 _1, v2u64 _2) {
1360 return __builtin_lsx_vsle_du(_1, _2);
1362 // CHECK-LABEL: @vslei_bu(
1363 // CHECK-NEXT: entry:
1364 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1365 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1)
1366 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1367 // CHECK-NEXT: ret i128 [[TMP2]]
1369 v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); }
1370 // CHECK-LABEL: @vslei_hu(
1371 // CHECK-NEXT: entry:
1372 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1373 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1)
1374 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1375 // CHECK-NEXT: ret i128 [[TMP2]]
1377 v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); }
1378 // CHECK-LABEL: @vslei_wu(
1379 // CHECK-NEXT: entry:
1380 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1381 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1)
1382 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1383 // CHECK-NEXT: ret i128 [[TMP2]]
1385 v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); }
1386 // CHECK-LABEL: @vslei_du(
1387 // CHECK-NEXT: entry:
1388 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1389 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1)
1390 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1391 // CHECK-NEXT: ret i128 [[TMP2]]
1393 v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); }
1394 // CHECK-LABEL: @vsat_b(
1395 // CHECK-NEXT: entry:
1396 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1397 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1)
1398 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1399 // CHECK-NEXT: ret i128 [[TMP2]]
1401 v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); }
1402 // CHECK-LABEL: @vsat_h(
1403 // CHECK-NEXT: entry:
1404 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1405 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1)
1406 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1407 // CHECK-NEXT: ret i128 [[TMP2]]
1409 v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); }
1410 // CHECK-LABEL: @vsat_w(
1411 // CHECK-NEXT: entry:
1412 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1413 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1)
1414 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1415 // CHECK-NEXT: ret i128 [[TMP2]]
1417 v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); }
1418 // CHECK-LABEL: @vsat_d(
1419 // CHECK-NEXT: entry:
1420 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1421 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1)
1422 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1423 // CHECK-NEXT: ret i128 [[TMP2]]
1425 v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); }
1426 // CHECK-LABEL: @vsat_bu(
1427 // CHECK-NEXT: entry:
1428 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1429 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1)
1430 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1431 // CHECK-NEXT: ret i128 [[TMP2]]
1433 v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); }
1434 // CHECK-LABEL: @vsat_hu(
1435 // CHECK-NEXT: entry:
1436 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1437 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1)
1438 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1439 // CHECK-NEXT: ret i128 [[TMP2]]
1441 v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); }
1442 // CHECK-LABEL: @vsat_wu(
1443 // CHECK-NEXT: entry:
1444 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1445 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1)
1446 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1447 // CHECK-NEXT: ret i128 [[TMP2]]
1449 v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); }
1450 // CHECK-LABEL: @vsat_du(
1451 // CHECK-NEXT: entry:
1452 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1453 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1)
1454 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1455 // CHECK-NEXT: ret i128 [[TMP2]]
1457 v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); }
1458 // CHECK-LABEL: @vadda_b(
1459 // CHECK-NEXT: entry:
1460 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1461 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1462 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1463 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1464 // CHECK-NEXT: ret i128 [[TMP3]]
1466 v16i8 vadda_b(v16i8 _1, v16i8 _2) {
1467 return __builtin_lsx_vadda_b(_1, _2);
1469 // CHECK-LABEL: @vadda_h(
1470 // CHECK-NEXT: entry:
1471 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1472 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1473 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1474 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1475 // CHECK-NEXT: ret i128 [[TMP3]]
1477 v8i16 vadda_h(v8i16 _1, v8i16 _2) {
1478 return __builtin_lsx_vadda_h(_1, _2);
1480 // CHECK-LABEL: @vadda_w(
1481 // CHECK-NEXT: entry:
1482 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1483 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1484 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1485 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1486 // CHECK-NEXT: ret i128 [[TMP3]]
1488 v4i32 vadda_w(v4i32 _1, v4i32 _2) {
1489 return __builtin_lsx_vadda_w(_1, _2);
1491 // CHECK-LABEL: @vadda_d(
1492 // CHECK-NEXT: entry:
1493 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1494 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1495 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1496 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1497 // CHECK-NEXT: ret i128 [[TMP3]]
1499 v2i64 vadda_d(v2i64 _1, v2i64 _2) {
1500 return __builtin_lsx_vadda_d(_1, _2);
1502 // CHECK-LABEL: @vsadd_b(
1503 // CHECK-NEXT: entry:
1504 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1505 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1506 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1507 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1508 // CHECK-NEXT: ret i128 [[TMP3]]
1510 v16i8 vsadd_b(v16i8 _1, v16i8 _2) {
1511 return __builtin_lsx_vsadd_b(_1, _2);
1513 // CHECK-LABEL: @vsadd_h(
1514 // CHECK-NEXT: entry:
1515 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1516 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1517 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1518 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1519 // CHECK-NEXT: ret i128 [[TMP3]]
1521 v8i16 vsadd_h(v8i16 _1, v8i16 _2) {
1522 return __builtin_lsx_vsadd_h(_1, _2);
1524 // CHECK-LABEL: @vsadd_w(
1525 // CHECK-NEXT: entry:
1526 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1527 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1528 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1529 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1530 // CHECK-NEXT: ret i128 [[TMP3]]
1532 v4i32 vsadd_w(v4i32 _1, v4i32 _2) {
1533 return __builtin_lsx_vsadd_w(_1, _2);
1535 // CHECK-LABEL: @vsadd_d(
1536 // CHECK-NEXT: entry:
1537 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1538 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1539 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1540 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1541 // CHECK-NEXT: ret i128 [[TMP3]]
1543 v2i64 vsadd_d(v2i64 _1, v2i64 _2) {
1544 return __builtin_lsx_vsadd_d(_1, _2);
1546 // CHECK-LABEL: @vsadd_bu(
1547 // CHECK-NEXT: entry:
1548 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1549 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1550 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1551 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1552 // CHECK-NEXT: ret i128 [[TMP3]]
1554 v16u8 vsadd_bu(v16u8 _1, v16u8 _2) {
1555 return __builtin_lsx_vsadd_bu(_1, _2);
1557 // CHECK-LABEL: @vsadd_hu(
1558 // CHECK-NEXT: entry:
1559 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1560 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1561 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1562 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1563 // CHECK-NEXT: ret i128 [[TMP3]]
1565 v8u16 vsadd_hu(v8u16 _1, v8u16 _2) {
1566 return __builtin_lsx_vsadd_hu(_1, _2);
1568 // CHECK-LABEL: @vsadd_wu(
1569 // CHECK-NEXT: entry:
1570 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1571 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1572 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1573 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1574 // CHECK-NEXT: ret i128 [[TMP3]]
1576 v4u32 vsadd_wu(v4u32 _1, v4u32 _2) {
1577 return __builtin_lsx_vsadd_wu(_1, _2);
1579 // CHECK-LABEL: @vsadd_du(
1580 // CHECK-NEXT: entry:
1581 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1582 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1583 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1584 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1585 // CHECK-NEXT: ret i128 [[TMP3]]
1587 v2u64 vsadd_du(v2u64 _1, v2u64 _2) {
1588 return __builtin_lsx_vsadd_du(_1, _2);
1590 // CHECK-LABEL: @vavg_b(
1591 // CHECK-NEXT: entry:
1592 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1593 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1594 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1595 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1596 // CHECK-NEXT: ret i128 [[TMP3]]
1598 v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); }
1599 // CHECK-LABEL: @vavg_h(
1600 // CHECK-NEXT: entry:
1601 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1602 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1603 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1604 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1605 // CHECK-NEXT: ret i128 [[TMP3]]
1607 v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); }
1608 // CHECK-LABEL: @vavg_w(
1609 // CHECK-NEXT: entry:
1610 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1611 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1612 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1613 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1614 // CHECK-NEXT: ret i128 [[TMP3]]
1616 v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); }
1617 // CHECK-LABEL: @vavg_d(
1618 // CHECK-NEXT: entry:
1619 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1620 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1621 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1622 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1623 // CHECK-NEXT: ret i128 [[TMP3]]
1625 v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); }
1626 // CHECK-LABEL: @vavg_bu(
1627 // CHECK-NEXT: entry:
1628 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1629 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1630 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1631 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1632 // CHECK-NEXT: ret i128 [[TMP3]]
1634 v16u8 vavg_bu(v16u8 _1, v16u8 _2) {
1635 return __builtin_lsx_vavg_bu(_1, _2);
1637 // CHECK-LABEL: @vavg_hu(
1638 // CHECK-NEXT: entry:
1639 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1640 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1641 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1642 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1643 // CHECK-NEXT: ret i128 [[TMP3]]
1645 v8u16 vavg_hu(v8u16 _1, v8u16 _2) {
1646 return __builtin_lsx_vavg_hu(_1, _2);
1648 // CHECK-LABEL: @vavg_wu(
1649 // CHECK-NEXT: entry:
1650 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1651 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1652 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1653 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1654 // CHECK-NEXT: ret i128 [[TMP3]]
1656 v4u32 vavg_wu(v4u32 _1, v4u32 _2) {
1657 return __builtin_lsx_vavg_wu(_1, _2);
1659 // CHECK-LABEL: @vavg_du(
1660 // CHECK-NEXT: entry:
1661 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1662 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1663 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1664 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1665 // CHECK-NEXT: ret i128 [[TMP3]]
1667 v2u64 vavg_du(v2u64 _1, v2u64 _2) {
1668 return __builtin_lsx_vavg_du(_1, _2);
1670 // CHECK-LABEL: @vavgr_b(
1671 // CHECK-NEXT: entry:
1672 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1673 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1674 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1675 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1676 // CHECK-NEXT: ret i128 [[TMP3]]
1678 v16i8 vavgr_b(v16i8 _1, v16i8 _2) {
1679 return __builtin_lsx_vavgr_b(_1, _2);
1681 // CHECK-LABEL: @vavgr_h(
1682 // CHECK-NEXT: entry:
1683 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1684 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1685 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1686 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1687 // CHECK-NEXT: ret i128 [[TMP3]]
1689 v8i16 vavgr_h(v8i16 _1, v8i16 _2) {
1690 return __builtin_lsx_vavgr_h(_1, _2);
1692 // CHECK-LABEL: @vavgr_w(
1693 // CHECK-NEXT: entry:
1694 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1695 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1696 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1697 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1698 // CHECK-NEXT: ret i128 [[TMP3]]
1700 v4i32 vavgr_w(v4i32 _1, v4i32 _2) {
1701 return __builtin_lsx_vavgr_w(_1, _2);
1703 // CHECK-LABEL: @vavgr_d(
1704 // CHECK-NEXT: entry:
1705 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1706 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1707 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1708 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1709 // CHECK-NEXT: ret i128 [[TMP3]]
1711 v2i64 vavgr_d(v2i64 _1, v2i64 _2) {
1712 return __builtin_lsx_vavgr_d(_1, _2);
1714 // CHECK-LABEL: @vavgr_bu(
1715 // CHECK-NEXT: entry:
1716 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1717 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1718 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1719 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1720 // CHECK-NEXT: ret i128 [[TMP3]]
1722 v16u8 vavgr_bu(v16u8 _1, v16u8 _2) {
1723 return __builtin_lsx_vavgr_bu(_1, _2);
1725 // CHECK-LABEL: @vavgr_hu(
1726 // CHECK-NEXT: entry:
1727 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1728 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1729 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1730 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1731 // CHECK-NEXT: ret i128 [[TMP3]]
1733 v8u16 vavgr_hu(v8u16 _1, v8u16 _2) {
1734 return __builtin_lsx_vavgr_hu(_1, _2);
1736 // CHECK-LABEL: @vavgr_wu(
1737 // CHECK-NEXT: entry:
1738 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1739 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1740 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1741 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1742 // CHECK-NEXT: ret i128 [[TMP3]]
1744 v4u32 vavgr_wu(v4u32 _1, v4u32 _2) {
1745 return __builtin_lsx_vavgr_wu(_1, _2);
1747 // CHECK-LABEL: @vavgr_du(
1748 // CHECK-NEXT: entry:
1749 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1750 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1751 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1752 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1753 // CHECK-NEXT: ret i128 [[TMP3]]
1755 v2u64 vavgr_du(v2u64 _1, v2u64 _2) {
1756 return __builtin_lsx_vavgr_du(_1, _2);
1758 // CHECK-LABEL: @vssub_b(
1759 // CHECK-NEXT: entry:
1760 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1761 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1762 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1763 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1764 // CHECK-NEXT: ret i128 [[TMP3]]
1766 v16i8 vssub_b(v16i8 _1, v16i8 _2) {
1767 return __builtin_lsx_vssub_b(_1, _2);
1769 // CHECK-LABEL: @vssub_h(
1770 // CHECK-NEXT: entry:
1771 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1772 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1773 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1774 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1775 // CHECK-NEXT: ret i128 [[TMP3]]
1777 v8i16 vssub_h(v8i16 _1, v8i16 _2) {
1778 return __builtin_lsx_vssub_h(_1, _2);
1780 // CHECK-LABEL: @vssub_w(
1781 // CHECK-NEXT: entry:
1782 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1783 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1784 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1785 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1786 // CHECK-NEXT: ret i128 [[TMP3]]
1788 v4i32 vssub_w(v4i32 _1, v4i32 _2) {
1789 return __builtin_lsx_vssub_w(_1, _2);
1791 // CHECK-LABEL: @vssub_d(
1792 // CHECK-NEXT: entry:
1793 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1794 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1795 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1796 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1797 // CHECK-NEXT: ret i128 [[TMP3]]
1799 v2i64 vssub_d(v2i64 _1, v2i64 _2) {
1800 return __builtin_lsx_vssub_d(_1, _2);
1802 // CHECK-LABEL: @vssub_bu(
1803 // CHECK-NEXT: entry:
1804 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1805 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1806 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1807 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1808 // CHECK-NEXT: ret i128 [[TMP3]]
1810 v16u8 vssub_bu(v16u8 _1, v16u8 _2) {
1811 return __builtin_lsx_vssub_bu(_1, _2);
1813 // CHECK-LABEL: @vssub_hu(
1814 // CHECK-NEXT: entry:
1815 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1816 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1817 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1818 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1819 // CHECK-NEXT: ret i128 [[TMP3]]
1821 v8u16 vssub_hu(v8u16 _1, v8u16 _2) {
1822 return __builtin_lsx_vssub_hu(_1, _2);
1824 // CHECK-LABEL: @vssub_wu(
1825 // CHECK-NEXT: entry:
1826 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1827 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1828 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1829 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1830 // CHECK-NEXT: ret i128 [[TMP3]]
1832 v4u32 vssub_wu(v4u32 _1, v4u32 _2) {
1833 return __builtin_lsx_vssub_wu(_1, _2);
1835 // CHECK-LABEL: @vssub_du(
1836 // CHECK-NEXT: entry:
1837 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1838 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1839 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1840 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1841 // CHECK-NEXT: ret i128 [[TMP3]]
1843 v2u64 vssub_du(v2u64 _1, v2u64 _2) {
1844 return __builtin_lsx_vssub_du(_1, _2);
1846 // CHECK-LABEL: @vabsd_b(
1847 // CHECK-NEXT: entry:
1848 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1849 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1850 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1851 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1852 // CHECK-NEXT: ret i128 [[TMP3]]
1854 v16i8 vabsd_b(v16i8 _1, v16i8 _2) {
1855 return __builtin_lsx_vabsd_b(_1, _2);
1857 // CHECK-LABEL: @vabsd_h(
1858 // CHECK-NEXT: entry:
1859 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1860 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1861 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1862 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1863 // CHECK-NEXT: ret i128 [[TMP3]]
1865 v8i16 vabsd_h(v8i16 _1, v8i16 _2) {
1866 return __builtin_lsx_vabsd_h(_1, _2);
1868 // CHECK-LABEL: @vabsd_w(
1869 // CHECK-NEXT: entry:
1870 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1871 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1872 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1873 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1874 // CHECK-NEXT: ret i128 [[TMP3]]
1876 v4i32 vabsd_w(v4i32 _1, v4i32 _2) {
1877 return __builtin_lsx_vabsd_w(_1, _2);
1879 // CHECK-LABEL: @vabsd_d(
1880 // CHECK-NEXT: entry:
1881 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1882 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1883 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1884 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1885 // CHECK-NEXT: ret i128 [[TMP3]]
1887 v2i64 vabsd_d(v2i64 _1, v2i64 _2) {
1888 return __builtin_lsx_vabsd_d(_1, _2);
1890 // CHECK-LABEL: @vabsd_bu(
1891 // CHECK-NEXT: entry:
1892 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1893 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1894 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1895 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1896 // CHECK-NEXT: ret i128 [[TMP3]]
1898 v16u8 vabsd_bu(v16u8 _1, v16u8 _2) {
1899 return __builtin_lsx_vabsd_bu(_1, _2);
1901 // CHECK-LABEL: @vabsd_hu(
1902 // CHECK-NEXT: entry:
1903 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1904 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1905 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1906 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1907 // CHECK-NEXT: ret i128 [[TMP3]]
1909 v8u16 vabsd_hu(v8u16 _1, v8u16 _2) {
1910 return __builtin_lsx_vabsd_hu(_1, _2);
1912 // CHECK-LABEL: @vabsd_wu(
1913 // CHECK-NEXT: entry:
1914 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1915 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1916 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1917 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1918 // CHECK-NEXT: ret i128 [[TMP3]]
1920 v4u32 vabsd_wu(v4u32 _1, v4u32 _2) {
1921 return __builtin_lsx_vabsd_wu(_1, _2);
1923 // CHECK-LABEL: @vabsd_du(
1924 // CHECK-NEXT: entry:
1925 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1926 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1927 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1928 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1929 // CHECK-NEXT: ret i128 [[TMP3]]
1931 v2u64 vabsd_du(v2u64 _1, v2u64 _2) {
1932 return __builtin_lsx_vabsd_du(_1, _2);
1934 // CHECK-LABEL: @vmul_b(
1935 // CHECK-NEXT: entry:
1936 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1937 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1938 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1939 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1940 // CHECK-NEXT: ret i128 [[TMP3]]
1942 v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); }
1943 // CHECK-LABEL: @vmul_h(
1944 // CHECK-NEXT: entry:
1945 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1946 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1947 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1948 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1949 // CHECK-NEXT: ret i128 [[TMP3]]
1951 v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); }
1952 // CHECK-LABEL: @vmul_w(
1953 // CHECK-NEXT: entry:
1954 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1955 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1956 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1957 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1958 // CHECK-NEXT: ret i128 [[TMP3]]
1960 v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); }
1961 // CHECK-LABEL: @vmul_d(
1962 // CHECK-NEXT: entry:
1963 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1964 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1965 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1966 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1967 // CHECK-NEXT: ret i128 [[TMP3]]
1969 v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); }
1970 // CHECK-LABEL: @vmadd_b(
1971 // CHECK-NEXT: entry:
1972 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1973 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1974 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
1975 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1976 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
1977 // CHECK-NEXT: ret i128 [[TMP4]]
1979 v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) {
1980 return __builtin_lsx_vmadd_b(_1, _2, _3);
1982 // CHECK-LABEL: @vmadd_h(
1983 // CHECK-NEXT: entry:
1984 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1985 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1986 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
1987 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1988 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
1989 // CHECK-NEXT: ret i128 [[TMP4]]
1991 v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) {
1992 return __builtin_lsx_vmadd_h(_1, _2, _3);
1994 // CHECK-LABEL: @vmadd_w(
1995 // CHECK-NEXT: entry:
1996 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1997 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1998 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
1999 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2000 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2001 // CHECK-NEXT: ret i128 [[TMP4]]
2003 v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2004 return __builtin_lsx_vmadd_w(_1, _2, _3);
2006 // CHECK-LABEL: @vmadd_d(
2007 // CHECK-NEXT: entry:
2008 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2009 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2010 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2011 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2012 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2013 // CHECK-NEXT: ret i128 [[TMP4]]
2015 v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2016 return __builtin_lsx_vmadd_d(_1, _2, _3);
2018 // CHECK-LABEL: @vmsub_b(
2019 // CHECK-NEXT: entry:
2020 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2021 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2022 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
2023 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2024 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
2025 // CHECK-NEXT: ret i128 [[TMP4]]
2027 v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) {
2028 return __builtin_lsx_vmsub_b(_1, _2, _3);
2030 // CHECK-LABEL: @vmsub_h(
2031 // CHECK-NEXT: entry:
2032 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2033 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2034 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
2035 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2036 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
2037 // CHECK-NEXT: ret i128 [[TMP4]]
2039 v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) {
2040 return __builtin_lsx_vmsub_h(_1, _2, _3);
2042 // CHECK-LABEL: @vmsub_w(
2043 // CHECK-NEXT: entry:
2044 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2045 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2046 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
2047 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2048 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2049 // CHECK-NEXT: ret i128 [[TMP4]]
2051 v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2052 return __builtin_lsx_vmsub_w(_1, _2, _3);
2054 // CHECK-LABEL: @vmsub_d(
2055 // CHECK-NEXT: entry:
2056 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2057 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2058 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2059 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2060 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2061 // CHECK-NEXT: ret i128 [[TMP4]]
2063 v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2064 return __builtin_lsx_vmsub_d(_1, _2, _3);
2066 // CHECK-LABEL: @vdiv_b(
2067 // CHECK-NEXT: entry:
2068 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2069 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2070 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2071 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2072 // CHECK-NEXT: ret i128 [[TMP3]]
2074 v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); }
2075 // CHECK-LABEL: @vdiv_h(
2076 // CHECK-NEXT: entry:
2077 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2078 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2079 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2080 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2081 // CHECK-NEXT: ret i128 [[TMP3]]
2083 v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); }
2084 // CHECK-LABEL: @vdiv_w(
2085 // CHECK-NEXT: entry:
2086 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2087 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2088 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2089 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2090 // CHECK-NEXT: ret i128 [[TMP3]]
2092 v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); }
2093 // CHECK-LABEL: @vdiv_d(
2094 // CHECK-NEXT: entry:
2095 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2096 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2097 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2098 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2099 // CHECK-NEXT: ret i128 [[TMP3]]
2101 v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); }
2102 // CHECK-LABEL: @vdiv_bu(
2103 // CHECK-NEXT: entry:
2104 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2105 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2106 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2107 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2108 // CHECK-NEXT: ret i128 [[TMP3]]
2110 v16u8 vdiv_bu(v16u8 _1, v16u8 _2) {
2111 return __builtin_lsx_vdiv_bu(_1, _2);
2113 // CHECK-LABEL: @vdiv_hu(
2114 // CHECK-NEXT: entry:
2115 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2116 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2117 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2118 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2119 // CHECK-NEXT: ret i128 [[TMP3]]
2121 v8u16 vdiv_hu(v8u16 _1, v8u16 _2) {
2122 return __builtin_lsx_vdiv_hu(_1, _2);
2124 // CHECK-LABEL: @vdiv_wu(
2125 // CHECK-NEXT: entry:
2126 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2127 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2128 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2129 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2130 // CHECK-NEXT: ret i128 [[TMP3]]
2132 v4u32 vdiv_wu(v4u32 _1, v4u32 _2) {
2133 return __builtin_lsx_vdiv_wu(_1, _2);
2135 // CHECK-LABEL: @vdiv_du(
2136 // CHECK-NEXT: entry:
2137 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2138 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2139 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2140 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2141 // CHECK-NEXT: ret i128 [[TMP3]]
2143 v2u64 vdiv_du(v2u64 _1, v2u64 _2) {
2144 return __builtin_lsx_vdiv_du(_1, _2);
2146 // CHECK-LABEL: @vhaddw_h_b(
2147 // CHECK-NEXT: entry:
2148 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2149 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2150 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2151 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2152 // CHECK-NEXT: ret i128 [[TMP3]]
2154 v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) {
2155 return __builtin_lsx_vhaddw_h_b(_1, _2);
2157 // CHECK-LABEL: @vhaddw_w_h(
2158 // CHECK-NEXT: entry:
2159 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2160 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2161 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2162 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2163 // CHECK-NEXT: ret i128 [[TMP3]]
2165 v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) {
2166 return __builtin_lsx_vhaddw_w_h(_1, _2);
2168 // CHECK-LABEL: @vhaddw_d_w(
2169 // CHECK-NEXT: entry:
2170 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2171 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2172 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2173 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2174 // CHECK-NEXT: ret i128 [[TMP3]]
2176 v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) {
2177 return __builtin_lsx_vhaddw_d_w(_1, _2);
2179 // CHECK-LABEL: @vhaddw_hu_bu(
2180 // CHECK-NEXT: entry:
2181 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2182 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2183 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2184 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2185 // CHECK-NEXT: ret i128 [[TMP3]]
2187 v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) {
2188 return __builtin_lsx_vhaddw_hu_bu(_1, _2);
2190 // CHECK-LABEL: @vhaddw_wu_hu(
2191 // CHECK-NEXT: entry:
2192 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2193 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2194 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2195 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2196 // CHECK-NEXT: ret i128 [[TMP3]]
2198 v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) {
2199 return __builtin_lsx_vhaddw_wu_hu(_1, _2);
2201 // CHECK-LABEL: @vhaddw_du_wu(
2202 // CHECK-NEXT: entry:
2203 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2204 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2205 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2206 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2207 // CHECK-NEXT: ret i128 [[TMP3]]
2209 v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) {
2210 return __builtin_lsx_vhaddw_du_wu(_1, _2);
2212 // CHECK-LABEL: @vhsubw_h_b(
2213 // CHECK-NEXT: entry:
2214 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2215 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2216 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2217 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2218 // CHECK-NEXT: ret i128 [[TMP3]]
2220 v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) {
2221 return __builtin_lsx_vhsubw_h_b(_1, _2);
2223 // CHECK-LABEL: @vhsubw_w_h(
2224 // CHECK-NEXT: entry:
2225 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2226 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2227 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2228 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2229 // CHECK-NEXT: ret i128 [[TMP3]]
2231 v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) {
2232 return __builtin_lsx_vhsubw_w_h(_1, _2);
2234 // CHECK-LABEL: @vhsubw_d_w(
2235 // CHECK-NEXT: entry:
2236 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2237 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2238 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2239 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2240 // CHECK-NEXT: ret i128 [[TMP3]]
2242 v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) {
2243 return __builtin_lsx_vhsubw_d_w(_1, _2);
2245 // CHECK-LABEL: @vhsubw_hu_bu(
2246 // CHECK-NEXT: entry:
2247 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2248 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2249 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2250 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2251 // CHECK-NEXT: ret i128 [[TMP3]]
2253 v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) {
2254 return __builtin_lsx_vhsubw_hu_bu(_1, _2);
2256 // CHECK-LABEL: @vhsubw_wu_hu(
2257 // CHECK-NEXT: entry:
2258 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2259 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2260 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2261 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2262 // CHECK-NEXT: ret i128 [[TMP3]]
2264 v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) {
2265 return __builtin_lsx_vhsubw_wu_hu(_1, _2);
2267 // CHECK-LABEL: @vhsubw_du_wu(
2268 // CHECK-NEXT: entry:
2269 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2270 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2271 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2272 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2273 // CHECK-NEXT: ret i128 [[TMP3]]
2275 v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) {
2276 return __builtin_lsx_vhsubw_du_wu(_1, _2);
2278 // CHECK-LABEL: @vmod_b(
2279 // CHECK-NEXT: entry:
2280 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2281 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2282 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2283 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2284 // CHECK-NEXT: ret i128 [[TMP3]]
2286 v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); }
2287 // CHECK-LABEL: @vmod_h(
2288 // CHECK-NEXT: entry:
2289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2290 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2292 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2293 // CHECK-NEXT: ret i128 [[TMP3]]
2295 v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); }
2296 // CHECK-LABEL: @vmod_w(
2297 // CHECK-NEXT: entry:
2298 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2299 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2300 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2301 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2302 // CHECK-NEXT: ret i128 [[TMP3]]
2304 v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); }
2305 // CHECK-LABEL: @vmod_d(
2306 // CHECK-NEXT: entry:
2307 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2308 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2309 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2310 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2311 // CHECK-NEXT: ret i128 [[TMP3]]
2313 v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); }
2314 // CHECK-LABEL: @vmod_bu(
2315 // CHECK-NEXT: entry:
2316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2317 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2318 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2319 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2320 // CHECK-NEXT: ret i128 [[TMP3]]
2322 v16u8 vmod_bu(v16u8 _1, v16u8 _2) {
2323 return __builtin_lsx_vmod_bu(_1, _2);
2325 // CHECK-LABEL: @vmod_hu(
2326 // CHECK-NEXT: entry:
2327 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2328 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2329 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2330 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2331 // CHECK-NEXT: ret i128 [[TMP3]]
2333 v8u16 vmod_hu(v8u16 _1, v8u16 _2) {
2334 return __builtin_lsx_vmod_hu(_1, _2);
2336 // CHECK-LABEL: @vmod_wu(
2337 // CHECK-NEXT: entry:
2338 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2339 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2340 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2341 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2342 // CHECK-NEXT: ret i128 [[TMP3]]
2344 v4u32 vmod_wu(v4u32 _1, v4u32 _2) {
2345 return __builtin_lsx_vmod_wu(_1, _2);
2347 // CHECK-LABEL: @vmod_du(
2348 // CHECK-NEXT: entry:
2349 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2350 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2351 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2352 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2353 // CHECK-NEXT: ret i128 [[TMP3]]
2355 v2u64 vmod_du(v2u64 _1, v2u64 _2) {
2356 return __builtin_lsx_vmod_du(_1, _2);
2358 // CHECK-LABEL: @vreplve_b(
2359 // CHECK-NEXT: entry:
2360 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2361 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]])
2362 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2363 // CHECK-NEXT: ret i128 [[TMP2]]
2365 v16i8 vreplve_b(v16i8 _1, int _2) {
2366 return __builtin_lsx_vreplve_b(_1, _2);
2368 // CHECK-LABEL: @vreplve_h(
2369 // CHECK-NEXT: entry:
2370 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2371 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]])
2372 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2373 // CHECK-NEXT: ret i128 [[TMP2]]
2375 v8i16 vreplve_h(v8i16 _1, int _2) {
2376 return __builtin_lsx_vreplve_h(_1, _2);
2378 // CHECK-LABEL: @vreplve_w(
2379 // CHECK-NEXT: entry:
2380 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2381 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]])
2382 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2383 // CHECK-NEXT: ret i128 [[TMP2]]
2385 v4i32 vreplve_w(v4i32 _1, int _2) {
2386 return __builtin_lsx_vreplve_w(_1, _2);
2388 // CHECK-LABEL: @vreplve_d(
2389 // CHECK-NEXT: entry:
2390 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2391 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]])
2392 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2393 // CHECK-NEXT: ret i128 [[TMP2]]
2395 v2i64 vreplve_d(v2i64 _1, int _2) {
2396 return __builtin_lsx_vreplve_d(_1, _2);
2398 // CHECK-LABEL: @vreplvei_b(
2399 // CHECK-NEXT: entry:
2400 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2401 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1)
2402 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2403 // CHECK-NEXT: ret i128 [[TMP2]]
2405 v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); }
2406 // CHECK-LABEL: @vreplvei_h(
2407 // CHECK-NEXT: entry:
2408 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2409 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1)
2410 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2411 // CHECK-NEXT: ret i128 [[TMP2]]
2413 v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); }
2414 // CHECK-LABEL: @vreplvei_w(
2415 // CHECK-NEXT: entry:
2416 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2417 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1)
2418 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2419 // CHECK-NEXT: ret i128 [[TMP2]]
2421 v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); }
2422 // CHECK-LABEL: @vreplvei_d(
2423 // CHECK-NEXT: entry:
2424 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2425 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1)
2426 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2427 // CHECK-NEXT: ret i128 [[TMP2]]
2429 v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); }
2430 // CHECK-LABEL: @vpickev_b(
2431 // CHECK-NEXT: entry:
2432 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2433 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2434 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2435 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2436 // CHECK-NEXT: ret i128 [[TMP3]]
2438 v16i8 vpickev_b(v16i8 _1, v16i8 _2) {
2439 return __builtin_lsx_vpickev_b(_1, _2);
2441 // CHECK-LABEL: @vpickev_h(
2442 // CHECK-NEXT: entry:
2443 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2444 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2445 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2446 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2447 // CHECK-NEXT: ret i128 [[TMP3]]
2449 v8i16 vpickev_h(v8i16 _1, v8i16 _2) {
2450 return __builtin_lsx_vpickev_h(_1, _2);
2452 // CHECK-LABEL: @vpickev_w(
2453 // CHECK-NEXT: entry:
2454 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2455 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2456 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2457 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2458 // CHECK-NEXT: ret i128 [[TMP3]]
2460 v4i32 vpickev_w(v4i32 _1, v4i32 _2) {
2461 return __builtin_lsx_vpickev_w(_1, _2);
2463 // CHECK-LABEL: @vpickev_d(
2464 // CHECK-NEXT: entry:
2465 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2466 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2467 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2468 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2469 // CHECK-NEXT: ret i128 [[TMP3]]
2471 v2i64 vpickev_d(v2i64 _1, v2i64 _2) {
2472 return __builtin_lsx_vpickev_d(_1, _2);
2474 // CHECK-LABEL: @vpickod_b(
2475 // CHECK-NEXT: entry:
2476 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2477 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2478 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2479 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2480 // CHECK-NEXT: ret i128 [[TMP3]]
2482 v16i8 vpickod_b(v16i8 _1, v16i8 _2) {
2483 return __builtin_lsx_vpickod_b(_1, _2);
2485 // CHECK-LABEL: @vpickod_h(
2486 // CHECK-NEXT: entry:
2487 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2488 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2489 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2490 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2491 // CHECK-NEXT: ret i128 [[TMP3]]
2493 v8i16 vpickod_h(v8i16 _1, v8i16 _2) {
2494 return __builtin_lsx_vpickod_h(_1, _2);
2496 // CHECK-LABEL: @vpickod_w(
2497 // CHECK-NEXT: entry:
2498 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2499 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2500 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2501 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2502 // CHECK-NEXT: ret i128 [[TMP3]]
2504 v4i32 vpickod_w(v4i32 _1, v4i32 _2) {
2505 return __builtin_lsx_vpickod_w(_1, _2);
2507 // CHECK-LABEL: @vpickod_d(
2508 // CHECK-NEXT: entry:
2509 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2510 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2511 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2512 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2513 // CHECK-NEXT: ret i128 [[TMP3]]
2515 v2i64 vpickod_d(v2i64 _1, v2i64 _2) {
2516 return __builtin_lsx_vpickod_d(_1, _2);
2518 // CHECK-LABEL: @vilvh_b(
2519 // CHECK-NEXT: entry:
2520 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2521 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2522 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2523 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2524 // CHECK-NEXT: ret i128 [[TMP3]]
2526 v16i8 vilvh_b(v16i8 _1, v16i8 _2) {
2527 return __builtin_lsx_vilvh_b(_1, _2);
2529 // CHECK-LABEL: @vilvh_h(
2530 // CHECK-NEXT: entry:
2531 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2532 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2533 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2534 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2535 // CHECK-NEXT: ret i128 [[TMP3]]
2537 v8i16 vilvh_h(v8i16 _1, v8i16 _2) {
2538 return __builtin_lsx_vilvh_h(_1, _2);
2540 // CHECK-LABEL: @vilvh_w(
2541 // CHECK-NEXT: entry:
2542 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2543 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2544 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2545 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2546 // CHECK-NEXT: ret i128 [[TMP3]]
2548 v4i32 vilvh_w(v4i32 _1, v4i32 _2) {
2549 return __builtin_lsx_vilvh_w(_1, _2);
2551 // CHECK-LABEL: @vilvh_d(
2552 // CHECK-NEXT: entry:
2553 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2554 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2555 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2556 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2557 // CHECK-NEXT: ret i128 [[TMP3]]
2559 v2i64 vilvh_d(v2i64 _1, v2i64 _2) {
2560 return __builtin_lsx_vilvh_d(_1, _2);
2562 // CHECK-LABEL: @vilvl_b(
2563 // CHECK-NEXT: entry:
2564 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2565 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2566 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2567 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2568 // CHECK-NEXT: ret i128 [[TMP3]]
2570 v16i8 vilvl_b(v16i8 _1, v16i8 _2) {
2571 return __builtin_lsx_vilvl_b(_1, _2);
2573 // CHECK-LABEL: @vilvl_h(
2574 // CHECK-NEXT: entry:
2575 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2576 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2577 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2578 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2579 // CHECK-NEXT: ret i128 [[TMP3]]
2581 v8i16 vilvl_h(v8i16 _1, v8i16 _2) {
2582 return __builtin_lsx_vilvl_h(_1, _2);
2584 // CHECK-LABEL: @vilvl_w(
2585 // CHECK-NEXT: entry:
2586 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2587 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2588 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2589 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2590 // CHECK-NEXT: ret i128 [[TMP3]]
2592 v4i32 vilvl_w(v4i32 _1, v4i32 _2) {
2593 return __builtin_lsx_vilvl_w(_1, _2);
2595 // CHECK-LABEL: @vilvl_d(
2596 // CHECK-NEXT: entry:
2597 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2598 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2599 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2600 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2601 // CHECK-NEXT: ret i128 [[TMP3]]
2603 v2i64 vilvl_d(v2i64 _1, v2i64 _2) {
2604 return __builtin_lsx_vilvl_d(_1, _2);
2606 // CHECK-LABEL: @vpackev_b(
2607 // CHECK-NEXT: entry:
2608 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2609 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2610 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2611 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2612 // CHECK-NEXT: ret i128 [[TMP3]]
2614 v16i8 vpackev_b(v16i8 _1, v16i8 _2) {
2615 return __builtin_lsx_vpackev_b(_1, _2);
2617 // CHECK-LABEL: @vpackev_h(
2618 // CHECK-NEXT: entry:
2619 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2620 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2621 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2622 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2623 // CHECK-NEXT: ret i128 [[TMP3]]
2625 v8i16 vpackev_h(v8i16 _1, v8i16 _2) {
2626 return __builtin_lsx_vpackev_h(_1, _2);
2628 // CHECK-LABEL: @vpackev_w(
2629 // CHECK-NEXT: entry:
2630 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2631 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2632 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2633 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2634 // CHECK-NEXT: ret i128 [[TMP3]]
2636 v4i32 vpackev_w(v4i32 _1, v4i32 _2) {
2637 return __builtin_lsx_vpackev_w(_1, _2);
2639 // CHECK-LABEL: @vpackev_d(
2640 // CHECK-NEXT: entry:
2641 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2642 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2643 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2644 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2645 // CHECK-NEXT: ret i128 [[TMP3]]
2647 v2i64 vpackev_d(v2i64 _1, v2i64 _2) {
2648 return __builtin_lsx_vpackev_d(_1, _2);
2650 // CHECK-LABEL: @vpackod_b(
2651 // CHECK-NEXT: entry:
2652 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2653 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2654 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2655 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2656 // CHECK-NEXT: ret i128 [[TMP3]]
2658 v16i8 vpackod_b(v16i8 _1, v16i8 _2) {
2659 return __builtin_lsx_vpackod_b(_1, _2);
2661 // CHECK-LABEL: @vpackod_h(
2662 // CHECK-NEXT: entry:
2663 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2664 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2665 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2666 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2667 // CHECK-NEXT: ret i128 [[TMP3]]
2669 v8i16 vpackod_h(v8i16 _1, v8i16 _2) {
2670 return __builtin_lsx_vpackod_h(_1, _2);
2672 // CHECK-LABEL: @vpackod_w(
2673 // CHECK-NEXT: entry:
2674 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2675 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2676 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2677 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2678 // CHECK-NEXT: ret i128 [[TMP3]]
2680 v4i32 vpackod_w(v4i32 _1, v4i32 _2) {
2681 return __builtin_lsx_vpackod_w(_1, _2);
2683 // CHECK-LABEL: @vpackod_d(
2684 // CHECK-NEXT: entry:
2685 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2686 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2687 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2688 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2689 // CHECK-NEXT: ret i128 [[TMP3]]
2691 v2i64 vpackod_d(v2i64 _1, v2i64 _2) {
2692 return __builtin_lsx_vpackod_d(_1, _2);
2694 // CHECK-LABEL: @vshuf_h(
2695 // CHECK-NEXT: entry:
2696 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2697 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2698 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
2699 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2700 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
2701 // CHECK-NEXT: ret i128 [[TMP4]]
2703 v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) {
2704 return __builtin_lsx_vshuf_h(_1, _2, _3);
2706 // CHECK-LABEL: @vshuf_w(
2707 // CHECK-NEXT: entry:
2708 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2709 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2710 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
2711 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2712 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2713 // CHECK-NEXT: ret i128 [[TMP4]]
2715 v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2716 return __builtin_lsx_vshuf_w(_1, _2, _3);
2718 // CHECK-LABEL: @vshuf_d(
2719 // CHECK-NEXT: entry:
2720 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2721 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2722 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2723 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2724 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2725 // CHECK-NEXT: ret i128 [[TMP4]]
2727 v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2728 return __builtin_lsx_vshuf_d(_1, _2, _3);
2730 // CHECK-LABEL: @vand_v(
2731 // CHECK-NEXT: entry:
2732 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2733 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2734 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2735 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2736 // CHECK-NEXT: ret i128 [[TMP3]]
2738 v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); }
2739 // CHECK-LABEL: @vandi_b(
2740 // CHECK-NEXT: entry:
2741 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2742 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1)
2743 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2744 // CHECK-NEXT: ret i128 [[TMP2]]
2746 v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); }
2747 // CHECK-LABEL: @vor_v(
2748 // CHECK-NEXT: entry:
2749 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2750 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2751 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2752 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2753 // CHECK-NEXT: ret i128 [[TMP3]]
2755 v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); }
2756 // CHECK-LABEL: @vori_b(
2757 // CHECK-NEXT: entry:
2758 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2759 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1)
2760 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2761 // CHECK-NEXT: ret i128 [[TMP2]]
2763 v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); }
2764 // CHECK-LABEL: @vnor_v(
2765 // CHECK-NEXT: entry:
2766 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2767 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2768 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2769 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2770 // CHECK-NEXT: ret i128 [[TMP3]]
2772 v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); }
2773 // CHECK-LABEL: @vnori_b(
2774 // CHECK-NEXT: entry:
2775 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2776 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1)
2777 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2778 // CHECK-NEXT: ret i128 [[TMP2]]
2780 v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); }
2781 // CHECK-LABEL: @vxor_v(
2782 // CHECK-NEXT: entry:
2783 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2784 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2785 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2786 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2787 // CHECK-NEXT: ret i128 [[TMP3]]
2789 v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); }
2790 // CHECK-LABEL: @vxori_b(
2791 // CHECK-NEXT: entry:
2792 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2793 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1)
2794 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2795 // CHECK-NEXT: ret i128 [[TMP2]]
2797 v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); }
2798 // CHECK-LABEL: @vbitsel_v(
2799 // CHECK-NEXT: entry:
2800 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2801 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2802 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
2803 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2804 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
2805 // CHECK-NEXT: ret i128 [[TMP4]]
2807 v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) {
2808 return __builtin_lsx_vbitsel_v(_1, _2, _3);
2810 // CHECK-LABEL: @vbitseli_b(
2811 // CHECK-NEXT: entry:
2812 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2813 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2814 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
2815 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2816 // CHECK-NEXT: ret i128 [[TMP3]]
2818 v16u8 vbitseli_b(v16u8 _1, v16u8 _2) {
2819 return __builtin_lsx_vbitseli_b(_1, _2, 1);
2821 // CHECK-LABEL: @vshuf4i_b(
2822 // CHECK-NEXT: entry:
2823 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2824 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1)
2825 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2826 // CHECK-NEXT: ret i128 [[TMP2]]
2828 v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); }
2829 // CHECK-LABEL: @vshuf4i_h(
2830 // CHECK-NEXT: entry:
2831 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2832 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1)
2833 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2834 // CHECK-NEXT: ret i128 [[TMP2]]
2836 v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); }
2837 // CHECK-LABEL: @vshuf4i_w(
2838 // CHECK-NEXT: entry:
2839 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2840 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1)
2841 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2842 // CHECK-NEXT: ret i128 [[TMP2]]
2844 v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); }
2845 // CHECK-LABEL: @vreplgr2vr_b(
2846 // CHECK-NEXT: entry:
2847 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]])
2848 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
2849 // CHECK-NEXT: ret i128 [[TMP1]]
2851 v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); }
2852 // CHECK-LABEL: @vreplgr2vr_h(
2853 // CHECK-NEXT: entry:
2854 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]])
2855 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
2856 // CHECK-NEXT: ret i128 [[TMP1]]
2858 v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); }
2859 // CHECK-LABEL: @vreplgr2vr_w(
2860 // CHECK-NEXT: entry:
2861 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]])
2862 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
2863 // CHECK-NEXT: ret i128 [[TMP1]]
2865 v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); }
2866 // CHECK-LABEL: @vreplgr2vr_d(
2867 // CHECK-NEXT: entry:
2868 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]])
2869 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
2870 // CHECK-NEXT: ret i128 [[TMP1]]
2872 v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); }
2873 // CHECK-LABEL: @vpcnt_b(
2874 // CHECK-NEXT: entry:
2875 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2876 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]])
2877 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2878 // CHECK-NEXT: ret i128 [[TMP2]]
2880 v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); }
2881 // CHECK-LABEL: @vpcnt_h(
2882 // CHECK-NEXT: entry:
2883 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2884 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]])
2885 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2886 // CHECK-NEXT: ret i128 [[TMP2]]
2888 v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); }
2889 // CHECK-LABEL: @vpcnt_w(
2890 // CHECK-NEXT: entry:
2891 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2892 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]])
2893 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2894 // CHECK-NEXT: ret i128 [[TMP2]]
2896 v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); }
2897 // CHECK-LABEL: @vpcnt_d(
2898 // CHECK-NEXT: entry:
2899 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2900 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]])
2901 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2902 // CHECK-NEXT: ret i128 [[TMP2]]
2904 v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); }
2905 // CHECK-LABEL: @vclo_b(
2906 // CHECK-NEXT: entry:
2907 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2908 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]])
2909 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2910 // CHECK-NEXT: ret i128 [[TMP2]]
2912 v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); }
2913 // CHECK-LABEL: @vclo_h(
2914 // CHECK-NEXT: entry:
2915 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2916 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]])
2917 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2918 // CHECK-NEXT: ret i128 [[TMP2]]
2920 v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); }
2921 // CHECK-LABEL: @vclo_w(
2922 // CHECK-NEXT: entry:
2923 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2924 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]])
2925 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2926 // CHECK-NEXT: ret i128 [[TMP2]]
2928 v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); }
2929 // CHECK-LABEL: @vclo_d(
2930 // CHECK-NEXT: entry:
2931 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2932 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]])
2933 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2934 // CHECK-NEXT: ret i128 [[TMP2]]
2936 v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); }
2937 // CHECK-LABEL: @vclz_b(
2938 // CHECK-NEXT: entry:
2939 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2940 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]])
2941 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2942 // CHECK-NEXT: ret i128 [[TMP2]]
2944 v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); }
2945 // CHECK-LABEL: @vclz_h(
2946 // CHECK-NEXT: entry:
2947 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2948 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]])
2949 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2950 // CHECK-NEXT: ret i128 [[TMP2]]
2952 v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); }
2953 // CHECK-LABEL: @vclz_w(
2954 // CHECK-NEXT: entry:
2955 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2956 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]])
2957 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2958 // CHECK-NEXT: ret i128 [[TMP2]]
2960 v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); }
2961 // CHECK-LABEL: @vclz_d(
2962 // CHECK-NEXT: entry:
2963 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2964 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]])
2965 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2966 // CHECK-NEXT: ret i128 [[TMP2]]
2968 v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); }
2969 // CHECK-LABEL: @vpickve2gr_b(
2970 // CHECK-NEXT: entry:
2971 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2972 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1)
2973 // CHECK-NEXT: ret i32 [[TMP1]]
2975 int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); }
2976 // CHECK-LABEL: @vpickve2gr_h(
2977 // CHECK-NEXT: entry:
2978 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2979 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1)
2980 // CHECK-NEXT: ret i32 [[TMP1]]
2982 int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); }
2983 // CHECK-LABEL: @vpickve2gr_w(
2984 // CHECK-NEXT: entry:
2985 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2986 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1)
2987 // CHECK-NEXT: ret i32 [[TMP1]]
2989 int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); }
2990 // CHECK-LABEL: @vpickve2gr_d(
2991 // CHECK-NEXT: entry:
2992 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2993 // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1)
2994 // CHECK-NEXT: ret i64 [[TMP1]]
2996 long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); }
2997 // CHECK-LABEL: @vpickve2gr_bu(
2998 // CHECK-NEXT: entry:
2999 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3000 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1)
3001 // CHECK-NEXT: ret i32 [[TMP1]]
3003 unsigned int vpickve2gr_bu(v16i8 _1) {
3004 return __builtin_lsx_vpickve2gr_bu(_1, 1);
3006 // CHECK-LABEL: @vpickve2gr_hu(
3007 // CHECK-NEXT: entry:
3008 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3009 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1)
3010 // CHECK-NEXT: ret i32 [[TMP1]]
3012 unsigned int vpickve2gr_hu(v8i16 _1) {
3013 return __builtin_lsx_vpickve2gr_hu(_1, 1);
3015 // CHECK-LABEL: @vpickve2gr_wu(
3016 // CHECK-NEXT: entry:
3017 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3018 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1)
3019 // CHECK-NEXT: ret i32 [[TMP1]]
3021 unsigned int vpickve2gr_wu(v4i32 _1) {
3022 return __builtin_lsx_vpickve2gr_wu(_1, 1);
3024 // CHECK-LABEL: @vpickve2gr_du(
3025 // CHECK-NEXT: entry:
3026 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3027 // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1)
3028 // CHECK-NEXT: ret i64 [[TMP1]]
3030 unsigned long int vpickve2gr_du(v2i64 _1) {
3031 return __builtin_lsx_vpickve2gr_du(_1, 1);
3033 // CHECK-LABEL: @vinsgr2vr_b(
3034 // CHECK-NEXT: entry:
3035 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3036 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1)
3037 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3038 // CHECK-NEXT: ret i128 [[TMP2]]
3040 v16i8 vinsgr2vr_b(v16i8 _1) {
3041 return __builtin_lsx_vinsgr2vr_b(_1, 1, 1);
3043 // CHECK-LABEL: @vinsgr2vr_h(
3044 // CHECK-NEXT: entry:
3045 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3046 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1)
3047 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3048 // CHECK-NEXT: ret i128 [[TMP2]]
3050 v8i16 vinsgr2vr_h(v8i16 _1) {
3051 return __builtin_lsx_vinsgr2vr_h(_1, 1, 1);
3053 // CHECK-LABEL: @vinsgr2vr_w(
3054 // CHECK-NEXT: entry:
3055 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3056 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1)
3057 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3058 // CHECK-NEXT: ret i128 [[TMP2]]
3060 v4i32 vinsgr2vr_w(v4i32 _1) {
3061 return __builtin_lsx_vinsgr2vr_w(_1, 1, 1);
3063 // CHECK-LABEL: @vinsgr2vr_d(
3064 // CHECK-NEXT: entry:
3065 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3066 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1)
3067 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3068 // CHECK-NEXT: ret i128 [[TMP2]]
3070 v2i64 vinsgr2vr_d(v2i64 _1) {
3071 return __builtin_lsx_vinsgr2vr_d(_1, 1, 1);
3073 // CHECK-LABEL: @vfadd_s(
3074 // CHECK-NEXT: entry:
3075 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3076 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3077 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3078 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3079 // CHECK-NEXT: ret i128 [[TMP3]]
3081 v4f32 vfadd_s(v4f32 _1, v4f32 _2) {
3082 return __builtin_lsx_vfadd_s(_1, _2);
3084 // CHECK-LABEL: @vfadd_d(
3085 // CHECK-NEXT: entry:
3086 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3087 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3088 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3089 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3090 // CHECK-NEXT: ret i128 [[TMP3]]
3092 v2f64 vfadd_d(v2f64 _1, v2f64 _2) {
3093 return __builtin_lsx_vfadd_d(_1, _2);
3095 // CHECK-LABEL: @vfsub_s(
3096 // CHECK-NEXT: entry:
3097 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3098 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3099 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3100 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3101 // CHECK-NEXT: ret i128 [[TMP3]]
3103 v4f32 vfsub_s(v4f32 _1, v4f32 _2) {
3104 return __builtin_lsx_vfsub_s(_1, _2);
3106 // CHECK-LABEL: @vfsub_d(
3107 // CHECK-NEXT: entry:
3108 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3109 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3110 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3111 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3112 // CHECK-NEXT: ret i128 [[TMP3]]
3114 v2f64 vfsub_d(v2f64 _1, v2f64 _2) {
3115 return __builtin_lsx_vfsub_d(_1, _2);
3117 // CHECK-LABEL: @vfmul_s(
3118 // CHECK-NEXT: entry:
3119 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3120 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3121 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3122 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3123 // CHECK-NEXT: ret i128 [[TMP3]]
3125 v4f32 vfmul_s(v4f32 _1, v4f32 _2) {
3126 return __builtin_lsx_vfmul_s(_1, _2);
3128 // CHECK-LABEL: @vfmul_d(
3129 // CHECK-NEXT: entry:
3130 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3131 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3132 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3133 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3134 // CHECK-NEXT: ret i128 [[TMP3]]
3136 v2f64 vfmul_d(v2f64 _1, v2f64 _2) {
3137 return __builtin_lsx_vfmul_d(_1, _2);
3139 // CHECK-LABEL: @vfdiv_s(
3140 // CHECK-NEXT: entry:
3141 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3142 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3143 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3144 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3145 // CHECK-NEXT: ret i128 [[TMP3]]
3147 v4f32 vfdiv_s(v4f32 _1, v4f32 _2) {
3148 return __builtin_lsx_vfdiv_s(_1, _2);
3150 // CHECK-LABEL: @vfdiv_d(
3151 // CHECK-NEXT: entry:
3152 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3153 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3154 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3155 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3156 // CHECK-NEXT: ret i128 [[TMP3]]
3158 v2f64 vfdiv_d(v2f64 _1, v2f64 _2) {
3159 return __builtin_lsx_vfdiv_d(_1, _2);
3161 // CHECK-LABEL: @vfcvt_h_s(
3162 // CHECK-NEXT: entry:
3163 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3164 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3165 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3166 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3167 // CHECK-NEXT: ret i128 [[TMP3]]
3169 v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) {
3170 return __builtin_lsx_vfcvt_h_s(_1, _2);
3172 // CHECK-LABEL: @vfcvt_s_d(
3173 // CHECK-NEXT: entry:
3174 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3175 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3176 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3177 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3178 // CHECK-NEXT: ret i128 [[TMP3]]
3180 v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) {
3181 return __builtin_lsx_vfcvt_s_d(_1, _2);
3183 // CHECK-LABEL: @vfmin_s(
3184 // CHECK-NEXT: entry:
3185 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3186 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3187 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3188 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3189 // CHECK-NEXT: ret i128 [[TMP3]]
3191 v4f32 vfmin_s(v4f32 _1, v4f32 _2) {
3192 return __builtin_lsx_vfmin_s(_1, _2);
3194 // CHECK-LABEL: @vfmin_d(
3195 // CHECK-NEXT: entry:
3196 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3197 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3198 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3199 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3200 // CHECK-NEXT: ret i128 [[TMP3]]
3202 v2f64 vfmin_d(v2f64 _1, v2f64 _2) {
3203 return __builtin_lsx_vfmin_d(_1, _2);
3205 // CHECK-LABEL: @vfmina_s(
3206 // CHECK-NEXT: entry:
3207 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3208 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3209 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3210 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3211 // CHECK-NEXT: ret i128 [[TMP3]]
3213 v4f32 vfmina_s(v4f32 _1, v4f32 _2) {
3214 return __builtin_lsx_vfmina_s(_1, _2);
3216 // CHECK-LABEL: @vfmina_d(
3217 // CHECK-NEXT: entry:
3218 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3219 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3220 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3221 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3222 // CHECK-NEXT: ret i128 [[TMP3]]
3224 v2f64 vfmina_d(v2f64 _1, v2f64 _2) {
3225 return __builtin_lsx_vfmina_d(_1, _2);
3227 // CHECK-LABEL: @vfmax_s(
3228 // CHECK-NEXT: entry:
3229 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3230 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3231 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3232 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3233 // CHECK-NEXT: ret i128 [[TMP3]]
3235 v4f32 vfmax_s(v4f32 _1, v4f32 _2) {
3236 return __builtin_lsx_vfmax_s(_1, _2);
3238 // CHECK-LABEL: @vfmax_d(
3239 // CHECK-NEXT: entry:
3240 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3241 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3242 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3243 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3244 // CHECK-NEXT: ret i128 [[TMP3]]
3246 v2f64 vfmax_d(v2f64 _1, v2f64 _2) {
3247 return __builtin_lsx_vfmax_d(_1, _2);
3249 // CHECK-LABEL: @vfmaxa_s(
3250 // CHECK-NEXT: entry:
3251 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3252 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3253 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
3254 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3255 // CHECK-NEXT: ret i128 [[TMP3]]
3257 v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) {
3258 return __builtin_lsx_vfmaxa_s(_1, _2);
3260 // CHECK-LABEL: @vfmaxa_d(
3261 // CHECK-NEXT: entry:
3262 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3263 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3264 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3265 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
3266 // CHECK-NEXT: ret i128 [[TMP3]]
3268 v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) {
3269 return __builtin_lsx_vfmaxa_d(_1, _2);
3271 // CHECK-LABEL: @vfclass_s(
3272 // CHECK-NEXT: entry:
3273 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3274 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]])
3275 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3276 // CHECK-NEXT: ret i128 [[TMP2]]
3278 v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); }
3279 // CHECK-LABEL: @vfclass_d(
3280 // CHECK-NEXT: entry:
3281 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3282 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]])
3283 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3284 // CHECK-NEXT: ret i128 [[TMP2]]
3286 v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); }
3287 // CHECK-LABEL: @vfsqrt_s(
3288 // CHECK-NEXT: entry:
3289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3290 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]])
3291 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3292 // CHECK-NEXT: ret i128 [[TMP2]]
3294 v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); }
3295 // CHECK-LABEL: @vfsqrt_d(
3296 // CHECK-NEXT: entry:
3297 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3298 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]])
3299 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3300 // CHECK-NEXT: ret i128 [[TMP2]]
3302 v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); }
3303 // CHECK-LABEL: @vfrecip_s(
3304 // CHECK-NEXT: entry:
3305 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3306 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]])
3307 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3308 // CHECK-NEXT: ret i128 [[TMP2]]
3310 v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); }
3311 // CHECK-LABEL: @vfrecip_d(
3312 // CHECK-NEXT: entry:
3313 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3314 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]])
3315 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3316 // CHECK-NEXT: ret i128 [[TMP2]]
3318 v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); }
3319 // CHECK-LABEL: @vfrint_s(
3320 // CHECK-NEXT: entry:
3321 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3322 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]])
3323 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3324 // CHECK-NEXT: ret i128 [[TMP2]]
3326 v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); }
3327 // CHECK-LABEL: @vfrint_d(
3328 // CHECK-NEXT: entry:
3329 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3330 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]])
3331 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3332 // CHECK-NEXT: ret i128 [[TMP2]]
3334 v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); }
3335 // CHECK-LABEL: @vfrsqrt_s(
3336 // CHECK-NEXT: entry:
3337 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3338 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]])
3339 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3340 // CHECK-NEXT: ret i128 [[TMP2]]
3342 v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); }
3343 // CHECK-LABEL: @vfrsqrt_d(
3344 // CHECK-NEXT: entry:
3345 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3346 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]])
3347 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3348 // CHECK-NEXT: ret i128 [[TMP2]]
3350 v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); }
3351 // CHECK-LABEL: @vflogb_s(
3352 // CHECK-NEXT: entry:
3353 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3354 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]])
3355 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3356 // CHECK-NEXT: ret i128 [[TMP2]]
3358 v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); }
3359 // CHECK-LABEL: @vflogb_d(
3360 // CHECK-NEXT: entry:
3361 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3362 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]])
3363 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3364 // CHECK-NEXT: ret i128 [[TMP2]]
3366 v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); }
3367 // CHECK-LABEL: @vfcvth_s_h(
3368 // CHECK-NEXT: entry:
3369 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3370 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]])
3371 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3372 // CHECK-NEXT: ret i128 [[TMP2]]
3374 v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); }
3375 // CHECK-LABEL: @vfcvth_d_s(
3376 // CHECK-NEXT: entry:
3377 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3378 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]])
3379 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3380 // CHECK-NEXT: ret i128 [[TMP2]]
3382 v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); }
3383 // CHECK-LABEL: @vfcvtl_s_h(
3384 // CHECK-NEXT: entry:
3385 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3386 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]])
3387 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3388 // CHECK-NEXT: ret i128 [[TMP2]]
3390 v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); }
3391 // CHECK-LABEL: @vfcvtl_d_s(
3392 // CHECK-NEXT: entry:
3393 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3394 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]])
3395 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3396 // CHECK-NEXT: ret i128 [[TMP2]]
3398 v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); }
3399 // CHECK-LABEL: @vftint_w_s(
3400 // CHECK-NEXT: entry:
3401 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3402 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]])
3403 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3404 // CHECK-NEXT: ret i128 [[TMP2]]
3406 v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); }
3407 // CHECK-LABEL: @vftint_l_d(
3408 // CHECK-NEXT: entry:
3409 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3410 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]])
3411 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3412 // CHECK-NEXT: ret i128 [[TMP2]]
3414 v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); }
3415 // CHECK-LABEL: @vftint_wu_s(
3416 // CHECK-NEXT: entry:
3417 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3418 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]])
3419 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3420 // CHECK-NEXT: ret i128 [[TMP2]]
3422 v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); }
3423 // CHECK-LABEL: @vftint_lu_d(
3424 // CHECK-NEXT: entry:
3425 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3426 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]])
3427 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3428 // CHECK-NEXT: ret i128 [[TMP2]]
3430 v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); }
3431 // CHECK-LABEL: @vftintrz_w_s(
3432 // CHECK-NEXT: entry:
3433 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3434 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]])
3435 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3436 // CHECK-NEXT: ret i128 [[TMP2]]
3438 v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); }
3439 // CHECK-LABEL: @vftintrz_l_d(
3440 // CHECK-NEXT: entry:
3441 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3442 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]])
3443 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3444 // CHECK-NEXT: ret i128 [[TMP2]]
3446 v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); }
3447 // CHECK-LABEL: @vftintrz_wu_s(
3448 // CHECK-NEXT: entry:
3449 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3450 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]])
3451 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3452 // CHECK-NEXT: ret i128 [[TMP2]]
3454 v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); }
3455 // CHECK-LABEL: @vftintrz_lu_d(
3456 // CHECK-NEXT: entry:
3457 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3458 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]])
3459 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3460 // CHECK-NEXT: ret i128 [[TMP2]]
3462 v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); }
3463 // CHECK-LABEL: @vffint_s_w(
3464 // CHECK-NEXT: entry:
3465 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3466 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]])
3467 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3468 // CHECK-NEXT: ret i128 [[TMP2]]
3470 v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); }
3471 // CHECK-LABEL: @vffint_d_l(
3472 // CHECK-NEXT: entry:
3473 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3474 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]])
3475 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3476 // CHECK-NEXT: ret i128 [[TMP2]]
3478 v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); }
3479 // CHECK-LABEL: @vffint_s_wu(
3480 // CHECK-NEXT: entry:
3481 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3482 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]])
3483 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3484 // CHECK-NEXT: ret i128 [[TMP2]]
3486 v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); }
3487 // CHECK-LABEL: @vffint_d_lu(
3488 // CHECK-NEXT: entry:
3489 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3490 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]])
3491 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3492 // CHECK-NEXT: ret i128 [[TMP2]]
3494 v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); }
3495 // CHECK-LABEL: @vandn_v(
3496 // CHECK-NEXT: entry:
3497 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3498 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3499 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3500 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3501 // CHECK-NEXT: ret i128 [[TMP3]]
3503 v16u8 vandn_v(v16u8 _1, v16u8 _2) {
3504 return __builtin_lsx_vandn_v(_1, _2);
3506 // CHECK-LABEL: @vneg_b(
3507 // CHECK-NEXT: entry:
3508 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3509 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]])
3510 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3511 // CHECK-NEXT: ret i128 [[TMP2]]
3513 v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); }
3514 // CHECK-LABEL: @vneg_h(
3515 // CHECK-NEXT: entry:
3516 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3517 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]])
3518 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3519 // CHECK-NEXT: ret i128 [[TMP2]]
3521 v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); }
3522 // CHECK-LABEL: @vneg_w(
3523 // CHECK-NEXT: entry:
3524 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3525 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]])
3526 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3527 // CHECK-NEXT: ret i128 [[TMP2]]
3529 v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); }
3530 // CHECK-LABEL: @vneg_d(
3531 // CHECK-NEXT: entry:
3532 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3533 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]])
3534 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3535 // CHECK-NEXT: ret i128 [[TMP2]]
3537 v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); }
3538 // CHECK-LABEL: @vmuh_b(
3539 // CHECK-NEXT: entry:
3540 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3541 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3542 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3543 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3544 // CHECK-NEXT: ret i128 [[TMP3]]
3546 v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); }
3547 // CHECK-LABEL: @vmuh_h(
3548 // CHECK-NEXT: entry:
3549 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3550 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3551 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3552 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3553 // CHECK-NEXT: ret i128 [[TMP3]]
3555 v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); }
3556 // CHECK-LABEL: @vmuh_w(
3557 // CHECK-NEXT: entry:
3558 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3559 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3560 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3561 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3562 // CHECK-NEXT: ret i128 [[TMP3]]
3564 v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); }
3565 // CHECK-LABEL: @vmuh_d(
3566 // CHECK-NEXT: entry:
3567 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3568 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3569 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3570 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3571 // CHECK-NEXT: ret i128 [[TMP3]]
3573 v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); }
3574 // CHECK-LABEL: @vmuh_bu(
3575 // CHECK-NEXT: entry:
3576 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3577 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3578 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3579 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3580 // CHECK-NEXT: ret i128 [[TMP3]]
3582 v16u8 vmuh_bu(v16u8 _1, v16u8 _2) {
3583 return __builtin_lsx_vmuh_bu(_1, _2);
3585 // CHECK-LABEL: @vmuh_hu(
3586 // CHECK-NEXT: entry:
3587 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3588 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3589 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3590 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3591 // CHECK-NEXT: ret i128 [[TMP3]]
3593 v8u16 vmuh_hu(v8u16 _1, v8u16 _2) {
3594 return __builtin_lsx_vmuh_hu(_1, _2);
3596 // CHECK-LABEL: @vmuh_wu(
3597 // CHECK-NEXT: entry:
3598 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3599 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3600 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3601 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3602 // CHECK-NEXT: ret i128 [[TMP3]]
3604 v4u32 vmuh_wu(v4u32 _1, v4u32 _2) {
3605 return __builtin_lsx_vmuh_wu(_1, _2);
3607 // CHECK-LABEL: @vmuh_du(
3608 // CHECK-NEXT: entry:
3609 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3610 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3611 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3612 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3613 // CHECK-NEXT: ret i128 [[TMP3]]
3615 v2u64 vmuh_du(v2u64 _1, v2u64 _2) {
3616 return __builtin_lsx_vmuh_du(_1, _2);
3618 // CHECK-LABEL: @vsllwil_h_b(
3619 // CHECK-NEXT: entry:
3620 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3621 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1)
3622 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3623 // CHECK-NEXT: ret i128 [[TMP2]]
3625 v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); }
3626 // CHECK-LABEL: @vsllwil_w_h(
3627 // CHECK-NEXT: entry:
3628 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3629 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1)
3630 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3631 // CHECK-NEXT: ret i128 [[TMP2]]
3633 v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); }
3634 // CHECK-LABEL: @vsllwil_d_w(
3635 // CHECK-NEXT: entry:
3636 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3637 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1)
3638 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3639 // CHECK-NEXT: ret i128 [[TMP2]]
3641 v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); }
3642 // CHECK-LABEL: @vsllwil_hu_bu(
3643 // CHECK-NEXT: entry:
3644 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3645 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1)
3646 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3647 // CHECK-NEXT: ret i128 [[TMP2]]
3649 v8u16 vsllwil_hu_bu(v16u8 _1) {
3650 return __builtin_lsx_vsllwil_hu_bu(_1, 1);
3652 // CHECK-LABEL: @vsllwil_wu_hu(
3653 // CHECK-NEXT: entry:
3654 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3655 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1)
3656 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3657 // CHECK-NEXT: ret i128 [[TMP2]]
3659 v4u32 vsllwil_wu_hu(v8u16 _1) {
3660 return __builtin_lsx_vsllwil_wu_hu(_1, 1);
3662 // CHECK-LABEL: @vsllwil_du_wu(
3663 // CHECK-NEXT: entry:
3664 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3665 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1)
3666 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3667 // CHECK-NEXT: ret i128 [[TMP2]]
3669 v2u64 vsllwil_du_wu(v4u32 _1) {
3670 return __builtin_lsx_vsllwil_du_wu(_1, 1);
3672 // CHECK-LABEL: @vsran_b_h(
3673 // CHECK-NEXT: entry:
3674 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3675 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3676 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3677 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3678 // CHECK-NEXT: ret i128 [[TMP3]]
3680 v16i8 vsran_b_h(v8i16 _1, v8i16 _2) {
3681 return __builtin_lsx_vsran_b_h(_1, _2);
3683 // CHECK-LABEL: @vsran_h_w(
3684 // CHECK-NEXT: entry:
3685 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3686 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3687 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3688 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3689 // CHECK-NEXT: ret i128 [[TMP3]]
3691 v8i16 vsran_h_w(v4i32 _1, v4i32 _2) {
3692 return __builtin_lsx_vsran_h_w(_1, _2);
3694 // CHECK-LABEL: @vsran_w_d(
3695 // CHECK-NEXT: entry:
3696 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3697 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3698 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3699 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3700 // CHECK-NEXT: ret i128 [[TMP3]]
3702 v4i32 vsran_w_d(v2i64 _1, v2i64 _2) {
3703 return __builtin_lsx_vsran_w_d(_1, _2);
3705 // CHECK-LABEL: @vssran_b_h(
3706 // CHECK-NEXT: entry:
3707 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3708 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3709 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3710 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3711 // CHECK-NEXT: ret i128 [[TMP3]]
3713 v16i8 vssran_b_h(v8i16 _1, v8i16 _2) {
3714 return __builtin_lsx_vssran_b_h(_1, _2);
3716 // CHECK-LABEL: @vssran_h_w(
3717 // CHECK-NEXT: entry:
3718 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3719 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3720 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3721 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3722 // CHECK-NEXT: ret i128 [[TMP3]]
3724 v8i16 vssran_h_w(v4i32 _1, v4i32 _2) {
3725 return __builtin_lsx_vssran_h_w(_1, _2);
3727 // CHECK-LABEL: @vssran_w_d(
3728 // CHECK-NEXT: entry:
3729 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3730 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3731 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3732 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3733 // CHECK-NEXT: ret i128 [[TMP3]]
3735 v4i32 vssran_w_d(v2i64 _1, v2i64 _2) {
3736 return __builtin_lsx_vssran_w_d(_1, _2);
3738 // CHECK-LABEL: @vssran_bu_h(
3739 // CHECK-NEXT: entry:
3740 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3741 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3742 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3743 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3744 // CHECK-NEXT: ret i128 [[TMP3]]
3746 v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) {
3747 return __builtin_lsx_vssran_bu_h(_1, _2);
3749 // CHECK-LABEL: @vssran_hu_w(
3750 // CHECK-NEXT: entry:
3751 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3752 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3753 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3754 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3755 // CHECK-NEXT: ret i128 [[TMP3]]
3757 v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) {
3758 return __builtin_lsx_vssran_hu_w(_1, _2);
3760 // CHECK-LABEL: @vssran_wu_d(
3761 // CHECK-NEXT: entry:
3762 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3763 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3764 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3765 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3766 // CHECK-NEXT: ret i128 [[TMP3]]
3768 v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) {
3769 return __builtin_lsx_vssran_wu_d(_1, _2);
3771 // CHECK-LABEL: @vsrarn_b_h(
3772 // CHECK-NEXT: entry:
3773 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3774 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3775 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3776 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3777 // CHECK-NEXT: ret i128 [[TMP3]]
3779 v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) {
3780 return __builtin_lsx_vsrarn_b_h(_1, _2);
3782 // CHECK-LABEL: @vsrarn_h_w(
3783 // CHECK-NEXT: entry:
3784 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3785 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3786 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3787 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3788 // CHECK-NEXT: ret i128 [[TMP3]]
3790 v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) {
3791 return __builtin_lsx_vsrarn_h_w(_1, _2);
3793 // CHECK-LABEL: @vsrarn_w_d(
3794 // CHECK-NEXT: entry:
3795 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3796 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3797 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3798 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3799 // CHECK-NEXT: ret i128 [[TMP3]]
3801 v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) {
3802 return __builtin_lsx_vsrarn_w_d(_1, _2);
3804 // CHECK-LABEL: @vssrarn_b_h(
3805 // CHECK-NEXT: entry:
3806 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3807 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3808 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3809 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3810 // CHECK-NEXT: ret i128 [[TMP3]]
3812 v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) {
3813 return __builtin_lsx_vssrarn_b_h(_1, _2);
3815 // CHECK-LABEL: @vssrarn_h_w(
3816 // CHECK-NEXT: entry:
3817 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3818 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3819 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3820 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3821 // CHECK-NEXT: ret i128 [[TMP3]]
3823 v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) {
3824 return __builtin_lsx_vssrarn_h_w(_1, _2);
3826 // CHECK-LABEL: @vssrarn_w_d(
3827 // CHECK-NEXT: entry:
3828 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3829 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3830 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3831 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3832 // CHECK-NEXT: ret i128 [[TMP3]]
3834 v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) {
3835 return __builtin_lsx_vssrarn_w_d(_1, _2);
3837 // CHECK-LABEL: @vssrarn_bu_h(
3838 // CHECK-NEXT: entry:
3839 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3840 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3841 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3842 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3843 // CHECK-NEXT: ret i128 [[TMP3]]
3845 v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) {
3846 return __builtin_lsx_vssrarn_bu_h(_1, _2);
3848 // CHECK-LABEL: @vssrarn_hu_w(
3849 // CHECK-NEXT: entry:
3850 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3851 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3852 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3853 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3854 // CHECK-NEXT: ret i128 [[TMP3]]
3856 v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) {
3857 return __builtin_lsx_vssrarn_hu_w(_1, _2);
3859 // CHECK-LABEL: @vssrarn_wu_d(
3860 // CHECK-NEXT: entry:
3861 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3862 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3863 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3864 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3865 // CHECK-NEXT: ret i128 [[TMP3]]
3867 v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) {
3868 return __builtin_lsx_vssrarn_wu_d(_1, _2);
3870 // CHECK-LABEL: @vsrln_b_h(
3871 // CHECK-NEXT: entry:
3872 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3873 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3874 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3875 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3876 // CHECK-NEXT: ret i128 [[TMP3]]
3878 v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) {
3879 return __builtin_lsx_vsrln_b_h(_1, _2);
3881 // CHECK-LABEL: @vsrln_h_w(
3882 // CHECK-NEXT: entry:
3883 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3884 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3885 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3886 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3887 // CHECK-NEXT: ret i128 [[TMP3]]
3889 v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) {
3890 return __builtin_lsx_vsrln_h_w(_1, _2);
3892 // CHECK-LABEL: @vsrln_w_d(
3893 // CHECK-NEXT: entry:
3894 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3895 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3896 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3897 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3898 // CHECK-NEXT: ret i128 [[TMP3]]
3900 v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) {
3901 return __builtin_lsx_vsrln_w_d(_1, _2);
3903 // CHECK-LABEL: @vssrln_bu_h(
3904 // CHECK-NEXT: entry:
3905 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3906 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3907 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3908 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3909 // CHECK-NEXT: ret i128 [[TMP3]]
3911 v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) {
3912 return __builtin_lsx_vssrln_bu_h(_1, _2);
3914 // CHECK-LABEL: @vssrln_hu_w(
3915 // CHECK-NEXT: entry:
3916 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3917 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3918 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3919 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3920 // CHECK-NEXT: ret i128 [[TMP3]]
3922 v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) {
3923 return __builtin_lsx_vssrln_hu_w(_1, _2);
3925 // CHECK-LABEL: @vssrln_wu_d(
3926 // CHECK-NEXT: entry:
3927 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3928 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3929 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3930 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3931 // CHECK-NEXT: ret i128 [[TMP3]]
3933 v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) {
3934 return __builtin_lsx_vssrln_wu_d(_1, _2);
3936 // CHECK-LABEL: @vsrlrn_b_h(
3937 // CHECK-NEXT: entry:
3938 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3939 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3940 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3941 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3942 // CHECK-NEXT: ret i128 [[TMP3]]
3944 v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) {
3945 return __builtin_lsx_vsrlrn_b_h(_1, _2);
3947 // CHECK-LABEL: @vsrlrn_h_w(
3948 // CHECK-NEXT: entry:
3949 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3950 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3951 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3952 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3953 // CHECK-NEXT: ret i128 [[TMP3]]
3955 v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) {
3956 return __builtin_lsx_vsrlrn_h_w(_1, _2);
3958 // CHECK-LABEL: @vsrlrn_w_d(
3959 // CHECK-NEXT: entry:
3960 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3961 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3962 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3963 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3964 // CHECK-NEXT: ret i128 [[TMP3]]
3966 v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) {
3967 return __builtin_lsx_vsrlrn_w_d(_1, _2);
3969 // CHECK-LABEL: @vssrlrn_bu_h(
3970 // CHECK-NEXT: entry:
3971 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3972 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3973 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3974 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3975 // CHECK-NEXT: ret i128 [[TMP3]]
3977 v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) {
3978 return __builtin_lsx_vssrlrn_bu_h(_1, _2);
3980 // CHECK-LABEL: @vssrlrn_hu_w(
3981 // CHECK-NEXT: entry:
3982 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3983 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3984 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3985 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3986 // CHECK-NEXT: ret i128 [[TMP3]]
3988 v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) {
3989 return __builtin_lsx_vssrlrn_hu_w(_1, _2);
3991 // CHECK-LABEL: @vssrlrn_wu_d(
3992 // CHECK-NEXT: entry:
3993 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3994 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3995 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3996 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3997 // CHECK-NEXT: ret i128 [[TMP3]]
3999 v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) {
4000 return __builtin_lsx_vssrlrn_wu_d(_1, _2);
4002 // CHECK-LABEL: @vfrstpi_b(
4003 // CHECK-NEXT: entry:
4004 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4005 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4006 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
4007 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4008 // CHECK-NEXT: ret i128 [[TMP3]]
4010 v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) {
4011 return __builtin_lsx_vfrstpi_b(_1, _2, 1);
4013 // CHECK-LABEL: @vfrstpi_h(
4014 // CHECK-NEXT: entry:
4015 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4016 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4017 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
4018 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4019 // CHECK-NEXT: ret i128 [[TMP3]]
4021 v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) {
4022 return __builtin_lsx_vfrstpi_h(_1, _2, 1);
4024 // CHECK-LABEL: @vfrstp_b(
4025 // CHECK-NEXT: entry:
4026 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4027 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4028 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4029 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4030 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
4031 // CHECK-NEXT: ret i128 [[TMP4]]
4033 v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) {
4034 return __builtin_lsx_vfrstp_b(_1, _2, _3);
4036 // CHECK-LABEL: @vfrstp_h(
4037 // CHECK-NEXT: entry:
4038 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4039 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4040 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4041 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4042 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4043 // CHECK-NEXT: ret i128 [[TMP4]]
4045 v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) {
4046 return __builtin_lsx_vfrstp_h(_1, _2, _3);
4048 // CHECK-LABEL: @vshuf4i_d(
4049 // CHECK-NEXT: entry:
4050 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4051 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4052 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
4053 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4054 // CHECK-NEXT: ret i128 [[TMP3]]
4056 v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) {
4057 return __builtin_lsx_vshuf4i_d(_1, _2, 1);
4059 // CHECK-LABEL: @vbsrl_v(
4060 // CHECK-NEXT: entry:
4061 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4062 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1)
4063 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
4064 // CHECK-NEXT: ret i128 [[TMP2]]
4066 v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); }
4067 // CHECK-LABEL: @vbsll_v(
4068 // CHECK-NEXT: entry:
4069 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4070 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1)
4071 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
4072 // CHECK-NEXT: ret i128 [[TMP2]]
4074 v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); }
4075 // CHECK-LABEL: @vextrins_b(
4076 // CHECK-NEXT: entry:
4077 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4078 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4079 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
4080 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4081 // CHECK-NEXT: ret i128 [[TMP3]]
4083 v16i8 vextrins_b(v16i8 _1, v16i8 _2) {
4084 return __builtin_lsx_vextrins_b(_1, _2, 1);
4086 // CHECK-LABEL: @vextrins_h(
4087 // CHECK-NEXT: entry:
4088 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4089 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4090 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
4091 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4092 // CHECK-NEXT: ret i128 [[TMP3]]
4094 v8i16 vextrins_h(v8i16 _1, v8i16 _2) {
4095 return __builtin_lsx_vextrins_h(_1, _2, 1);
4097 // CHECK-LABEL: @vextrins_w(
4098 // CHECK-NEXT: entry:
4099 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4100 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4101 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
4102 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4103 // CHECK-NEXT: ret i128 [[TMP3]]
4105 v4i32 vextrins_w(v4i32 _1, v4i32 _2) {
4106 return __builtin_lsx_vextrins_w(_1, _2, 1);
4108 // CHECK-LABEL: @vextrins_d(
4109 // CHECK-NEXT: entry:
4110 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4111 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4112 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
4113 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4114 // CHECK-NEXT: ret i128 [[TMP3]]
4116 v2i64 vextrins_d(v2i64 _1, v2i64 _2) {
4117 return __builtin_lsx_vextrins_d(_1, _2, 1);
4119 // CHECK-LABEL: @vmskltz_b(
4120 // CHECK-NEXT: entry:
4121 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4122 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]])
4123 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
4124 // CHECK-NEXT: ret i128 [[TMP2]]
4126 v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); }
4127 // CHECK-LABEL: @vmskltz_h(
4128 // CHECK-NEXT: entry:
4129 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4130 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]])
4131 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
4132 // CHECK-NEXT: ret i128 [[TMP2]]
4134 v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); }
4135 // CHECK-LABEL: @vmskltz_w(
4136 // CHECK-NEXT: entry:
4137 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4138 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]])
4139 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4140 // CHECK-NEXT: ret i128 [[TMP2]]
4142 v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); }
4143 // CHECK-LABEL: @vmskltz_d(
4144 // CHECK-NEXT: entry:
4145 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4146 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]])
4147 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4148 // CHECK-NEXT: ret i128 [[TMP2]]
4150 v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); }
4151 // CHECK-LABEL: @vsigncov_b(
4152 // CHECK-NEXT: entry:
4153 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4154 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4155 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4156 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
4157 // CHECK-NEXT: ret i128 [[TMP3]]
4159 v16i8 vsigncov_b(v16i8 _1, v16i8 _2) {
4160 return __builtin_lsx_vsigncov_b(_1, _2);
4162 // CHECK-LABEL: @vsigncov_h(
4163 // CHECK-NEXT: entry:
4164 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4165 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4166 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4167 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4168 // CHECK-NEXT: ret i128 [[TMP3]]
4170 v8i16 vsigncov_h(v8i16 _1, v8i16 _2) {
4171 return __builtin_lsx_vsigncov_h(_1, _2);
4173 // CHECK-LABEL: @vsigncov_w(
4174 // CHECK-NEXT: entry:
4175 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4176 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4177 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4178 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4179 // CHECK-NEXT: ret i128 [[TMP3]]
4181 v4i32 vsigncov_w(v4i32 _1, v4i32 _2) {
4182 return __builtin_lsx_vsigncov_w(_1, _2);
4184 // CHECK-LABEL: @vsigncov_d(
4185 // CHECK-NEXT: entry:
4186 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4187 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4188 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4189 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4190 // CHECK-NEXT: ret i128 [[TMP3]]
4192 v2i64 vsigncov_d(v2i64 _1, v2i64 _2) {
4193 return __builtin_lsx_vsigncov_d(_1, _2);
4195 // CHECK-LABEL: @vfmadd_s(
4196 // CHECK-NEXT: entry:
4197 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4198 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4199 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4200 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4201 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4202 // CHECK-NEXT: ret i128 [[TMP4]]
4204 v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4205 return __builtin_lsx_vfmadd_s(_1, _2, _3);
4207 // CHECK-LABEL: @vfmadd_d(
4208 // CHECK-NEXT: entry:
4209 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4210 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4211 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4212 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4213 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4214 // CHECK-NEXT: ret i128 [[TMP4]]
4216 v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4217 return __builtin_lsx_vfmadd_d(_1, _2, _3);
4219 // CHECK-LABEL: @vfmsub_s(
4220 // CHECK-NEXT: entry:
4221 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4222 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4223 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4224 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4225 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4226 // CHECK-NEXT: ret i128 [[TMP4]]
4228 v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4229 return __builtin_lsx_vfmsub_s(_1, _2, _3);
4231 // CHECK-LABEL: @vfmsub_d(
4232 // CHECK-NEXT: entry:
4233 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4234 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4235 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4236 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4237 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4238 // CHECK-NEXT: ret i128 [[TMP4]]
4240 v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4241 return __builtin_lsx_vfmsub_d(_1, _2, _3);
4243 // CHECK-LABEL: @vfnmadd_s(
4244 // CHECK-NEXT: entry:
4245 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4246 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4247 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4248 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4249 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4250 // CHECK-NEXT: ret i128 [[TMP4]]
4252 v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4253 return __builtin_lsx_vfnmadd_s(_1, _2, _3);
4255 // CHECK-LABEL: @vfnmadd_d(
4256 // CHECK-NEXT: entry:
4257 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4258 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4259 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4260 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4261 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4262 // CHECK-NEXT: ret i128 [[TMP4]]
4264 v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4265 return __builtin_lsx_vfnmadd_d(_1, _2, _3);
4267 // CHECK-LABEL: @vfnmsub_s(
4268 // CHECK-NEXT: entry:
4269 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4270 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
4271 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
4272 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
4273 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
4274 // CHECK-NEXT: ret i128 [[TMP4]]
4276 v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
4277 return __builtin_lsx_vfnmsub_s(_1, _2, _3);
4279 // CHECK-LABEL: @vfnmsub_d(
4280 // CHECK-NEXT: entry:
4281 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4282 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4283 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
4284 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
4285 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
4286 // CHECK-NEXT: ret i128 [[TMP4]]
4288 v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
4289 return __builtin_lsx_vfnmsub_d(_1, _2, _3);
4291 // CHECK-LABEL: @vftintrne_w_s(
4292 // CHECK-NEXT: entry:
4293 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4294 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]])
4295 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4296 // CHECK-NEXT: ret i128 [[TMP2]]
4298 v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); }
4299 // CHECK-LABEL: @vftintrne_l_d(
4300 // CHECK-NEXT: entry:
4301 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4302 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]])
4303 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4304 // CHECK-NEXT: ret i128 [[TMP2]]
4306 v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); }
4307 // CHECK-LABEL: @vftintrp_w_s(
4308 // CHECK-NEXT: entry:
4309 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4310 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]])
4311 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4312 // CHECK-NEXT: ret i128 [[TMP2]]
4314 v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); }
4315 // CHECK-LABEL: @vftintrp_l_d(
4316 // CHECK-NEXT: entry:
4317 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4318 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]])
4319 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4320 // CHECK-NEXT: ret i128 [[TMP2]]
4322 v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); }
4323 // CHECK-LABEL: @vftintrm_w_s(
4324 // CHECK-NEXT: entry:
4325 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4326 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]])
4327 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4328 // CHECK-NEXT: ret i128 [[TMP2]]
4330 v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); }
4331 // CHECK-LABEL: @vftintrm_l_d(
4332 // CHECK-NEXT: entry:
4333 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4334 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]])
4335 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4336 // CHECK-NEXT: ret i128 [[TMP2]]
4338 v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); }
4339 // CHECK-LABEL: @vftint_w_d(
4340 // CHECK-NEXT: entry:
4341 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4342 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4343 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4344 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4345 // CHECK-NEXT: ret i128 [[TMP3]]
4347 v4i32 vftint_w_d(v2f64 _1, v2f64 _2) {
4348 return __builtin_lsx_vftint_w_d(_1, _2);
4350 // CHECK-LABEL: @vffint_s_l(
4351 // CHECK-NEXT: entry:
4352 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4353 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4354 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4355 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
4356 // CHECK-NEXT: ret i128 [[TMP3]]
4358 v4f32 vffint_s_l(v2i64 _1, v2i64 _2) {
4359 return __builtin_lsx_vffint_s_l(_1, _2);
4361 // CHECK-LABEL: @vftintrz_w_d(
4362 // CHECK-NEXT: entry:
4363 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4364 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4365 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4366 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4367 // CHECK-NEXT: ret i128 [[TMP3]]
4369 v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) {
4370 return __builtin_lsx_vftintrz_w_d(_1, _2);
4372 // CHECK-LABEL: @vftintrp_w_d(
4373 // CHECK-NEXT: entry:
4374 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4375 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4376 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4377 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4378 // CHECK-NEXT: ret i128 [[TMP3]]
4380 v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) {
4381 return __builtin_lsx_vftintrp_w_d(_1, _2);
4383 // CHECK-LABEL: @vftintrm_w_d(
4384 // CHECK-NEXT: entry:
4385 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4386 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4387 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4388 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4389 // CHECK-NEXT: ret i128 [[TMP3]]
4391 v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) {
4392 return __builtin_lsx_vftintrm_w_d(_1, _2);
4394 // CHECK-LABEL: @vftintrne_w_d(
4395 // CHECK-NEXT: entry:
4396 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4397 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
4398 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
4399 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4400 // CHECK-NEXT: ret i128 [[TMP3]]
4402 v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) {
4403 return __builtin_lsx_vftintrne_w_d(_1, _2);
4405 // CHECK-LABEL: @vftintl_l_s(
4406 // CHECK-NEXT: entry:
4407 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4408 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]])
4409 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4410 // CHECK-NEXT: ret i128 [[TMP2]]
4412 v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); }
4413 // CHECK-LABEL: @vftinth_l_s(
4414 // CHECK-NEXT: entry:
4415 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4416 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]])
4417 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4418 // CHECK-NEXT: ret i128 [[TMP2]]
4420 v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); }
4421 // CHECK-LABEL: @vffinth_d_w(
4422 // CHECK-NEXT: entry:
4423 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4424 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]])
4425 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4426 // CHECK-NEXT: ret i128 [[TMP2]]
4428 v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); }
4429 // CHECK-LABEL: @vffintl_d_w(
4430 // CHECK-NEXT: entry:
4431 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4432 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]])
4433 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4434 // CHECK-NEXT: ret i128 [[TMP2]]
4436 v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); }
4437 // CHECK-LABEL: @vftintrzl_l_s(
4438 // CHECK-NEXT: entry:
4439 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4440 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]])
4441 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4442 // CHECK-NEXT: ret i128 [[TMP2]]
4444 v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); }
4445 // CHECK-LABEL: @vftintrzh_l_s(
4446 // CHECK-NEXT: entry:
4447 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4448 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]])
4449 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4450 // CHECK-NEXT: ret i128 [[TMP2]]
4452 v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); }
4453 // CHECK-LABEL: @vftintrpl_l_s(
4454 // CHECK-NEXT: entry:
4455 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4456 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]])
4457 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4458 // CHECK-NEXT: ret i128 [[TMP2]]
4460 v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); }
4461 // CHECK-LABEL: @vftintrph_l_s(
4462 // CHECK-NEXT: entry:
4463 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4464 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]])
4465 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4466 // CHECK-NEXT: ret i128 [[TMP2]]
4468 v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); }
4469 // CHECK-LABEL: @vftintrml_l_s(
4470 // CHECK-NEXT: entry:
4471 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4472 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]])
4473 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4474 // CHECK-NEXT: ret i128 [[TMP2]]
4476 v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); }
4477 // CHECK-LABEL: @vftintrmh_l_s(
4478 // CHECK-NEXT: entry:
4479 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4480 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]])
4481 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4482 // CHECK-NEXT: ret i128 [[TMP2]]
4484 v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); }
4485 // CHECK-LABEL: @vftintrnel_l_s(
4486 // CHECK-NEXT: entry:
4487 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4488 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]])
4489 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4490 // CHECK-NEXT: ret i128 [[TMP2]]
4492 v2i64 vftintrnel_l_s(v4f32 _1) {
4493 return __builtin_lsx_vftintrnel_l_s(_1);
4495 // CHECK-LABEL: @vftintrneh_l_s(
4496 // CHECK-NEXT: entry:
4497 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4498 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]])
4499 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4500 // CHECK-NEXT: ret i128 [[TMP2]]
4502 v2i64 vftintrneh_l_s(v4f32 _1) {
4503 return __builtin_lsx_vftintrneh_l_s(_1);
4505 // CHECK-LABEL: @vfrintrne_s(
4506 // CHECK-NEXT: entry:
4507 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4508 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]])
4509 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4510 // CHECK-NEXT: ret i128 [[TMP2]]
4512 v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); }
4513 // CHECK-LABEL: @vfrintrne_d(
4514 // CHECK-NEXT: entry:
4515 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4516 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]])
4517 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4518 // CHECK-NEXT: ret i128 [[TMP2]]
4520 v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); }
4521 // CHECK-LABEL: @vfrintrz_s(
4522 // CHECK-NEXT: entry:
4523 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4524 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]])
4525 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4526 // CHECK-NEXT: ret i128 [[TMP2]]
4528 v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); }
4529 // CHECK-LABEL: @vfrintrz_d(
4530 // CHECK-NEXT: entry:
4531 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4532 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]])
4533 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4534 // CHECK-NEXT: ret i128 [[TMP2]]
4536 v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); }
4537 // CHECK-LABEL: @vfrintrp_s(
4538 // CHECK-NEXT: entry:
4539 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4540 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]])
4541 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4542 // CHECK-NEXT: ret i128 [[TMP2]]
4544 v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); }
4545 // CHECK-LABEL: @vfrintrp_d(
4546 // CHECK-NEXT: entry:
4547 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4548 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]])
4549 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4550 // CHECK-NEXT: ret i128 [[TMP2]]
4552 v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); }
4553 // CHECK-LABEL: @vfrintrm_s(
4554 // CHECK-NEXT: entry:
4555 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4556 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]])
4557 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4558 // CHECK-NEXT: ret i128 [[TMP2]]
4560 v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); }
4561 // CHECK-LABEL: @vfrintrm_d(
4562 // CHECK-NEXT: entry:
4563 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4564 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]])
4565 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4566 // CHECK-NEXT: ret i128 [[TMP2]]
4568 v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); }
4569 // CHECK-LABEL: @vstelm_b(
4570 // CHECK-NEXT: entry:
4571 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4572 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1)
4573 // CHECK-NEXT: ret void
4575 void vstelm_b(v16i8 _1, void *_2) {
4576 return __builtin_lsx_vstelm_b(_1, _2, 1, 1);
4578 // CHECK-LABEL: @vstelm_h(
4579 // CHECK-NEXT: entry:
4580 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4581 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1)
4582 // CHECK-NEXT: ret void
4584 void vstelm_h(v8i16 _1, void *_2) {
4585 return __builtin_lsx_vstelm_h(_1, _2, 2, 1);
4587 // CHECK-LABEL: @vstelm_w(
4588 // CHECK-NEXT: entry:
4589 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4590 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1)
4591 // CHECK-NEXT: ret void
4593 void vstelm_w(v4i32 _1, void *_2) {
4594 return __builtin_lsx_vstelm_w(_1, _2, 4, 1);
4596 // CHECK-LABEL: @vstelm_d(
4597 // CHECK-NEXT: entry:
4598 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4599 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1)
4600 // CHECK-NEXT: ret void
4602 void vstelm_d(v2i64 _1, void *_2) {
4603 return __builtin_lsx_vstelm_d(_1, _2, 8, 1);
4605 // CHECK-LABEL: @vaddwev_d_w(
4606 // CHECK-NEXT: entry:
4607 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4608 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4609 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4610 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4611 // CHECK-NEXT: ret i128 [[TMP3]]
4613 v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) {
4614 return __builtin_lsx_vaddwev_d_w(_1, _2);
4616 // CHECK-LABEL: @vaddwev_w_h(
4617 // CHECK-NEXT: entry:
4618 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4619 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4620 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4621 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4622 // CHECK-NEXT: ret i128 [[TMP3]]
4624 v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) {
4625 return __builtin_lsx_vaddwev_w_h(_1, _2);
4627 // CHECK-LABEL: @vaddwev_h_b(
4628 // CHECK-NEXT: entry:
4629 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4630 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4631 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4632 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4633 // CHECK-NEXT: ret i128 [[TMP3]]
4635 v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) {
4636 return __builtin_lsx_vaddwev_h_b(_1, _2);
4638 // CHECK-LABEL: @vaddwod_d_w(
4639 // CHECK-NEXT: entry:
4640 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4641 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4642 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4643 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4644 // CHECK-NEXT: ret i128 [[TMP3]]
4646 v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) {
4647 return __builtin_lsx_vaddwod_d_w(_1, _2);
4649 // CHECK-LABEL: @vaddwod_w_h(
4650 // CHECK-NEXT: entry:
4651 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4652 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4653 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4654 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4655 // CHECK-NEXT: ret i128 [[TMP3]]
4657 v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) {
4658 return __builtin_lsx_vaddwod_w_h(_1, _2);
4660 // CHECK-LABEL: @vaddwod_h_b(
4661 // CHECK-NEXT: entry:
4662 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4663 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4664 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4665 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4666 // CHECK-NEXT: ret i128 [[TMP3]]
4668 v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) {
4669 return __builtin_lsx_vaddwod_h_b(_1, _2);
4671 // CHECK-LABEL: @vaddwev_d_wu(
4672 // CHECK-NEXT: entry:
4673 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4674 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4675 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4676 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4677 // CHECK-NEXT: ret i128 [[TMP3]]
4679 v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) {
4680 return __builtin_lsx_vaddwev_d_wu(_1, _2);
4682 // CHECK-LABEL: @vaddwev_w_hu(
4683 // CHECK-NEXT: entry:
4684 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4685 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4686 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4687 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4688 // CHECK-NEXT: ret i128 [[TMP3]]
4690 v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) {
4691 return __builtin_lsx_vaddwev_w_hu(_1, _2);
4693 // CHECK-LABEL: @vaddwev_h_bu(
4694 // CHECK-NEXT: entry:
4695 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4696 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4697 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4698 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4699 // CHECK-NEXT: ret i128 [[TMP3]]
4701 v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) {
4702 return __builtin_lsx_vaddwev_h_bu(_1, _2);
4704 // CHECK-LABEL: @vaddwod_d_wu(
4705 // CHECK-NEXT: entry:
4706 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4707 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4708 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4709 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4710 // CHECK-NEXT: ret i128 [[TMP3]]
4712 v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) {
4713 return __builtin_lsx_vaddwod_d_wu(_1, _2);
4715 // CHECK-LABEL: @vaddwod_w_hu(
4716 // CHECK-NEXT: entry:
4717 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4718 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4719 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4720 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4721 // CHECK-NEXT: ret i128 [[TMP3]]
4723 v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) {
4724 return __builtin_lsx_vaddwod_w_hu(_1, _2);
4726 // CHECK-LABEL: @vaddwod_h_bu(
4727 // CHECK-NEXT: entry:
4728 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4729 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4730 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4731 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4732 // CHECK-NEXT: ret i128 [[TMP3]]
4734 v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) {
4735 return __builtin_lsx_vaddwod_h_bu(_1, _2);
4737 // CHECK-LABEL: @vaddwev_d_wu_w(
4738 // CHECK-NEXT: entry:
4739 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4740 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4741 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4742 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4743 // CHECK-NEXT: ret i128 [[TMP3]]
4745 v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) {
4746 return __builtin_lsx_vaddwev_d_wu_w(_1, _2);
4748 // CHECK-LABEL: @vaddwev_w_hu_h(
4749 // CHECK-NEXT: entry:
4750 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4751 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4752 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4753 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4754 // CHECK-NEXT: ret i128 [[TMP3]]
4756 v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) {
4757 return __builtin_lsx_vaddwev_w_hu_h(_1, _2);
4759 // CHECK-LABEL: @vaddwev_h_bu_b(
4760 // CHECK-NEXT: entry:
4761 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4762 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4763 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4764 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4765 // CHECK-NEXT: ret i128 [[TMP3]]
4767 v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) {
4768 return __builtin_lsx_vaddwev_h_bu_b(_1, _2);
4770 // CHECK-LABEL: @vaddwod_d_wu_w(
4771 // CHECK-NEXT: entry:
4772 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4773 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4774 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4775 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4776 // CHECK-NEXT: ret i128 [[TMP3]]
4778 v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) {
4779 return __builtin_lsx_vaddwod_d_wu_w(_1, _2);
4781 // CHECK-LABEL: @vaddwod_w_hu_h(
4782 // CHECK-NEXT: entry:
4783 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4784 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4785 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4786 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4787 // CHECK-NEXT: ret i128 [[TMP3]]
4789 v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) {
4790 return __builtin_lsx_vaddwod_w_hu_h(_1, _2);
4792 // CHECK-LABEL: @vaddwod_h_bu_b(
4793 // CHECK-NEXT: entry:
4794 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4795 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4796 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4797 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4798 // CHECK-NEXT: ret i128 [[TMP3]]
4800 v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) {
4801 return __builtin_lsx_vaddwod_h_bu_b(_1, _2);
4803 // CHECK-LABEL: @vsubwev_d_w(
4804 // CHECK-NEXT: entry:
4805 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4806 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4807 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4808 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4809 // CHECK-NEXT: ret i128 [[TMP3]]
4811 v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) {
4812 return __builtin_lsx_vsubwev_d_w(_1, _2);
4814 // CHECK-LABEL: @vsubwev_w_h(
4815 // CHECK-NEXT: entry:
4816 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4817 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4818 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4819 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4820 // CHECK-NEXT: ret i128 [[TMP3]]
4822 v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) {
4823 return __builtin_lsx_vsubwev_w_h(_1, _2);
4825 // CHECK-LABEL: @vsubwev_h_b(
4826 // CHECK-NEXT: entry:
4827 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4828 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4829 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4830 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4831 // CHECK-NEXT: ret i128 [[TMP3]]
4833 v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) {
4834 return __builtin_lsx_vsubwev_h_b(_1, _2);
4836 // CHECK-LABEL: @vsubwod_d_w(
4837 // CHECK-NEXT: entry:
4838 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4839 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4840 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4841 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4842 // CHECK-NEXT: ret i128 [[TMP3]]
4844 v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) {
4845 return __builtin_lsx_vsubwod_d_w(_1, _2);
4847 // CHECK-LABEL: @vsubwod_w_h(
4848 // CHECK-NEXT: entry:
4849 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4850 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4851 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4852 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4853 // CHECK-NEXT: ret i128 [[TMP3]]
4855 v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) {
4856 return __builtin_lsx_vsubwod_w_h(_1, _2);
4858 // CHECK-LABEL: @vsubwod_h_b(
4859 // CHECK-NEXT: entry:
4860 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4861 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4862 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4863 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4864 // CHECK-NEXT: ret i128 [[TMP3]]
4866 v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) {
4867 return __builtin_lsx_vsubwod_h_b(_1, _2);
4869 // CHECK-LABEL: @vsubwev_d_wu(
4870 // CHECK-NEXT: entry:
4871 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4872 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4873 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4874 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4875 // CHECK-NEXT: ret i128 [[TMP3]]
4877 v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) {
4878 return __builtin_lsx_vsubwev_d_wu(_1, _2);
4880 // CHECK-LABEL: @vsubwev_w_hu(
4881 // CHECK-NEXT: entry:
4882 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4883 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4884 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4885 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4886 // CHECK-NEXT: ret i128 [[TMP3]]
4888 v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) {
4889 return __builtin_lsx_vsubwev_w_hu(_1, _2);
4891 // CHECK-LABEL: @vsubwev_h_bu(
4892 // CHECK-NEXT: entry:
4893 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4894 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4895 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4896 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4897 // CHECK-NEXT: ret i128 [[TMP3]]
4899 v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) {
4900 return __builtin_lsx_vsubwev_h_bu(_1, _2);
4902 // CHECK-LABEL: @vsubwod_d_wu(
4903 // CHECK-NEXT: entry:
4904 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4905 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4906 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4907 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4908 // CHECK-NEXT: ret i128 [[TMP3]]
4910 v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) {
4911 return __builtin_lsx_vsubwod_d_wu(_1, _2);
4913 // CHECK-LABEL: @vsubwod_w_hu(
4914 // CHECK-NEXT: entry:
4915 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4916 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4917 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4918 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4919 // CHECK-NEXT: ret i128 [[TMP3]]
4921 v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) {
4922 return __builtin_lsx_vsubwod_w_hu(_1, _2);
4924 // CHECK-LABEL: @vsubwod_h_bu(
4925 // CHECK-NEXT: entry:
4926 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4927 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4928 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4929 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4930 // CHECK-NEXT: ret i128 [[TMP3]]
4932 v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) {
4933 return __builtin_lsx_vsubwod_h_bu(_1, _2);
4935 // CHECK-LABEL: @vaddwev_q_d(
4936 // CHECK-NEXT: entry:
4937 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4938 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4939 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4940 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4941 // CHECK-NEXT: ret i128 [[TMP3]]
4943 v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) {
4944 return __builtin_lsx_vaddwev_q_d(_1, _2);
4946 // CHECK-LABEL: @vaddwod_q_d(
4947 // CHECK-NEXT: entry:
4948 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4949 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4950 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4951 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4952 // CHECK-NEXT: ret i128 [[TMP3]]
4954 v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) {
4955 return __builtin_lsx_vaddwod_q_d(_1, _2);
4957 // CHECK-LABEL: @vaddwev_q_du(
4958 // CHECK-NEXT: entry:
4959 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4960 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4961 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4962 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4963 // CHECK-NEXT: ret i128 [[TMP3]]
4965 v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) {
4966 return __builtin_lsx_vaddwev_q_du(_1, _2);
4968 // CHECK-LABEL: @vaddwod_q_du(
4969 // CHECK-NEXT: entry:
4970 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4971 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4972 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4973 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4974 // CHECK-NEXT: ret i128 [[TMP3]]
4976 v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) {
4977 return __builtin_lsx_vaddwod_q_du(_1, _2);
4979 // CHECK-LABEL: @vsubwev_q_d(
4980 // CHECK-NEXT: entry:
4981 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4982 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4983 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4984 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4985 // CHECK-NEXT: ret i128 [[TMP3]]
4987 v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) {
4988 return __builtin_lsx_vsubwev_q_d(_1, _2);
4990 // CHECK-LABEL: @vsubwod_q_d(
4991 // CHECK-NEXT: entry:
4992 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4993 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4994 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4995 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4996 // CHECK-NEXT: ret i128 [[TMP3]]
4998 v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) {
4999 return __builtin_lsx_vsubwod_q_d(_1, _2);
5001 // CHECK-LABEL: @vsubwev_q_du(
5002 // CHECK-NEXT: entry:
5003 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5004 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5005 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5006 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5007 // CHECK-NEXT: ret i128 [[TMP3]]
5009 v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) {
5010 return __builtin_lsx_vsubwev_q_du(_1, _2);
5012 // CHECK-LABEL: @vsubwod_q_du(
5013 // CHECK-NEXT: entry:
5014 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5015 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5016 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5017 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5018 // CHECK-NEXT: ret i128 [[TMP3]]
5020 v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) {
5021 return __builtin_lsx_vsubwod_q_du(_1, _2);
5023 // CHECK-LABEL: @vaddwev_q_du_d(
5024 // CHECK-NEXT: entry:
5025 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5026 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5027 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5028 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5029 // CHECK-NEXT: ret i128 [[TMP3]]
5031 v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) {
5032 return __builtin_lsx_vaddwev_q_du_d(_1, _2);
5034 // CHECK-LABEL: @vaddwod_q_du_d(
5035 // CHECK-NEXT: entry:
5036 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5037 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5038 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5039 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5040 // CHECK-NEXT: ret i128 [[TMP3]]
5042 v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) {
5043 return __builtin_lsx_vaddwod_q_du_d(_1, _2);
5045 // CHECK-LABEL: @vmulwev_d_w(
5046 // CHECK-NEXT: entry:
5047 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5048 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5049 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5050 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5051 // CHECK-NEXT: ret i128 [[TMP3]]
5053 v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) {
5054 return __builtin_lsx_vmulwev_d_w(_1, _2);
5056 // CHECK-LABEL: @vmulwev_w_h(
5057 // CHECK-NEXT: entry:
5058 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5059 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5060 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5061 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5062 // CHECK-NEXT: ret i128 [[TMP3]]
5064 v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) {
5065 return __builtin_lsx_vmulwev_w_h(_1, _2);
5067 // CHECK-LABEL: @vmulwev_h_b(
5068 // CHECK-NEXT: entry:
5069 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5070 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5071 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5072 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5073 // CHECK-NEXT: ret i128 [[TMP3]]
5075 v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) {
5076 return __builtin_lsx_vmulwev_h_b(_1, _2);
5078 // CHECK-LABEL: @vmulwod_d_w(
5079 // CHECK-NEXT: entry:
5080 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5081 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5082 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5083 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5084 // CHECK-NEXT: ret i128 [[TMP3]]
5086 v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) {
5087 return __builtin_lsx_vmulwod_d_w(_1, _2);
5089 // CHECK-LABEL: @vmulwod_w_h(
5090 // CHECK-NEXT: entry:
5091 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5092 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5093 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5094 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5095 // CHECK-NEXT: ret i128 [[TMP3]]
5097 v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) {
5098 return __builtin_lsx_vmulwod_w_h(_1, _2);
5100 // CHECK-LABEL: @vmulwod_h_b(
5101 // CHECK-NEXT: entry:
5102 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5103 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5104 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5105 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5106 // CHECK-NEXT: ret i128 [[TMP3]]
5108 v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) {
5109 return __builtin_lsx_vmulwod_h_b(_1, _2);
5111 // CHECK-LABEL: @vmulwev_d_wu(
5112 // CHECK-NEXT: entry:
5113 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5114 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5115 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5116 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5117 // CHECK-NEXT: ret i128 [[TMP3]]
5119 v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) {
5120 return __builtin_lsx_vmulwev_d_wu(_1, _2);
5122 // CHECK-LABEL: @vmulwev_w_hu(
5123 // CHECK-NEXT: entry:
5124 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5125 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5126 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5127 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5128 // CHECK-NEXT: ret i128 [[TMP3]]
5130 v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) {
5131 return __builtin_lsx_vmulwev_w_hu(_1, _2);
5133 // CHECK-LABEL: @vmulwev_h_bu(
5134 // CHECK-NEXT: entry:
5135 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5136 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5137 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5138 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5139 // CHECK-NEXT: ret i128 [[TMP3]]
5141 v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) {
5142 return __builtin_lsx_vmulwev_h_bu(_1, _2);
5144 // CHECK-LABEL: @vmulwod_d_wu(
5145 // CHECK-NEXT: entry:
5146 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5147 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5148 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5149 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5150 // CHECK-NEXT: ret i128 [[TMP3]]
5152 v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) {
5153 return __builtin_lsx_vmulwod_d_wu(_1, _2);
5155 // CHECK-LABEL: @vmulwod_w_hu(
5156 // CHECK-NEXT: entry:
5157 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5158 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5159 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5160 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5161 // CHECK-NEXT: ret i128 [[TMP3]]
5163 v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) {
5164 return __builtin_lsx_vmulwod_w_hu(_1, _2);
5166 // CHECK-LABEL: @vmulwod_h_bu(
5167 // CHECK-NEXT: entry:
5168 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5169 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5170 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5171 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5172 // CHECK-NEXT: ret i128 [[TMP3]]
5174 v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) {
5175 return __builtin_lsx_vmulwod_h_bu(_1, _2);
5177 // CHECK-LABEL: @vmulwev_d_wu_w(
5178 // CHECK-NEXT: entry:
5179 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5180 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5181 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5182 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5183 // CHECK-NEXT: ret i128 [[TMP3]]
5185 v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) {
5186 return __builtin_lsx_vmulwev_d_wu_w(_1, _2);
5188 // CHECK-LABEL: @vmulwev_w_hu_h(
5189 // CHECK-NEXT: entry:
5190 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5191 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5192 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5193 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5194 // CHECK-NEXT: ret i128 [[TMP3]]
5196 v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) {
5197 return __builtin_lsx_vmulwev_w_hu_h(_1, _2);
5199 // CHECK-LABEL: @vmulwev_h_bu_b(
5200 // CHECK-NEXT: entry:
5201 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5202 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5203 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5204 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5205 // CHECK-NEXT: ret i128 [[TMP3]]
5207 v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) {
5208 return __builtin_lsx_vmulwev_h_bu_b(_1, _2);
5210 // CHECK-LABEL: @vmulwod_d_wu_w(
5211 // CHECK-NEXT: entry:
5212 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5213 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5214 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5215 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5216 // CHECK-NEXT: ret i128 [[TMP3]]
5218 v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) {
5219 return __builtin_lsx_vmulwod_d_wu_w(_1, _2);
5221 // CHECK-LABEL: @vmulwod_w_hu_h(
5222 // CHECK-NEXT: entry:
5223 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5224 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5225 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5226 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5227 // CHECK-NEXT: ret i128 [[TMP3]]
5229 v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) {
5230 return __builtin_lsx_vmulwod_w_hu_h(_1, _2);
5232 // CHECK-LABEL: @vmulwod_h_bu_b(
5233 // CHECK-NEXT: entry:
5234 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5235 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5236 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5237 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5238 // CHECK-NEXT: ret i128 [[TMP3]]
5240 v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) {
5241 return __builtin_lsx_vmulwod_h_bu_b(_1, _2);
5243 // CHECK-LABEL: @vmulwev_q_d(
5244 // CHECK-NEXT: entry:
5245 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5246 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5247 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5248 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5249 // CHECK-NEXT: ret i128 [[TMP3]]
5251 v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) {
5252 return __builtin_lsx_vmulwev_q_d(_1, _2);
5254 // CHECK-LABEL: @vmulwod_q_d(
5255 // CHECK-NEXT: entry:
5256 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5257 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5258 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5259 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5260 // CHECK-NEXT: ret i128 [[TMP3]]
5262 v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) {
5263 return __builtin_lsx_vmulwod_q_d(_1, _2);
5265 // CHECK-LABEL: @vmulwev_q_du(
5266 // CHECK-NEXT: entry:
5267 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5268 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5269 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5270 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5271 // CHECK-NEXT: ret i128 [[TMP3]]
5273 v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) {
5274 return __builtin_lsx_vmulwev_q_du(_1, _2);
5276 // CHECK-LABEL: @vmulwod_q_du(
5277 // CHECK-NEXT: entry:
5278 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5279 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5280 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5281 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5282 // CHECK-NEXT: ret i128 [[TMP3]]
5284 v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) {
5285 return __builtin_lsx_vmulwod_q_du(_1, _2);
5287 // CHECK-LABEL: @vmulwev_q_du_d(
5288 // CHECK-NEXT: entry:
5289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5290 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5292 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5293 // CHECK-NEXT: ret i128 [[TMP3]]
5295 v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) {
5296 return __builtin_lsx_vmulwev_q_du_d(_1, _2);
5298 // CHECK-LABEL: @vmulwod_q_du_d(
5299 // CHECK-NEXT: entry:
5300 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5301 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5302 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5303 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5304 // CHECK-NEXT: ret i128 [[TMP3]]
5306 v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) {
5307 return __builtin_lsx_vmulwod_q_du_d(_1, _2);
5309 // CHECK-LABEL: @vhaddw_q_d(
5310 // CHECK-NEXT: entry:
5311 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5312 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5313 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5314 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5315 // CHECK-NEXT: ret i128 [[TMP3]]
5317 v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) {
5318 return __builtin_lsx_vhaddw_q_d(_1, _2);
5320 // CHECK-LABEL: @vhaddw_qu_du(
5321 // CHECK-NEXT: entry:
5322 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5323 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5324 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5325 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5326 // CHECK-NEXT: ret i128 [[TMP3]]
5328 v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) {
5329 return __builtin_lsx_vhaddw_qu_du(_1, _2);
5331 // CHECK-LABEL: @vhsubw_q_d(
5332 // CHECK-NEXT: entry:
5333 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5334 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5335 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5336 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5337 // CHECK-NEXT: ret i128 [[TMP3]]
5339 v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) {
5340 return __builtin_lsx_vhsubw_q_d(_1, _2);
5342 // CHECK-LABEL: @vhsubw_qu_du(
5343 // CHECK-NEXT: entry:
5344 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5345 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5346 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5347 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5348 // CHECK-NEXT: ret i128 [[TMP3]]
5350 v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) {
5351 return __builtin_lsx_vhsubw_qu_du(_1, _2);
5353 // CHECK-LABEL: @vmaddwev_d_w(
5354 // CHECK-NEXT: entry:
5355 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5356 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5357 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5358 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5359 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5360 // CHECK-NEXT: ret i128 [[TMP4]]
5362 v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
5363 return __builtin_lsx_vmaddwev_d_w(_1, _2, _3);
5365 // CHECK-LABEL: @vmaddwev_w_h(
5366 // CHECK-NEXT: entry:
5367 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5368 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5369 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5370 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5371 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5372 // CHECK-NEXT: ret i128 [[TMP4]]
5374 v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
5375 return __builtin_lsx_vmaddwev_w_h(_1, _2, _3);
5377 // CHECK-LABEL: @vmaddwev_h_b(
5378 // CHECK-NEXT: entry:
5379 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5380 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5381 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5382 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5383 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5384 // CHECK-NEXT: ret i128 [[TMP4]]
5386 v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
5387 return __builtin_lsx_vmaddwev_h_b(_1, _2, _3);
5389 // CHECK-LABEL: @vmaddwev_d_wu(
5390 // CHECK-NEXT: entry:
5391 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5392 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5393 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5394 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5395 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5396 // CHECK-NEXT: ret i128 [[TMP4]]
5398 v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
5399 return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3);
5401 // CHECK-LABEL: @vmaddwev_w_hu(
5402 // CHECK-NEXT: entry:
5403 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5404 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5405 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5406 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5407 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5408 // CHECK-NEXT: ret i128 [[TMP4]]
5410 v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
5411 return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3);
5413 // CHECK-LABEL: @vmaddwev_h_bu(
5414 // CHECK-NEXT: entry:
5415 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5416 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5417 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5418 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5419 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5420 // CHECK-NEXT: ret i128 [[TMP4]]
5422 v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
5423 return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3);
5425 // CHECK-LABEL: @vmaddwod_d_w(
5426 // CHECK-NEXT: entry:
5427 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5428 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5429 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5430 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5431 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5432 // CHECK-NEXT: ret i128 [[TMP4]]
5434 v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
5435 return __builtin_lsx_vmaddwod_d_w(_1, _2, _3);
5437 // CHECK-LABEL: @vmaddwod_w_h(
5438 // CHECK-NEXT: entry:
5439 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5440 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5441 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5442 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5443 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5444 // CHECK-NEXT: ret i128 [[TMP4]]
5446 v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
5447 return __builtin_lsx_vmaddwod_w_h(_1, _2, _3);
5449 // CHECK-LABEL: @vmaddwod_h_b(
5450 // CHECK-NEXT: entry:
5451 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5452 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5453 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5454 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5455 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5456 // CHECK-NEXT: ret i128 [[TMP4]]
5458 v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
5459 return __builtin_lsx_vmaddwod_h_b(_1, _2, _3);
5461 // CHECK-LABEL: @vmaddwod_d_wu(
5462 // CHECK-NEXT: entry:
5463 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5464 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5465 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5466 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5467 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5468 // CHECK-NEXT: ret i128 [[TMP4]]
5470 v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
5471 return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3);
5473 // CHECK-LABEL: @vmaddwod_w_hu(
5474 // CHECK-NEXT: entry:
5475 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5476 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5477 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5478 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5479 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5480 // CHECK-NEXT: ret i128 [[TMP4]]
5482 v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
5483 return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3);
5485 // CHECK-LABEL: @vmaddwod_h_bu(
5486 // CHECK-NEXT: entry:
5487 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5488 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5489 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5490 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5491 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5492 // CHECK-NEXT: ret i128 [[TMP4]]
5494 v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
5495 return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3);
5497 // CHECK-LABEL: @vmaddwev_d_wu_w(
5498 // CHECK-NEXT: entry:
5499 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5500 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5501 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5502 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5503 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5504 // CHECK-NEXT: ret i128 [[TMP4]]
5506 v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
5507 return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3);
5509 // CHECK-LABEL: @vmaddwev_w_hu_h(
5510 // CHECK-NEXT: entry:
5511 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5512 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5513 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5514 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5515 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5516 // CHECK-NEXT: ret i128 [[TMP4]]
5518 v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
5519 return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3);
5521 // CHECK-LABEL: @vmaddwev_h_bu_b(
5522 // CHECK-NEXT: entry:
5523 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5524 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5525 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5526 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5527 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5528 // CHECK-NEXT: ret i128 [[TMP4]]
5530 v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
5531 return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3);
5533 // CHECK-LABEL: @vmaddwod_d_wu_w(
5534 // CHECK-NEXT: entry:
5535 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5536 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5537 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
5538 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
5539 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5540 // CHECK-NEXT: ret i128 [[TMP4]]
5542 v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
5543 return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3);
5545 // CHECK-LABEL: @vmaddwod_w_hu_h(
5546 // CHECK-NEXT: entry:
5547 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5548 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5549 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
5550 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
5551 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5552 // CHECK-NEXT: ret i128 [[TMP4]]
5554 v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
5555 return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3);
5557 // CHECK-LABEL: @vmaddwod_h_bu_b(
5558 // CHECK-NEXT: entry:
5559 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5560 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5561 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5562 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5563 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5564 // CHECK-NEXT: ret i128 [[TMP4]]
5566 v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
5567 return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3);
5569 // CHECK-LABEL: @vmaddwev_q_d(
5570 // CHECK-NEXT: entry:
5571 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5572 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5573 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5574 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5575 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5576 // CHECK-NEXT: ret i128 [[TMP4]]
5578 v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5579 return __builtin_lsx_vmaddwev_q_d(_1, _2, _3);
5581 // CHECK-LABEL: @vmaddwod_q_d(
5582 // CHECK-NEXT: entry:
5583 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5584 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5585 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5586 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5587 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5588 // CHECK-NEXT: ret i128 [[TMP4]]
5590 v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5591 return __builtin_lsx_vmaddwod_q_d(_1, _2, _3);
5593 // CHECK-LABEL: @vmaddwev_q_du(
5594 // CHECK-NEXT: entry:
5595 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5596 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5597 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5598 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5599 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5600 // CHECK-NEXT: ret i128 [[TMP4]]
5602 v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5603 return __builtin_lsx_vmaddwev_q_du(_1, _2, _3);
5605 // CHECK-LABEL: @vmaddwod_q_du(
5606 // CHECK-NEXT: entry:
5607 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5608 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5609 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5610 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5611 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5612 // CHECK-NEXT: ret i128 [[TMP4]]
5614 v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5615 return __builtin_lsx_vmaddwod_q_du(_1, _2, _3);
5617 // CHECK-LABEL: @vmaddwev_q_du_d(
5618 // CHECK-NEXT: entry:
5619 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5620 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5621 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5622 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5623 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5624 // CHECK-NEXT: ret i128 [[TMP4]]
5626 v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5627 return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3);
5629 // CHECK-LABEL: @vmaddwod_q_du_d(
5630 // CHECK-NEXT: entry:
5631 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5632 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5633 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5634 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5635 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5636 // CHECK-NEXT: ret i128 [[TMP4]]
5638 v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5639 return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3);
5641 // CHECK-LABEL: @vrotr_b(
5642 // CHECK-NEXT: entry:
5643 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5644 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5645 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5646 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5647 // CHECK-NEXT: ret i128 [[TMP3]]
5649 v16i8 vrotr_b(v16i8 _1, v16i8 _2) {
5650 return __builtin_lsx_vrotr_b(_1, _2);
5652 // CHECK-LABEL: @vrotr_h(
5653 // CHECK-NEXT: entry:
5654 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5655 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5656 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5657 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5658 // CHECK-NEXT: ret i128 [[TMP3]]
5660 v8i16 vrotr_h(v8i16 _1, v8i16 _2) {
5661 return __builtin_lsx_vrotr_h(_1, _2);
5663 // CHECK-LABEL: @vrotr_w(
5664 // CHECK-NEXT: entry:
5665 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5666 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5667 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5668 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5669 // CHECK-NEXT: ret i128 [[TMP3]]
5671 v4i32 vrotr_w(v4i32 _1, v4i32 _2) {
5672 return __builtin_lsx_vrotr_w(_1, _2);
5674 // CHECK-LABEL: @vrotr_d(
5675 // CHECK-NEXT: entry:
5676 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5677 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5678 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5679 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5680 // CHECK-NEXT: ret i128 [[TMP3]]
5682 v2i64 vrotr_d(v2i64 _1, v2i64 _2) {
5683 return __builtin_lsx_vrotr_d(_1, _2);
5685 // CHECK-LABEL: @vadd_q(
5686 // CHECK-NEXT: entry:
5687 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5688 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5689 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5690 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5691 // CHECK-NEXT: ret i128 [[TMP3]]
5693 v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); }
5694 // CHECK-LABEL: @vsub_q(
5695 // CHECK-NEXT: entry:
5696 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5697 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5698 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5699 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5700 // CHECK-NEXT: ret i128 [[TMP3]]
5702 v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); }
5703 // CHECK-LABEL: @vldrepl_b(
5704 // CHECK-NEXT: entry:
5705 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1)
5706 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5707 // CHECK-NEXT: ret i128 [[TMP1]]
5709 v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); }
5710 // CHECK-LABEL: @vldrepl_h(
5711 // CHECK-NEXT: entry:
5712 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2)
5713 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
5714 // CHECK-NEXT: ret i128 [[TMP1]]
5716 v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); }
5717 // CHECK-LABEL: @vldrepl_w(
5718 // CHECK-NEXT: entry:
5719 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4)
5720 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
5721 // CHECK-NEXT: ret i128 [[TMP1]]
5723 v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); }
5724 // CHECK-LABEL: @vldrepl_d(
5725 // CHECK-NEXT: entry:
5726 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8)
5727 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
5728 // CHECK-NEXT: ret i128 [[TMP1]]
5730 v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); }
5731 // CHECK-LABEL: @vmskgez_b(
5732 // CHECK-NEXT: entry:
5733 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5734 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]])
5735 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5736 // CHECK-NEXT: ret i128 [[TMP2]]
5738 v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); }
5739 // CHECK-LABEL: @vmsknz_b(
5740 // CHECK-NEXT: entry:
5741 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5742 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]])
5743 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5744 // CHECK-NEXT: ret i128 [[TMP2]]
5746 v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); }
5747 // CHECK-LABEL: @vexth_h_b(
5748 // CHECK-NEXT: entry:
5749 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5750 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]])
5751 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5752 // CHECK-NEXT: ret i128 [[TMP2]]
5754 v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); }
5755 // CHECK-LABEL: @vexth_w_h(
5756 // CHECK-NEXT: entry:
5757 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5758 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]])
5759 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5760 // CHECK-NEXT: ret i128 [[TMP2]]
5762 v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); }
5763 // CHECK-LABEL: @vexth_d_w(
5764 // CHECK-NEXT: entry:
5765 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5766 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]])
5767 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5768 // CHECK-NEXT: ret i128 [[TMP2]]
5770 v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); }
5771 // CHECK-LABEL: @vexth_q_d(
5772 // CHECK-NEXT: entry:
5773 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5774 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]])
5775 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5776 // CHECK-NEXT: ret i128 [[TMP2]]
5778 v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); }
5779 // CHECK-LABEL: @vexth_hu_bu(
5780 // CHECK-NEXT: entry:
5781 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5782 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]])
5783 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5784 // CHECK-NEXT: ret i128 [[TMP2]]
5786 v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); }
5787 // CHECK-LABEL: @vexth_wu_hu(
5788 // CHECK-NEXT: entry:
5789 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5790 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]])
5791 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5792 // CHECK-NEXT: ret i128 [[TMP2]]
5794 v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); }
5795 // CHECK-LABEL: @vexth_du_wu(
5796 // CHECK-NEXT: entry:
5797 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5798 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]])
5799 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5800 // CHECK-NEXT: ret i128 [[TMP2]]
5802 v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); }
5803 // CHECK-LABEL: @vexth_qu_du(
5804 // CHECK-NEXT: entry:
5805 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5806 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]])
5807 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5808 // CHECK-NEXT: ret i128 [[TMP2]]
5810 v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); }
5811 // CHECK-LABEL: @vrotri_b(
5812 // CHECK-NEXT: entry:
5813 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5814 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1)
5815 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5816 // CHECK-NEXT: ret i128 [[TMP2]]
5818 v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); }
5819 // CHECK-LABEL: @vrotri_h(
5820 // CHECK-NEXT: entry:
5821 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5822 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1)
5823 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5824 // CHECK-NEXT: ret i128 [[TMP2]]
5826 v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); }
5827 // CHECK-LABEL: @vrotri_w(
5828 // CHECK-NEXT: entry:
5829 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5830 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1)
5831 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5832 // CHECK-NEXT: ret i128 [[TMP2]]
5834 v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); }
5835 // CHECK-LABEL: @vrotri_d(
5836 // CHECK-NEXT: entry:
5837 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5838 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1)
5839 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5840 // CHECK-NEXT: ret i128 [[TMP2]]
5842 v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); }
5843 // CHECK-LABEL: @vextl_q_d(
5844 // CHECK-NEXT: entry:
5845 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5846 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]])
5847 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5848 // CHECK-NEXT: ret i128 [[TMP2]]
5850 v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); }
5851 // CHECK-LABEL: @vsrlni_b_h(
5852 // CHECK-NEXT: entry:
5853 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5854 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5855 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5856 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5857 // CHECK-NEXT: ret i128 [[TMP3]]
5859 v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) {
5860 return __builtin_lsx_vsrlni_b_h(_1, _2, 1);
5862 // CHECK-LABEL: @vsrlni_h_w(
5863 // CHECK-NEXT: entry:
5864 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5865 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5866 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5867 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5868 // CHECK-NEXT: ret i128 [[TMP3]]
5870 v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) {
5871 return __builtin_lsx_vsrlni_h_w(_1, _2, 1);
5873 // CHECK-LABEL: @vsrlni_w_d(
5874 // CHECK-NEXT: entry:
5875 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5876 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5877 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5878 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5879 // CHECK-NEXT: ret i128 [[TMP3]]
5881 v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) {
5882 return __builtin_lsx_vsrlni_w_d(_1, _2, 1);
5884 // CHECK-LABEL: @vsrlni_d_q(
5885 // CHECK-NEXT: entry:
5886 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5887 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5888 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5889 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5890 // CHECK-NEXT: ret i128 [[TMP3]]
5892 v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) {
5893 return __builtin_lsx_vsrlni_d_q(_1, _2, 1);
5895 // CHECK-LABEL: @vsrlrni_b_h(
5896 // CHECK-NEXT: entry:
5897 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5898 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5899 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5900 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5901 // CHECK-NEXT: ret i128 [[TMP3]]
5903 v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) {
5904 return __builtin_lsx_vsrlrni_b_h(_1, _2, 1);
5906 // CHECK-LABEL: @vsrlrni_h_w(
5907 // CHECK-NEXT: entry:
5908 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5909 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5910 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5911 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5912 // CHECK-NEXT: ret i128 [[TMP3]]
5914 v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) {
5915 return __builtin_lsx_vsrlrni_h_w(_1, _2, 1);
5917 // CHECK-LABEL: @vsrlrni_w_d(
5918 // CHECK-NEXT: entry:
5919 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5920 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5921 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5922 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5923 // CHECK-NEXT: ret i128 [[TMP3]]
5925 v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) {
5926 return __builtin_lsx_vsrlrni_w_d(_1, _2, 1);
5928 // CHECK-LABEL: @vsrlrni_d_q(
5929 // CHECK-NEXT: entry:
5930 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5931 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5932 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5933 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5934 // CHECK-NEXT: ret i128 [[TMP3]]
5936 v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) {
5937 return __builtin_lsx_vsrlrni_d_q(_1, _2, 1);
5939 // CHECK-LABEL: @vssrlni_b_h(
5940 // CHECK-NEXT: entry:
5941 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5942 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5943 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5944 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5945 // CHECK-NEXT: ret i128 [[TMP3]]
5947 v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) {
5948 return __builtin_lsx_vssrlni_b_h(_1, _2, 1);
5950 // CHECK-LABEL: @vssrlni_h_w(
5951 // CHECK-NEXT: entry:
5952 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5953 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5954 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5955 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5956 // CHECK-NEXT: ret i128 [[TMP3]]
5958 v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) {
5959 return __builtin_lsx_vssrlni_h_w(_1, _2, 1);
5961 // CHECK-LABEL: @vssrlni_w_d(
5962 // CHECK-NEXT: entry:
5963 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5964 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5965 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5966 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5967 // CHECK-NEXT: ret i128 [[TMP3]]
5969 v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) {
5970 return __builtin_lsx_vssrlni_w_d(_1, _2, 1);
5972 // CHECK-LABEL: @vssrlni_d_q(
5973 // CHECK-NEXT: entry:
5974 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5975 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5976 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5977 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5978 // CHECK-NEXT: ret i128 [[TMP3]]
5980 v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) {
5981 return __builtin_lsx_vssrlni_d_q(_1, _2, 1);
5983 // CHECK-LABEL: @vssrlni_bu_h(
5984 // CHECK-NEXT: entry:
5985 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5986 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5987 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5988 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5989 // CHECK-NEXT: ret i128 [[TMP3]]
5991 v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) {
5992 return __builtin_lsx_vssrlni_bu_h(_1, _2, 1);
5994 // CHECK-LABEL: @vssrlni_hu_w(
5995 // CHECK-NEXT: entry:
5996 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5997 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5998 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5999 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6000 // CHECK-NEXT: ret i128 [[TMP3]]
6002 v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) {
6003 return __builtin_lsx_vssrlni_hu_w(_1, _2, 1);
6005 // CHECK-LABEL: @vssrlni_wu_d(
6006 // CHECK-NEXT: entry:
6007 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6008 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6009 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6010 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6011 // CHECK-NEXT: ret i128 [[TMP3]]
6013 v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) {
6014 return __builtin_lsx_vssrlni_wu_d(_1, _2, 1);
6016 // CHECK-LABEL: @vssrlni_du_q(
6017 // CHECK-NEXT: entry:
6018 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6019 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6020 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6021 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6022 // CHECK-NEXT: ret i128 [[TMP3]]
6024 v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) {
6025 return __builtin_lsx_vssrlni_du_q(_1, _2, 1);
6027 // CHECK-LABEL: @vssrlrni_b_h(
6028 // CHECK-NEXT: entry:
6029 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6030 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6031 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6032 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6033 // CHECK-NEXT: ret i128 [[TMP3]]
6035 v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) {
6036 return __builtin_lsx_vssrlrni_b_h(_1, _2, 1);
6038 // CHECK-LABEL: @vssrlrni_h_w(
6039 // CHECK-NEXT: entry:
6040 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6041 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6042 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6043 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6044 // CHECK-NEXT: ret i128 [[TMP3]]
6046 v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) {
6047 return __builtin_lsx_vssrlrni_h_w(_1, _2, 1);
6049 // CHECK-LABEL: @vssrlrni_w_d(
6050 // CHECK-NEXT: entry:
6051 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6052 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6053 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6054 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6055 // CHECK-NEXT: ret i128 [[TMP3]]
6057 v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) {
6058 return __builtin_lsx_vssrlrni_w_d(_1, _2, 1);
6060 // CHECK-LABEL: @vssrlrni_d_q(
6061 // CHECK-NEXT: entry:
6062 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6063 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6064 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6065 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6066 // CHECK-NEXT: ret i128 [[TMP3]]
6068 v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) {
6069 return __builtin_lsx_vssrlrni_d_q(_1, _2, 1);
6071 // CHECK-LABEL: @vssrlrni_bu_h(
6072 // CHECK-NEXT: entry:
6073 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6074 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6075 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6076 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6077 // CHECK-NEXT: ret i128 [[TMP3]]
6079 v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) {
6080 return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1);
6082 // CHECK-LABEL: @vssrlrni_hu_w(
6083 // CHECK-NEXT: entry:
6084 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6085 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6086 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6087 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6088 // CHECK-NEXT: ret i128 [[TMP3]]
6090 v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) {
6091 return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1);
6093 // CHECK-LABEL: @vssrlrni_wu_d(
6094 // CHECK-NEXT: entry:
6095 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6096 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6097 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6098 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6099 // CHECK-NEXT: ret i128 [[TMP3]]
6101 v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) {
6102 return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1);
6104 // CHECK-LABEL: @vssrlrni_du_q(
6105 // CHECK-NEXT: entry:
6106 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6107 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6108 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6109 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6110 // CHECK-NEXT: ret i128 [[TMP3]]
6112 v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) {
6113 return __builtin_lsx_vssrlrni_du_q(_1, _2, 1);
6115 // CHECK-LABEL: @vsrani_b_h(
6116 // CHECK-NEXT: entry:
6117 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6118 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6119 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6120 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6121 // CHECK-NEXT: ret i128 [[TMP3]]
6123 v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) {
6124 return __builtin_lsx_vsrani_b_h(_1, _2, 1);
6126 // CHECK-LABEL: @vsrani_h_w(
6127 // CHECK-NEXT: entry:
6128 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6129 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6130 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6131 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6132 // CHECK-NEXT: ret i128 [[TMP3]]
6134 v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) {
6135 return __builtin_lsx_vsrani_h_w(_1, _2, 1);
6137 // CHECK-LABEL: @vsrani_w_d(
6138 // CHECK-NEXT: entry:
6139 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6140 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6141 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6142 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6143 // CHECK-NEXT: ret i128 [[TMP3]]
6145 v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) {
6146 return __builtin_lsx_vsrani_w_d(_1, _2, 1);
6148 // CHECK-LABEL: @vsrani_d_q(
6149 // CHECK-NEXT: entry:
6150 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6151 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6152 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6153 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6154 // CHECK-NEXT: ret i128 [[TMP3]]
6156 v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) {
6157 return __builtin_lsx_vsrani_d_q(_1, _2, 1);
6159 // CHECK-LABEL: @vsrarni_b_h(
6160 // CHECK-NEXT: entry:
6161 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6162 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6163 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6164 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6165 // CHECK-NEXT: ret i128 [[TMP3]]
6167 v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) {
6168 return __builtin_lsx_vsrarni_b_h(_1, _2, 1);
6170 // CHECK-LABEL: @vsrarni_h_w(
6171 // CHECK-NEXT: entry:
6172 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6173 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6174 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6175 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6176 // CHECK-NEXT: ret i128 [[TMP3]]
6178 v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) {
6179 return __builtin_lsx_vsrarni_h_w(_1, _2, 1);
6181 // CHECK-LABEL: @vsrarni_w_d(
6182 // CHECK-NEXT: entry:
6183 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6184 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6185 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6186 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6187 // CHECK-NEXT: ret i128 [[TMP3]]
6189 v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) {
6190 return __builtin_lsx_vsrarni_w_d(_1, _2, 1);
6192 // CHECK-LABEL: @vsrarni_d_q(
6193 // CHECK-NEXT: entry:
6194 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6195 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6196 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6197 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6198 // CHECK-NEXT: ret i128 [[TMP3]]
6200 v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) {
6201 return __builtin_lsx_vsrarni_d_q(_1, _2, 1);
6203 // CHECK-LABEL: @vssrani_b_h(
6204 // CHECK-NEXT: entry:
6205 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6206 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6207 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6208 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6209 // CHECK-NEXT: ret i128 [[TMP3]]
6211 v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) {
6212 return __builtin_lsx_vssrani_b_h(_1, _2, 1);
6214 // CHECK-LABEL: @vssrani_h_w(
6215 // CHECK-NEXT: entry:
6216 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6217 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6218 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6219 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6220 // CHECK-NEXT: ret i128 [[TMP3]]
6222 v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) {
6223 return __builtin_lsx_vssrani_h_w(_1, _2, 1);
6225 // CHECK-LABEL: @vssrani_w_d(
6226 // CHECK-NEXT: entry:
6227 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6228 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6229 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6230 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6231 // CHECK-NEXT: ret i128 [[TMP3]]
6233 v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) {
6234 return __builtin_lsx_vssrani_w_d(_1, _2, 1);
6236 // CHECK-LABEL: @vssrani_d_q(
6237 // CHECK-NEXT: entry:
6238 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6239 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6240 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6241 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6242 // CHECK-NEXT: ret i128 [[TMP3]]
6244 v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) {
6245 return __builtin_lsx_vssrani_d_q(_1, _2, 1);
6247 // CHECK-LABEL: @vssrani_bu_h(
6248 // CHECK-NEXT: entry:
6249 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6250 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6251 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6252 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6253 // CHECK-NEXT: ret i128 [[TMP3]]
6255 v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) {
6256 return __builtin_lsx_vssrani_bu_h(_1, _2, 1);
6258 // CHECK-LABEL: @vssrani_hu_w(
6259 // CHECK-NEXT: entry:
6260 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6261 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6262 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6263 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6264 // CHECK-NEXT: ret i128 [[TMP3]]
6266 v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) {
6267 return __builtin_lsx_vssrani_hu_w(_1, _2, 1);
6269 // CHECK-LABEL: @vssrani_wu_d(
6270 // CHECK-NEXT: entry:
6271 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6272 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6273 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6274 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6275 // CHECK-NEXT: ret i128 [[TMP3]]
6277 v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) {
6278 return __builtin_lsx_vssrani_wu_d(_1, _2, 1);
6280 // CHECK-LABEL: @vssrani_du_q(
6281 // CHECK-NEXT: entry:
6282 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6283 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6284 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6285 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6286 // CHECK-NEXT: ret i128 [[TMP3]]
6288 v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) {
6289 return __builtin_lsx_vssrani_du_q(_1, _2, 1);
6291 // CHECK-LABEL: @vssrarni_b_h(
6292 // CHECK-NEXT: entry:
6293 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6294 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6295 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6296 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6297 // CHECK-NEXT: ret i128 [[TMP3]]
6299 v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) {
6300 return __builtin_lsx_vssrarni_b_h(_1, _2, 1);
6302 // CHECK-LABEL: @vssrarni_h_w(
6303 // CHECK-NEXT: entry:
6304 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6305 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6306 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6307 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6308 // CHECK-NEXT: ret i128 [[TMP3]]
6310 v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) {
6311 return __builtin_lsx_vssrarni_h_w(_1, _2, 1);
6313 // CHECK-LABEL: @vssrarni_w_d(
6314 // CHECK-NEXT: entry:
6315 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6316 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6317 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6318 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6319 // CHECK-NEXT: ret i128 [[TMP3]]
6321 v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) {
6322 return __builtin_lsx_vssrarni_w_d(_1, _2, 1);
6324 // CHECK-LABEL: @vssrarni_d_q(
6325 // CHECK-NEXT: entry:
6326 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6327 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6328 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6329 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6330 // CHECK-NEXT: ret i128 [[TMP3]]
6332 v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) {
6333 return __builtin_lsx_vssrarni_d_q(_1, _2, 1);
6335 // CHECK-LABEL: @vssrarni_bu_h(
6336 // CHECK-NEXT: entry:
6337 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6338 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6339 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
6340 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6341 // CHECK-NEXT: ret i128 [[TMP3]]
6343 v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) {
6344 return __builtin_lsx_vssrarni_bu_h(_1, _2, 1);
6346 // CHECK-LABEL: @vssrarni_hu_w(
6347 // CHECK-NEXT: entry:
6348 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6349 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6350 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
6351 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6352 // CHECK-NEXT: ret i128 [[TMP3]]
6354 v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) {
6355 return __builtin_lsx_vssrarni_hu_w(_1, _2, 1);
6357 // CHECK-LABEL: @vssrarni_wu_d(
6358 // CHECK-NEXT: entry:
6359 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6360 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6361 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6362 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6363 // CHECK-NEXT: ret i128 [[TMP3]]
6365 v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) {
6366 return __builtin_lsx_vssrarni_wu_d(_1, _2, 1);
6368 // CHECK-LABEL: @vssrarni_du_q(
6369 // CHECK-NEXT: entry:
6370 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6371 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6372 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
6373 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6374 // CHECK-NEXT: ret i128 [[TMP3]]
6376 v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) {
6377 return __builtin_lsx_vssrarni_du_q(_1, _2, 1);
6379 // CHECK-LABEL: @vpermi_w(
6380 // CHECK-NEXT: entry:
6381 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6382 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6383 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
6384 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6385 // CHECK-NEXT: ret i128 [[TMP3]]
6387 v4i32 vpermi_w(v4i32 _1, v4i32 _2) {
6388 return __builtin_lsx_vpermi_w(_1, _2, 1);
6390 // CHECK-LABEL: @vld(
6391 // CHECK-NEXT: entry:
6392 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1)
6393 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
6394 // CHECK-NEXT: ret i128 [[TMP1]]
6396 v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); }
6397 // CHECK-LABEL: @vst(
6398 // CHECK-NEXT: entry:
6399 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6400 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1)
6401 // CHECK-NEXT: ret void
6403 void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); }
6404 // CHECK-LABEL: @vssrlrn_b_h(
6405 // CHECK-NEXT: entry:
6406 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6407 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6408 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
6409 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6410 // CHECK-NEXT: ret i128 [[TMP3]]
6412 v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) {
6413 return __builtin_lsx_vssrlrn_b_h(_1, _2);
6415 // CHECK-LABEL: @vssrlrn_h_w(
6416 // CHECK-NEXT: entry:
6417 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6418 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6419 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
6420 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6421 // CHECK-NEXT: ret i128 [[TMP3]]
6423 v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) {
6424 return __builtin_lsx_vssrlrn_h_w(_1, _2);
6426 // CHECK-LABEL: @vssrlrn_w_d(
6427 // CHECK-NEXT: entry:
6428 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6429 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6430 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
6431 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6432 // CHECK-NEXT: ret i128 [[TMP3]]
6434 v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) {
6435 return __builtin_lsx_vssrlrn_w_d(_1, _2);
6437 // CHECK-LABEL: @vssrln_b_h(
6438 // CHECK-NEXT: entry:
6439 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6440 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
6441 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
6442 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6443 // CHECK-NEXT: ret i128 [[TMP3]]
6445 v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) {
6446 return __builtin_lsx_vssrln_b_h(_1, _2);
6448 // CHECK-LABEL: @vssrln_h_w(
6449 // CHECK-NEXT: entry:
6450 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6451 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
6452 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
6453 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6454 // CHECK-NEXT: ret i128 [[TMP3]]
6456 v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) {
6457 return __builtin_lsx_vssrln_h_w(_1, _2);
6459 // CHECK-LABEL: @vssrln_w_d(
6460 // CHECK-NEXT: entry:
6461 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6462 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6463 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
6464 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6465 // CHECK-NEXT: ret i128 [[TMP3]]
6467 v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) {
6468 return __builtin_lsx_vssrln_w_d(_1, _2);
6470 // CHECK-LABEL: @vorn_v(
6471 // CHECK-NEXT: entry:
6472 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6473 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6474 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
6475 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6476 // CHECK-NEXT: ret i128 [[TMP3]]
6478 v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); }
6479 // CHECK-LABEL: @vldi(
6480 // CHECK-NEXT: entry:
6481 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1)
6482 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
6483 // CHECK-NEXT: ret i128 [[TMP1]]
6485 v2i64 vldi() { return __builtin_lsx_vldi(1); }
6486 // CHECK-LABEL: @vshuf_b(
6487 // CHECK-NEXT: entry:
6488 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6489 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
6490 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
6491 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
6492 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
6493 // CHECK-NEXT: ret i128 [[TMP4]]
6495 v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) {
6496 return __builtin_lsx_vshuf_b(_1, _2, _3);
6498 // CHECK-LABEL: @vldx(
6499 // CHECK-NEXT: entry:
6500 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1)
6501 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
6502 // CHECK-NEXT: ret i128 [[TMP1]]
6504 v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); }
6505 // CHECK-LABEL: @vstx(
6506 // CHECK-NEXT: entry:
6507 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6508 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1)
6509 // CHECK-NEXT: ret void
6511 void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); }
6512 // CHECK-LABEL: @vextl_qu_du(
6513 // CHECK-NEXT: entry:
6514 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6515 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]])
6516 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
6517 // CHECK-NEXT: ret i128 [[TMP2]]
6519 v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); }
6520 // CHECK-LABEL: @bnz_b(
6521 // CHECK-NEXT: entry:
6522 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6523 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]])
6524 // CHECK-NEXT: ret i32 [[TMP1]]
6526 int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); }
6527 // CHECK-LABEL: @bnz_d(
6528 // CHECK-NEXT: entry:
6529 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6530 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]])
6531 // CHECK-NEXT: ret i32 [[TMP1]]
6533 int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); }
6534 // CHECK-LABEL: @bnz_h(
6535 // CHECK-NEXT: entry:
6536 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6537 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]])
6538 // CHECK-NEXT: ret i32 [[TMP1]]
6540 int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); }
6541 // CHECK-LABEL: @bnz_v(
6542 // CHECK-NEXT: entry:
6543 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6544 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]])
6545 // CHECK-NEXT: ret i32 [[TMP1]]
6547 int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); }
6548 // CHECK-LABEL: @bnz_w(
6549 // CHECK-NEXT: entry:
6550 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6551 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]])
6552 // CHECK-NEXT: ret i32 [[TMP1]]
6554 int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); }
6555 // CHECK-LABEL: @bz_b(
6556 // CHECK-NEXT: entry:
6557 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6558 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]])
6559 // CHECK-NEXT: ret i32 [[TMP1]]
6561 int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); }
6562 // CHECK-LABEL: @bz_d(
6563 // CHECK-NEXT: entry:
6564 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6565 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]])
6566 // CHECK-NEXT: ret i32 [[TMP1]]
6568 int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); }
6569 // CHECK-LABEL: @bz_h(
6570 // CHECK-NEXT: entry:
6571 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
6572 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]])
6573 // CHECK-NEXT: ret i32 [[TMP1]]
6575 int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); }
6576 // CHECK-LABEL: @bz_v(
6577 // CHECK-NEXT: entry:
6578 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
6579 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]])
6580 // CHECK-NEXT: ret i32 [[TMP1]]
6582 int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); }
6583 // CHECK-LABEL: @bz_w(
6584 // CHECK-NEXT: entry:
6585 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
6586 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]])
6587 // CHECK-NEXT: ret i32 [[TMP1]]
6589 int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); }
6590 // CHECK-LABEL: @vfcmp_caf_d(
6591 // CHECK-NEXT: entry:
6592 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6593 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6594 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6595 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6596 // CHECK-NEXT: ret i128 [[TMP3]]
6598 v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) {
6599 return __builtin_lsx_vfcmp_caf_d(_1, _2);
6601 // CHECK-LABEL: @vfcmp_caf_s(
6602 // CHECK-NEXT: entry:
6603 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6604 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6605 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6606 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6607 // CHECK-NEXT: ret i128 [[TMP3]]
6609 v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) {
6610 return __builtin_lsx_vfcmp_caf_s(_1, _2);
6612 // CHECK-LABEL: @vfcmp_ceq_d(
6613 // CHECK-NEXT: entry:
6614 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6615 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6616 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6617 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6618 // CHECK-NEXT: ret i128 [[TMP3]]
6620 v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) {
6621 return __builtin_lsx_vfcmp_ceq_d(_1, _2);
6623 // CHECK-LABEL: @vfcmp_ceq_s(
6624 // CHECK-NEXT: entry:
6625 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6626 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6627 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6628 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6629 // CHECK-NEXT: ret i128 [[TMP3]]
6631 v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) {
6632 return __builtin_lsx_vfcmp_ceq_s(_1, _2);
6634 // CHECK-LABEL: @vfcmp_cle_d(
6635 // CHECK-NEXT: entry:
6636 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6637 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6638 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6639 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6640 // CHECK-NEXT: ret i128 [[TMP3]]
6642 v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) {
6643 return __builtin_lsx_vfcmp_cle_d(_1, _2);
6645 // CHECK-LABEL: @vfcmp_cle_s(
6646 // CHECK-NEXT: entry:
6647 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6648 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6649 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6650 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6651 // CHECK-NEXT: ret i128 [[TMP3]]
6653 v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) {
6654 return __builtin_lsx_vfcmp_cle_s(_1, _2);
6656 // CHECK-LABEL: @vfcmp_clt_d(
6657 // CHECK-NEXT: entry:
6658 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6659 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6660 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6661 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6662 // CHECK-NEXT: ret i128 [[TMP3]]
6664 v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) {
6665 return __builtin_lsx_vfcmp_clt_d(_1, _2);
6667 // CHECK-LABEL: @vfcmp_clt_s(
6668 // CHECK-NEXT: entry:
6669 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6670 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6671 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6672 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6673 // CHECK-NEXT: ret i128 [[TMP3]]
6675 v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) {
6676 return __builtin_lsx_vfcmp_clt_s(_1, _2);
6678 // CHECK-LABEL: @vfcmp_cne_d(
6679 // CHECK-NEXT: entry:
6680 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6681 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6682 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6683 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6684 // CHECK-NEXT: ret i128 [[TMP3]]
6686 v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) {
6687 return __builtin_lsx_vfcmp_cne_d(_1, _2);
6689 // CHECK-LABEL: @vfcmp_cne_s(
6690 // CHECK-NEXT: entry:
6691 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6692 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6693 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6694 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6695 // CHECK-NEXT: ret i128 [[TMP3]]
6697 v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) {
6698 return __builtin_lsx_vfcmp_cne_s(_1, _2);
6700 // CHECK-LABEL: @vfcmp_cor_d(
6701 // CHECK-NEXT: entry:
6702 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6703 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6704 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6705 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6706 // CHECK-NEXT: ret i128 [[TMP3]]
6708 v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) {
6709 return __builtin_lsx_vfcmp_cor_d(_1, _2);
6711 // CHECK-LABEL: @vfcmp_cor_s(
6712 // CHECK-NEXT: entry:
6713 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6714 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6715 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6716 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6717 // CHECK-NEXT: ret i128 [[TMP3]]
6719 v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) {
6720 return __builtin_lsx_vfcmp_cor_s(_1, _2);
6722 // CHECK-LABEL: @vfcmp_cueq_d(
6723 // CHECK-NEXT: entry:
6724 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6725 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6726 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6727 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6728 // CHECK-NEXT: ret i128 [[TMP3]]
6730 v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) {
6731 return __builtin_lsx_vfcmp_cueq_d(_1, _2);
6733 // CHECK-LABEL: @vfcmp_cueq_s(
6734 // CHECK-NEXT: entry:
6735 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6736 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6737 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6738 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6739 // CHECK-NEXT: ret i128 [[TMP3]]
6741 v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) {
6742 return __builtin_lsx_vfcmp_cueq_s(_1, _2);
6744 // CHECK-LABEL: @vfcmp_cule_d(
6745 // CHECK-NEXT: entry:
6746 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6747 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6748 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6749 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6750 // CHECK-NEXT: ret i128 [[TMP3]]
6752 v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) {
6753 return __builtin_lsx_vfcmp_cule_d(_1, _2);
6755 // CHECK-LABEL: @vfcmp_cule_s(
6756 // CHECK-NEXT: entry:
6757 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6758 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6759 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6760 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6761 // CHECK-NEXT: ret i128 [[TMP3]]
6763 v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) {
6764 return __builtin_lsx_vfcmp_cule_s(_1, _2);
6766 // CHECK-LABEL: @vfcmp_cult_d(
6767 // CHECK-NEXT: entry:
6768 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6769 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6770 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6771 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6772 // CHECK-NEXT: ret i128 [[TMP3]]
6774 v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) {
6775 return __builtin_lsx_vfcmp_cult_d(_1, _2);
6777 // CHECK-LABEL: @vfcmp_cult_s(
6778 // CHECK-NEXT: entry:
6779 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6780 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6781 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6782 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6783 // CHECK-NEXT: ret i128 [[TMP3]]
6785 v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) {
6786 return __builtin_lsx_vfcmp_cult_s(_1, _2);
6788 // CHECK-LABEL: @vfcmp_cun_d(
6789 // CHECK-NEXT: entry:
6790 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6791 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6792 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6793 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6794 // CHECK-NEXT: ret i128 [[TMP3]]
6796 v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) {
6797 return __builtin_lsx_vfcmp_cun_d(_1, _2);
6799 // CHECK-LABEL: @vfcmp_cune_d(
6800 // CHECK-NEXT: entry:
6801 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6802 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6803 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6804 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6805 // CHECK-NEXT: ret i128 [[TMP3]]
6807 v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) {
6808 return __builtin_lsx_vfcmp_cune_d(_1, _2);
6810 // CHECK-LABEL: @vfcmp_cune_s(
6811 // CHECK-NEXT: entry:
6812 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6813 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6814 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6815 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6816 // CHECK-NEXT: ret i128 [[TMP3]]
6818 v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) {
6819 return __builtin_lsx_vfcmp_cune_s(_1, _2);
6821 // CHECK-LABEL: @vfcmp_cun_s(
6822 // CHECK-NEXT: entry:
6823 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6824 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6825 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6826 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6827 // CHECK-NEXT: ret i128 [[TMP3]]
6829 v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) {
6830 return __builtin_lsx_vfcmp_cun_s(_1, _2);
6832 // CHECK-LABEL: @vfcmp_saf_d(
6833 // CHECK-NEXT: entry:
6834 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6835 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6836 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6837 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6838 // CHECK-NEXT: ret i128 [[TMP3]]
6840 v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) {
6841 return __builtin_lsx_vfcmp_saf_d(_1, _2);
6843 // CHECK-LABEL: @vfcmp_saf_s(
6844 // CHECK-NEXT: entry:
6845 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6846 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6847 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6848 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6849 // CHECK-NEXT: ret i128 [[TMP3]]
6851 v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) {
6852 return __builtin_lsx_vfcmp_saf_s(_1, _2);
6854 // CHECK-LABEL: @vfcmp_seq_d(
6855 // CHECK-NEXT: entry:
6856 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6857 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6858 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6859 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6860 // CHECK-NEXT: ret i128 [[TMP3]]
6862 v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) {
6863 return __builtin_lsx_vfcmp_seq_d(_1, _2);
6865 // CHECK-LABEL: @vfcmp_seq_s(
6866 // CHECK-NEXT: entry:
6867 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6868 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6869 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6870 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6871 // CHECK-NEXT: ret i128 [[TMP3]]
6873 v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) {
6874 return __builtin_lsx_vfcmp_seq_s(_1, _2);
6876 // CHECK-LABEL: @vfcmp_sle_d(
6877 // CHECK-NEXT: entry:
6878 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6879 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6880 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6881 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6882 // CHECK-NEXT: ret i128 [[TMP3]]
6884 v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) {
6885 return __builtin_lsx_vfcmp_sle_d(_1, _2);
6887 // CHECK-LABEL: @vfcmp_sle_s(
6888 // CHECK-NEXT: entry:
6889 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6890 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6891 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6892 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6893 // CHECK-NEXT: ret i128 [[TMP3]]
6895 v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) {
6896 return __builtin_lsx_vfcmp_sle_s(_1, _2);
6898 // CHECK-LABEL: @vfcmp_slt_d(
6899 // CHECK-NEXT: entry:
6900 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6901 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6902 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6903 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6904 // CHECK-NEXT: ret i128 [[TMP3]]
6906 v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) {
6907 return __builtin_lsx_vfcmp_slt_d(_1, _2);
6909 // CHECK-LABEL: @vfcmp_slt_s(
6910 // CHECK-NEXT: entry:
6911 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6912 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6913 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6914 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6915 // CHECK-NEXT: ret i128 [[TMP3]]
6917 v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) {
6918 return __builtin_lsx_vfcmp_slt_s(_1, _2);
6920 // CHECK-LABEL: @vfcmp_sne_d(
6921 // CHECK-NEXT: entry:
6922 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6923 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6924 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6925 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6926 // CHECK-NEXT: ret i128 [[TMP3]]
6928 v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) {
6929 return __builtin_lsx_vfcmp_sne_d(_1, _2);
6931 // CHECK-LABEL: @vfcmp_sne_s(
6932 // CHECK-NEXT: entry:
6933 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6934 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6935 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6936 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6937 // CHECK-NEXT: ret i128 [[TMP3]]
6939 v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) {
6940 return __builtin_lsx_vfcmp_sne_s(_1, _2);
6942 // CHECK-LABEL: @vfcmp_sor_d(
6943 // CHECK-NEXT: entry:
6944 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6945 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6946 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6947 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6948 // CHECK-NEXT: ret i128 [[TMP3]]
6950 v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) {
6951 return __builtin_lsx_vfcmp_sor_d(_1, _2);
6953 // CHECK-LABEL: @vfcmp_sor_s(
6954 // CHECK-NEXT: entry:
6955 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6956 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6957 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6958 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6959 // CHECK-NEXT: ret i128 [[TMP3]]
6961 v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) {
6962 return __builtin_lsx_vfcmp_sor_s(_1, _2);
6964 // CHECK-LABEL: @vfcmp_sueq_d(
6965 // CHECK-NEXT: entry:
6966 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6967 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6968 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6969 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6970 // CHECK-NEXT: ret i128 [[TMP3]]
6972 v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) {
6973 return __builtin_lsx_vfcmp_sueq_d(_1, _2);
6975 // CHECK-LABEL: @vfcmp_sueq_s(
6976 // CHECK-NEXT: entry:
6977 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6978 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6979 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6980 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6981 // CHECK-NEXT: ret i128 [[TMP3]]
6983 v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) {
6984 return __builtin_lsx_vfcmp_sueq_s(_1, _2);
6986 // CHECK-LABEL: @vfcmp_sule_d(
6987 // CHECK-NEXT: entry:
6988 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6989 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6990 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6991 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6992 // CHECK-NEXT: ret i128 [[TMP3]]
6994 v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) {
6995 return __builtin_lsx_vfcmp_sule_d(_1, _2);
6997 // CHECK-LABEL: @vfcmp_sule_s(
6998 // CHECK-NEXT: entry:
6999 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7000 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7001 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7002 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7003 // CHECK-NEXT: ret i128 [[TMP3]]
7005 v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) {
7006 return __builtin_lsx_vfcmp_sule_s(_1, _2);
7008 // CHECK-LABEL: @vfcmp_sult_d(
7009 // CHECK-NEXT: entry:
7010 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
7011 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
7012 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7013 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
7014 // CHECK-NEXT: ret i128 [[TMP3]]
7016 v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) {
7017 return __builtin_lsx_vfcmp_sult_d(_1, _2);
7019 // CHECK-LABEL: @vfcmp_sult_s(
7020 // CHECK-NEXT: entry:
7021 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7022 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7023 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7024 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7025 // CHECK-NEXT: ret i128 [[TMP3]]
7027 v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) {
7028 return __builtin_lsx_vfcmp_sult_s(_1, _2);
7030 // CHECK-LABEL: @vfcmp_sun_d(
7031 // CHECK-NEXT: entry:
7032 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
7033 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
7034 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7035 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
7036 // CHECK-NEXT: ret i128 [[TMP3]]
7038 v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) {
7039 return __builtin_lsx_vfcmp_sun_d(_1, _2);
7041 // CHECK-LABEL: @vfcmp_sune_d(
7042 // CHECK-NEXT: entry:
7043 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
7044 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
7045 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7046 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
7047 // CHECK-NEXT: ret i128 [[TMP3]]
7049 v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) {
7050 return __builtin_lsx_vfcmp_sune_d(_1, _2);
7052 // CHECK-LABEL: @vfcmp_sune_s(
7053 // CHECK-NEXT: entry:
7054 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7055 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7056 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7057 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7058 // CHECK-NEXT: ret i128 [[TMP3]]
7060 v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) {
7061 return __builtin_lsx_vfcmp_sune_s(_1, _2);
7063 // CHECK-LABEL: @vfcmp_sun_s(
7064 // CHECK-NEXT: entry:
7065 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
7066 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
7067 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7068 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7069 // CHECK-NEXT: ret i128 [[TMP3]]
7071 v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) {
7072 return __builtin_lsx_vfcmp_sun_s(_1, _2);
7074 // CHECK-LABEL: @vrepli_b(
7075 // CHECK-NEXT: entry:
7076 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1)
7077 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
7078 // CHECK-NEXT: ret i128 [[TMP1]]
7080 v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); }
7081 // CHECK-LABEL: @vrepli_d(
7082 // CHECK-NEXT: entry:
7083 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1)
7084 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
7085 // CHECK-NEXT: ret i128 [[TMP1]]
7087 v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); }
7088 // CHECK-LABEL: @vrepli_h(
7089 // CHECK-NEXT: entry:
7090 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1)
7091 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
7092 // CHECK-NEXT: ret i128 [[TMP1]]
7094 v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); }
7095 // CHECK-LABEL: @vrepli_w(
7096 // CHECK-NEXT: entry:
7097 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1)
7098 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
7099 // CHECK-NEXT: ret i128 [[TMP1]]
7101 v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); }