Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGen / LoongArch / lsx / builtin-alias.c
blob7a84e0ae24f950b8d898cac908d06e15474f8db8
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s
4 #include <lsxintrin.h>
6 // CHECK-LABEL: @vsll_b(
7 // CHECK-NEXT: entry:
8 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
9 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
10 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
11 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
12 // CHECK-NEXT: ret i128 [[TMP3]]
14 v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); }
15 // CHECK-LABEL: @vsll_h(
16 // CHECK-NEXT: entry:
17 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
18 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
19 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
20 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
21 // CHECK-NEXT: ret i128 [[TMP3]]
23 v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); }
24 // CHECK-LABEL: @vsll_w(
25 // CHECK-NEXT: entry:
26 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
27 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
28 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
29 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
30 // CHECK-NEXT: ret i128 [[TMP3]]
32 v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); }
33 // CHECK-LABEL: @vsll_d(
34 // CHECK-NEXT: entry:
35 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
36 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
37 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
38 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
39 // CHECK-NEXT: ret i128 [[TMP3]]
41 v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); }
42 // CHECK-LABEL: @vslli_b(
43 // CHECK-NEXT: entry:
44 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
45 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1)
46 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
47 // CHECK-NEXT: ret i128 [[TMP2]]
49 v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); }
50 // CHECK-LABEL: @vslli_h(
51 // CHECK-NEXT: entry:
52 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
53 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1)
54 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
55 // CHECK-NEXT: ret i128 [[TMP2]]
57 v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); }
58 // CHECK-LABEL: @vslli_w(
59 // CHECK-NEXT: entry:
60 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
61 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1)
62 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
63 // CHECK-NEXT: ret i128 [[TMP2]]
65 v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); }
66 // CHECK-LABEL: @vslli_d(
67 // CHECK-NEXT: entry:
68 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
69 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1)
70 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
71 // CHECK-NEXT: ret i128 [[TMP2]]
73 v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); }
74 // CHECK-LABEL: @vsra_b(
75 // CHECK-NEXT: entry:
76 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
77 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
78 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
79 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
80 // CHECK-NEXT: ret i128 [[TMP3]]
82 v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); }
83 // CHECK-LABEL: @vsra_h(
84 // CHECK-NEXT: entry:
85 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
86 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
87 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
88 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
89 // CHECK-NEXT: ret i128 [[TMP3]]
91 v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); }
92 // CHECK-LABEL: @vsra_w(
93 // CHECK-NEXT: entry:
94 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
95 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
96 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
97 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
98 // CHECK-NEXT: ret i128 [[TMP3]]
100 v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); }
101 // CHECK-LABEL: @vsra_d(
102 // CHECK-NEXT: entry:
103 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
104 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
105 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
106 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
107 // CHECK-NEXT: ret i128 [[TMP3]]
109 v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); }
110 // CHECK-LABEL: @vsrai_b(
111 // CHECK-NEXT: entry:
112 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
113 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1)
114 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
115 // CHECK-NEXT: ret i128 [[TMP2]]
117 v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); }
118 // CHECK-LABEL: @vsrai_h(
119 // CHECK-NEXT: entry:
120 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
121 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1)
122 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
123 // CHECK-NEXT: ret i128 [[TMP2]]
125 v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); }
126 // CHECK-LABEL: @vsrai_w(
127 // CHECK-NEXT: entry:
128 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
129 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1)
130 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
131 // CHECK-NEXT: ret i128 [[TMP2]]
133 v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); }
134 // CHECK-LABEL: @vsrai_d(
135 // CHECK-NEXT: entry:
136 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
137 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1)
138 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
139 // CHECK-NEXT: ret i128 [[TMP2]]
141 v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); }
142 // CHECK-LABEL: @vsrar_b(
143 // CHECK-NEXT: entry:
144 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
145 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
146 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
147 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
148 // CHECK-NEXT: ret i128 [[TMP3]]
150 v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); }
151 // CHECK-LABEL: @vsrar_h(
152 // CHECK-NEXT: entry:
153 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
154 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
155 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
156 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
157 // CHECK-NEXT: ret i128 [[TMP3]]
159 v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); }
160 // CHECK-LABEL: @vsrar_w(
161 // CHECK-NEXT: entry:
162 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
163 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
164 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
165 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
166 // CHECK-NEXT: ret i128 [[TMP3]]
168 v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); }
169 // CHECK-LABEL: @vsrar_d(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
172 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
173 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
174 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
175 // CHECK-NEXT: ret i128 [[TMP3]]
177 v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); }
178 // CHECK-LABEL: @vsrari_b(
179 // CHECK-NEXT: entry:
180 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
181 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1)
182 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
183 // CHECK-NEXT: ret i128 [[TMP2]]
185 v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); }
186 // CHECK-LABEL: @vsrari_h(
187 // CHECK-NEXT: entry:
188 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
189 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1)
190 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
191 // CHECK-NEXT: ret i128 [[TMP2]]
193 v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); }
194 // CHECK-LABEL: @vsrari_w(
195 // CHECK-NEXT: entry:
196 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
197 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1)
198 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
199 // CHECK-NEXT: ret i128 [[TMP2]]
201 v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); }
202 // CHECK-LABEL: @vsrari_d(
203 // CHECK-NEXT: entry:
204 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
205 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1)
206 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
207 // CHECK-NEXT: ret i128 [[TMP2]]
209 v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); }
210 // CHECK-LABEL: @vsrl_b(
211 // CHECK-NEXT: entry:
212 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
213 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
214 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
215 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
216 // CHECK-NEXT: ret i128 [[TMP3]]
218 v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); }
219 // CHECK-LABEL: @vsrl_h(
220 // CHECK-NEXT: entry:
221 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
222 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
223 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
224 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
225 // CHECK-NEXT: ret i128 [[TMP3]]
227 v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); }
228 // CHECK-LABEL: @vsrl_w(
229 // CHECK-NEXT: entry:
230 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
231 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
232 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
233 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
234 // CHECK-NEXT: ret i128 [[TMP3]]
236 v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); }
237 // CHECK-LABEL: @vsrl_d(
238 // CHECK-NEXT: entry:
239 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
240 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
241 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
242 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
243 // CHECK-NEXT: ret i128 [[TMP3]]
245 v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); }
246 // CHECK-LABEL: @vsrli_b(
247 // CHECK-NEXT: entry:
248 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
249 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1)
250 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
251 // CHECK-NEXT: ret i128 [[TMP2]]
253 v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); }
254 // CHECK-LABEL: @vsrli_h(
255 // CHECK-NEXT: entry:
256 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
257 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1)
258 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
259 // CHECK-NEXT: ret i128 [[TMP2]]
261 v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); }
262 // CHECK-LABEL: @vsrli_w(
263 // CHECK-NEXT: entry:
264 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
265 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1)
266 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
267 // CHECK-NEXT: ret i128 [[TMP2]]
269 v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); }
270 // CHECK-LABEL: @vsrli_d(
271 // CHECK-NEXT: entry:
272 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
273 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1)
274 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
275 // CHECK-NEXT: ret i128 [[TMP2]]
277 v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); }
278 // CHECK-LABEL: @vsrlr_b(
279 // CHECK-NEXT: entry:
280 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
281 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
282 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
283 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
284 // CHECK-NEXT: ret i128 [[TMP3]]
286 v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); }
287 // CHECK-LABEL: @vsrlr_h(
288 // CHECK-NEXT: entry:
289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
290 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
292 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
293 // CHECK-NEXT: ret i128 [[TMP3]]
295 v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); }
296 // CHECK-LABEL: @vsrlr_w(
297 // CHECK-NEXT: entry:
298 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
299 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
300 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
301 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
302 // CHECK-NEXT: ret i128 [[TMP3]]
304 v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); }
305 // CHECK-LABEL: @vsrlr_d(
306 // CHECK-NEXT: entry:
307 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
308 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
309 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
310 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
311 // CHECK-NEXT: ret i128 [[TMP3]]
313 v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); }
314 // CHECK-LABEL: @vsrlri_b(
315 // CHECK-NEXT: entry:
316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
317 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1)
318 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
319 // CHECK-NEXT: ret i128 [[TMP2]]
321 v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); }
322 // CHECK-LABEL: @vsrlri_h(
323 // CHECK-NEXT: entry:
324 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
325 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1)
326 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
327 // CHECK-NEXT: ret i128 [[TMP2]]
329 v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); }
330 // CHECK-LABEL: @vsrlri_w(
331 // CHECK-NEXT: entry:
332 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
333 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1)
334 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
335 // CHECK-NEXT: ret i128 [[TMP2]]
337 v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); }
338 // CHECK-LABEL: @vsrlri_d(
339 // CHECK-NEXT: entry:
340 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
341 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1)
342 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
343 // CHECK-NEXT: ret i128 [[TMP2]]
345 v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); }
346 // CHECK-LABEL: @vbitclr_b(
347 // CHECK-NEXT: entry:
348 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
349 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
350 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
351 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
352 // CHECK-NEXT: ret i128 [[TMP3]]
354 v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); }
355 // CHECK-LABEL: @vbitclr_h(
356 // CHECK-NEXT: entry:
357 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
358 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
359 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
360 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
361 // CHECK-NEXT: ret i128 [[TMP3]]
363 v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); }
364 // CHECK-LABEL: @vbitclr_w(
365 // CHECK-NEXT: entry:
366 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
367 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
368 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
369 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
370 // CHECK-NEXT: ret i128 [[TMP3]]
372 v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); }
373 // CHECK-LABEL: @vbitclr_d(
374 // CHECK-NEXT: entry:
375 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
376 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
377 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
378 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
379 // CHECK-NEXT: ret i128 [[TMP3]]
381 v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); }
382 // CHECK-LABEL: @vbitclri_b(
383 // CHECK-NEXT: entry:
384 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
385 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1)
386 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
387 // CHECK-NEXT: ret i128 [[TMP2]]
389 v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); }
390 // CHECK-LABEL: @vbitclri_h(
391 // CHECK-NEXT: entry:
392 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
393 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1)
394 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
395 // CHECK-NEXT: ret i128 [[TMP2]]
397 v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); }
398 // CHECK-LABEL: @vbitclri_w(
399 // CHECK-NEXT: entry:
400 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
401 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1)
402 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
403 // CHECK-NEXT: ret i128 [[TMP2]]
405 v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); }
406 // CHECK-LABEL: @vbitclri_d(
407 // CHECK-NEXT: entry:
408 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
409 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1)
410 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
411 // CHECK-NEXT: ret i128 [[TMP2]]
413 v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); }
414 // CHECK-LABEL: @vbitset_b(
415 // CHECK-NEXT: entry:
416 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
417 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
418 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
419 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
420 // CHECK-NEXT: ret i128 [[TMP3]]
422 v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); }
423 // CHECK-LABEL: @vbitset_h(
424 // CHECK-NEXT: entry:
425 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
426 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
427 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
428 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
429 // CHECK-NEXT: ret i128 [[TMP3]]
431 v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); }
432 // CHECK-LABEL: @vbitset_w(
433 // CHECK-NEXT: entry:
434 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
435 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
436 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
437 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
438 // CHECK-NEXT: ret i128 [[TMP3]]
440 v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); }
441 // CHECK-LABEL: @vbitset_d(
442 // CHECK-NEXT: entry:
443 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
444 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
445 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
446 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
447 // CHECK-NEXT: ret i128 [[TMP3]]
449 v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); }
450 // CHECK-LABEL: @vbitseti_b(
451 // CHECK-NEXT: entry:
452 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
453 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1)
454 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
455 // CHECK-NEXT: ret i128 [[TMP2]]
457 v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); }
458 // CHECK-LABEL: @vbitseti_h(
459 // CHECK-NEXT: entry:
460 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
461 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1)
462 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
463 // CHECK-NEXT: ret i128 [[TMP2]]
465 v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); }
466 // CHECK-LABEL: @vbitseti_w(
467 // CHECK-NEXT: entry:
468 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
469 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1)
470 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
471 // CHECK-NEXT: ret i128 [[TMP2]]
473 v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); }
474 // CHECK-LABEL: @vbitseti_d(
475 // CHECK-NEXT: entry:
476 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
477 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1)
478 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
479 // CHECK-NEXT: ret i128 [[TMP2]]
481 v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); }
482 // CHECK-LABEL: @vbitrev_b(
483 // CHECK-NEXT: entry:
484 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
485 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
486 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
487 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
488 // CHECK-NEXT: ret i128 [[TMP3]]
490 v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); }
491 // CHECK-LABEL: @vbitrev_h(
492 // CHECK-NEXT: entry:
493 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
494 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
495 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
496 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
497 // CHECK-NEXT: ret i128 [[TMP3]]
499 v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); }
500 // CHECK-LABEL: @vbitrev_w(
501 // CHECK-NEXT: entry:
502 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
503 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
504 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
505 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
506 // CHECK-NEXT: ret i128 [[TMP3]]
508 v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); }
509 // CHECK-LABEL: @vbitrev_d(
510 // CHECK-NEXT: entry:
511 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
512 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
513 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
514 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
515 // CHECK-NEXT: ret i128 [[TMP3]]
517 v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); }
518 // CHECK-LABEL: @vbitrevi_b(
519 // CHECK-NEXT: entry:
520 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
521 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1)
522 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
523 // CHECK-NEXT: ret i128 [[TMP2]]
525 v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); }
526 // CHECK-LABEL: @vbitrevi_h(
527 // CHECK-NEXT: entry:
528 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
529 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1)
530 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
531 // CHECK-NEXT: ret i128 [[TMP2]]
533 v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); }
534 // CHECK-LABEL: @vbitrevi_w(
535 // CHECK-NEXT: entry:
536 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
537 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1)
538 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
539 // CHECK-NEXT: ret i128 [[TMP2]]
541 v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); }
542 // CHECK-LABEL: @vbitrevi_d(
543 // CHECK-NEXT: entry:
544 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
545 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1)
546 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
547 // CHECK-NEXT: ret i128 [[TMP2]]
549 v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); }
550 // CHECK-LABEL: @vadd_b(
551 // CHECK-NEXT: entry:
552 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
553 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
554 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
555 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
556 // CHECK-NEXT: ret i128 [[TMP3]]
558 v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); }
559 // CHECK-LABEL: @vadd_h(
560 // CHECK-NEXT: entry:
561 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
562 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
563 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
564 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
565 // CHECK-NEXT: ret i128 [[TMP3]]
567 v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); }
568 // CHECK-LABEL: @vadd_w(
569 // CHECK-NEXT: entry:
570 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
571 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
572 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
573 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
574 // CHECK-NEXT: ret i128 [[TMP3]]
576 v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); }
577 // CHECK-LABEL: @vadd_d(
578 // CHECK-NEXT: entry:
579 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
580 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
581 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
582 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
583 // CHECK-NEXT: ret i128 [[TMP3]]
585 v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); }
586 // CHECK-LABEL: @vaddi_bu(
587 // CHECK-NEXT: entry:
588 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
589 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1)
590 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
591 // CHECK-NEXT: ret i128 [[TMP2]]
593 v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); }
594 // CHECK-LABEL: @vaddi_hu(
595 // CHECK-NEXT: entry:
596 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
597 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1)
598 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
599 // CHECK-NEXT: ret i128 [[TMP2]]
601 v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); }
602 // CHECK-LABEL: @vaddi_wu(
603 // CHECK-NEXT: entry:
604 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
605 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1)
606 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
607 // CHECK-NEXT: ret i128 [[TMP2]]
609 v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); }
610 // CHECK-LABEL: @vaddi_du(
611 // CHECK-NEXT: entry:
612 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
613 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1)
614 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
615 // CHECK-NEXT: ret i128 [[TMP2]]
617 v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); }
618 // CHECK-LABEL: @vsub_b(
619 // CHECK-NEXT: entry:
620 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
621 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
622 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
623 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
624 // CHECK-NEXT: ret i128 [[TMP3]]
626 v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); }
627 // CHECK-LABEL: @vsub_h(
628 // CHECK-NEXT: entry:
629 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
630 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
631 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
632 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
633 // CHECK-NEXT: ret i128 [[TMP3]]
635 v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); }
636 // CHECK-LABEL: @vsub_w(
637 // CHECK-NEXT: entry:
638 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
639 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
640 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
641 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
642 // CHECK-NEXT: ret i128 [[TMP3]]
644 v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); }
645 // CHECK-LABEL: @vsub_d(
646 // CHECK-NEXT: entry:
647 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
648 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
649 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
650 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
651 // CHECK-NEXT: ret i128 [[TMP3]]
653 v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); }
654 // CHECK-LABEL: @vsubi_bu(
655 // CHECK-NEXT: entry:
656 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
657 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1)
658 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
659 // CHECK-NEXT: ret i128 [[TMP2]]
661 v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); }
662 // CHECK-LABEL: @vsubi_hu(
663 // CHECK-NEXT: entry:
664 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
665 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1)
666 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
667 // CHECK-NEXT: ret i128 [[TMP2]]
669 v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); }
670 // CHECK-LABEL: @vsubi_wu(
671 // CHECK-NEXT: entry:
672 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
673 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1)
674 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
675 // CHECK-NEXT: ret i128 [[TMP2]]
677 v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); }
678 // CHECK-LABEL: @vsubi_du(
679 // CHECK-NEXT: entry:
680 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
681 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1)
682 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
683 // CHECK-NEXT: ret i128 [[TMP2]]
685 v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); }
686 // CHECK-LABEL: @vmax_b(
687 // CHECK-NEXT: entry:
688 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
689 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
691 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
692 // CHECK-NEXT: ret i128 [[TMP3]]
694 v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); }
695 // CHECK-LABEL: @vmax_h(
696 // CHECK-NEXT: entry:
697 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
698 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
699 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
700 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
701 // CHECK-NEXT: ret i128 [[TMP3]]
703 v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); }
704 // CHECK-LABEL: @vmax_w(
705 // CHECK-NEXT: entry:
706 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
707 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
708 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
709 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
710 // CHECK-NEXT: ret i128 [[TMP3]]
712 v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); }
713 // CHECK-LABEL: @vmax_d(
714 // CHECK-NEXT: entry:
715 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
716 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
717 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
718 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
719 // CHECK-NEXT: ret i128 [[TMP3]]
721 v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); }
722 // CHECK-LABEL: @vmaxi_b(
723 // CHECK-NEXT: entry:
724 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
725 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1)
726 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
727 // CHECK-NEXT: ret i128 [[TMP2]]
729 v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); }
730 // CHECK-LABEL: @vmaxi_h(
731 // CHECK-NEXT: entry:
732 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
733 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1)
734 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
735 // CHECK-NEXT: ret i128 [[TMP2]]
737 v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); }
738 // CHECK-LABEL: @vmaxi_w(
739 // CHECK-NEXT: entry:
740 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
741 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1)
742 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
743 // CHECK-NEXT: ret i128 [[TMP2]]
745 v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); }
746 // CHECK-LABEL: @vmaxi_d(
747 // CHECK-NEXT: entry:
748 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
749 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1)
750 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
751 // CHECK-NEXT: ret i128 [[TMP2]]
753 v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); }
754 // CHECK-LABEL: @vmax_bu(
755 // CHECK-NEXT: entry:
756 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
757 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
758 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
759 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
760 // CHECK-NEXT: ret i128 [[TMP3]]
762 v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); }
763 // CHECK-LABEL: @vmax_hu(
764 // CHECK-NEXT: entry:
765 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
766 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
767 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
768 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
769 // CHECK-NEXT: ret i128 [[TMP3]]
771 v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); }
772 // CHECK-LABEL: @vmax_wu(
773 // CHECK-NEXT: entry:
774 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
775 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
776 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
777 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
778 // CHECK-NEXT: ret i128 [[TMP3]]
780 v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); }
781 // CHECK-LABEL: @vmax_du(
782 // CHECK-NEXT: entry:
783 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
784 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
785 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
786 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
787 // CHECK-NEXT: ret i128 [[TMP3]]
789 v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); }
790 // CHECK-LABEL: @vmaxi_bu(
791 // CHECK-NEXT: entry:
792 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
793 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1)
794 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
795 // CHECK-NEXT: ret i128 [[TMP2]]
797 v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); }
798 // CHECK-LABEL: @vmaxi_hu(
799 // CHECK-NEXT: entry:
800 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
801 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1)
802 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
803 // CHECK-NEXT: ret i128 [[TMP2]]
805 v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); }
806 // CHECK-LABEL: @vmaxi_wu(
807 // CHECK-NEXT: entry:
808 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
809 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1)
810 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
811 // CHECK-NEXT: ret i128 [[TMP2]]
813 v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); }
814 // CHECK-LABEL: @vmaxi_du(
815 // CHECK-NEXT: entry:
816 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
817 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1)
818 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
819 // CHECK-NEXT: ret i128 [[TMP2]]
821 v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); }
822 // CHECK-LABEL: @vmin_b(
823 // CHECK-NEXT: entry:
824 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
825 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
826 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
827 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
828 // CHECK-NEXT: ret i128 [[TMP3]]
830 v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); }
831 // CHECK-LABEL: @vmin_h(
832 // CHECK-NEXT: entry:
833 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
834 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
835 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
836 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
837 // CHECK-NEXT: ret i128 [[TMP3]]
839 v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); }
840 // CHECK-LABEL: @vmin_w(
841 // CHECK-NEXT: entry:
842 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
843 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
844 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
845 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
846 // CHECK-NEXT: ret i128 [[TMP3]]
848 v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); }
849 // CHECK-LABEL: @vmin_d(
850 // CHECK-NEXT: entry:
851 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
852 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
853 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
854 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
855 // CHECK-NEXT: ret i128 [[TMP3]]
857 v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); }
858 // CHECK-LABEL: @vmini_b(
859 // CHECK-NEXT: entry:
860 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
861 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1)
862 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
863 // CHECK-NEXT: ret i128 [[TMP2]]
865 v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); }
866 // CHECK-LABEL: @vmini_h(
867 // CHECK-NEXT: entry:
868 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
869 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1)
870 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
871 // CHECK-NEXT: ret i128 [[TMP2]]
873 v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); }
874 // CHECK-LABEL: @vmini_w(
875 // CHECK-NEXT: entry:
876 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
877 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1)
878 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
879 // CHECK-NEXT: ret i128 [[TMP2]]
881 v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); }
882 // CHECK-LABEL: @vmini_d(
883 // CHECK-NEXT: entry:
884 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
885 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1)
886 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
887 // CHECK-NEXT: ret i128 [[TMP2]]
889 v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); }
890 // CHECK-LABEL: @vmin_bu(
891 // CHECK-NEXT: entry:
892 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
893 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
894 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
895 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
896 // CHECK-NEXT: ret i128 [[TMP3]]
898 v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); }
899 // CHECK-LABEL: @vmin_hu(
900 // CHECK-NEXT: entry:
901 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
902 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
903 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
904 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
905 // CHECK-NEXT: ret i128 [[TMP3]]
907 v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); }
908 // CHECK-LABEL: @vmin_wu(
909 // CHECK-NEXT: entry:
910 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
911 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
912 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
913 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
914 // CHECK-NEXT: ret i128 [[TMP3]]
916 v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); }
917 // CHECK-LABEL: @vmin_du(
918 // CHECK-NEXT: entry:
919 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
920 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
921 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
922 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
923 // CHECK-NEXT: ret i128 [[TMP3]]
925 v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); }
926 // CHECK-LABEL: @vmini_bu(
927 // CHECK-NEXT: entry:
928 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
929 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1)
930 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
931 // CHECK-NEXT: ret i128 [[TMP2]]
933 v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); }
934 // CHECK-LABEL: @vmini_hu(
935 // CHECK-NEXT: entry:
936 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
937 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1)
938 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
939 // CHECK-NEXT: ret i128 [[TMP2]]
941 v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); }
942 // CHECK-LABEL: @vmini_wu(
943 // CHECK-NEXT: entry:
944 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
945 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1)
946 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
947 // CHECK-NEXT: ret i128 [[TMP2]]
949 v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); }
950 // CHECK-LABEL: @vmini_du(
951 // CHECK-NEXT: entry:
952 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
953 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1)
954 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
955 // CHECK-NEXT: ret i128 [[TMP2]]
957 v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); }
958 // CHECK-LABEL: @vseq_b(
959 // CHECK-NEXT: entry:
960 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
961 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
962 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
963 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
964 // CHECK-NEXT: ret i128 [[TMP3]]
966 v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); }
967 // CHECK-LABEL: @vseq_h(
968 // CHECK-NEXT: entry:
969 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
970 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
971 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
972 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
973 // CHECK-NEXT: ret i128 [[TMP3]]
975 v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); }
976 // CHECK-LABEL: @vseq_w(
977 // CHECK-NEXT: entry:
978 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
979 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
980 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
981 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
982 // CHECK-NEXT: ret i128 [[TMP3]]
984 v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); }
985 // CHECK-LABEL: @vseq_d(
986 // CHECK-NEXT: entry:
987 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
988 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
989 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
990 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
991 // CHECK-NEXT: ret i128 [[TMP3]]
993 v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); }
994 // CHECK-LABEL: @vseqi_b(
995 // CHECK-NEXT: entry:
996 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
997 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1)
998 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
999 // CHECK-NEXT: ret i128 [[TMP2]]
1001 v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); }
1002 // CHECK-LABEL: @vseqi_h(
1003 // CHECK-NEXT: entry:
1004 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1005 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1)
1006 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1007 // CHECK-NEXT: ret i128 [[TMP2]]
1009 v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); }
1010 // CHECK-LABEL: @vseqi_w(
1011 // CHECK-NEXT: entry:
1012 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1013 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1)
1014 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1015 // CHECK-NEXT: ret i128 [[TMP2]]
1017 v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); }
1018 // CHECK-LABEL: @vseqi_d(
1019 // CHECK-NEXT: entry:
1020 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1021 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1)
1022 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1023 // CHECK-NEXT: ret i128 [[TMP2]]
1025 v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); }
1026 // CHECK-LABEL: @vslti_b(
1027 // CHECK-NEXT: entry:
1028 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1029 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1)
1030 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1031 // CHECK-NEXT: ret i128 [[TMP2]]
1033 v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); }
1034 // CHECK-LABEL: @vslt_b(
1035 // CHECK-NEXT: entry:
1036 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1037 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1038 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1039 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1040 // CHECK-NEXT: ret i128 [[TMP3]]
1042 v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); }
1043 // CHECK-LABEL: @vslt_h(
1044 // CHECK-NEXT: entry:
1045 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1046 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1047 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1048 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1049 // CHECK-NEXT: ret i128 [[TMP3]]
1051 v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); }
1052 // CHECK-LABEL: @vslt_w(
1053 // CHECK-NEXT: entry:
1054 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1055 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1056 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1057 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1058 // CHECK-NEXT: ret i128 [[TMP3]]
1060 v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); }
1061 // CHECK-LABEL: @vslt_d(
1062 // CHECK-NEXT: entry:
1063 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1064 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1065 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1066 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1067 // CHECK-NEXT: ret i128 [[TMP3]]
1069 v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); }
1070 // CHECK-LABEL: @vslti_h(
1071 // CHECK-NEXT: entry:
1072 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1073 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1)
1074 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1075 // CHECK-NEXT: ret i128 [[TMP2]]
1077 v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); }
1078 // CHECK-LABEL: @vslti_w(
1079 // CHECK-NEXT: entry:
1080 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1081 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1)
1082 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1083 // CHECK-NEXT: ret i128 [[TMP2]]
1085 v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); }
1086 // CHECK-LABEL: @vslti_d(
1087 // CHECK-NEXT: entry:
1088 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1089 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1)
1090 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1091 // CHECK-NEXT: ret i128 [[TMP2]]
1093 v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); }
1094 // CHECK-LABEL: @vslt_bu(
1095 // CHECK-NEXT: entry:
1096 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1097 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1098 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1099 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1100 // CHECK-NEXT: ret i128 [[TMP3]]
1102 v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); }
1103 // CHECK-LABEL: @vslt_hu(
1104 // CHECK-NEXT: entry:
1105 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1106 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1107 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1108 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1109 // CHECK-NEXT: ret i128 [[TMP3]]
1111 v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); }
1112 // CHECK-LABEL: @vslt_wu(
1113 // CHECK-NEXT: entry:
1114 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1115 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1116 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1117 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1118 // CHECK-NEXT: ret i128 [[TMP3]]
1120 v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); }
1121 // CHECK-LABEL: @vslt_du(
1122 // CHECK-NEXT: entry:
1123 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1124 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1125 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1126 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1127 // CHECK-NEXT: ret i128 [[TMP3]]
1129 v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); }
1130 // CHECK-LABEL: @vslti_bu(
1131 // CHECK-NEXT: entry:
1132 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1133 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1)
1134 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1135 // CHECK-NEXT: ret i128 [[TMP2]]
1137 v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); }
1138 // CHECK-LABEL: @vslti_hu(
1139 // CHECK-NEXT: entry:
1140 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1141 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1)
1142 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1143 // CHECK-NEXT: ret i128 [[TMP2]]
1145 v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); }
1146 // CHECK-LABEL: @vslti_wu(
1147 // CHECK-NEXT: entry:
1148 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1149 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1)
1150 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1151 // CHECK-NEXT: ret i128 [[TMP2]]
1153 v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); }
1154 // CHECK-LABEL: @vslti_du(
1155 // CHECK-NEXT: entry:
1156 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1157 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1)
1158 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1159 // CHECK-NEXT: ret i128 [[TMP2]]
1161 v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); }
1162 // CHECK-LABEL: @vsle_b(
1163 // CHECK-NEXT: entry:
1164 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1165 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1166 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1167 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1168 // CHECK-NEXT: ret i128 [[TMP3]]
1170 v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); }
1171 // CHECK-LABEL: @vsle_h(
1172 // CHECK-NEXT: entry:
1173 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1174 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1175 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1176 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1177 // CHECK-NEXT: ret i128 [[TMP3]]
1179 v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); }
1180 // CHECK-LABEL: @vsle_w(
1181 // CHECK-NEXT: entry:
1182 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1183 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1184 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1185 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1186 // CHECK-NEXT: ret i128 [[TMP3]]
1188 v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); }
1189 // CHECK-LABEL: @vsle_d(
1190 // CHECK-NEXT: entry:
1191 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1192 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1193 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1194 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1195 // CHECK-NEXT: ret i128 [[TMP3]]
1197 v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); }
1198 // CHECK-LABEL: @vslei_b(
1199 // CHECK-NEXT: entry:
1200 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1201 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1)
1202 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1203 // CHECK-NEXT: ret i128 [[TMP2]]
1205 v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); }
1206 // CHECK-LABEL: @vslei_h(
1207 // CHECK-NEXT: entry:
1208 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1209 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1)
1210 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1211 // CHECK-NEXT: ret i128 [[TMP2]]
1213 v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); }
1214 // CHECK-LABEL: @vslei_w(
1215 // CHECK-NEXT: entry:
1216 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1217 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1)
1218 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1219 // CHECK-NEXT: ret i128 [[TMP2]]
1221 v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); }
1222 // CHECK-LABEL: @vslei_d(
1223 // CHECK-NEXT: entry:
1224 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1225 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1)
1226 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1227 // CHECK-NEXT: ret i128 [[TMP2]]
1229 v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); }
1230 // CHECK-LABEL: @vsle_bu(
1231 // CHECK-NEXT: entry:
1232 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1233 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1234 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1235 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1236 // CHECK-NEXT: ret i128 [[TMP3]]
1238 v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); }
1239 // CHECK-LABEL: @vsle_hu(
1240 // CHECK-NEXT: entry:
1241 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1242 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1243 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1244 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1245 // CHECK-NEXT: ret i128 [[TMP3]]
1247 v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); }
1248 // CHECK-LABEL: @vsle_wu(
1249 // CHECK-NEXT: entry:
1250 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1251 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1252 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1253 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1254 // CHECK-NEXT: ret i128 [[TMP3]]
1256 v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); }
1257 // CHECK-LABEL: @vsle_du(
1258 // CHECK-NEXT: entry:
1259 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1260 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1261 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1262 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1263 // CHECK-NEXT: ret i128 [[TMP3]]
1265 v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); }
1266 // CHECK-LABEL: @vslei_bu(
1267 // CHECK-NEXT: entry:
1268 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1269 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1)
1270 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1271 // CHECK-NEXT: ret i128 [[TMP2]]
1273 v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); }
1274 // CHECK-LABEL: @vslei_hu(
1275 // CHECK-NEXT: entry:
1276 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1277 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1)
1278 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1279 // CHECK-NEXT: ret i128 [[TMP2]]
1281 v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); }
1282 // CHECK-LABEL: @vslei_wu(
1283 // CHECK-NEXT: entry:
1284 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1285 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1)
1286 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1287 // CHECK-NEXT: ret i128 [[TMP2]]
1289 v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); }
1290 // CHECK-LABEL: @vslei_du(
1291 // CHECK-NEXT: entry:
1292 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1293 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1)
1294 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1295 // CHECK-NEXT: ret i128 [[TMP2]]
1297 v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); }
1298 // CHECK-LABEL: @vsat_b(
1299 // CHECK-NEXT: entry:
1300 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1301 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1)
1302 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1303 // CHECK-NEXT: ret i128 [[TMP2]]
1305 v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); }
1306 // CHECK-LABEL: @vsat_h(
1307 // CHECK-NEXT: entry:
1308 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1309 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1)
1310 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1311 // CHECK-NEXT: ret i128 [[TMP2]]
1313 v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); }
1314 // CHECK-LABEL: @vsat_w(
1315 // CHECK-NEXT: entry:
1316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1317 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1)
1318 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1319 // CHECK-NEXT: ret i128 [[TMP2]]
1321 v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); }
1322 // CHECK-LABEL: @vsat_d(
1323 // CHECK-NEXT: entry:
1324 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1325 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1)
1326 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1327 // CHECK-NEXT: ret i128 [[TMP2]]
1329 v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); }
1330 // CHECK-LABEL: @vsat_bu(
1331 // CHECK-NEXT: entry:
1332 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1333 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1)
1334 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1335 // CHECK-NEXT: ret i128 [[TMP2]]
1337 v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); }
1338 // CHECK-LABEL: @vsat_hu(
1339 // CHECK-NEXT: entry:
1340 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1341 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1)
1342 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1343 // CHECK-NEXT: ret i128 [[TMP2]]
1345 v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); }
1346 // CHECK-LABEL: @vsat_wu(
1347 // CHECK-NEXT: entry:
1348 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1349 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1)
1350 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1351 // CHECK-NEXT: ret i128 [[TMP2]]
1353 v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); }
1354 // CHECK-LABEL: @vsat_du(
1355 // CHECK-NEXT: entry:
1356 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1357 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1)
1358 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1359 // CHECK-NEXT: ret i128 [[TMP2]]
1361 v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); }
1362 // CHECK-LABEL: @vadda_b(
1363 // CHECK-NEXT: entry:
1364 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1365 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1366 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1367 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1368 // CHECK-NEXT: ret i128 [[TMP3]]
1370 v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); }
1371 // CHECK-LABEL: @vadda_h(
1372 // CHECK-NEXT: entry:
1373 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1374 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1375 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1376 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1377 // CHECK-NEXT: ret i128 [[TMP3]]
1379 v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); }
1380 // CHECK-LABEL: @vadda_w(
1381 // CHECK-NEXT: entry:
1382 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1383 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1384 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1385 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1386 // CHECK-NEXT: ret i128 [[TMP3]]
1388 v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); }
1389 // CHECK-LABEL: @vadda_d(
1390 // CHECK-NEXT: entry:
1391 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1392 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1393 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1394 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1395 // CHECK-NEXT: ret i128 [[TMP3]]
1397 v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); }
1398 // CHECK-LABEL: @vsadd_b(
1399 // CHECK-NEXT: entry:
1400 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1401 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1402 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1403 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1404 // CHECK-NEXT: ret i128 [[TMP3]]
1406 v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); }
1407 // CHECK-LABEL: @vsadd_h(
1408 // CHECK-NEXT: entry:
1409 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1410 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1411 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1412 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1413 // CHECK-NEXT: ret i128 [[TMP3]]
1415 v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); }
1416 // CHECK-LABEL: @vsadd_w(
1417 // CHECK-NEXT: entry:
1418 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1419 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1420 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1421 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1422 // CHECK-NEXT: ret i128 [[TMP3]]
1424 v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); }
1425 // CHECK-LABEL: @vsadd_d(
1426 // CHECK-NEXT: entry:
1427 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1428 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1429 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1430 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1431 // CHECK-NEXT: ret i128 [[TMP3]]
1433 v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); }
1434 // CHECK-LABEL: @vsadd_bu(
1435 // CHECK-NEXT: entry:
1436 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1437 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1438 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1439 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1440 // CHECK-NEXT: ret i128 [[TMP3]]
1442 v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); }
1443 // CHECK-LABEL: @vsadd_hu(
1444 // CHECK-NEXT: entry:
1445 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1446 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1447 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1448 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1449 // CHECK-NEXT: ret i128 [[TMP3]]
1451 v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); }
1452 // CHECK-LABEL: @vsadd_wu(
1453 // CHECK-NEXT: entry:
1454 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1455 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1456 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1457 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1458 // CHECK-NEXT: ret i128 [[TMP3]]
1460 v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); }
1461 // CHECK-LABEL: @vsadd_du(
1462 // CHECK-NEXT: entry:
1463 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1464 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1465 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1466 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1467 // CHECK-NEXT: ret i128 [[TMP3]]
1469 v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); }
1470 // CHECK-LABEL: @vavg_b(
1471 // CHECK-NEXT: entry:
1472 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1473 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1474 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1475 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1476 // CHECK-NEXT: ret i128 [[TMP3]]
1478 v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); }
1479 // CHECK-LABEL: @vavg_h(
1480 // CHECK-NEXT: entry:
1481 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1482 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1483 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1484 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1485 // CHECK-NEXT: ret i128 [[TMP3]]
1487 v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); }
1488 // CHECK-LABEL: @vavg_w(
1489 // CHECK-NEXT: entry:
1490 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1491 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1492 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1493 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1494 // CHECK-NEXT: ret i128 [[TMP3]]
1496 v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); }
1497 // CHECK-LABEL: @vavg_d(
1498 // CHECK-NEXT: entry:
1499 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1500 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1501 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1502 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1503 // CHECK-NEXT: ret i128 [[TMP3]]
1505 v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); }
1506 // CHECK-LABEL: @vavg_bu(
1507 // CHECK-NEXT: entry:
1508 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1509 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1510 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1511 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1512 // CHECK-NEXT: ret i128 [[TMP3]]
1514 v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); }
1515 // CHECK-LABEL: @vavg_hu(
1516 // CHECK-NEXT: entry:
1517 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1518 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1519 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1520 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1521 // CHECK-NEXT: ret i128 [[TMP3]]
1523 v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); }
1524 // CHECK-LABEL: @vavg_wu(
1525 // CHECK-NEXT: entry:
1526 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1527 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1528 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1529 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1530 // CHECK-NEXT: ret i128 [[TMP3]]
1532 v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); }
1533 // CHECK-LABEL: @vavg_du(
1534 // CHECK-NEXT: entry:
1535 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1536 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1537 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1538 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1539 // CHECK-NEXT: ret i128 [[TMP3]]
1541 v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); }
1542 // CHECK-LABEL: @vavgr_b(
1543 // CHECK-NEXT: entry:
1544 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1545 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1546 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1547 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1548 // CHECK-NEXT: ret i128 [[TMP3]]
1550 v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); }
1551 // CHECK-LABEL: @vavgr_h(
1552 // CHECK-NEXT: entry:
1553 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1554 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1555 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1556 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1557 // CHECK-NEXT: ret i128 [[TMP3]]
1559 v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); }
1560 // CHECK-LABEL: @vavgr_w(
1561 // CHECK-NEXT: entry:
1562 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1563 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1564 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1565 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1566 // CHECK-NEXT: ret i128 [[TMP3]]
1568 v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); }
1569 // CHECK-LABEL: @vavgr_d(
1570 // CHECK-NEXT: entry:
1571 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1572 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1573 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1574 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1575 // CHECK-NEXT: ret i128 [[TMP3]]
1577 v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); }
1578 // CHECK-LABEL: @vavgr_bu(
1579 // CHECK-NEXT: entry:
1580 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1581 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1582 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1583 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1584 // CHECK-NEXT: ret i128 [[TMP3]]
1586 v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); }
1587 // CHECK-LABEL: @vavgr_hu(
1588 // CHECK-NEXT: entry:
1589 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1590 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1591 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1592 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1593 // CHECK-NEXT: ret i128 [[TMP3]]
1595 v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); }
1596 // CHECK-LABEL: @vavgr_wu(
1597 // CHECK-NEXT: entry:
1598 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1599 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1600 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1601 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1602 // CHECK-NEXT: ret i128 [[TMP3]]
1604 v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); }
1605 // CHECK-LABEL: @vavgr_du(
1606 // CHECK-NEXT: entry:
1607 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1608 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1609 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1610 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1611 // CHECK-NEXT: ret i128 [[TMP3]]
1613 v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); }
1614 // CHECK-LABEL: @vssub_b(
1615 // CHECK-NEXT: entry:
1616 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1617 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1618 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1619 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1620 // CHECK-NEXT: ret i128 [[TMP3]]
1622 v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); }
1623 // CHECK-LABEL: @vssub_h(
1624 // CHECK-NEXT: entry:
1625 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1626 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1627 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1628 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1629 // CHECK-NEXT: ret i128 [[TMP3]]
1631 v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); }
1632 // CHECK-LABEL: @vssub_w(
1633 // CHECK-NEXT: entry:
1634 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1635 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1636 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1637 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1638 // CHECK-NEXT: ret i128 [[TMP3]]
1640 v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); }
1641 // CHECK-LABEL: @vssub_d(
1642 // CHECK-NEXT: entry:
1643 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1644 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1645 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1646 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1647 // CHECK-NEXT: ret i128 [[TMP3]]
1649 v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); }
1650 // CHECK-LABEL: @vssub_bu(
1651 // CHECK-NEXT: entry:
1652 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1653 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1654 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1655 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1656 // CHECK-NEXT: ret i128 [[TMP3]]
1658 v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); }
1659 // CHECK-LABEL: @vssub_hu(
1660 // CHECK-NEXT: entry:
1661 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1662 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1663 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1664 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1665 // CHECK-NEXT: ret i128 [[TMP3]]
1667 v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); }
1668 // CHECK-LABEL: @vssub_wu(
1669 // CHECK-NEXT: entry:
1670 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1671 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1672 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1673 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1674 // CHECK-NEXT: ret i128 [[TMP3]]
1676 v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); }
1677 // CHECK-LABEL: @vssub_du(
1678 // CHECK-NEXT: entry:
1679 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1680 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1681 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1682 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1683 // CHECK-NEXT: ret i128 [[TMP3]]
1685 v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); }
1686 // CHECK-LABEL: @vabsd_b(
1687 // CHECK-NEXT: entry:
1688 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1689 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1691 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1692 // CHECK-NEXT: ret i128 [[TMP3]]
1694 v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); }
1695 // CHECK-LABEL: @vabsd_h(
1696 // CHECK-NEXT: entry:
1697 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1698 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1699 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1700 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1701 // CHECK-NEXT: ret i128 [[TMP3]]
1703 v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); }
1704 // CHECK-LABEL: @vabsd_w(
1705 // CHECK-NEXT: entry:
1706 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1707 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1708 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1709 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1710 // CHECK-NEXT: ret i128 [[TMP3]]
1712 v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); }
1713 // CHECK-LABEL: @vabsd_d(
1714 // CHECK-NEXT: entry:
1715 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1716 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1717 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1718 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1719 // CHECK-NEXT: ret i128 [[TMP3]]
1721 v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); }
1722 // CHECK-LABEL: @vabsd_bu(
1723 // CHECK-NEXT: entry:
1724 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1725 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1726 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1727 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1728 // CHECK-NEXT: ret i128 [[TMP3]]
1730 v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); }
1731 // CHECK-LABEL: @vabsd_hu(
1732 // CHECK-NEXT: entry:
1733 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1734 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1735 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1736 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1737 // CHECK-NEXT: ret i128 [[TMP3]]
1739 v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); }
1740 // CHECK-LABEL: @vabsd_wu(
1741 // CHECK-NEXT: entry:
1742 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1743 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1744 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1745 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1746 // CHECK-NEXT: ret i128 [[TMP3]]
1748 v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); }
1749 // CHECK-LABEL: @vabsd_du(
1750 // CHECK-NEXT: entry:
1751 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1752 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1753 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1754 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1755 // CHECK-NEXT: ret i128 [[TMP3]]
1757 v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); }
1758 // CHECK-LABEL: @vmul_b(
1759 // CHECK-NEXT: entry:
1760 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1761 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1762 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1763 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1764 // CHECK-NEXT: ret i128 [[TMP3]]
1766 v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); }
1767 // CHECK-LABEL: @vmul_h(
1768 // CHECK-NEXT: entry:
1769 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1770 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1771 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1772 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1773 // CHECK-NEXT: ret i128 [[TMP3]]
1775 v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); }
1776 // CHECK-LABEL: @vmul_w(
1777 // CHECK-NEXT: entry:
1778 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1779 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1780 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1781 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1782 // CHECK-NEXT: ret i128 [[TMP3]]
1784 v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); }
1785 // CHECK-LABEL: @vmul_d(
1786 // CHECK-NEXT: entry:
1787 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1788 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1789 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1790 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1791 // CHECK-NEXT: ret i128 [[TMP3]]
1793 v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); }
1794 // CHECK-LABEL: @vmadd_b(
1795 // CHECK-NEXT: entry:
1796 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1797 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1798 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
1799 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1800 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
1801 // CHECK-NEXT: ret i128 [[TMP4]]
1803 v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) {
1804 return __lsx_vmadd_b(_1, _2, _3);
1806 // CHECK-LABEL: @vmadd_h(
1807 // CHECK-NEXT: entry:
1808 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1809 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1810 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
1811 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1812 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
1813 // CHECK-NEXT: ret i128 [[TMP4]]
1815 v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) {
1816 return __lsx_vmadd_h(_1, _2, _3);
1818 // CHECK-LABEL: @vmadd_w(
1819 // CHECK-NEXT: entry:
1820 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1821 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1822 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
1823 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
1824 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1825 // CHECK-NEXT: ret i128 [[TMP4]]
1827 v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) {
1828 return __lsx_vmadd_w(_1, _2, _3);
1830 // CHECK-LABEL: @vmadd_d(
1831 // CHECK-NEXT: entry:
1832 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1833 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1834 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
1835 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
1836 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1837 // CHECK-NEXT: ret i128 [[TMP4]]
1839 v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) {
1840 return __lsx_vmadd_d(_1, _2, _3);
1842 // CHECK-LABEL: @vmsub_b(
1843 // CHECK-NEXT: entry:
1844 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1845 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1846 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
1847 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1848 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
1849 // CHECK-NEXT: ret i128 [[TMP4]]
1851 v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) {
1852 return __lsx_vmsub_b(_1, _2, _3);
1854 // CHECK-LABEL: @vmsub_h(
1855 // CHECK-NEXT: entry:
1856 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1857 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1858 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
1859 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1860 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
1861 // CHECK-NEXT: ret i128 [[TMP4]]
1863 v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) {
1864 return __lsx_vmsub_h(_1, _2, _3);
1866 // CHECK-LABEL: @vmsub_w(
1867 // CHECK-NEXT: entry:
1868 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1869 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1870 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
1871 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
1872 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1873 // CHECK-NEXT: ret i128 [[TMP4]]
1875 v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) {
1876 return __lsx_vmsub_w(_1, _2, _3);
1878 // CHECK-LABEL: @vmsub_d(
1879 // CHECK-NEXT: entry:
1880 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1881 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1882 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
1883 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
1884 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1885 // CHECK-NEXT: ret i128 [[TMP4]]
1887 v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) {
1888 return __lsx_vmsub_d(_1, _2, _3);
1890 // CHECK-LABEL: @vdiv_b(
1891 // CHECK-NEXT: entry:
1892 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1893 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1894 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1895 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1896 // CHECK-NEXT: ret i128 [[TMP3]]
1898 v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); }
1899 // CHECK-LABEL: @vdiv_h(
1900 // CHECK-NEXT: entry:
1901 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1902 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1903 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1904 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1905 // CHECK-NEXT: ret i128 [[TMP3]]
1907 v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); }
1908 // CHECK-LABEL: @vdiv_w(
1909 // CHECK-NEXT: entry:
1910 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1911 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1912 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1913 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1914 // CHECK-NEXT: ret i128 [[TMP3]]
1916 v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); }
1917 // CHECK-LABEL: @vdiv_d(
1918 // CHECK-NEXT: entry:
1919 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1920 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1921 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1922 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1923 // CHECK-NEXT: ret i128 [[TMP3]]
1925 v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); }
1926 // CHECK-LABEL: @vdiv_bu(
1927 // CHECK-NEXT: entry:
1928 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1929 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1930 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1931 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1932 // CHECK-NEXT: ret i128 [[TMP3]]
1934 v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); }
1935 // CHECK-LABEL: @vdiv_hu(
1936 // CHECK-NEXT: entry:
1937 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1938 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1939 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1940 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1941 // CHECK-NEXT: ret i128 [[TMP3]]
1943 v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); }
1944 // CHECK-LABEL: @vdiv_wu(
1945 // CHECK-NEXT: entry:
1946 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1947 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1948 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1949 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1950 // CHECK-NEXT: ret i128 [[TMP3]]
1952 v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); }
1953 // CHECK-LABEL: @vdiv_du(
1954 // CHECK-NEXT: entry:
1955 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1956 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1957 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1958 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1959 // CHECK-NEXT: ret i128 [[TMP3]]
1961 v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); }
1962 // CHECK-LABEL: @vhaddw_h_b(
1963 // CHECK-NEXT: entry:
1964 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1965 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1966 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1967 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1968 // CHECK-NEXT: ret i128 [[TMP3]]
1970 v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); }
1971 // CHECK-LABEL: @vhaddw_w_h(
1972 // CHECK-NEXT: entry:
1973 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1974 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1975 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1976 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1977 // CHECK-NEXT: ret i128 [[TMP3]]
1979 v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); }
1980 // CHECK-LABEL: @vhaddw_d_w(
1981 // CHECK-NEXT: entry:
1982 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1983 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1984 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1985 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1986 // CHECK-NEXT: ret i128 [[TMP3]]
1988 v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); }
1989 // CHECK-LABEL: @vhaddw_hu_bu(
1990 // CHECK-NEXT: entry:
1991 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1992 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1993 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1994 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1995 // CHECK-NEXT: ret i128 [[TMP3]]
1997 v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); }
1998 // CHECK-LABEL: @vhaddw_wu_hu(
1999 // CHECK-NEXT: entry:
2000 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2001 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2002 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2003 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2004 // CHECK-NEXT: ret i128 [[TMP3]]
2006 v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); }
2007 // CHECK-LABEL: @vhaddw_du_wu(
2008 // CHECK-NEXT: entry:
2009 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2010 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2011 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2012 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2013 // CHECK-NEXT: ret i128 [[TMP3]]
2015 v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); }
2016 // CHECK-LABEL: @vhsubw_h_b(
2017 // CHECK-NEXT: entry:
2018 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2019 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2020 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2021 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2022 // CHECK-NEXT: ret i128 [[TMP3]]
2024 v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); }
2025 // CHECK-LABEL: @vhsubw_w_h(
2026 // CHECK-NEXT: entry:
2027 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2028 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2029 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2030 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2031 // CHECK-NEXT: ret i128 [[TMP3]]
2033 v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); }
2034 // CHECK-LABEL: @vhsubw_d_w(
2035 // CHECK-NEXT: entry:
2036 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2037 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2038 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2039 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2040 // CHECK-NEXT: ret i128 [[TMP3]]
2042 v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); }
2043 // CHECK-LABEL: @vhsubw_hu_bu(
2044 // CHECK-NEXT: entry:
2045 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2046 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2047 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2048 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2049 // CHECK-NEXT: ret i128 [[TMP3]]
2051 v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); }
2052 // CHECK-LABEL: @vhsubw_wu_hu(
2053 // CHECK-NEXT: entry:
2054 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2055 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2056 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2057 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2058 // CHECK-NEXT: ret i128 [[TMP3]]
2060 v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); }
2061 // CHECK-LABEL: @vhsubw_du_wu(
2062 // CHECK-NEXT: entry:
2063 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2064 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2065 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2066 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2067 // CHECK-NEXT: ret i128 [[TMP3]]
2069 v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); }
2070 // CHECK-LABEL: @vmod_b(
2071 // CHECK-NEXT: entry:
2072 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2073 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2074 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2075 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2076 // CHECK-NEXT: ret i128 [[TMP3]]
2078 v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); }
2079 // CHECK-LABEL: @vmod_h(
2080 // CHECK-NEXT: entry:
2081 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2082 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2083 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2084 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2085 // CHECK-NEXT: ret i128 [[TMP3]]
2087 v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); }
2088 // CHECK-LABEL: @vmod_w(
2089 // CHECK-NEXT: entry:
2090 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2091 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2092 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2093 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2094 // CHECK-NEXT: ret i128 [[TMP3]]
2096 v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); }
2097 // CHECK-LABEL: @vmod_d(
2098 // CHECK-NEXT: entry:
2099 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2100 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2101 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2102 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2103 // CHECK-NEXT: ret i128 [[TMP3]]
2105 v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); }
2106 // CHECK-LABEL: @vmod_bu(
2107 // CHECK-NEXT: entry:
2108 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2109 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2110 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2111 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2112 // CHECK-NEXT: ret i128 [[TMP3]]
2114 v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); }
2115 // CHECK-LABEL: @vmod_hu(
2116 // CHECK-NEXT: entry:
2117 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2118 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2119 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2120 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2121 // CHECK-NEXT: ret i128 [[TMP3]]
2123 v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); }
2124 // CHECK-LABEL: @vmod_wu(
2125 // CHECK-NEXT: entry:
2126 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2127 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2128 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2129 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2130 // CHECK-NEXT: ret i128 [[TMP3]]
2132 v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); }
2133 // CHECK-LABEL: @vmod_du(
2134 // CHECK-NEXT: entry:
2135 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2136 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2137 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2138 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2139 // CHECK-NEXT: ret i128 [[TMP3]]
2141 v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); }
2142 // CHECK-LABEL: @vreplve_b(
2143 // CHECK-NEXT: entry:
2144 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2145 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]])
2146 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2147 // CHECK-NEXT: ret i128 [[TMP2]]
2149 v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); }
2150 // CHECK-LABEL: @vreplve_h(
2151 // CHECK-NEXT: entry:
2152 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2153 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]])
2154 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2155 // CHECK-NEXT: ret i128 [[TMP2]]
2157 v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); }
2158 // CHECK-LABEL: @vreplve_w(
2159 // CHECK-NEXT: entry:
2160 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2161 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]])
2162 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2163 // CHECK-NEXT: ret i128 [[TMP2]]
2165 v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); }
2166 // CHECK-LABEL: @vreplve_d(
2167 // CHECK-NEXT: entry:
2168 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2169 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]])
2170 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2171 // CHECK-NEXT: ret i128 [[TMP2]]
2173 v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); }
2174 // CHECK-LABEL: @vreplvei_b(
2175 // CHECK-NEXT: entry:
2176 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2177 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1)
2178 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2179 // CHECK-NEXT: ret i128 [[TMP2]]
2181 v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); }
2182 // CHECK-LABEL: @vreplvei_h(
2183 // CHECK-NEXT: entry:
2184 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2185 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1)
2186 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2187 // CHECK-NEXT: ret i128 [[TMP2]]
2189 v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); }
2190 // CHECK-LABEL: @vreplvei_w(
2191 // CHECK-NEXT: entry:
2192 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2193 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1)
2194 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2195 // CHECK-NEXT: ret i128 [[TMP2]]
2197 v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); }
2198 // CHECK-LABEL: @vreplvei_d(
2199 // CHECK-NEXT: entry:
2200 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2201 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1)
2202 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2203 // CHECK-NEXT: ret i128 [[TMP2]]
2205 v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); }
2206 // CHECK-LABEL: @vpickev_b(
2207 // CHECK-NEXT: entry:
2208 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2209 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2210 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2211 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2212 // CHECK-NEXT: ret i128 [[TMP3]]
2214 v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); }
2215 // CHECK-LABEL: @vpickev_h(
2216 // CHECK-NEXT: entry:
2217 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2218 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2219 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2220 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2221 // CHECK-NEXT: ret i128 [[TMP3]]
2223 v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); }
2224 // CHECK-LABEL: @vpickev_w(
2225 // CHECK-NEXT: entry:
2226 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2227 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2228 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2229 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2230 // CHECK-NEXT: ret i128 [[TMP3]]
2232 v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); }
2233 // CHECK-LABEL: @vpickev_d(
2234 // CHECK-NEXT: entry:
2235 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2236 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2237 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2238 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2239 // CHECK-NEXT: ret i128 [[TMP3]]
2241 v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); }
2242 // CHECK-LABEL: @vpickod_b(
2243 // CHECK-NEXT: entry:
2244 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2245 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2246 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2247 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2248 // CHECK-NEXT: ret i128 [[TMP3]]
2250 v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); }
2251 // CHECK-LABEL: @vpickod_h(
2252 // CHECK-NEXT: entry:
2253 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2254 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2255 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2256 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2257 // CHECK-NEXT: ret i128 [[TMP3]]
2259 v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); }
2260 // CHECK-LABEL: @vpickod_w(
2261 // CHECK-NEXT: entry:
2262 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2263 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2264 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2265 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2266 // CHECK-NEXT: ret i128 [[TMP3]]
2268 v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); }
2269 // CHECK-LABEL: @vpickod_d(
2270 // CHECK-NEXT: entry:
2271 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2272 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2273 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2274 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2275 // CHECK-NEXT: ret i128 [[TMP3]]
2277 v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); }
2278 // CHECK-LABEL: @vilvh_b(
2279 // CHECK-NEXT: entry:
2280 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2281 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2282 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2283 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2284 // CHECK-NEXT: ret i128 [[TMP3]]
2286 v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); }
2287 // CHECK-LABEL: @vilvh_h(
2288 // CHECK-NEXT: entry:
2289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2290 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2292 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2293 // CHECK-NEXT: ret i128 [[TMP3]]
2295 v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); }
2296 // CHECK-LABEL: @vilvh_w(
2297 // CHECK-NEXT: entry:
2298 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2299 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2300 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2301 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2302 // CHECK-NEXT: ret i128 [[TMP3]]
2304 v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); }
2305 // CHECK-LABEL: @vilvh_d(
2306 // CHECK-NEXT: entry:
2307 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2308 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2309 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2310 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2311 // CHECK-NEXT: ret i128 [[TMP3]]
2313 v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); }
2314 // CHECK-LABEL: @vilvl_b(
2315 // CHECK-NEXT: entry:
2316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2317 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2318 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2319 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2320 // CHECK-NEXT: ret i128 [[TMP3]]
2322 v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); }
2323 // CHECK-LABEL: @vilvl_h(
2324 // CHECK-NEXT: entry:
2325 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2326 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2327 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2328 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2329 // CHECK-NEXT: ret i128 [[TMP3]]
2331 v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); }
2332 // CHECK-LABEL: @vilvl_w(
2333 // CHECK-NEXT: entry:
2334 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2335 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2336 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2337 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2338 // CHECK-NEXT: ret i128 [[TMP3]]
2340 v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); }
2341 // CHECK-LABEL: @vilvl_d(
2342 // CHECK-NEXT: entry:
2343 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2344 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2345 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2346 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2347 // CHECK-NEXT: ret i128 [[TMP3]]
2349 v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); }
2350 // CHECK-LABEL: @vpackev_b(
2351 // CHECK-NEXT: entry:
2352 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2353 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2354 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2355 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2356 // CHECK-NEXT: ret i128 [[TMP3]]
2358 v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); }
2359 // CHECK-LABEL: @vpackev_h(
2360 // CHECK-NEXT: entry:
2361 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2362 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2363 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2364 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2365 // CHECK-NEXT: ret i128 [[TMP3]]
2367 v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); }
2368 // CHECK-LABEL: @vpackev_w(
2369 // CHECK-NEXT: entry:
2370 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2371 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2372 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2373 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2374 // CHECK-NEXT: ret i128 [[TMP3]]
2376 v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); }
2377 // CHECK-LABEL: @vpackev_d(
2378 // CHECK-NEXT: entry:
2379 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2380 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2381 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2382 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2383 // CHECK-NEXT: ret i128 [[TMP3]]
2385 v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); }
2386 // CHECK-LABEL: @vpackod_b(
2387 // CHECK-NEXT: entry:
2388 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2389 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2390 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2391 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2392 // CHECK-NEXT: ret i128 [[TMP3]]
2394 v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); }
2395 // CHECK-LABEL: @vpackod_h(
2396 // CHECK-NEXT: entry:
2397 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2398 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2399 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2400 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2401 // CHECK-NEXT: ret i128 [[TMP3]]
2403 v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); }
2404 // CHECK-LABEL: @vpackod_w(
2405 // CHECK-NEXT: entry:
2406 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2407 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2408 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2409 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2410 // CHECK-NEXT: ret i128 [[TMP3]]
2412 v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); }
2413 // CHECK-LABEL: @vpackod_d(
2414 // CHECK-NEXT: entry:
2415 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2416 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2417 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2418 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2419 // CHECK-NEXT: ret i128 [[TMP3]]
2421 v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); }
2422 // CHECK-LABEL: @vshuf_h(
2423 // CHECK-NEXT: entry:
2424 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2425 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2426 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
2427 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2428 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
2429 // CHECK-NEXT: ret i128 [[TMP4]]
2431 v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) {
2432 return __lsx_vshuf_h(_1, _2, _3);
2434 // CHECK-LABEL: @vshuf_w(
2435 // CHECK-NEXT: entry:
2436 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2437 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2438 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
2439 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2440 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2441 // CHECK-NEXT: ret i128 [[TMP4]]
2443 v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2444 return __lsx_vshuf_w(_1, _2, _3);
2446 // CHECK-LABEL: @vshuf_d(
2447 // CHECK-NEXT: entry:
2448 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2449 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2450 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2451 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2452 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2453 // CHECK-NEXT: ret i128 [[TMP4]]
2455 v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2456 return __lsx_vshuf_d(_1, _2, _3);
2458 // CHECK-LABEL: @vand_v(
2459 // CHECK-NEXT: entry:
2460 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2461 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2462 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2463 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2464 // CHECK-NEXT: ret i128 [[TMP3]]
2466 v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); }
2467 // CHECK-LABEL: @vandi_b(
2468 // CHECK-NEXT: entry:
2469 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2470 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1)
2471 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2472 // CHECK-NEXT: ret i128 [[TMP2]]
2474 v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); }
2475 // CHECK-LABEL: @vor_v(
2476 // CHECK-NEXT: entry:
2477 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2478 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2479 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2480 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2481 // CHECK-NEXT: ret i128 [[TMP3]]
2483 v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); }
2484 // CHECK-LABEL: @vori_b(
2485 // CHECK-NEXT: entry:
2486 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2487 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1)
2488 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2489 // CHECK-NEXT: ret i128 [[TMP2]]
2491 v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); }
2492 // CHECK-LABEL: @vnor_v(
2493 // CHECK-NEXT: entry:
2494 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2495 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2496 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2497 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2498 // CHECK-NEXT: ret i128 [[TMP3]]
2500 v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); }
2501 // CHECK-LABEL: @vnori_b(
2502 // CHECK-NEXT: entry:
2503 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2504 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1)
2505 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2506 // CHECK-NEXT: ret i128 [[TMP2]]
2508 v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); }
2509 // CHECK-LABEL: @vxor_v(
2510 // CHECK-NEXT: entry:
2511 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2512 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2513 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2514 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2515 // CHECK-NEXT: ret i128 [[TMP3]]
2517 v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); }
2518 // CHECK-LABEL: @vxori_b(
2519 // CHECK-NEXT: entry:
2520 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2521 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1)
2522 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2523 // CHECK-NEXT: ret i128 [[TMP2]]
2525 v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); }
2526 // CHECK-LABEL: @vbitsel_v(
2527 // CHECK-NEXT: entry:
2528 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2529 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2530 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
2531 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2532 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
2533 // CHECK-NEXT: ret i128 [[TMP4]]
2535 v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) {
2536 return __lsx_vbitsel_v(_1, _2, _3);
2538 // CHECK-LABEL: @vbitseli_b(
2539 // CHECK-NEXT: entry:
2540 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2541 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2542 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
2543 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2544 // CHECK-NEXT: ret i128 [[TMP3]]
2546 v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); }
2547 // CHECK-LABEL: @vshuf4i_b(
2548 // CHECK-NEXT: entry:
2549 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2550 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1)
2551 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2552 // CHECK-NEXT: ret i128 [[TMP2]]
2554 v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); }
2555 // CHECK-LABEL: @vshuf4i_h(
2556 // CHECK-NEXT: entry:
2557 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2558 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1)
2559 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2560 // CHECK-NEXT: ret i128 [[TMP2]]
2562 v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); }
2563 // CHECK-LABEL: @vshuf4i_w(
2564 // CHECK-NEXT: entry:
2565 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2566 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1)
2567 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2568 // CHECK-NEXT: ret i128 [[TMP2]]
2570 v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); }
2571 // CHECK-LABEL: @vreplgr2vr_b(
2572 // CHECK-NEXT: entry:
2573 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]])
2574 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
2575 // CHECK-NEXT: ret i128 [[TMP1]]
2577 v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); }
2578 // CHECK-LABEL: @vreplgr2vr_h(
2579 // CHECK-NEXT: entry:
2580 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]])
2581 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
2582 // CHECK-NEXT: ret i128 [[TMP1]]
2584 v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); }
2585 // CHECK-LABEL: @vreplgr2vr_w(
2586 // CHECK-NEXT: entry:
2587 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]])
2588 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
2589 // CHECK-NEXT: ret i128 [[TMP1]]
2591 v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); }
2592 // CHECK-LABEL: @vreplgr2vr_d(
2593 // CHECK-NEXT: entry:
2594 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]])
2595 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
2596 // CHECK-NEXT: ret i128 [[TMP1]]
2598 v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); }
2599 // CHECK-LABEL: @vpcnt_b(
2600 // CHECK-NEXT: entry:
2601 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2602 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]])
2603 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2604 // CHECK-NEXT: ret i128 [[TMP2]]
2606 v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); }
2607 // CHECK-LABEL: @vpcnt_h(
2608 // CHECK-NEXT: entry:
2609 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2610 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]])
2611 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2612 // CHECK-NEXT: ret i128 [[TMP2]]
2614 v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); }
2615 // CHECK-LABEL: @vpcnt_w(
2616 // CHECK-NEXT: entry:
2617 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2618 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]])
2619 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2620 // CHECK-NEXT: ret i128 [[TMP2]]
2622 v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); }
2623 // CHECK-LABEL: @vpcnt_d(
2624 // CHECK-NEXT: entry:
2625 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2626 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]])
2627 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2628 // CHECK-NEXT: ret i128 [[TMP2]]
2630 v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); }
2631 // CHECK-LABEL: @vclo_b(
2632 // CHECK-NEXT: entry:
2633 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2634 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]])
2635 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2636 // CHECK-NEXT: ret i128 [[TMP2]]
2638 v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); }
2639 // CHECK-LABEL: @vclo_h(
2640 // CHECK-NEXT: entry:
2641 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2642 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]])
2643 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2644 // CHECK-NEXT: ret i128 [[TMP2]]
2646 v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); }
2647 // CHECK-LABEL: @vclo_w(
2648 // CHECK-NEXT: entry:
2649 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2650 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]])
2651 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2652 // CHECK-NEXT: ret i128 [[TMP2]]
2654 v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); }
2655 // CHECK-LABEL: @vclo_d(
2656 // CHECK-NEXT: entry:
2657 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2658 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]])
2659 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2660 // CHECK-NEXT: ret i128 [[TMP2]]
2662 v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); }
2663 // CHECK-LABEL: @vclz_b(
2664 // CHECK-NEXT: entry:
2665 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2666 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]])
2667 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2668 // CHECK-NEXT: ret i128 [[TMP2]]
2670 v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); }
2671 // CHECK-LABEL: @vclz_h(
2672 // CHECK-NEXT: entry:
2673 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2674 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]])
2675 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2676 // CHECK-NEXT: ret i128 [[TMP2]]
2678 v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); }
2679 // CHECK-LABEL: @vclz_w(
2680 // CHECK-NEXT: entry:
2681 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2682 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]])
2683 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2684 // CHECK-NEXT: ret i128 [[TMP2]]
2686 v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); }
2687 // CHECK-LABEL: @vclz_d(
2688 // CHECK-NEXT: entry:
2689 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2690 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]])
2691 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2692 // CHECK-NEXT: ret i128 [[TMP2]]
2694 v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); }
2695 // CHECK-LABEL: @vpickve2gr_b(
2696 // CHECK-NEXT: entry:
2697 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2698 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1)
2699 // CHECK-NEXT: ret i32 [[TMP1]]
2701 int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); }
2702 // CHECK-LABEL: @vpickve2gr_h(
2703 // CHECK-NEXT: entry:
2704 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2705 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1)
2706 // CHECK-NEXT: ret i32 [[TMP1]]
2708 int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); }
2709 // CHECK-LABEL: @vpickve2gr_w(
2710 // CHECK-NEXT: entry:
2711 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2712 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1)
2713 // CHECK-NEXT: ret i32 [[TMP1]]
2715 int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); }
2716 // CHECK-LABEL: @vpickve2gr_d(
2717 // CHECK-NEXT: entry:
2718 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2719 // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1)
2720 // CHECK-NEXT: ret i64 [[TMP1]]
2722 long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); }
2723 // CHECK-LABEL: @vpickve2gr_bu(
2724 // CHECK-NEXT: entry:
2725 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2726 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1)
2727 // CHECK-NEXT: ret i32 [[TMP1]]
2729 unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); }
2730 // CHECK-LABEL: @vpickve2gr_hu(
2731 // CHECK-NEXT: entry:
2732 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2733 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1)
2734 // CHECK-NEXT: ret i32 [[TMP1]]
2736 unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); }
2737 // CHECK-LABEL: @vpickve2gr_wu(
2738 // CHECK-NEXT: entry:
2739 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2740 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1)
2741 // CHECK-NEXT: ret i32 [[TMP1]]
2743 unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); }
2744 // CHECK-LABEL: @vpickve2gr_du(
2745 // CHECK-NEXT: entry:
2746 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2747 // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1)
2748 // CHECK-NEXT: ret i64 [[TMP1]]
2750 unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); }
2751 // CHECK-LABEL: @vinsgr2vr_b(
2752 // CHECK-NEXT: entry:
2753 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2754 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1)
2755 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2756 // CHECK-NEXT: ret i128 [[TMP2]]
2758 v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); }
2759 // CHECK-LABEL: @vinsgr2vr_h(
2760 // CHECK-NEXT: entry:
2761 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2762 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1)
2763 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2764 // CHECK-NEXT: ret i128 [[TMP2]]
2766 v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); }
2767 // CHECK-LABEL: @vinsgr2vr_w(
2768 // CHECK-NEXT: entry:
2769 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2770 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1)
2771 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2772 // CHECK-NEXT: ret i128 [[TMP2]]
2774 v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); }
2775 // CHECK-LABEL: @vinsgr2vr_d(
2776 // CHECK-NEXT: entry:
2777 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2778 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1)
2779 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2780 // CHECK-NEXT: ret i128 [[TMP2]]
2782 v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); }
2783 // CHECK-LABEL: @vfadd_s(
2784 // CHECK-NEXT: entry:
2785 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2786 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2787 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2788 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2789 // CHECK-NEXT: ret i128 [[TMP3]]
2791 v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); }
2792 // CHECK-LABEL: @vfadd_d(
2793 // CHECK-NEXT: entry:
2794 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2795 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2796 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2797 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2798 // CHECK-NEXT: ret i128 [[TMP3]]
2800 v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); }
2801 // CHECK-LABEL: @vfsub_s(
2802 // CHECK-NEXT: entry:
2803 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2804 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2805 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2806 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2807 // CHECK-NEXT: ret i128 [[TMP3]]
2809 v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); }
2810 // CHECK-LABEL: @vfsub_d(
2811 // CHECK-NEXT: entry:
2812 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2813 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2814 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2815 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2816 // CHECK-NEXT: ret i128 [[TMP3]]
2818 v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); }
2819 // CHECK-LABEL: @vfmul_s(
2820 // CHECK-NEXT: entry:
2821 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2822 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2823 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2824 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2825 // CHECK-NEXT: ret i128 [[TMP3]]
2827 v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); }
2828 // CHECK-LABEL: @vfmul_d(
2829 // CHECK-NEXT: entry:
2830 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2831 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2832 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2833 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2834 // CHECK-NEXT: ret i128 [[TMP3]]
2836 v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); }
2837 // CHECK-LABEL: @vfdiv_s(
2838 // CHECK-NEXT: entry:
2839 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2840 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2841 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2842 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2843 // CHECK-NEXT: ret i128 [[TMP3]]
2845 v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); }
2846 // CHECK-LABEL: @vfdiv_d(
2847 // CHECK-NEXT: entry:
2848 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2849 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2850 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2851 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2852 // CHECK-NEXT: ret i128 [[TMP3]]
2854 v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); }
2855 // CHECK-LABEL: @vfcvt_h_s(
2856 // CHECK-NEXT: entry:
2857 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2858 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2859 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2860 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2861 // CHECK-NEXT: ret i128 [[TMP3]]
2863 v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); }
2864 // CHECK-LABEL: @vfcvt_s_d(
2865 // CHECK-NEXT: entry:
2866 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2867 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2868 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2869 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2870 // CHECK-NEXT: ret i128 [[TMP3]]
2872 v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); }
2873 // CHECK-LABEL: @vfmin_s(
2874 // CHECK-NEXT: entry:
2875 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2876 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2877 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2878 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2879 // CHECK-NEXT: ret i128 [[TMP3]]
2881 v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); }
2882 // CHECK-LABEL: @vfmin_d(
2883 // CHECK-NEXT: entry:
2884 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2885 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2886 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2887 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2888 // CHECK-NEXT: ret i128 [[TMP3]]
2890 v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); }
2891 // CHECK-LABEL: @vfmina_s(
2892 // CHECK-NEXT: entry:
2893 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2894 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2895 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2896 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2897 // CHECK-NEXT: ret i128 [[TMP3]]
2899 v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); }
2900 // CHECK-LABEL: @vfmina_d(
2901 // CHECK-NEXT: entry:
2902 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2903 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2904 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2905 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2906 // CHECK-NEXT: ret i128 [[TMP3]]
2908 v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); }
2909 // CHECK-LABEL: @vfmax_s(
2910 // CHECK-NEXT: entry:
2911 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2912 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2913 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2914 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2915 // CHECK-NEXT: ret i128 [[TMP3]]
2917 v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); }
2918 // CHECK-LABEL: @vfmax_d(
2919 // CHECK-NEXT: entry:
2920 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2921 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2922 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2923 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2924 // CHECK-NEXT: ret i128 [[TMP3]]
2926 v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); }
2927 // CHECK-LABEL: @vfmaxa_s(
2928 // CHECK-NEXT: entry:
2929 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2930 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2931 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2932 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2933 // CHECK-NEXT: ret i128 [[TMP3]]
2935 v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); }
2936 // CHECK-LABEL: @vfmaxa_d(
2937 // CHECK-NEXT: entry:
2938 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2939 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2940 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2941 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2942 // CHECK-NEXT: ret i128 [[TMP3]]
2944 v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); }
2945 // CHECK-LABEL: @vfclass_s(
2946 // CHECK-NEXT: entry:
2947 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2948 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]])
2949 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2950 // CHECK-NEXT: ret i128 [[TMP2]]
2952 v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); }
2953 // CHECK-LABEL: @vfclass_d(
2954 // CHECK-NEXT: entry:
2955 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2956 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]])
2957 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2958 // CHECK-NEXT: ret i128 [[TMP2]]
2960 v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); }
2961 // CHECK-LABEL: @vfsqrt_s(
2962 // CHECK-NEXT: entry:
2963 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2964 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]])
2965 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
2966 // CHECK-NEXT: ret i128 [[TMP2]]
2968 v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); }
2969 // CHECK-LABEL: @vfsqrt_d(
2970 // CHECK-NEXT: entry:
2971 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2972 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]])
2973 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
2974 // CHECK-NEXT: ret i128 [[TMP2]]
2976 v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); }
2977 // CHECK-LABEL: @vfrecip_s(
2978 // CHECK-NEXT: entry:
2979 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2980 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]])
2981 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
2982 // CHECK-NEXT: ret i128 [[TMP2]]
2984 v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); }
2985 // CHECK-LABEL: @vfrecip_d(
2986 // CHECK-NEXT: entry:
2987 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2988 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]])
2989 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
2990 // CHECK-NEXT: ret i128 [[TMP2]]
2992 v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); }
2993 // CHECK-LABEL: @vfrint_s(
2994 // CHECK-NEXT: entry:
2995 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2996 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]])
2997 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
2998 // CHECK-NEXT: ret i128 [[TMP2]]
3000 v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); }
3001 // CHECK-LABEL: @vfrint_d(
3002 // CHECK-NEXT: entry:
3003 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3004 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]])
3005 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3006 // CHECK-NEXT: ret i128 [[TMP2]]
3008 v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); }
3009 // CHECK-LABEL: @vfrsqrt_s(
3010 // CHECK-NEXT: entry:
3011 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3012 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]])
3013 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3014 // CHECK-NEXT: ret i128 [[TMP2]]
3016 v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); }
3017 // CHECK-LABEL: @vfrsqrt_d(
3018 // CHECK-NEXT: entry:
3019 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3020 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]])
3021 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3022 // CHECK-NEXT: ret i128 [[TMP2]]
3024 v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); }
3025 // CHECK-LABEL: @vflogb_s(
3026 // CHECK-NEXT: entry:
3027 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3028 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]])
3029 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3030 // CHECK-NEXT: ret i128 [[TMP2]]
3032 v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); }
3033 // CHECK-LABEL: @vflogb_d(
3034 // CHECK-NEXT: entry:
3035 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3036 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]])
3037 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3038 // CHECK-NEXT: ret i128 [[TMP2]]
3040 v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); }
3041 // CHECK-LABEL: @vfcvth_s_h(
3042 // CHECK-NEXT: entry:
3043 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3044 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]])
3045 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3046 // CHECK-NEXT: ret i128 [[TMP2]]
3048 v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); }
3049 // CHECK-LABEL: @vfcvth_d_s(
3050 // CHECK-NEXT: entry:
3051 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3052 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]])
3053 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3054 // CHECK-NEXT: ret i128 [[TMP2]]
3056 v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); }
3057 // CHECK-LABEL: @vfcvtl_s_h(
3058 // CHECK-NEXT: entry:
3059 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3060 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]])
3061 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3062 // CHECK-NEXT: ret i128 [[TMP2]]
3064 v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); }
3065 // CHECK-LABEL: @vfcvtl_d_s(
3066 // CHECK-NEXT: entry:
3067 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3068 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]])
3069 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3070 // CHECK-NEXT: ret i128 [[TMP2]]
3072 v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); }
3073 // CHECK-LABEL: @vftint_w_s(
3074 // CHECK-NEXT: entry:
3075 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3076 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]])
3077 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3078 // CHECK-NEXT: ret i128 [[TMP2]]
3080 v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); }
3081 // CHECK-LABEL: @vftint_l_d(
3082 // CHECK-NEXT: entry:
3083 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3084 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]])
3085 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3086 // CHECK-NEXT: ret i128 [[TMP2]]
3088 v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); }
3089 // CHECK-LABEL: @vftint_wu_s(
3090 // CHECK-NEXT: entry:
3091 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3092 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]])
3093 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3094 // CHECK-NEXT: ret i128 [[TMP2]]
3096 v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); }
3097 // CHECK-LABEL: @vftint_lu_d(
3098 // CHECK-NEXT: entry:
3099 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3100 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]])
3101 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3102 // CHECK-NEXT: ret i128 [[TMP2]]
3104 v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); }
3105 // CHECK-LABEL: @vftintrz_w_s(
3106 // CHECK-NEXT: entry:
3107 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3108 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]])
3109 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3110 // CHECK-NEXT: ret i128 [[TMP2]]
3112 v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); }
3113 // CHECK-LABEL: @vftintrz_l_d(
3114 // CHECK-NEXT: entry:
3115 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3116 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]])
3117 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3118 // CHECK-NEXT: ret i128 [[TMP2]]
3120 v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); }
3121 // CHECK-LABEL: @vftintrz_wu_s(
3122 // CHECK-NEXT: entry:
3123 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3124 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]])
3125 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3126 // CHECK-NEXT: ret i128 [[TMP2]]
3128 v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); }
3129 // CHECK-LABEL: @vftintrz_lu_d(
3130 // CHECK-NEXT: entry:
3131 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3132 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]])
3133 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3134 // CHECK-NEXT: ret i128 [[TMP2]]
3136 v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); }
3137 // CHECK-LABEL: @vffint_s_w(
3138 // CHECK-NEXT: entry:
3139 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3140 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]])
3141 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3142 // CHECK-NEXT: ret i128 [[TMP2]]
3144 v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); }
3145 // CHECK-LABEL: @vffint_d_l(
3146 // CHECK-NEXT: entry:
3147 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3148 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]])
3149 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3150 // CHECK-NEXT: ret i128 [[TMP2]]
3152 v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); }
3153 // CHECK-LABEL: @vffint_s_wu(
3154 // CHECK-NEXT: entry:
3155 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3156 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]])
3157 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3158 // CHECK-NEXT: ret i128 [[TMP2]]
3160 v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); }
3161 // CHECK-LABEL: @vffint_d_lu(
3162 // CHECK-NEXT: entry:
3163 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3164 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]])
3165 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3166 // CHECK-NEXT: ret i128 [[TMP2]]
3168 v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); }
3169 // CHECK-LABEL: @vandn_v(
3170 // CHECK-NEXT: entry:
3171 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3172 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3173 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3174 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3175 // CHECK-NEXT: ret i128 [[TMP3]]
3177 v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); }
3178 // CHECK-LABEL: @vneg_b(
3179 // CHECK-NEXT: entry:
3180 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3181 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]])
3182 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3183 // CHECK-NEXT: ret i128 [[TMP2]]
3185 v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); }
3186 // CHECK-LABEL: @vneg_h(
3187 // CHECK-NEXT: entry:
3188 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3189 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]])
3190 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3191 // CHECK-NEXT: ret i128 [[TMP2]]
3193 v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); }
3194 // CHECK-LABEL: @vneg_w(
3195 // CHECK-NEXT: entry:
3196 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3197 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]])
3198 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3199 // CHECK-NEXT: ret i128 [[TMP2]]
3201 v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); }
3202 // CHECK-LABEL: @vneg_d(
3203 // CHECK-NEXT: entry:
3204 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3205 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]])
3206 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3207 // CHECK-NEXT: ret i128 [[TMP2]]
3209 v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); }
3210 // CHECK-LABEL: @vmuh_b(
3211 // CHECK-NEXT: entry:
3212 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3213 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3214 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3215 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3216 // CHECK-NEXT: ret i128 [[TMP3]]
3218 v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); }
3219 // CHECK-LABEL: @vmuh_h(
3220 // CHECK-NEXT: entry:
3221 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3222 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3223 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3224 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3225 // CHECK-NEXT: ret i128 [[TMP3]]
3227 v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); }
3228 // CHECK-LABEL: @vmuh_w(
3229 // CHECK-NEXT: entry:
3230 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3231 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3232 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3233 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3234 // CHECK-NEXT: ret i128 [[TMP3]]
3236 v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); }
3237 // CHECK-LABEL: @vmuh_d(
3238 // CHECK-NEXT: entry:
3239 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3240 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3241 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3242 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3243 // CHECK-NEXT: ret i128 [[TMP3]]
3245 v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); }
3246 // CHECK-LABEL: @vmuh_bu(
3247 // CHECK-NEXT: entry:
3248 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3249 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3250 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3251 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3252 // CHECK-NEXT: ret i128 [[TMP3]]
3254 v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); }
3255 // CHECK-LABEL: @vmuh_hu(
3256 // CHECK-NEXT: entry:
3257 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3258 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3259 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3260 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3261 // CHECK-NEXT: ret i128 [[TMP3]]
3263 v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); }
3264 // CHECK-LABEL: @vmuh_wu(
3265 // CHECK-NEXT: entry:
3266 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3267 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3268 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3269 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3270 // CHECK-NEXT: ret i128 [[TMP3]]
3272 v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); }
3273 // CHECK-LABEL: @vmuh_du(
3274 // CHECK-NEXT: entry:
3275 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3276 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3277 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3278 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3279 // CHECK-NEXT: ret i128 [[TMP3]]
3281 v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); }
3282 // CHECK-LABEL: @vsllwil_h_b(
3283 // CHECK-NEXT: entry:
3284 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3285 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1)
3286 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3287 // CHECK-NEXT: ret i128 [[TMP2]]
3289 v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); }
3290 // CHECK-LABEL: @vsllwil_w_h(
3291 // CHECK-NEXT: entry:
3292 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3293 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1)
3294 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3295 // CHECK-NEXT: ret i128 [[TMP2]]
3297 v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); }
3298 // CHECK-LABEL: @vsllwil_d_w(
3299 // CHECK-NEXT: entry:
3300 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3301 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1)
3302 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3303 // CHECK-NEXT: ret i128 [[TMP2]]
3305 v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); }
3306 // CHECK-LABEL: @vsllwil_hu_bu(
3307 // CHECK-NEXT: entry:
3308 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3309 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1)
3310 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3311 // CHECK-NEXT: ret i128 [[TMP2]]
3313 v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); }
3314 // CHECK-LABEL: @vsllwil_wu_hu(
3315 // CHECK-NEXT: entry:
3316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3317 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1)
3318 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3319 // CHECK-NEXT: ret i128 [[TMP2]]
3321 v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); }
3322 // CHECK-LABEL: @vsllwil_du_wu(
3323 // CHECK-NEXT: entry:
3324 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3325 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1)
3326 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3327 // CHECK-NEXT: ret i128 [[TMP2]]
3329 v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); }
3330 // CHECK-LABEL: @vsran_b_h(
3331 // CHECK-NEXT: entry:
3332 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3333 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3334 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3335 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3336 // CHECK-NEXT: ret i128 [[TMP3]]
3338 v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); }
3339 // CHECK-LABEL: @vsran_h_w(
3340 // CHECK-NEXT: entry:
3341 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3342 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3343 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3344 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3345 // CHECK-NEXT: ret i128 [[TMP3]]
3347 v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); }
3348 // CHECK-LABEL: @vsran_w_d(
3349 // CHECK-NEXT: entry:
3350 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3351 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3352 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3353 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3354 // CHECK-NEXT: ret i128 [[TMP3]]
3356 v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); }
3357 // CHECK-LABEL: @vssran_b_h(
3358 // CHECK-NEXT: entry:
3359 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3360 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3361 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3362 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3363 // CHECK-NEXT: ret i128 [[TMP3]]
3365 v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); }
3366 // CHECK-LABEL: @vssran_h_w(
3367 // CHECK-NEXT: entry:
3368 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3369 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3370 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3371 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3372 // CHECK-NEXT: ret i128 [[TMP3]]
3374 v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); }
3375 // CHECK-LABEL: @vssran_w_d(
3376 // CHECK-NEXT: entry:
3377 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3378 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3379 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3380 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3381 // CHECK-NEXT: ret i128 [[TMP3]]
3383 v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); }
3384 // CHECK-LABEL: @vssran_bu_h(
3385 // CHECK-NEXT: entry:
3386 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3387 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3388 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3389 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3390 // CHECK-NEXT: ret i128 [[TMP3]]
3392 v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); }
3393 // CHECK-LABEL: @vssran_hu_w(
3394 // CHECK-NEXT: entry:
3395 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3396 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3397 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3398 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3399 // CHECK-NEXT: ret i128 [[TMP3]]
3401 v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); }
3402 // CHECK-LABEL: @vssran_wu_d(
3403 // CHECK-NEXT: entry:
3404 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3405 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3406 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3407 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3408 // CHECK-NEXT: ret i128 [[TMP3]]
3410 v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); }
3411 // CHECK-LABEL: @vsrarn_b_h(
3412 // CHECK-NEXT: entry:
3413 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3414 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3415 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3416 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3417 // CHECK-NEXT: ret i128 [[TMP3]]
3419 v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); }
3420 // CHECK-LABEL: @vsrarn_h_w(
3421 // CHECK-NEXT: entry:
3422 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3423 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3424 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3425 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3426 // CHECK-NEXT: ret i128 [[TMP3]]
3428 v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); }
3429 // CHECK-LABEL: @vsrarn_w_d(
3430 // CHECK-NEXT: entry:
3431 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3432 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3433 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3434 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3435 // CHECK-NEXT: ret i128 [[TMP3]]
3437 v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); }
3438 // CHECK-LABEL: @vssrarn_b_h(
3439 // CHECK-NEXT: entry:
3440 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3441 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3442 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3443 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3444 // CHECK-NEXT: ret i128 [[TMP3]]
3446 v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); }
3447 // CHECK-LABEL: @vssrarn_h_w(
3448 // CHECK-NEXT: entry:
3449 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3450 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3451 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3452 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3453 // CHECK-NEXT: ret i128 [[TMP3]]
3455 v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); }
3456 // CHECK-LABEL: @vssrarn_w_d(
3457 // CHECK-NEXT: entry:
3458 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3459 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3460 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3461 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3462 // CHECK-NEXT: ret i128 [[TMP3]]
3464 v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); }
3465 // CHECK-LABEL: @vssrarn_bu_h(
3466 // CHECK-NEXT: entry:
3467 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3468 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3469 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3470 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3471 // CHECK-NEXT: ret i128 [[TMP3]]
3473 v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); }
3474 // CHECK-LABEL: @vssrarn_hu_w(
3475 // CHECK-NEXT: entry:
3476 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3477 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3478 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3479 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3480 // CHECK-NEXT: ret i128 [[TMP3]]
3482 v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); }
3483 // CHECK-LABEL: @vssrarn_wu_d(
3484 // CHECK-NEXT: entry:
3485 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3486 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3487 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3488 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3489 // CHECK-NEXT: ret i128 [[TMP3]]
3491 v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); }
3492 // CHECK-LABEL: @vsrln_b_h(
3493 // CHECK-NEXT: entry:
3494 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3495 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3496 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3497 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3498 // CHECK-NEXT: ret i128 [[TMP3]]
3500 v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); }
3501 // CHECK-LABEL: @vsrln_h_w(
3502 // CHECK-NEXT: entry:
3503 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3504 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3505 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3506 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3507 // CHECK-NEXT: ret i128 [[TMP3]]
3509 v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); }
3510 // CHECK-LABEL: @vsrln_w_d(
3511 // CHECK-NEXT: entry:
3512 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3513 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3514 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3515 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3516 // CHECK-NEXT: ret i128 [[TMP3]]
3518 v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); }
3519 // CHECK-LABEL: @vssrln_bu_h(
3520 // CHECK-NEXT: entry:
3521 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3522 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3523 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3524 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3525 // CHECK-NEXT: ret i128 [[TMP3]]
3527 v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); }
3528 // CHECK-LABEL: @vssrln_hu_w(
3529 // CHECK-NEXT: entry:
3530 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3531 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3532 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3533 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3534 // CHECK-NEXT: ret i128 [[TMP3]]
3536 v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); }
3537 // CHECK-LABEL: @vssrln_wu_d(
3538 // CHECK-NEXT: entry:
3539 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3540 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3541 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3542 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3543 // CHECK-NEXT: ret i128 [[TMP3]]
3545 v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); }
3546 // CHECK-LABEL: @vsrlrn_b_h(
3547 // CHECK-NEXT: entry:
3548 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3549 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3550 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3551 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3552 // CHECK-NEXT: ret i128 [[TMP3]]
3554 v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); }
3555 // CHECK-LABEL: @vsrlrn_h_w(
3556 // CHECK-NEXT: entry:
3557 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3558 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3559 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3560 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3561 // CHECK-NEXT: ret i128 [[TMP3]]
3563 v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); }
3564 // CHECK-LABEL: @vsrlrn_w_d(
3565 // CHECK-NEXT: entry:
3566 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3567 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3568 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3569 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3570 // CHECK-NEXT: ret i128 [[TMP3]]
3572 v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); }
3573 // CHECK-LABEL: @vssrlrn_bu_h(
3574 // CHECK-NEXT: entry:
3575 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3576 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3577 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3578 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3579 // CHECK-NEXT: ret i128 [[TMP3]]
3581 v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); }
3582 // CHECK-LABEL: @vssrlrn_hu_w(
3583 // CHECK-NEXT: entry:
3584 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3585 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3586 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3587 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3588 // CHECK-NEXT: ret i128 [[TMP3]]
3590 v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); }
3591 // CHECK-LABEL: @vssrlrn_wu_d(
3592 // CHECK-NEXT: entry:
3593 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3594 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3595 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3596 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3597 // CHECK-NEXT: ret i128 [[TMP3]]
3599 v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); }
3600 // CHECK-LABEL: @vfrstpi_b(
3601 // CHECK-NEXT: entry:
3602 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3603 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3604 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
3605 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3606 // CHECK-NEXT: ret i128 [[TMP3]]
3608 v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); }
3609 // CHECK-LABEL: @vfrstpi_h(
3610 // CHECK-NEXT: entry:
3611 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3612 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3613 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
3614 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3615 // CHECK-NEXT: ret i128 [[TMP3]]
3617 v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); }
3618 // CHECK-LABEL: @vfrstp_b(
3619 // CHECK-NEXT: entry:
3620 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3621 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3622 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
3623 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
3624 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
3625 // CHECK-NEXT: ret i128 [[TMP4]]
3627 v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) {
3628 return __lsx_vfrstp_b(_1, _2, _3);
3630 // CHECK-LABEL: @vfrstp_h(
3631 // CHECK-NEXT: entry:
3632 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3633 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3634 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
3635 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
3636 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
3637 // CHECK-NEXT: ret i128 [[TMP4]]
3639 v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) {
3640 return __lsx_vfrstp_h(_1, _2, _3);
3642 // CHECK-LABEL: @vshuf4i_d(
3643 // CHECK-NEXT: entry:
3644 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3645 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3646 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
3647 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3648 // CHECK-NEXT: ret i128 [[TMP3]]
3650 v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); }
3651 // CHECK-LABEL: @vbsrl_v(
3652 // CHECK-NEXT: entry:
3653 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3654 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1)
3655 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3656 // CHECK-NEXT: ret i128 [[TMP2]]
3658 v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); }
3659 // CHECK-LABEL: @vbsll_v(
3660 // CHECK-NEXT: entry:
3661 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3662 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1)
3663 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3664 // CHECK-NEXT: ret i128 [[TMP2]]
3666 v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); }
3667 // CHECK-LABEL: @vextrins_b(
3668 // CHECK-NEXT: entry:
3669 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3670 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3671 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
3672 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3673 // CHECK-NEXT: ret i128 [[TMP3]]
3675 v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); }
3676 // CHECK-LABEL: @vextrins_h(
3677 // CHECK-NEXT: entry:
3678 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3679 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3680 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
3681 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3682 // CHECK-NEXT: ret i128 [[TMP3]]
3684 v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); }
3685 // CHECK-LABEL: @vextrins_w(
3686 // CHECK-NEXT: entry:
3687 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3688 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3689 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
3690 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3691 // CHECK-NEXT: ret i128 [[TMP3]]
3693 v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); }
3694 // CHECK-LABEL: @vextrins_d(
3695 // CHECK-NEXT: entry:
3696 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3697 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3698 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
3699 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3700 // CHECK-NEXT: ret i128 [[TMP3]]
3702 v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); }
3703 // CHECK-LABEL: @vmskltz_b(
3704 // CHECK-NEXT: entry:
3705 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3706 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]])
3707 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3708 // CHECK-NEXT: ret i128 [[TMP2]]
3710 v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); }
3711 // CHECK-LABEL: @vmskltz_h(
3712 // CHECK-NEXT: entry:
3713 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3714 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]])
3715 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3716 // CHECK-NEXT: ret i128 [[TMP2]]
3718 v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); }
3719 // CHECK-LABEL: @vmskltz_w(
3720 // CHECK-NEXT: entry:
3721 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3722 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]])
3723 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3724 // CHECK-NEXT: ret i128 [[TMP2]]
3726 v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); }
3727 // CHECK-LABEL: @vmskltz_d(
3728 // CHECK-NEXT: entry:
3729 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3730 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]])
3731 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3732 // CHECK-NEXT: ret i128 [[TMP2]]
3734 v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); }
3735 // CHECK-LABEL: @vsigncov_b(
3736 // CHECK-NEXT: entry:
3737 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3738 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3739 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3740 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3741 // CHECK-NEXT: ret i128 [[TMP3]]
3743 v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); }
3744 // CHECK-LABEL: @vsigncov_h(
3745 // CHECK-NEXT: entry:
3746 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3747 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3748 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3749 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3750 // CHECK-NEXT: ret i128 [[TMP3]]
3752 v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); }
3753 // CHECK-LABEL: @vsigncov_w(
3754 // CHECK-NEXT: entry:
3755 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3756 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3757 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3758 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3759 // CHECK-NEXT: ret i128 [[TMP3]]
3761 v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); }
3762 // CHECK-LABEL: @vsigncov_d(
3763 // CHECK-NEXT: entry:
3764 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3765 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3766 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3767 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3768 // CHECK-NEXT: ret i128 [[TMP3]]
3770 v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); }
3771 // CHECK-LABEL: @vfmadd_s(
3772 // CHECK-NEXT: entry:
3773 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3774 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3775 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3776 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3777 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3778 // CHECK-NEXT: ret i128 [[TMP4]]
3780 v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3781 return __lsx_vfmadd_s(_1, _2, _3);
3783 // CHECK-LABEL: @vfmadd_d(
3784 // CHECK-NEXT: entry:
3785 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3786 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3787 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3788 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3789 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3790 // CHECK-NEXT: ret i128 [[TMP4]]
3792 v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3793 return __lsx_vfmadd_d(_1, _2, _3);
3795 // CHECK-LABEL: @vfmsub_s(
3796 // CHECK-NEXT: entry:
3797 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3798 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3799 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3800 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3801 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3802 // CHECK-NEXT: ret i128 [[TMP4]]
3804 v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3805 return __lsx_vfmsub_s(_1, _2, _3);
3807 // CHECK-LABEL: @vfmsub_d(
3808 // CHECK-NEXT: entry:
3809 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3810 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3811 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3812 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3813 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3814 // CHECK-NEXT: ret i128 [[TMP4]]
3816 v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3817 return __lsx_vfmsub_d(_1, _2, _3);
3819 // CHECK-LABEL: @vfnmadd_s(
3820 // CHECK-NEXT: entry:
3821 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3822 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3823 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3824 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3825 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3826 // CHECK-NEXT: ret i128 [[TMP4]]
3828 v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3829 return __lsx_vfnmadd_s(_1, _2, _3);
3831 // CHECK-LABEL: @vfnmadd_d(
3832 // CHECK-NEXT: entry:
3833 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3834 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3835 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3836 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3837 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3838 // CHECK-NEXT: ret i128 [[TMP4]]
3840 v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3841 return __lsx_vfnmadd_d(_1, _2, _3);
3843 // CHECK-LABEL: @vfnmsub_s(
3844 // CHECK-NEXT: entry:
3845 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3846 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3847 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3848 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3849 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3850 // CHECK-NEXT: ret i128 [[TMP4]]
3852 v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3853 return __lsx_vfnmsub_s(_1, _2, _3);
3855 // CHECK-LABEL: @vfnmsub_d(
3856 // CHECK-NEXT: entry:
3857 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3858 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3859 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3860 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3861 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3862 // CHECK-NEXT: ret i128 [[TMP4]]
3864 v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3865 return __lsx_vfnmsub_d(_1, _2, _3);
3867 // CHECK-LABEL: @vftintrne_w_s(
3868 // CHECK-NEXT: entry:
3869 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3870 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]])
3871 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3872 // CHECK-NEXT: ret i128 [[TMP2]]
3874 v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); }
3875 // CHECK-LABEL: @vftintrne_l_d(
3876 // CHECK-NEXT: entry:
3877 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3878 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]])
3879 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3880 // CHECK-NEXT: ret i128 [[TMP2]]
3882 v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); }
3883 // CHECK-LABEL: @vftintrp_w_s(
3884 // CHECK-NEXT: entry:
3885 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3886 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]])
3887 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3888 // CHECK-NEXT: ret i128 [[TMP2]]
3890 v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); }
3891 // CHECK-LABEL: @vftintrp_l_d(
3892 // CHECK-NEXT: entry:
3893 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3894 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]])
3895 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3896 // CHECK-NEXT: ret i128 [[TMP2]]
3898 v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); }
3899 // CHECK-LABEL: @vftintrm_w_s(
3900 // CHECK-NEXT: entry:
3901 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3902 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]])
3903 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3904 // CHECK-NEXT: ret i128 [[TMP2]]
3906 v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); }
3907 // CHECK-LABEL: @vftintrm_l_d(
3908 // CHECK-NEXT: entry:
3909 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3910 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]])
3911 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3912 // CHECK-NEXT: ret i128 [[TMP2]]
3914 v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); }
3915 // CHECK-LABEL: @vftint_w_d(
3916 // CHECK-NEXT: entry:
3917 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3918 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3919 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3920 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3921 // CHECK-NEXT: ret i128 [[TMP3]]
3923 v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); }
3924 // CHECK-LABEL: @vffint_s_l(
3925 // CHECK-NEXT: entry:
3926 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3927 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3928 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3929 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3930 // CHECK-NEXT: ret i128 [[TMP3]]
3932 v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); }
3933 // CHECK-LABEL: @vftintrz_w_d(
3934 // CHECK-NEXT: entry:
3935 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3936 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3937 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3938 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3939 // CHECK-NEXT: ret i128 [[TMP3]]
3941 v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); }
3942 // CHECK-LABEL: @vftintrp_w_d(
3943 // CHECK-NEXT: entry:
3944 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3945 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3946 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3947 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3948 // CHECK-NEXT: ret i128 [[TMP3]]
3950 v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); }
3951 // CHECK-LABEL: @vftintrm_w_d(
3952 // CHECK-NEXT: entry:
3953 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3954 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3955 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3956 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3957 // CHECK-NEXT: ret i128 [[TMP3]]
3959 v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); }
3960 // CHECK-LABEL: @vftintrne_w_d(
3961 // CHECK-NEXT: entry:
3962 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3963 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3964 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3965 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3966 // CHECK-NEXT: ret i128 [[TMP3]]
3968 v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); }
3969 // CHECK-LABEL: @vftintl_l_s(
3970 // CHECK-NEXT: entry:
3971 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3972 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]])
3973 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3974 // CHECK-NEXT: ret i128 [[TMP2]]
3976 v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); }
3977 // CHECK-LABEL: @vftinth_l_s(
3978 // CHECK-NEXT: entry:
3979 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3980 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]])
3981 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3982 // CHECK-NEXT: ret i128 [[TMP2]]
3984 v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); }
3985 // CHECK-LABEL: @vffinth_d_w(
3986 // CHECK-NEXT: entry:
3987 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3988 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]])
3989 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3990 // CHECK-NEXT: ret i128 [[TMP2]]
3992 v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); }
3993 // CHECK-LABEL: @vffintl_d_w(
3994 // CHECK-NEXT: entry:
3995 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3996 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]])
3997 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3998 // CHECK-NEXT: ret i128 [[TMP2]]
4000 v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); }
4001 // CHECK-LABEL: @vftintrzl_l_s(
4002 // CHECK-NEXT: entry:
4003 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4004 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]])
4005 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4006 // CHECK-NEXT: ret i128 [[TMP2]]
4008 v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); }
4009 // CHECK-LABEL: @vftintrzh_l_s(
4010 // CHECK-NEXT: entry:
4011 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4012 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]])
4013 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4014 // CHECK-NEXT: ret i128 [[TMP2]]
4016 v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); }
4017 // CHECK-LABEL: @vftintrpl_l_s(
4018 // CHECK-NEXT: entry:
4019 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4020 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]])
4021 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4022 // CHECK-NEXT: ret i128 [[TMP2]]
4024 v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); }
4025 // CHECK-LABEL: @vftintrph_l_s(
4026 // CHECK-NEXT: entry:
4027 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4028 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]])
4029 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4030 // CHECK-NEXT: ret i128 [[TMP2]]
4032 v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); }
4033 // CHECK-LABEL: @vftintrml_l_s(
4034 // CHECK-NEXT: entry:
4035 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4036 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]])
4037 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4038 // CHECK-NEXT: ret i128 [[TMP2]]
4040 v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); }
4041 // CHECK-LABEL: @vftintrmh_l_s(
4042 // CHECK-NEXT: entry:
4043 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4044 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]])
4045 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4046 // CHECK-NEXT: ret i128 [[TMP2]]
4048 v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); }
4049 // CHECK-LABEL: @vftintrnel_l_s(
4050 // CHECK-NEXT: entry:
4051 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4052 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]])
4053 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4054 // CHECK-NEXT: ret i128 [[TMP2]]
4056 v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); }
4057 // CHECK-LABEL: @vftintrneh_l_s(
4058 // CHECK-NEXT: entry:
4059 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4060 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]])
4061 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4062 // CHECK-NEXT: ret i128 [[TMP2]]
4064 v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); }
4065 // CHECK-LABEL: @vfrintrne_s(
4066 // CHECK-NEXT: entry:
4067 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4068 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]])
4069 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4070 // CHECK-NEXT: ret i128 [[TMP2]]
4072 v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); }
4073 // CHECK-LABEL: @vfrintrne_d(
4074 // CHECK-NEXT: entry:
4075 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4076 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]])
4077 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4078 // CHECK-NEXT: ret i128 [[TMP2]]
4080 v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); }
4081 // CHECK-LABEL: @vfrintrz_s(
4082 // CHECK-NEXT: entry:
4083 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4084 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]])
4085 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4086 // CHECK-NEXT: ret i128 [[TMP2]]
4088 v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); }
4089 // CHECK-LABEL: @vfrintrz_d(
4090 // CHECK-NEXT: entry:
4091 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4092 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]])
4093 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4094 // CHECK-NEXT: ret i128 [[TMP2]]
4096 v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); }
4097 // CHECK-LABEL: @vfrintrp_s(
4098 // CHECK-NEXT: entry:
4099 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4100 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]])
4101 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4102 // CHECK-NEXT: ret i128 [[TMP2]]
4104 v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); }
4105 // CHECK-LABEL: @vfrintrp_d(
4106 // CHECK-NEXT: entry:
4107 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4108 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]])
4109 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4110 // CHECK-NEXT: ret i128 [[TMP2]]
4112 v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); }
4113 // CHECK-LABEL: @vfrintrm_s(
4114 // CHECK-NEXT: entry:
4115 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4116 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]])
4117 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4118 // CHECK-NEXT: ret i128 [[TMP2]]
4120 v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); }
4121 // CHECK-LABEL: @vfrintrm_d(
4122 // CHECK-NEXT: entry:
4123 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4124 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]])
4125 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4126 // CHECK-NEXT: ret i128 [[TMP2]]
4128 v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); }
4129 // CHECK-LABEL: @vstelm_b(
4130 // CHECK-NEXT: entry:
4131 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4132 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1)
4133 // CHECK-NEXT: ret void
4135 void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); }
4136 // CHECK-LABEL: @vstelm_h(
4137 // CHECK-NEXT: entry:
4138 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4139 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1)
4140 // CHECK-NEXT: ret void
4142 void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); }
4143 // CHECK-LABEL: @vstelm_w(
4144 // CHECK-NEXT: entry:
4145 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4146 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1)
4147 // CHECK-NEXT: ret void
4149 void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); }
4150 // CHECK-LABEL: @vstelm_d(
4151 // CHECK-NEXT: entry:
4152 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4153 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1)
4154 // CHECK-NEXT: ret void
4156 void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); }
4157 // CHECK-LABEL: @vaddwev_d_w(
4158 // CHECK-NEXT: entry:
4159 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4160 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4161 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4162 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4163 // CHECK-NEXT: ret i128 [[TMP3]]
4165 v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); }
4166 // CHECK-LABEL: @vaddwev_w_h(
4167 // CHECK-NEXT: entry:
4168 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4169 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4170 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4171 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4172 // CHECK-NEXT: ret i128 [[TMP3]]
4174 v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); }
4175 // CHECK-LABEL: @vaddwev_h_b(
4176 // CHECK-NEXT: entry:
4177 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4178 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4179 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4180 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4181 // CHECK-NEXT: ret i128 [[TMP3]]
4183 v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); }
4184 // CHECK-LABEL: @vaddwod_d_w(
4185 // CHECK-NEXT: entry:
4186 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4187 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4188 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4189 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4190 // CHECK-NEXT: ret i128 [[TMP3]]
4192 v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); }
4193 // CHECK-LABEL: @vaddwod_w_h(
4194 // CHECK-NEXT: entry:
4195 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4196 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4197 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4198 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4199 // CHECK-NEXT: ret i128 [[TMP3]]
4201 v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); }
4202 // CHECK-LABEL: @vaddwod_h_b(
4203 // CHECK-NEXT: entry:
4204 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4205 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4206 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4207 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4208 // CHECK-NEXT: ret i128 [[TMP3]]
4210 v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); }
4211 // CHECK-LABEL: @vaddwev_d_wu(
4212 // CHECK-NEXT: entry:
4213 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4214 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4215 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4216 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4217 // CHECK-NEXT: ret i128 [[TMP3]]
4219 v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); }
4220 // CHECK-LABEL: @vaddwev_w_hu(
4221 // CHECK-NEXT: entry:
4222 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4223 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4224 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4225 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4226 // CHECK-NEXT: ret i128 [[TMP3]]
4228 v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); }
4229 // CHECK-LABEL: @vaddwev_h_bu(
4230 // CHECK-NEXT: entry:
4231 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4232 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4233 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4234 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4235 // CHECK-NEXT: ret i128 [[TMP3]]
4237 v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); }
4238 // CHECK-LABEL: @vaddwod_d_wu(
4239 // CHECK-NEXT: entry:
4240 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4241 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4242 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4243 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4244 // CHECK-NEXT: ret i128 [[TMP3]]
4246 v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); }
4247 // CHECK-LABEL: @vaddwod_w_hu(
4248 // CHECK-NEXT: entry:
4249 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4250 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4251 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4252 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4253 // CHECK-NEXT: ret i128 [[TMP3]]
4255 v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); }
4256 // CHECK-LABEL: @vaddwod_h_bu(
4257 // CHECK-NEXT: entry:
4258 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4259 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4260 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4261 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4262 // CHECK-NEXT: ret i128 [[TMP3]]
4264 v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); }
4265 // CHECK-LABEL: @vaddwev_d_wu_w(
4266 // CHECK-NEXT: entry:
4267 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4268 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4269 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4270 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4271 // CHECK-NEXT: ret i128 [[TMP3]]
4273 v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) {
4274 return __lsx_vaddwev_d_wu_w(_1, _2);
4276 // CHECK-LABEL: @vaddwev_w_hu_h(
4277 // CHECK-NEXT: entry:
4278 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4279 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4280 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4281 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4282 // CHECK-NEXT: ret i128 [[TMP3]]
4284 v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) {
4285 return __lsx_vaddwev_w_hu_h(_1, _2);
4287 // CHECK-LABEL: @vaddwev_h_bu_b(
4288 // CHECK-NEXT: entry:
4289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4290 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4292 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4293 // CHECK-NEXT: ret i128 [[TMP3]]
4295 v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) {
4296 return __lsx_vaddwev_h_bu_b(_1, _2);
4298 // CHECK-LABEL: @vaddwod_d_wu_w(
4299 // CHECK-NEXT: entry:
4300 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4301 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4302 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4303 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4304 // CHECK-NEXT: ret i128 [[TMP3]]
4306 v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) {
4307 return __lsx_vaddwod_d_wu_w(_1, _2);
4309 // CHECK-LABEL: @vaddwod_w_hu_h(
4310 // CHECK-NEXT: entry:
4311 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4312 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4313 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4314 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4315 // CHECK-NEXT: ret i128 [[TMP3]]
4317 v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) {
4318 return __lsx_vaddwod_w_hu_h(_1, _2);
4320 // CHECK-LABEL: @vaddwod_h_bu_b(
4321 // CHECK-NEXT: entry:
4322 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4323 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4324 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4325 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4326 // CHECK-NEXT: ret i128 [[TMP3]]
4328 v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) {
4329 return __lsx_vaddwod_h_bu_b(_1, _2);
4331 // CHECK-LABEL: @vsubwev_d_w(
4332 // CHECK-NEXT: entry:
4333 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4334 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4335 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4336 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4337 // CHECK-NEXT: ret i128 [[TMP3]]
4339 v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); }
4340 // CHECK-LABEL: @vsubwev_w_h(
4341 // CHECK-NEXT: entry:
4342 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4343 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4344 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4345 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4346 // CHECK-NEXT: ret i128 [[TMP3]]
4348 v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); }
4349 // CHECK-LABEL: @vsubwev_h_b(
4350 // CHECK-NEXT: entry:
4351 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4352 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4353 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4354 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4355 // CHECK-NEXT: ret i128 [[TMP3]]
4357 v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); }
4358 // CHECK-LABEL: @vsubwod_d_w(
4359 // CHECK-NEXT: entry:
4360 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4361 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4362 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4363 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4364 // CHECK-NEXT: ret i128 [[TMP3]]
4366 v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); }
4367 // CHECK-LABEL: @vsubwod_w_h(
4368 // CHECK-NEXT: entry:
4369 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4370 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4371 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4372 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4373 // CHECK-NEXT: ret i128 [[TMP3]]
4375 v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); }
4376 // CHECK-LABEL: @vsubwod_h_b(
4377 // CHECK-NEXT: entry:
4378 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4379 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4380 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4381 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4382 // CHECK-NEXT: ret i128 [[TMP3]]
4384 v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); }
4385 // CHECK-LABEL: @vsubwev_d_wu(
4386 // CHECK-NEXT: entry:
4387 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4388 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4389 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4390 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4391 // CHECK-NEXT: ret i128 [[TMP3]]
4393 v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); }
4394 // CHECK-LABEL: @vsubwev_w_hu(
4395 // CHECK-NEXT: entry:
4396 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4397 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4398 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4399 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4400 // CHECK-NEXT: ret i128 [[TMP3]]
4402 v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); }
4403 // CHECK-LABEL: @vsubwev_h_bu(
4404 // CHECK-NEXT: entry:
4405 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4406 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4407 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4408 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4409 // CHECK-NEXT: ret i128 [[TMP3]]
4411 v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); }
4412 // CHECK-LABEL: @vsubwod_d_wu(
4413 // CHECK-NEXT: entry:
4414 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4415 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4416 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4417 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4418 // CHECK-NEXT: ret i128 [[TMP3]]
4420 v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); }
4421 // CHECK-LABEL: @vsubwod_w_hu(
4422 // CHECK-NEXT: entry:
4423 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4424 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4425 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4426 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4427 // CHECK-NEXT: ret i128 [[TMP3]]
4429 v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); }
4430 // CHECK-LABEL: @vsubwod_h_bu(
4431 // CHECK-NEXT: entry:
4432 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4433 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4434 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4435 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4436 // CHECK-NEXT: ret i128 [[TMP3]]
4438 v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); }
4439 // CHECK-LABEL: @vaddwev_q_d(
4440 // CHECK-NEXT: entry:
4441 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4442 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4443 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4444 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4445 // CHECK-NEXT: ret i128 [[TMP3]]
4447 v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); }
4448 // CHECK-LABEL: @vaddwod_q_d(
4449 // CHECK-NEXT: entry:
4450 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4451 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4452 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4453 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4454 // CHECK-NEXT: ret i128 [[TMP3]]
4456 v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); }
4457 // CHECK-LABEL: @vaddwev_q_du(
4458 // CHECK-NEXT: entry:
4459 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4460 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4461 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4462 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4463 // CHECK-NEXT: ret i128 [[TMP3]]
4465 v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); }
4466 // CHECK-LABEL: @vaddwod_q_du(
4467 // CHECK-NEXT: entry:
4468 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4469 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4470 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4471 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4472 // CHECK-NEXT: ret i128 [[TMP3]]
4474 v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); }
4475 // CHECK-LABEL: @vsubwev_q_d(
4476 // CHECK-NEXT: entry:
4477 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4478 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4479 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4480 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4481 // CHECK-NEXT: ret i128 [[TMP3]]
4483 v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); }
4484 // CHECK-LABEL: @vsubwod_q_d(
4485 // CHECK-NEXT: entry:
4486 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4487 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4488 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4489 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4490 // CHECK-NEXT: ret i128 [[TMP3]]
4492 v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); }
4493 // CHECK-LABEL: @vsubwev_q_du(
4494 // CHECK-NEXT: entry:
4495 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4496 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4497 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4498 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4499 // CHECK-NEXT: ret i128 [[TMP3]]
4501 v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); }
4502 // CHECK-LABEL: @vsubwod_q_du(
4503 // CHECK-NEXT: entry:
4504 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4505 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4506 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4507 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4508 // CHECK-NEXT: ret i128 [[TMP3]]
4510 v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); }
4511 // CHECK-LABEL: @vaddwev_q_du_d(
4512 // CHECK-NEXT: entry:
4513 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4514 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4515 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4516 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4517 // CHECK-NEXT: ret i128 [[TMP3]]
4519 v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) {
4520 return __lsx_vaddwev_q_du_d(_1, _2);
4522 // CHECK-LABEL: @vaddwod_q_du_d(
4523 // CHECK-NEXT: entry:
4524 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4525 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4526 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4527 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4528 // CHECK-NEXT: ret i128 [[TMP3]]
4530 v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) {
4531 return __lsx_vaddwod_q_du_d(_1, _2);
4533 // CHECK-LABEL: @vmulwev_d_w(
4534 // CHECK-NEXT: entry:
4535 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4536 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4537 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4538 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4539 // CHECK-NEXT: ret i128 [[TMP3]]
4541 v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); }
4542 // CHECK-LABEL: @vmulwev_w_h(
4543 // CHECK-NEXT: entry:
4544 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4545 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4546 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4547 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4548 // CHECK-NEXT: ret i128 [[TMP3]]
4550 v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); }
4551 // CHECK-LABEL: @vmulwev_h_b(
4552 // CHECK-NEXT: entry:
4553 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4554 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4555 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4556 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4557 // CHECK-NEXT: ret i128 [[TMP3]]
4559 v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); }
4560 // CHECK-LABEL: @vmulwod_d_w(
4561 // CHECK-NEXT: entry:
4562 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4563 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4564 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4565 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4566 // CHECK-NEXT: ret i128 [[TMP3]]
4568 v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); }
4569 // CHECK-LABEL: @vmulwod_w_h(
4570 // CHECK-NEXT: entry:
4571 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4572 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4573 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4574 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4575 // CHECK-NEXT: ret i128 [[TMP3]]
4577 v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); }
4578 // CHECK-LABEL: @vmulwod_h_b(
4579 // CHECK-NEXT: entry:
4580 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4581 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4582 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4583 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4584 // CHECK-NEXT: ret i128 [[TMP3]]
4586 v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); }
4587 // CHECK-LABEL: @vmulwev_d_wu(
4588 // CHECK-NEXT: entry:
4589 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4590 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4591 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4592 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4593 // CHECK-NEXT: ret i128 [[TMP3]]
4595 v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); }
4596 // CHECK-LABEL: @vmulwev_w_hu(
4597 // CHECK-NEXT: entry:
4598 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4599 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4600 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4601 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4602 // CHECK-NEXT: ret i128 [[TMP3]]
4604 v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); }
4605 // CHECK-LABEL: @vmulwev_h_bu(
4606 // CHECK-NEXT: entry:
4607 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4608 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4609 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4610 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4611 // CHECK-NEXT: ret i128 [[TMP3]]
4613 v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); }
4614 // CHECK-LABEL: @vmulwod_d_wu(
4615 // CHECK-NEXT: entry:
4616 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4617 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4618 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4619 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4620 // CHECK-NEXT: ret i128 [[TMP3]]
4622 v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); }
4623 // CHECK-LABEL: @vmulwod_w_hu(
4624 // CHECK-NEXT: entry:
4625 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4626 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4627 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4628 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4629 // CHECK-NEXT: ret i128 [[TMP3]]
4631 v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); }
4632 // CHECK-LABEL: @vmulwod_h_bu(
4633 // CHECK-NEXT: entry:
4634 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4635 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4636 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4637 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4638 // CHECK-NEXT: ret i128 [[TMP3]]
4640 v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); }
4641 // CHECK-LABEL: @vmulwev_d_wu_w(
4642 // CHECK-NEXT: entry:
4643 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4644 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4645 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4646 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4647 // CHECK-NEXT: ret i128 [[TMP3]]
4649 v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) {
4650 return __lsx_vmulwev_d_wu_w(_1, _2);
4652 // CHECK-LABEL: @vmulwev_w_hu_h(
4653 // CHECK-NEXT: entry:
4654 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4655 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4656 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4657 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4658 // CHECK-NEXT: ret i128 [[TMP3]]
4660 v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) {
4661 return __lsx_vmulwev_w_hu_h(_1, _2);
4663 // CHECK-LABEL: @vmulwev_h_bu_b(
4664 // CHECK-NEXT: entry:
4665 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4666 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4667 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4668 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4669 // CHECK-NEXT: ret i128 [[TMP3]]
4671 v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) {
4672 return __lsx_vmulwev_h_bu_b(_1, _2);
4674 // CHECK-LABEL: @vmulwod_d_wu_w(
4675 // CHECK-NEXT: entry:
4676 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4677 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4678 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4679 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4680 // CHECK-NEXT: ret i128 [[TMP3]]
4682 v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) {
4683 return __lsx_vmulwod_d_wu_w(_1, _2);
4685 // CHECK-LABEL: @vmulwod_w_hu_h(
4686 // CHECK-NEXT: entry:
4687 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4688 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4689 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4690 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4691 // CHECK-NEXT: ret i128 [[TMP3]]
4693 v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) {
4694 return __lsx_vmulwod_w_hu_h(_1, _2);
4696 // CHECK-LABEL: @vmulwod_h_bu_b(
4697 // CHECK-NEXT: entry:
4698 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4699 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4700 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4701 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4702 // CHECK-NEXT: ret i128 [[TMP3]]
4704 v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) {
4705 return __lsx_vmulwod_h_bu_b(_1, _2);
4707 // CHECK-LABEL: @vmulwev_q_d(
4708 // CHECK-NEXT: entry:
4709 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4710 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4711 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4712 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4713 // CHECK-NEXT: ret i128 [[TMP3]]
4715 v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); }
4716 // CHECK-LABEL: @vmulwod_q_d(
4717 // CHECK-NEXT: entry:
4718 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4719 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4720 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4721 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4722 // CHECK-NEXT: ret i128 [[TMP3]]
4724 v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); }
4725 // CHECK-LABEL: @vmulwev_q_du(
4726 // CHECK-NEXT: entry:
4727 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4728 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4729 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4730 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4731 // CHECK-NEXT: ret i128 [[TMP3]]
4733 v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); }
4734 // CHECK-LABEL: @vmulwod_q_du(
4735 // CHECK-NEXT: entry:
4736 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4737 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4738 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4739 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4740 // CHECK-NEXT: ret i128 [[TMP3]]
4742 v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); }
4743 // CHECK-LABEL: @vmulwev_q_du_d(
4744 // CHECK-NEXT: entry:
4745 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4746 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4747 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4748 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4749 // CHECK-NEXT: ret i128 [[TMP3]]
4751 v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) {
4752 return __lsx_vmulwev_q_du_d(_1, _2);
4754 // CHECK-LABEL: @vmulwod_q_du_d(
4755 // CHECK-NEXT: entry:
4756 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4757 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4758 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4759 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4760 // CHECK-NEXT: ret i128 [[TMP3]]
4762 v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) {
4763 return __lsx_vmulwod_q_du_d(_1, _2);
4765 // CHECK-LABEL: @vhaddw_q_d(
4766 // CHECK-NEXT: entry:
4767 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4768 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4769 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4770 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4771 // CHECK-NEXT: ret i128 [[TMP3]]
4773 v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); }
4774 // CHECK-LABEL: @vhaddw_qu_du(
4775 // CHECK-NEXT: entry:
4776 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4777 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4778 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4779 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4780 // CHECK-NEXT: ret i128 [[TMP3]]
4782 v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); }
4783 // CHECK-LABEL: @vhsubw_q_d(
4784 // CHECK-NEXT: entry:
4785 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4786 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4787 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4788 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4789 // CHECK-NEXT: ret i128 [[TMP3]]
4791 v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); }
4792 // CHECK-LABEL: @vhsubw_qu_du(
4793 // CHECK-NEXT: entry:
4794 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4795 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4796 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4797 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4798 // CHECK-NEXT: ret i128 [[TMP3]]
4800 v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); }
4801 // CHECK-LABEL: @vmaddwev_d_w(
4802 // CHECK-NEXT: entry:
4803 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4804 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4805 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4806 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4807 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4808 // CHECK-NEXT: ret i128 [[TMP4]]
4810 v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
4811 return __lsx_vmaddwev_d_w(_1, _2, _3);
4813 // CHECK-LABEL: @vmaddwev_w_h(
4814 // CHECK-NEXT: entry:
4815 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4816 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4817 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4818 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4819 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4820 // CHECK-NEXT: ret i128 [[TMP4]]
4822 v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
4823 return __lsx_vmaddwev_w_h(_1, _2, _3);
4825 // CHECK-LABEL: @vmaddwev_h_b(
4826 // CHECK-NEXT: entry:
4827 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4828 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4829 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4830 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4831 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4832 // CHECK-NEXT: ret i128 [[TMP4]]
4834 v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
4835 return __lsx_vmaddwev_h_b(_1, _2, _3);
4837 // CHECK-LABEL: @vmaddwev_d_wu(
4838 // CHECK-NEXT: entry:
4839 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4840 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4841 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4842 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4843 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4844 // CHECK-NEXT: ret i128 [[TMP4]]
4846 v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
4847 return __lsx_vmaddwev_d_wu(_1, _2, _3);
4849 // CHECK-LABEL: @vmaddwev_w_hu(
4850 // CHECK-NEXT: entry:
4851 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4852 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4853 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4854 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4855 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4856 // CHECK-NEXT: ret i128 [[TMP4]]
4858 v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
4859 return __lsx_vmaddwev_w_hu(_1, _2, _3);
4861 // CHECK-LABEL: @vmaddwev_h_bu(
4862 // CHECK-NEXT: entry:
4863 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4864 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4865 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4866 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4867 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4868 // CHECK-NEXT: ret i128 [[TMP4]]
4870 v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
4871 return __lsx_vmaddwev_h_bu(_1, _2, _3);
4873 // CHECK-LABEL: @vmaddwod_d_w(
4874 // CHECK-NEXT: entry:
4875 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4876 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4877 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4878 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4879 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4880 // CHECK-NEXT: ret i128 [[TMP4]]
4882 v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
4883 return __lsx_vmaddwod_d_w(_1, _2, _3);
4885 // CHECK-LABEL: @vmaddwod_w_h(
4886 // CHECK-NEXT: entry:
4887 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4888 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4889 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4890 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4891 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4892 // CHECK-NEXT: ret i128 [[TMP4]]
4894 v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
4895 return __lsx_vmaddwod_w_h(_1, _2, _3);
4897 // CHECK-LABEL: @vmaddwod_h_b(
4898 // CHECK-NEXT: entry:
4899 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4900 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4901 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4902 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4903 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4904 // CHECK-NEXT: ret i128 [[TMP4]]
4906 v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
4907 return __lsx_vmaddwod_h_b(_1, _2, _3);
4909 // CHECK-LABEL: @vmaddwod_d_wu(
4910 // CHECK-NEXT: entry:
4911 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4912 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4913 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4914 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4915 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4916 // CHECK-NEXT: ret i128 [[TMP4]]
4918 v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
4919 return __lsx_vmaddwod_d_wu(_1, _2, _3);
4921 // CHECK-LABEL: @vmaddwod_w_hu(
4922 // CHECK-NEXT: entry:
4923 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4924 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4925 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4926 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4927 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4928 // CHECK-NEXT: ret i128 [[TMP4]]
4930 v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
4931 return __lsx_vmaddwod_w_hu(_1, _2, _3);
4933 // CHECK-LABEL: @vmaddwod_h_bu(
4934 // CHECK-NEXT: entry:
4935 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4936 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4937 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4938 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4939 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4940 // CHECK-NEXT: ret i128 [[TMP4]]
4942 v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
4943 return __lsx_vmaddwod_h_bu(_1, _2, _3);
4945 // CHECK-LABEL: @vmaddwev_d_wu_w(
4946 // CHECK-NEXT: entry:
4947 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4948 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4949 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4950 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4951 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4952 // CHECK-NEXT: ret i128 [[TMP4]]
4954 v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
4955 return __lsx_vmaddwev_d_wu_w(_1, _2, _3);
4957 // CHECK-LABEL: @vmaddwev_w_hu_h(
4958 // CHECK-NEXT: entry:
4959 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4960 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4961 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4962 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4963 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4964 // CHECK-NEXT: ret i128 [[TMP4]]
4966 v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
4967 return __lsx_vmaddwev_w_hu_h(_1, _2, _3);
4969 // CHECK-LABEL: @vmaddwev_h_bu_b(
4970 // CHECK-NEXT: entry:
4971 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4972 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4973 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4974 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4975 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4976 // CHECK-NEXT: ret i128 [[TMP4]]
4978 v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
4979 return __lsx_vmaddwev_h_bu_b(_1, _2, _3);
4981 // CHECK-LABEL: @vmaddwod_d_wu_w(
4982 // CHECK-NEXT: entry:
4983 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4984 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4985 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4986 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4987 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4988 // CHECK-NEXT: ret i128 [[TMP4]]
4990 v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
4991 return __lsx_vmaddwod_d_wu_w(_1, _2, _3);
4993 // CHECK-LABEL: @vmaddwod_w_hu_h(
4994 // CHECK-NEXT: entry:
4995 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4996 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4997 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4998 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4999 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5000 // CHECK-NEXT: ret i128 [[TMP4]]
5002 v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
5003 return __lsx_vmaddwod_w_hu_h(_1, _2, _3);
5005 // CHECK-LABEL: @vmaddwod_h_bu_b(
5006 // CHECK-NEXT: entry:
5007 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5008 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5009 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5010 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5011 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5012 // CHECK-NEXT: ret i128 [[TMP4]]
5014 v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
5015 return __lsx_vmaddwod_h_bu_b(_1, _2, _3);
5017 // CHECK-LABEL: @vmaddwev_q_d(
5018 // CHECK-NEXT: entry:
5019 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5020 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5021 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5022 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5023 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5024 // CHECK-NEXT: ret i128 [[TMP4]]
5026 v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5027 return __lsx_vmaddwev_q_d(_1, _2, _3);
5029 // CHECK-LABEL: @vmaddwod_q_d(
5030 // CHECK-NEXT: entry:
5031 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5032 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5033 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5034 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5035 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5036 // CHECK-NEXT: ret i128 [[TMP4]]
5038 v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5039 return __lsx_vmaddwod_q_d(_1, _2, _3);
5041 // CHECK-LABEL: @vmaddwev_q_du(
5042 // CHECK-NEXT: entry:
5043 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5044 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5045 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5046 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5047 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5048 // CHECK-NEXT: ret i128 [[TMP4]]
5050 v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5051 return __lsx_vmaddwev_q_du(_1, _2, _3);
5053 // CHECK-LABEL: @vmaddwod_q_du(
5054 // CHECK-NEXT: entry:
5055 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5056 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5057 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5058 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5059 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5060 // CHECK-NEXT: ret i128 [[TMP4]]
5062 v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5063 return __lsx_vmaddwod_q_du(_1, _2, _3);
5065 // CHECK-LABEL: @vmaddwev_q_du_d(
5066 // CHECK-NEXT: entry:
5067 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5068 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5069 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5070 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5071 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5072 // CHECK-NEXT: ret i128 [[TMP4]]
5074 v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5075 return __lsx_vmaddwev_q_du_d(_1, _2, _3);
5077 // CHECK-LABEL: @vmaddwod_q_du_d(
5078 // CHECK-NEXT: entry:
5079 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5080 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5081 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5082 // CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5083 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5084 // CHECK-NEXT: ret i128 [[TMP4]]
5086 v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5087 return __lsx_vmaddwod_q_du_d(_1, _2, _3);
5089 // CHECK-LABEL: @vrotr_b(
5090 // CHECK-NEXT: entry:
5091 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5092 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5093 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5094 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5095 // CHECK-NEXT: ret i128 [[TMP3]]
5097 v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); }
5098 // CHECK-LABEL: @vrotr_h(
5099 // CHECK-NEXT: entry:
5100 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5101 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5102 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5103 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5104 // CHECK-NEXT: ret i128 [[TMP3]]
5106 v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); }
5107 // CHECK-LABEL: @vrotr_w(
5108 // CHECK-NEXT: entry:
5109 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5110 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5111 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5112 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5113 // CHECK-NEXT: ret i128 [[TMP3]]
5115 v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); }
5116 // CHECK-LABEL: @vrotr_d(
5117 // CHECK-NEXT: entry:
5118 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5119 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5120 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5121 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5122 // CHECK-NEXT: ret i128 [[TMP3]]
5124 v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); }
5125 // CHECK-LABEL: @vadd_q(
5126 // CHECK-NEXT: entry:
5127 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5128 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5129 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5130 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5131 // CHECK-NEXT: ret i128 [[TMP3]]
5133 v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); }
5134 // CHECK-LABEL: @vsub_q(
5135 // CHECK-NEXT: entry:
5136 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5137 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5138 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5139 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5140 // CHECK-NEXT: ret i128 [[TMP3]]
5142 v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); }
5143 // CHECK-LABEL: @vldrepl_b(
5144 // CHECK-NEXT: entry:
5145 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1)
5146 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5147 // CHECK-NEXT: ret i128 [[TMP1]]
5149 v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); }
5150 // CHECK-LABEL: @vldrepl_h(
5151 // CHECK-NEXT: entry:
5152 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2)
5153 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
5154 // CHECK-NEXT: ret i128 [[TMP1]]
5156 v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); }
5157 // CHECK-LABEL: @vldrepl_w(
5158 // CHECK-NEXT: entry:
5159 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4)
5160 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
5161 // CHECK-NEXT: ret i128 [[TMP1]]
5163 v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); }
5164 // CHECK-LABEL: @vldrepl_d(
5165 // CHECK-NEXT: entry:
5166 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8)
5167 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
5168 // CHECK-NEXT: ret i128 [[TMP1]]
5170 v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); }
5171 // CHECK-LABEL: @vmskgez_b(
5172 // CHECK-NEXT: entry:
5173 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5174 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]])
5175 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5176 // CHECK-NEXT: ret i128 [[TMP2]]
5178 v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); }
5179 // CHECK-LABEL: @vmsknz_b(
5180 // CHECK-NEXT: entry:
5181 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5182 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]])
5183 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5184 // CHECK-NEXT: ret i128 [[TMP2]]
5186 v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); }
5187 // CHECK-LABEL: @vexth_h_b(
5188 // CHECK-NEXT: entry:
5189 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5190 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]])
5191 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5192 // CHECK-NEXT: ret i128 [[TMP2]]
5194 v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); }
5195 // CHECK-LABEL: @vexth_w_h(
5196 // CHECK-NEXT: entry:
5197 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5198 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]])
5199 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5200 // CHECK-NEXT: ret i128 [[TMP2]]
5202 v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); }
5203 // CHECK-LABEL: @vexth_d_w(
5204 // CHECK-NEXT: entry:
5205 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5206 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]])
5207 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5208 // CHECK-NEXT: ret i128 [[TMP2]]
5210 v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); }
5211 // CHECK-LABEL: @vexth_q_d(
5212 // CHECK-NEXT: entry:
5213 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5214 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]])
5215 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5216 // CHECK-NEXT: ret i128 [[TMP2]]
5218 v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); }
5219 // CHECK-LABEL: @vexth_hu_bu(
5220 // CHECK-NEXT: entry:
5221 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5222 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]])
5223 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5224 // CHECK-NEXT: ret i128 [[TMP2]]
5226 v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); }
5227 // CHECK-LABEL: @vexth_wu_hu(
5228 // CHECK-NEXT: entry:
5229 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5230 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]])
5231 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5232 // CHECK-NEXT: ret i128 [[TMP2]]
5234 v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); }
5235 // CHECK-LABEL: @vexth_du_wu(
5236 // CHECK-NEXT: entry:
5237 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5238 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]])
5239 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5240 // CHECK-NEXT: ret i128 [[TMP2]]
5242 v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); }
5243 // CHECK-LABEL: @vexth_qu_du(
5244 // CHECK-NEXT: entry:
5245 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5246 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]])
5247 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5248 // CHECK-NEXT: ret i128 [[TMP2]]
5250 v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); }
5251 // CHECK-LABEL: @vrotri_b(
5252 // CHECK-NEXT: entry:
5253 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5254 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1)
5255 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5256 // CHECK-NEXT: ret i128 [[TMP2]]
5258 v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); }
5259 // CHECK-LABEL: @vrotri_h(
5260 // CHECK-NEXT: entry:
5261 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5262 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1)
5263 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5264 // CHECK-NEXT: ret i128 [[TMP2]]
5266 v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); }
5267 // CHECK-LABEL: @vrotri_w(
5268 // CHECK-NEXT: entry:
5269 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5270 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1)
5271 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5272 // CHECK-NEXT: ret i128 [[TMP2]]
5274 v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); }
5275 // CHECK-LABEL: @vrotri_d(
5276 // CHECK-NEXT: entry:
5277 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5278 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1)
5279 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5280 // CHECK-NEXT: ret i128 [[TMP2]]
5282 v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); }
5283 // CHECK-LABEL: @vextl_q_d(
5284 // CHECK-NEXT: entry:
5285 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5286 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]])
5287 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5288 // CHECK-NEXT: ret i128 [[TMP2]]
5290 v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); }
5291 // CHECK-LABEL: @vsrlni_b_h(
5292 // CHECK-NEXT: entry:
5293 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5294 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5295 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5296 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5297 // CHECK-NEXT: ret i128 [[TMP3]]
5299 v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); }
5300 // CHECK-LABEL: @vsrlni_h_w(
5301 // CHECK-NEXT: entry:
5302 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5303 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5304 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5305 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5306 // CHECK-NEXT: ret i128 [[TMP3]]
5308 v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); }
5309 // CHECK-LABEL: @vsrlni_w_d(
5310 // CHECK-NEXT: entry:
5311 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5312 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5313 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5314 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5315 // CHECK-NEXT: ret i128 [[TMP3]]
5317 v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); }
5318 // CHECK-LABEL: @vsrlni_d_q(
5319 // CHECK-NEXT: entry:
5320 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5321 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5322 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5323 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5324 // CHECK-NEXT: ret i128 [[TMP3]]
5326 v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); }
5327 // CHECK-LABEL: @vsrlrni_b_h(
5328 // CHECK-NEXT: entry:
5329 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5330 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5331 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5332 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5333 // CHECK-NEXT: ret i128 [[TMP3]]
5335 v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); }
5336 // CHECK-LABEL: @vsrlrni_h_w(
5337 // CHECK-NEXT: entry:
5338 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5339 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5340 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5341 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5342 // CHECK-NEXT: ret i128 [[TMP3]]
5344 v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); }
5345 // CHECK-LABEL: @vsrlrni_w_d(
5346 // CHECK-NEXT: entry:
5347 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5348 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5349 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5350 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5351 // CHECK-NEXT: ret i128 [[TMP3]]
5353 v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); }
5354 // CHECK-LABEL: @vsrlrni_d_q(
5355 // CHECK-NEXT: entry:
5356 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5357 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5358 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5359 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5360 // CHECK-NEXT: ret i128 [[TMP3]]
5362 v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); }
5363 // CHECK-LABEL: @vssrlni_b_h(
5364 // CHECK-NEXT: entry:
5365 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5366 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5367 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5368 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5369 // CHECK-NEXT: ret i128 [[TMP3]]
5371 v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); }
5372 // CHECK-LABEL: @vssrlni_h_w(
5373 // CHECK-NEXT: entry:
5374 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5375 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5376 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5377 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5378 // CHECK-NEXT: ret i128 [[TMP3]]
5380 v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); }
5381 // CHECK-LABEL: @vssrlni_w_d(
5382 // CHECK-NEXT: entry:
5383 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5384 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5385 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5386 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5387 // CHECK-NEXT: ret i128 [[TMP3]]
5389 v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); }
5390 // CHECK-LABEL: @vssrlni_d_q(
5391 // CHECK-NEXT: entry:
5392 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5393 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5394 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5395 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5396 // CHECK-NEXT: ret i128 [[TMP3]]
5398 v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); }
5399 // CHECK-LABEL: @vssrlni_bu_h(
5400 // CHECK-NEXT: entry:
5401 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5402 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5403 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5404 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5405 // CHECK-NEXT: ret i128 [[TMP3]]
5407 v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); }
5408 // CHECK-LABEL: @vssrlni_hu_w(
5409 // CHECK-NEXT: entry:
5410 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5411 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5412 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5413 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5414 // CHECK-NEXT: ret i128 [[TMP3]]
5416 v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); }
5417 // CHECK-LABEL: @vssrlni_wu_d(
5418 // CHECK-NEXT: entry:
5419 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5420 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5421 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5422 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5423 // CHECK-NEXT: ret i128 [[TMP3]]
5425 v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); }
5426 // CHECK-LABEL: @vssrlni_du_q(
5427 // CHECK-NEXT: entry:
5428 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5429 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5430 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5431 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5432 // CHECK-NEXT: ret i128 [[TMP3]]
5434 v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); }
5435 // CHECK-LABEL: @vssrlrni_b_h(
5436 // CHECK-NEXT: entry:
5437 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5438 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5439 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5440 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5441 // CHECK-NEXT: ret i128 [[TMP3]]
5443 v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); }
5444 // CHECK-LABEL: @vssrlrni_h_w(
5445 // CHECK-NEXT: entry:
5446 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5447 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5448 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5449 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5450 // CHECK-NEXT: ret i128 [[TMP3]]
5452 v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); }
5453 // CHECK-LABEL: @vssrlrni_w_d(
5454 // CHECK-NEXT: entry:
5455 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5456 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5457 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5458 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5459 // CHECK-NEXT: ret i128 [[TMP3]]
5461 v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); }
5462 // CHECK-LABEL: @vssrlrni_d_q(
5463 // CHECK-NEXT: entry:
5464 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5465 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5466 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5467 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5468 // CHECK-NEXT: ret i128 [[TMP3]]
5470 v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); }
5471 // CHECK-LABEL: @vssrlrni_bu_h(
5472 // CHECK-NEXT: entry:
5473 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5474 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5475 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5476 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5477 // CHECK-NEXT: ret i128 [[TMP3]]
5479 v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) {
5480 return __lsx_vssrlrni_bu_h(_1, _2, 1);
5482 // CHECK-LABEL: @vssrlrni_hu_w(
5483 // CHECK-NEXT: entry:
5484 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5485 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5486 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5487 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5488 // CHECK-NEXT: ret i128 [[TMP3]]
5490 v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) {
5491 return __lsx_vssrlrni_hu_w(_1, _2, 1);
5493 // CHECK-LABEL: @vssrlrni_wu_d(
5494 // CHECK-NEXT: entry:
5495 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5496 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5497 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5498 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5499 // CHECK-NEXT: ret i128 [[TMP3]]
5501 v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) {
5502 return __lsx_vssrlrni_wu_d(_1, _2, 1);
5504 // CHECK-LABEL: @vssrlrni_du_q(
5505 // CHECK-NEXT: entry:
5506 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5507 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5508 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5509 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5510 // CHECK-NEXT: ret i128 [[TMP3]]
5512 v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) {
5513 return __lsx_vssrlrni_du_q(_1, _2, 1);
5515 // CHECK-LABEL: @vsrani_b_h(
5516 // CHECK-NEXT: entry:
5517 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5518 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5519 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5520 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5521 // CHECK-NEXT: ret i128 [[TMP3]]
5523 v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); }
5524 // CHECK-LABEL: @vsrani_h_w(
5525 // CHECK-NEXT: entry:
5526 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5527 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5528 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5529 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5530 // CHECK-NEXT: ret i128 [[TMP3]]
5532 v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); }
5533 // CHECK-LABEL: @vsrani_w_d(
5534 // CHECK-NEXT: entry:
5535 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5536 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5537 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5538 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5539 // CHECK-NEXT: ret i128 [[TMP3]]
5541 v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); }
5542 // CHECK-LABEL: @vsrani_d_q(
5543 // CHECK-NEXT: entry:
5544 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5545 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5546 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5547 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5548 // CHECK-NEXT: ret i128 [[TMP3]]
5550 v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); }
5551 // CHECK-LABEL: @vsrarni_b_h(
5552 // CHECK-NEXT: entry:
5553 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5554 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5555 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5556 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5557 // CHECK-NEXT: ret i128 [[TMP3]]
5559 v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); }
5560 // CHECK-LABEL: @vsrarni_h_w(
5561 // CHECK-NEXT: entry:
5562 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5563 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5564 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5565 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5566 // CHECK-NEXT: ret i128 [[TMP3]]
5568 v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); }
5569 // CHECK-LABEL: @vsrarni_w_d(
5570 // CHECK-NEXT: entry:
5571 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5572 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5573 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5574 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5575 // CHECK-NEXT: ret i128 [[TMP3]]
5577 v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); }
5578 // CHECK-LABEL: @vsrarni_d_q(
5579 // CHECK-NEXT: entry:
5580 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5581 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5582 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5583 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5584 // CHECK-NEXT: ret i128 [[TMP3]]
5586 v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); }
5587 // CHECK-LABEL: @vssrani_b_h(
5588 // CHECK-NEXT: entry:
5589 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5590 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5591 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5592 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5593 // CHECK-NEXT: ret i128 [[TMP3]]
5595 v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); }
5596 // CHECK-LABEL: @vssrani_h_w(
5597 // CHECK-NEXT: entry:
5598 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5599 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5600 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5601 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5602 // CHECK-NEXT: ret i128 [[TMP3]]
5604 v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); }
5605 // CHECK-LABEL: @vssrani_w_d(
5606 // CHECK-NEXT: entry:
5607 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5608 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5609 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5610 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5611 // CHECK-NEXT: ret i128 [[TMP3]]
5613 v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); }
5614 // CHECK-LABEL: @vssrani_d_q(
5615 // CHECK-NEXT: entry:
5616 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5617 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5618 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5619 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5620 // CHECK-NEXT: ret i128 [[TMP3]]
5622 v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); }
5623 // CHECK-LABEL: @vssrani_bu_h(
5624 // CHECK-NEXT: entry:
5625 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5626 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5627 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5628 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5629 // CHECK-NEXT: ret i128 [[TMP3]]
5631 v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); }
5632 // CHECK-LABEL: @vssrani_hu_w(
5633 // CHECK-NEXT: entry:
5634 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5635 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5636 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5637 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5638 // CHECK-NEXT: ret i128 [[TMP3]]
5640 v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); }
5641 // CHECK-LABEL: @vssrani_wu_d(
5642 // CHECK-NEXT: entry:
5643 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5644 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5645 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5646 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5647 // CHECK-NEXT: ret i128 [[TMP3]]
5649 v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); }
5650 // CHECK-LABEL: @vssrani_du_q(
5651 // CHECK-NEXT: entry:
5652 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5653 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5654 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5655 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5656 // CHECK-NEXT: ret i128 [[TMP3]]
5658 v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); }
5659 // CHECK-LABEL: @vssrarni_b_h(
5660 // CHECK-NEXT: entry:
5661 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5662 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5663 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5664 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5665 // CHECK-NEXT: ret i128 [[TMP3]]
5667 v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); }
5668 // CHECK-LABEL: @vssrarni_h_w(
5669 // CHECK-NEXT: entry:
5670 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5671 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5672 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5673 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5674 // CHECK-NEXT: ret i128 [[TMP3]]
5676 v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); }
5677 // CHECK-LABEL: @vssrarni_w_d(
5678 // CHECK-NEXT: entry:
5679 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5680 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5681 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5682 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5683 // CHECK-NEXT: ret i128 [[TMP3]]
5685 v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); }
5686 // CHECK-LABEL: @vssrarni_d_q(
5687 // CHECK-NEXT: entry:
5688 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5689 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5691 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5692 // CHECK-NEXT: ret i128 [[TMP3]]
5694 v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); }
5695 // CHECK-LABEL: @vssrarni_bu_h(
5696 // CHECK-NEXT: entry:
5697 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5698 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5699 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5700 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5701 // CHECK-NEXT: ret i128 [[TMP3]]
5703 v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) {
5704 return __lsx_vssrarni_bu_h(_1, _2, 1);
5706 // CHECK-LABEL: @vssrarni_hu_w(
5707 // CHECK-NEXT: entry:
5708 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5709 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5710 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5711 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5712 // CHECK-NEXT: ret i128 [[TMP3]]
5714 v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) {
5715 return __lsx_vssrarni_hu_w(_1, _2, 1);
5717 // CHECK-LABEL: @vssrarni_wu_d(
5718 // CHECK-NEXT: entry:
5719 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5720 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5721 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5722 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5723 // CHECK-NEXT: ret i128 [[TMP3]]
5725 v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) {
5726 return __lsx_vssrarni_wu_d(_1, _2, 1);
5728 // CHECK-LABEL: @vssrarni_du_q(
5729 // CHECK-NEXT: entry:
5730 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5731 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5732 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5733 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5734 // CHECK-NEXT: ret i128 [[TMP3]]
5736 v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) {
5737 return __lsx_vssrarni_du_q(_1, _2, 1);
5739 // CHECK-LABEL: @vpermi_w(
5740 // CHECK-NEXT: entry:
5741 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5742 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5743 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5744 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5745 // CHECK-NEXT: ret i128 [[TMP3]]
5747 v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); }
5748 // CHECK-LABEL: @vld(
5749 // CHECK-NEXT: entry:
5750 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1)
5751 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5752 // CHECK-NEXT: ret i128 [[TMP1]]
5754 v16i8 vld(void *_1) { return __lsx_vld(_1, 1); }
5755 // CHECK-LABEL: @vst(
5756 // CHECK-NEXT: entry:
5757 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5758 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1)
5759 // CHECK-NEXT: ret void
5761 void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); }
5762 // CHECK-LABEL: @vssrlrn_b_h(
5763 // CHECK-NEXT: entry:
5764 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5765 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5766 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5767 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5768 // CHECK-NEXT: ret i128 [[TMP3]]
5770 v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); }
5771 // CHECK-LABEL: @vssrlrn_h_w(
5772 // CHECK-NEXT: entry:
5773 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5774 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5775 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5776 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5777 // CHECK-NEXT: ret i128 [[TMP3]]
5779 v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); }
5780 // CHECK-LABEL: @vssrlrn_w_d(
5781 // CHECK-NEXT: entry:
5782 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5783 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5784 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5785 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5786 // CHECK-NEXT: ret i128 [[TMP3]]
5788 v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); }
5789 // CHECK-LABEL: @vssrln_b_h(
5790 // CHECK-NEXT: entry:
5791 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5792 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5793 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5794 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5795 // CHECK-NEXT: ret i128 [[TMP3]]
5797 v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); }
5798 // CHECK-LABEL: @vssrln_h_w(
5799 // CHECK-NEXT: entry:
5800 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5801 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5802 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5803 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5804 // CHECK-NEXT: ret i128 [[TMP3]]
5806 v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); }
5807 // CHECK-LABEL: @vssrln_w_d(
5808 // CHECK-NEXT: entry:
5809 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5810 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5811 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5812 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5813 // CHECK-NEXT: ret i128 [[TMP3]]
5815 v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); }
5816 // CHECK-LABEL: @vorn_v(
5817 // CHECK-NEXT: entry:
5818 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5819 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5820 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5821 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5822 // CHECK-NEXT: ret i128 [[TMP3]]
5824 v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); }
5825 // CHECK-LABEL: @vldi(
5826 // CHECK-NEXT: entry:
5827 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1)
5828 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
5829 // CHECK-NEXT: ret i128 [[TMP1]]
5831 v2i64 vldi() { return __lsx_vldi(1); }
5832 // CHECK-LABEL: @vshuf_b(
5833 // CHECK-NEXT: entry:
5834 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5835 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5836 // CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5837 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5838 // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
5839 // CHECK-NEXT: ret i128 [[TMP4]]
5841 v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) {
5842 return __lsx_vshuf_b(_1, _2, _3);
5844 // CHECK-LABEL: @vldx(
5845 // CHECK-NEXT: entry:
5846 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1)
5847 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5848 // CHECK-NEXT: ret i128 [[TMP1]]
5850 v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); }
5851 // CHECK-LABEL: @vstx(
5852 // CHECK-NEXT: entry:
5853 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5854 // CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1)
5855 // CHECK-NEXT: ret void
5857 void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); }
5858 // CHECK-LABEL: @vextl_qu_du(
5859 // CHECK-NEXT: entry:
5860 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5861 // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]])
5862 // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5863 // CHECK-NEXT: ret i128 [[TMP2]]
5865 v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); }
5866 // CHECK-LABEL: @bnz_b(
5867 // CHECK-NEXT: entry:
5868 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5869 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]])
5870 // CHECK-NEXT: ret i32 [[TMP1]]
5872 int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); }
5873 // CHECK-LABEL: @bnz_d(
5874 // CHECK-NEXT: entry:
5875 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5876 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]])
5877 // CHECK-NEXT: ret i32 [[TMP1]]
5879 int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); }
5880 // CHECK-LABEL: @bnz_h(
5881 // CHECK-NEXT: entry:
5882 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5883 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]])
5884 // CHECK-NEXT: ret i32 [[TMP1]]
5886 int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); }
5887 // CHECK-LABEL: @bnz_v(
5888 // CHECK-NEXT: entry:
5889 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5890 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]])
5891 // CHECK-NEXT: ret i32 [[TMP1]]
5893 int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); }
5894 // CHECK-LABEL: @bnz_w(
5895 // CHECK-NEXT: entry:
5896 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5897 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]])
5898 // CHECK-NEXT: ret i32 [[TMP1]]
5900 int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); }
5901 // CHECK-LABEL: @bz_b(
5902 // CHECK-NEXT: entry:
5903 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5904 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]])
5905 // CHECK-NEXT: ret i32 [[TMP1]]
5907 int bz_b(v16u8 _1) { return __lsx_bz_b(_1); }
5908 // CHECK-LABEL: @bz_d(
5909 // CHECK-NEXT: entry:
5910 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5911 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]])
5912 // CHECK-NEXT: ret i32 [[TMP1]]
5914 int bz_d(v2u64 _1) { return __lsx_bz_d(_1); }
5915 // CHECK-LABEL: @bz_h(
5916 // CHECK-NEXT: entry:
5917 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5918 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]])
5919 // CHECK-NEXT: ret i32 [[TMP1]]
5921 int bz_h(v8u16 _1) { return __lsx_bz_h(_1); }
5922 // CHECK-LABEL: @bz_v(
5923 // CHECK-NEXT: entry:
5924 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5925 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]])
5926 // CHECK-NEXT: ret i32 [[TMP1]]
5928 int bz_v(v16u8 _1) { return __lsx_bz_v(_1); }
5929 // CHECK-LABEL: @bz_w(
5930 // CHECK-NEXT: entry:
5931 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5932 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]])
5933 // CHECK-NEXT: ret i32 [[TMP1]]
5935 int bz_w(v4u32 _1) { return __lsx_bz_w(_1); }
5936 // CHECK-LABEL: @vfcmp_caf_d(
5937 // CHECK-NEXT: entry:
5938 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5939 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5940 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5941 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5942 // CHECK-NEXT: ret i128 [[TMP3]]
5944 v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); }
5945 // CHECK-LABEL: @vfcmp_caf_s(
5946 // CHECK-NEXT: entry:
5947 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
5948 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
5949 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
5950 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5951 // CHECK-NEXT: ret i128 [[TMP3]]
5953 v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); }
5954 // CHECK-LABEL: @vfcmp_ceq_d(
5955 // CHECK-NEXT: entry:
5956 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5957 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5958 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5959 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5960 // CHECK-NEXT: ret i128 [[TMP3]]
5962 v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); }
5963 // CHECK-LABEL: @vfcmp_ceq_s(
5964 // CHECK-NEXT: entry:
5965 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
5966 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
5967 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
5968 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5969 // CHECK-NEXT: ret i128 [[TMP3]]
5971 v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); }
5972 // CHECK-LABEL: @vfcmp_cle_d(
5973 // CHECK-NEXT: entry:
5974 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5975 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5976 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5977 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5978 // CHECK-NEXT: ret i128 [[TMP3]]
5980 v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); }
5981 // CHECK-LABEL: @vfcmp_cle_s(
5982 // CHECK-NEXT: entry:
5983 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
5984 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
5985 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
5986 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5987 // CHECK-NEXT: ret i128 [[TMP3]]
5989 v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); }
5990 // CHECK-LABEL: @vfcmp_clt_d(
5991 // CHECK-NEXT: entry:
5992 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5993 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5994 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5995 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5996 // CHECK-NEXT: ret i128 [[TMP3]]
5998 v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); }
5999 // CHECK-LABEL: @vfcmp_clt_s(
6000 // CHECK-NEXT: entry:
6001 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6002 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6003 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6004 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6005 // CHECK-NEXT: ret i128 [[TMP3]]
6007 v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); }
6008 // CHECK-LABEL: @vfcmp_cne_d(
6009 // CHECK-NEXT: entry:
6010 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6011 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6012 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6013 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6014 // CHECK-NEXT: ret i128 [[TMP3]]
6016 v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); }
6017 // CHECK-LABEL: @vfcmp_cne_s(
6018 // CHECK-NEXT: entry:
6019 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6020 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6021 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6022 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6023 // CHECK-NEXT: ret i128 [[TMP3]]
6025 v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); }
6026 // CHECK-LABEL: @vfcmp_cor_d(
6027 // CHECK-NEXT: entry:
6028 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6029 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6030 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6031 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6032 // CHECK-NEXT: ret i128 [[TMP3]]
6034 v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); }
6035 // CHECK-LABEL: @vfcmp_cor_s(
6036 // CHECK-NEXT: entry:
6037 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6038 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6039 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6040 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6041 // CHECK-NEXT: ret i128 [[TMP3]]
6043 v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); }
6044 // CHECK-LABEL: @vfcmp_cueq_d(
6045 // CHECK-NEXT: entry:
6046 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6047 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6048 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6049 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6050 // CHECK-NEXT: ret i128 [[TMP3]]
6052 v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); }
6053 // CHECK-LABEL: @vfcmp_cueq_s(
6054 // CHECK-NEXT: entry:
6055 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6056 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6057 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6058 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6059 // CHECK-NEXT: ret i128 [[TMP3]]
6061 v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); }
6062 // CHECK-LABEL: @vfcmp_cule_d(
6063 // CHECK-NEXT: entry:
6064 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6065 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6066 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6067 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6068 // CHECK-NEXT: ret i128 [[TMP3]]
6070 v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); }
6071 // CHECK-LABEL: @vfcmp_cule_s(
6072 // CHECK-NEXT: entry:
6073 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6074 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6075 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6076 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6077 // CHECK-NEXT: ret i128 [[TMP3]]
6079 v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); }
6080 // CHECK-LABEL: @vfcmp_cult_d(
6081 // CHECK-NEXT: entry:
6082 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6083 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6084 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6085 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6086 // CHECK-NEXT: ret i128 [[TMP3]]
6088 v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); }
6089 // CHECK-LABEL: @vfcmp_cult_s(
6090 // CHECK-NEXT: entry:
6091 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6092 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6093 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6094 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6095 // CHECK-NEXT: ret i128 [[TMP3]]
6097 v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); }
6098 // CHECK-LABEL: @vfcmp_cun_d(
6099 // CHECK-NEXT: entry:
6100 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6101 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6102 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6103 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6104 // CHECK-NEXT: ret i128 [[TMP3]]
6106 v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); }
6107 // CHECK-LABEL: @vfcmp_cune_d(
6108 // CHECK-NEXT: entry:
6109 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6110 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6111 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6112 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6113 // CHECK-NEXT: ret i128 [[TMP3]]
6115 v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); }
6116 // CHECK-LABEL: @vfcmp_cune_s(
6117 // CHECK-NEXT: entry:
6118 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6119 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6120 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6121 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6122 // CHECK-NEXT: ret i128 [[TMP3]]
6124 v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); }
6125 // CHECK-LABEL: @vfcmp_cun_s(
6126 // CHECK-NEXT: entry:
6127 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6128 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6129 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6130 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6131 // CHECK-NEXT: ret i128 [[TMP3]]
6133 v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); }
6134 // CHECK-LABEL: @vfcmp_saf_d(
6135 // CHECK-NEXT: entry:
6136 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6137 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6138 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6139 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6140 // CHECK-NEXT: ret i128 [[TMP3]]
6142 v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); }
6143 // CHECK-LABEL: @vfcmp_saf_s(
6144 // CHECK-NEXT: entry:
6145 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6146 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6147 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6148 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6149 // CHECK-NEXT: ret i128 [[TMP3]]
6151 v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); }
6152 // CHECK-LABEL: @vfcmp_seq_d(
6153 // CHECK-NEXT: entry:
6154 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6155 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6156 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6157 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6158 // CHECK-NEXT: ret i128 [[TMP3]]
6160 v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); }
6161 // CHECK-LABEL: @vfcmp_seq_s(
6162 // CHECK-NEXT: entry:
6163 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6164 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6165 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6166 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6167 // CHECK-NEXT: ret i128 [[TMP3]]
6169 v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); }
6170 // CHECK-LABEL: @vfcmp_sle_d(
6171 // CHECK-NEXT: entry:
6172 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6173 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6174 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6175 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6176 // CHECK-NEXT: ret i128 [[TMP3]]
6178 v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); }
6179 // CHECK-LABEL: @vfcmp_sle_s(
6180 // CHECK-NEXT: entry:
6181 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6182 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6183 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6184 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6185 // CHECK-NEXT: ret i128 [[TMP3]]
6187 v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); }
6188 // CHECK-LABEL: @vfcmp_slt_d(
6189 // CHECK-NEXT: entry:
6190 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6191 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6192 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6193 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6194 // CHECK-NEXT: ret i128 [[TMP3]]
6196 v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); }
6197 // CHECK-LABEL: @vfcmp_slt_s(
6198 // CHECK-NEXT: entry:
6199 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6200 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6201 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6202 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6203 // CHECK-NEXT: ret i128 [[TMP3]]
6205 v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); }
6206 // CHECK-LABEL: @vfcmp_sne_d(
6207 // CHECK-NEXT: entry:
6208 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6209 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6210 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6211 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6212 // CHECK-NEXT: ret i128 [[TMP3]]
6214 v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); }
6215 // CHECK-LABEL: @vfcmp_sne_s(
6216 // CHECK-NEXT: entry:
6217 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6218 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6219 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6220 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6221 // CHECK-NEXT: ret i128 [[TMP3]]
6223 v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); }
6224 // CHECK-LABEL: @vfcmp_sor_d(
6225 // CHECK-NEXT: entry:
6226 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6227 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6228 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6229 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6230 // CHECK-NEXT: ret i128 [[TMP3]]
6232 v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); }
6233 // CHECK-LABEL: @vfcmp_sor_s(
6234 // CHECK-NEXT: entry:
6235 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6236 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6237 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6238 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6239 // CHECK-NEXT: ret i128 [[TMP3]]
6241 v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); }
6242 // CHECK-LABEL: @vfcmp_sueq_d(
6243 // CHECK-NEXT: entry:
6244 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6245 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6246 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6247 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6248 // CHECK-NEXT: ret i128 [[TMP3]]
6250 v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); }
6251 // CHECK-LABEL: @vfcmp_sueq_s(
6252 // CHECK-NEXT: entry:
6253 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6254 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6255 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6256 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6257 // CHECK-NEXT: ret i128 [[TMP3]]
6259 v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); }
6260 // CHECK-LABEL: @vfcmp_sule_d(
6261 // CHECK-NEXT: entry:
6262 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6263 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6264 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6265 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6266 // CHECK-NEXT: ret i128 [[TMP3]]
6268 v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); }
6269 // CHECK-LABEL: @vfcmp_sule_s(
6270 // CHECK-NEXT: entry:
6271 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6272 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6273 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6274 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6275 // CHECK-NEXT: ret i128 [[TMP3]]
6277 v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); }
6278 // CHECK-LABEL: @vfcmp_sult_d(
6279 // CHECK-NEXT: entry:
6280 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6281 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6282 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6283 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6284 // CHECK-NEXT: ret i128 [[TMP3]]
6286 v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); }
6287 // CHECK-LABEL: @vfcmp_sult_s(
6288 // CHECK-NEXT: entry:
6289 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6290 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6292 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6293 // CHECK-NEXT: ret i128 [[TMP3]]
6295 v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); }
6296 // CHECK-LABEL: @vfcmp_sun_d(
6297 // CHECK-NEXT: entry:
6298 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6299 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6300 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6301 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6302 // CHECK-NEXT: ret i128 [[TMP3]]
6304 v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); }
6305 // CHECK-LABEL: @vfcmp_sune_d(
6306 // CHECK-NEXT: entry:
6307 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6308 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6309 // CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6310 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6311 // CHECK-NEXT: ret i128 [[TMP3]]
6313 v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); }
6314 // CHECK-LABEL: @vfcmp_sune_s(
6315 // CHECK-NEXT: entry:
6316 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6317 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6318 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6319 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6320 // CHECK-NEXT: ret i128 [[TMP3]]
6322 v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); }
6323 // CHECK-LABEL: @vfcmp_sun_s(
6324 // CHECK-NEXT: entry:
6325 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6326 // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6327 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6328 // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6329 // CHECK-NEXT: ret i128 [[TMP3]]
6331 v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); }
6332 // CHECK-LABEL: @vrepli_b(
6333 // CHECK-NEXT: entry:
6334 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1)
6335 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
6336 // CHECK-NEXT: ret i128 [[TMP1]]
6338 v16i8 vrepli_b() { return __lsx_vrepli_b(1); }
6339 // CHECK-LABEL: @vrepli_d(
6340 // CHECK-NEXT: entry:
6341 // CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1)
6342 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
6343 // CHECK-NEXT: ret i128 [[TMP1]]
6345 v2i64 vrepli_d() { return __lsx_vrepli_d(1); }
6346 // CHECK-LABEL: @vrepli_h(
6347 // CHECK-NEXT: entry:
6348 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1)
6349 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
6350 // CHECK-NEXT: ret i128 [[TMP1]]
6352 v8i16 vrepli_h() { return __lsx_vrepli_h(1); }
6353 // CHECK-LABEL: @vrepli_w(
6354 // CHECK-NEXT: entry:
6355 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1)
6356 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
6357 // CHECK-NEXT: ret i128 [[TMP1]]
6359 v4i32 vrepli_w() { return __lsx_vrepli_w(1); }