[LLVM][NVPTX] Add support for griddepcontrol instruction (#123511)
[llvm-project.git] / clang / test / CodeGen / LoongArch / lasx / builtin-alias.c
blob9a8ce224bcfd0910914c54519f8c263d4f6fa379
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s
4 #include <lasxintrin.h>
6 // CHECK-LABEL: @xvsll_b(
7 // CHECK-NEXT: entry:
8 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
9 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
10 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
11 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
12 // CHECK-NEXT: ret void
14 v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); }
15 // CHECK-LABEL: @xvsll_h(
16 // CHECK-NEXT: entry:
17 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
18 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
19 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
20 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
21 // CHECK-NEXT: ret void
23 v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); }
24 // CHECK-LABEL: @xvsll_w(
25 // CHECK-NEXT: entry:
26 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
27 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
28 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
29 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
30 // CHECK-NEXT: ret void
32 v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); }
33 // CHECK-LABEL: @xvsll_d(
34 // CHECK-NEXT: entry:
35 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
36 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
37 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
38 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
39 // CHECK-NEXT: ret void
41 v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); }
42 // CHECK-LABEL: @xvslli_b(
43 // CHECK-NEXT: entry:
44 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
45 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1)
46 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
47 // CHECK-NEXT: ret void
49 v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); }
50 // CHECK-LABEL: @xvslli_h(
51 // CHECK-NEXT: entry:
52 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
53 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1)
54 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
55 // CHECK-NEXT: ret void
57 v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); }
58 // CHECK-LABEL: @xvslli_w(
59 // CHECK-NEXT: entry:
60 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
61 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1)
62 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
63 // CHECK-NEXT: ret void
65 v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); }
66 // CHECK-LABEL: @xvslli_d(
67 // CHECK-NEXT: entry:
68 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
69 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1)
70 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
71 // CHECK-NEXT: ret void
73 v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); }
74 // CHECK-LABEL: @xvsra_b(
75 // CHECK-NEXT: entry:
76 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
77 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
78 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
79 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
80 // CHECK-NEXT: ret void
82 v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); }
83 // CHECK-LABEL: @xvsra_h(
84 // CHECK-NEXT: entry:
85 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
86 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
87 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
88 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
89 // CHECK-NEXT: ret void
91 v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); }
92 // CHECK-LABEL: @xvsra_w(
93 // CHECK-NEXT: entry:
94 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
95 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
96 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
97 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
98 // CHECK-NEXT: ret void
100 v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); }
101 // CHECK-LABEL: @xvsra_d(
102 // CHECK-NEXT: entry:
103 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
104 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
105 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
106 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
107 // CHECK-NEXT: ret void
109 v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); }
110 // CHECK-LABEL: @xvsrai_b(
111 // CHECK-NEXT: entry:
112 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
113 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1)
114 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
115 // CHECK-NEXT: ret void
117 v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); }
118 // CHECK-LABEL: @xvsrai_h(
119 // CHECK-NEXT: entry:
120 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
121 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1)
122 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
123 // CHECK-NEXT: ret void
125 v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); }
126 // CHECK-LABEL: @xvsrai_w(
127 // CHECK-NEXT: entry:
128 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
129 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1)
130 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
131 // CHECK-NEXT: ret void
133 v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); }
134 // CHECK-LABEL: @xvsrai_d(
135 // CHECK-NEXT: entry:
136 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
137 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1)
138 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
139 // CHECK-NEXT: ret void
141 v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); }
142 // CHECK-LABEL: @xvsrar_b(
143 // CHECK-NEXT: entry:
144 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
145 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
146 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
147 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
148 // CHECK-NEXT: ret void
150 v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); }
151 // CHECK-LABEL: @xvsrar_h(
152 // CHECK-NEXT: entry:
153 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
154 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
155 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
156 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
157 // CHECK-NEXT: ret void
159 v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); }
160 // CHECK-LABEL: @xvsrar_w(
161 // CHECK-NEXT: entry:
162 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
163 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
164 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
165 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
166 // CHECK-NEXT: ret void
168 v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); }
169 // CHECK-LABEL: @xvsrar_d(
170 // CHECK-NEXT: entry:
171 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
172 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
173 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
174 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
175 // CHECK-NEXT: ret void
177 v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); }
178 // CHECK-LABEL: @xvsrari_b(
179 // CHECK-NEXT: entry:
180 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
181 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1)
182 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
183 // CHECK-NEXT: ret void
185 v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); }
186 // CHECK-LABEL: @xvsrari_h(
187 // CHECK-NEXT: entry:
188 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
189 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1)
190 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
191 // CHECK-NEXT: ret void
193 v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); }
194 // CHECK-LABEL: @xvsrari_w(
195 // CHECK-NEXT: entry:
196 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
197 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1)
198 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
199 // CHECK-NEXT: ret void
201 v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); }
202 // CHECK-LABEL: @xvsrari_d(
203 // CHECK-NEXT: entry:
204 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
205 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1)
206 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
207 // CHECK-NEXT: ret void
209 v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); }
210 // CHECK-LABEL: @xvsrl_b(
211 // CHECK-NEXT: entry:
212 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
213 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
214 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
215 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
216 // CHECK-NEXT: ret void
218 v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); }
219 // CHECK-LABEL: @xvsrl_h(
220 // CHECK-NEXT: entry:
221 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
222 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
223 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
224 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
225 // CHECK-NEXT: ret void
227 v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); }
228 // CHECK-LABEL: @xvsrl_w(
229 // CHECK-NEXT: entry:
230 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
231 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
232 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
233 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
234 // CHECK-NEXT: ret void
236 v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); }
237 // CHECK-LABEL: @xvsrl_d(
238 // CHECK-NEXT: entry:
239 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
240 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
241 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
242 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
243 // CHECK-NEXT: ret void
245 v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); }
246 // CHECK-LABEL: @xvsrli_b(
247 // CHECK-NEXT: entry:
248 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
249 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1)
250 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
251 // CHECK-NEXT: ret void
253 v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); }
254 // CHECK-LABEL: @xvsrli_h(
255 // CHECK-NEXT: entry:
256 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
257 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1)
258 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
259 // CHECK-NEXT: ret void
261 v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); }
262 // CHECK-LABEL: @xvsrli_w(
263 // CHECK-NEXT: entry:
264 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
265 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1)
266 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
267 // CHECK-NEXT: ret void
269 v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); }
270 // CHECK-LABEL: @xvsrli_d(
271 // CHECK-NEXT: entry:
272 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
273 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1)
274 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
275 // CHECK-NEXT: ret void
277 v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); }
278 // CHECK-LABEL: @xvsrlr_b(
279 // CHECK-NEXT: entry:
280 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
281 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
282 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
283 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
284 // CHECK-NEXT: ret void
286 v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); }
287 // CHECK-LABEL: @xvsrlr_h(
288 // CHECK-NEXT: entry:
289 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
290 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
291 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
292 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
293 // CHECK-NEXT: ret void
295 v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); }
296 // CHECK-LABEL: @xvsrlr_w(
297 // CHECK-NEXT: entry:
298 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
299 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
300 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
301 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
302 // CHECK-NEXT: ret void
304 v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); }
305 // CHECK-LABEL: @xvsrlr_d(
306 // CHECK-NEXT: entry:
307 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
308 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
309 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
310 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
311 // CHECK-NEXT: ret void
313 v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); }
314 // CHECK-LABEL: @xvsrlri_b(
315 // CHECK-NEXT: entry:
316 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
317 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1)
318 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
319 // CHECK-NEXT: ret void
321 v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); }
322 // CHECK-LABEL: @xvsrlri_h(
323 // CHECK-NEXT: entry:
324 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
325 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1)
326 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
327 // CHECK-NEXT: ret void
329 v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); }
330 // CHECK-LABEL: @xvsrlri_w(
331 // CHECK-NEXT: entry:
332 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
333 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1)
334 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
335 // CHECK-NEXT: ret void
337 v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); }
338 // CHECK-LABEL: @xvsrlri_d(
339 // CHECK-NEXT: entry:
340 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
341 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1)
342 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
343 // CHECK-NEXT: ret void
345 v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); }
346 // CHECK-LABEL: @xvbitclr_b(
347 // CHECK-NEXT: entry:
348 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
349 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
350 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
351 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
352 // CHECK-NEXT: ret void
354 v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); }
355 // CHECK-LABEL: @xvbitclr_h(
356 // CHECK-NEXT: entry:
357 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
358 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
359 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
360 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
361 // CHECK-NEXT: ret void
363 v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); }
364 // CHECK-LABEL: @xvbitclr_w(
365 // CHECK-NEXT: entry:
366 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
367 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
368 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
369 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
370 // CHECK-NEXT: ret void
372 v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); }
373 // CHECK-LABEL: @xvbitclr_d(
374 // CHECK-NEXT: entry:
375 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
376 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
377 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
378 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
379 // CHECK-NEXT: ret void
381 v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); }
382 // CHECK-LABEL: @xvbitclri_b(
383 // CHECK-NEXT: entry:
384 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
385 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1)
386 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
387 // CHECK-NEXT: ret void
389 v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); }
390 // CHECK-LABEL: @xvbitclri_h(
391 // CHECK-NEXT: entry:
392 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
393 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1)
394 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
395 // CHECK-NEXT: ret void
397 v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); }
398 // CHECK-LABEL: @xvbitclri_w(
399 // CHECK-NEXT: entry:
400 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
401 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1)
402 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
403 // CHECK-NEXT: ret void
405 v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); }
406 // CHECK-LABEL: @xvbitclri_d(
407 // CHECK-NEXT: entry:
408 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
409 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1)
410 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
411 // CHECK-NEXT: ret void
413 v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); }
414 // CHECK-LABEL: @xvbitset_b(
415 // CHECK-NEXT: entry:
416 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
417 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
418 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
419 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
420 // CHECK-NEXT: ret void
422 v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); }
423 // CHECK-LABEL: @xvbitset_h(
424 // CHECK-NEXT: entry:
425 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
426 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
427 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
428 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
429 // CHECK-NEXT: ret void
431 v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); }
432 // CHECK-LABEL: @xvbitset_w(
433 // CHECK-NEXT: entry:
434 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
435 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
436 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
437 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
438 // CHECK-NEXT: ret void
440 v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); }
441 // CHECK-LABEL: @xvbitset_d(
442 // CHECK-NEXT: entry:
443 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
444 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
445 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
446 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
447 // CHECK-NEXT: ret void
449 v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); }
450 // CHECK-LABEL: @xvbitseti_b(
451 // CHECK-NEXT: entry:
452 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
453 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1)
454 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
455 // CHECK-NEXT: ret void
457 v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); }
458 // CHECK-LABEL: @xvbitseti_h(
459 // CHECK-NEXT: entry:
460 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
461 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1)
462 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
463 // CHECK-NEXT: ret void
465 v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); }
466 // CHECK-LABEL: @xvbitseti_w(
467 // CHECK-NEXT: entry:
468 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
469 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1)
470 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
471 // CHECK-NEXT: ret void
473 v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); }
474 // CHECK-LABEL: @xvbitseti_d(
475 // CHECK-NEXT: entry:
476 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
477 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1)
478 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
479 // CHECK-NEXT: ret void
481 v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); }
482 // CHECK-LABEL: @xvbitrev_b(
483 // CHECK-NEXT: entry:
484 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
485 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
486 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
487 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
488 // CHECK-NEXT: ret void
490 v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); }
491 // CHECK-LABEL: @xvbitrev_h(
492 // CHECK-NEXT: entry:
493 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
494 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
495 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
496 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
497 // CHECK-NEXT: ret void
499 v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); }
500 // CHECK-LABEL: @xvbitrev_w(
501 // CHECK-NEXT: entry:
502 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
503 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
504 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
505 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
506 // CHECK-NEXT: ret void
508 v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); }
509 // CHECK-LABEL: @xvbitrev_d(
510 // CHECK-NEXT: entry:
511 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
512 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
513 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
514 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
515 // CHECK-NEXT: ret void
517 v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); }
518 // CHECK-LABEL: @xvbitrevi_b(
519 // CHECK-NEXT: entry:
520 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
521 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1)
522 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
523 // CHECK-NEXT: ret void
525 v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); }
526 // CHECK-LABEL: @xvbitrevi_h(
527 // CHECK-NEXT: entry:
528 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
529 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1)
530 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
531 // CHECK-NEXT: ret void
533 v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); }
534 // CHECK-LABEL: @xvbitrevi_w(
535 // CHECK-NEXT: entry:
536 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
537 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1)
538 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
539 // CHECK-NEXT: ret void
541 v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); }
542 // CHECK-LABEL: @xvbitrevi_d(
543 // CHECK-NEXT: entry:
544 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
545 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1)
546 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
547 // CHECK-NEXT: ret void
549 v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); }
550 // CHECK-LABEL: @xvadd_b(
551 // CHECK-NEXT: entry:
552 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
553 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
554 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
555 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
556 // CHECK-NEXT: ret void
558 v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); }
559 // CHECK-LABEL: @xvadd_h(
560 // CHECK-NEXT: entry:
561 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
562 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
563 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
564 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
565 // CHECK-NEXT: ret void
567 v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); }
568 // CHECK-LABEL: @xvadd_w(
569 // CHECK-NEXT: entry:
570 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
571 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
572 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
573 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
574 // CHECK-NEXT: ret void
576 v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); }
577 // CHECK-LABEL: @xvadd_d(
578 // CHECK-NEXT: entry:
579 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
580 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
581 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
582 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
583 // CHECK-NEXT: ret void
585 v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); }
586 // CHECK-LABEL: @xvaddi_bu(
587 // CHECK-NEXT: entry:
588 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
589 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1)
590 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
591 // CHECK-NEXT: ret void
593 v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); }
594 // CHECK-LABEL: @xvaddi_hu(
595 // CHECK-NEXT: entry:
596 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
597 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1)
598 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
599 // CHECK-NEXT: ret void
601 v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); }
602 // CHECK-LABEL: @xvaddi_wu(
603 // CHECK-NEXT: entry:
604 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
605 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1)
606 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
607 // CHECK-NEXT: ret void
609 v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); }
610 // CHECK-LABEL: @xvaddi_du(
611 // CHECK-NEXT: entry:
612 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
613 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1)
614 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
615 // CHECK-NEXT: ret void
617 v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); }
618 // CHECK-LABEL: @xvsub_b(
619 // CHECK-NEXT: entry:
620 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
621 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
622 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
623 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
624 // CHECK-NEXT: ret void
626 v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); }
627 // CHECK-LABEL: @xvsub_h(
628 // CHECK-NEXT: entry:
629 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
630 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
631 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
632 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
633 // CHECK-NEXT: ret void
635 v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); }
636 // CHECK-LABEL: @xvsub_w(
637 // CHECK-NEXT: entry:
638 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
639 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
640 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
641 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
642 // CHECK-NEXT: ret void
644 v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); }
645 // CHECK-LABEL: @xvsub_d(
646 // CHECK-NEXT: entry:
647 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
648 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
649 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
650 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
651 // CHECK-NEXT: ret void
653 v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); }
654 // CHECK-LABEL: @xvsubi_bu(
655 // CHECK-NEXT: entry:
656 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
657 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1)
658 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
659 // CHECK-NEXT: ret void
661 v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); }
662 // CHECK-LABEL: @xvsubi_hu(
663 // CHECK-NEXT: entry:
664 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
665 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1)
666 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
667 // CHECK-NEXT: ret void
669 v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); }
670 // CHECK-LABEL: @xvsubi_wu(
671 // CHECK-NEXT: entry:
672 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
673 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1)
674 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
675 // CHECK-NEXT: ret void
677 v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); }
678 // CHECK-LABEL: @xvsubi_du(
679 // CHECK-NEXT: entry:
680 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
681 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1)
682 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
683 // CHECK-NEXT: ret void
685 v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); }
686 // CHECK-LABEL: @xvmax_b(
687 // CHECK-NEXT: entry:
688 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
689 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
691 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
692 // CHECK-NEXT: ret void
694 v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); }
695 // CHECK-LABEL: @xvmax_h(
696 // CHECK-NEXT: entry:
697 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
698 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
699 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
700 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
701 // CHECK-NEXT: ret void
703 v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); }
704 // CHECK-LABEL: @xvmax_w(
705 // CHECK-NEXT: entry:
706 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
707 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
708 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
709 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
710 // CHECK-NEXT: ret void
712 v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); }
713 // CHECK-LABEL: @xvmax_d(
714 // CHECK-NEXT: entry:
715 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
716 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
717 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
718 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
719 // CHECK-NEXT: ret void
721 v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); }
722 // CHECK-LABEL: @xvmaxi_b(
723 // CHECK-NEXT: entry:
724 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
725 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1)
726 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
727 // CHECK-NEXT: ret void
729 v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); }
730 // CHECK-LABEL: @xvmaxi_h(
731 // CHECK-NEXT: entry:
732 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
733 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1)
734 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
735 // CHECK-NEXT: ret void
737 v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); }
738 // CHECK-LABEL: @xvmaxi_w(
739 // CHECK-NEXT: entry:
740 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
741 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1)
742 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
743 // CHECK-NEXT: ret void
745 v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); }
746 // CHECK-LABEL: @xvmaxi_d(
747 // CHECK-NEXT: entry:
748 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
749 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1)
750 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
751 // CHECK-NEXT: ret void
753 v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); }
754 // CHECK-LABEL: @xvmax_bu(
755 // CHECK-NEXT: entry:
756 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
757 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
758 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
759 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
760 // CHECK-NEXT: ret void
762 v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); }
763 // CHECK-LABEL: @xvmax_hu(
764 // CHECK-NEXT: entry:
765 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
766 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
767 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
768 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
769 // CHECK-NEXT: ret void
771 v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); }
772 // CHECK-LABEL: @xvmax_wu(
773 // CHECK-NEXT: entry:
774 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
775 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
776 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
777 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
778 // CHECK-NEXT: ret void
780 v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); }
781 // CHECK-LABEL: @xvmax_du(
782 // CHECK-NEXT: entry:
783 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
784 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
785 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
786 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
787 // CHECK-NEXT: ret void
789 v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); }
790 // CHECK-LABEL: @xvmaxi_bu(
791 // CHECK-NEXT: entry:
792 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
793 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1)
794 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
795 // CHECK-NEXT: ret void
797 v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); }
798 // CHECK-LABEL: @xvmaxi_hu(
799 // CHECK-NEXT: entry:
800 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
801 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1)
802 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
803 // CHECK-NEXT: ret void
805 v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); }
806 // CHECK-LABEL: @xvmaxi_wu(
807 // CHECK-NEXT: entry:
808 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
809 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1)
810 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
811 // CHECK-NEXT: ret void
813 v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); }
814 // CHECK-LABEL: @xvmaxi_du(
815 // CHECK-NEXT: entry:
816 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
817 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1)
818 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
819 // CHECK-NEXT: ret void
821 v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); }
822 // CHECK-LABEL: @xvmin_b(
823 // CHECK-NEXT: entry:
824 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
825 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
826 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
827 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
828 // CHECK-NEXT: ret void
830 v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); }
831 // CHECK-LABEL: @xvmin_h(
832 // CHECK-NEXT: entry:
833 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
834 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
835 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
836 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
837 // CHECK-NEXT: ret void
839 v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); }
840 // CHECK-LABEL: @xvmin_w(
841 // CHECK-NEXT: entry:
842 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
843 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
844 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
845 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
846 // CHECK-NEXT: ret void
848 v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); }
849 // CHECK-LABEL: @xvmin_d(
850 // CHECK-NEXT: entry:
851 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
852 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
853 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
854 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
855 // CHECK-NEXT: ret void
857 v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); }
858 // CHECK-LABEL: @xvmini_b(
859 // CHECK-NEXT: entry:
860 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
861 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1)
862 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
863 // CHECK-NEXT: ret void
865 v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); }
866 // CHECK-LABEL: @xvmini_h(
867 // CHECK-NEXT: entry:
868 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
869 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1)
870 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
871 // CHECK-NEXT: ret void
873 v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); }
874 // CHECK-LABEL: @xvmini_w(
875 // CHECK-NEXT: entry:
876 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
877 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1)
878 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
879 // CHECK-NEXT: ret void
881 v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); }
882 // CHECK-LABEL: @xvmini_d(
883 // CHECK-NEXT: entry:
884 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
885 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1)
886 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
887 // CHECK-NEXT: ret void
889 v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); }
890 // CHECK-LABEL: @xvmin_bu(
891 // CHECK-NEXT: entry:
892 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
893 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
894 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
895 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
896 // CHECK-NEXT: ret void
898 v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); }
899 // CHECK-LABEL: @xvmin_hu(
900 // CHECK-NEXT: entry:
901 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
902 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
903 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
904 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
905 // CHECK-NEXT: ret void
907 v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); }
908 // CHECK-LABEL: @xvmin_wu(
909 // CHECK-NEXT: entry:
910 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
911 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
912 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
913 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
914 // CHECK-NEXT: ret void
916 v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); }
917 // CHECK-LABEL: @xvmin_du(
918 // CHECK-NEXT: entry:
919 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
920 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
921 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
922 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
923 // CHECK-NEXT: ret void
925 v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); }
926 // CHECK-LABEL: @xvmini_bu(
927 // CHECK-NEXT: entry:
928 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
929 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1)
930 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
931 // CHECK-NEXT: ret void
933 v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); }
934 // CHECK-LABEL: @xvmini_hu(
935 // CHECK-NEXT: entry:
936 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
937 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1)
938 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
939 // CHECK-NEXT: ret void
941 v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); }
942 // CHECK-LABEL: @xvmini_wu(
943 // CHECK-NEXT: entry:
944 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
945 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1)
946 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
947 // CHECK-NEXT: ret void
949 v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); }
950 // CHECK-LABEL: @xvmini_du(
951 // CHECK-NEXT: entry:
952 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
953 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1)
954 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
955 // CHECK-NEXT: ret void
957 v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); }
958 // CHECK-LABEL: @xvseq_b(
959 // CHECK-NEXT: entry:
960 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
961 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
962 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
963 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
964 // CHECK-NEXT: ret void
966 v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); }
967 // CHECK-LABEL: @xvseq_h(
968 // CHECK-NEXT: entry:
969 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
970 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
971 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
972 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
973 // CHECK-NEXT: ret void
975 v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); }
976 // CHECK-LABEL: @xvseq_w(
977 // CHECK-NEXT: entry:
978 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
979 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
980 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
981 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
982 // CHECK-NEXT: ret void
984 v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); }
985 // CHECK-LABEL: @xvseq_d(
986 // CHECK-NEXT: entry:
987 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
988 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
989 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
990 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
991 // CHECK-NEXT: ret void
993 v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); }
994 // CHECK-LABEL: @xvseqi_b(
995 // CHECK-NEXT: entry:
996 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
997 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1)
998 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
999 // CHECK-NEXT: ret void
1001 v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); }
1002 // CHECK-LABEL: @xvseqi_h(
1003 // CHECK-NEXT: entry:
1004 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1005 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1)
1006 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1007 // CHECK-NEXT: ret void
1009 v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); }
1010 // CHECK-LABEL: @xvseqi_w(
1011 // CHECK-NEXT: entry:
1012 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1013 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1)
1014 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1015 // CHECK-NEXT: ret void
1017 v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); }
1018 // CHECK-LABEL: @xvseqi_d(
1019 // CHECK-NEXT: entry:
1020 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1021 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1)
1022 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1023 // CHECK-NEXT: ret void
1025 v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); }
1026 // CHECK-LABEL: @xvslt_b(
1027 // CHECK-NEXT: entry:
1028 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1029 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1030 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1031 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1032 // CHECK-NEXT: ret void
1034 v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); }
1035 // CHECK-LABEL: @xvslt_h(
1036 // CHECK-NEXT: entry:
1037 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1038 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1039 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1040 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1041 // CHECK-NEXT: ret void
1043 v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); }
1044 // CHECK-LABEL: @xvslt_w(
1045 // CHECK-NEXT: entry:
1046 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1047 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1048 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1049 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1050 // CHECK-NEXT: ret void
1052 v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); }
1053 // CHECK-LABEL: @xvslt_d(
1054 // CHECK-NEXT: entry:
1055 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1056 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1057 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1058 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1059 // CHECK-NEXT: ret void
1061 v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); }
1062 // CHECK-LABEL: @xvslti_b(
1063 // CHECK-NEXT: entry:
1064 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1065 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1)
1066 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1067 // CHECK-NEXT: ret void
1069 v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); }
1070 // CHECK-LABEL: @xvslti_h(
1071 // CHECK-NEXT: entry:
1072 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1073 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1)
1074 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1075 // CHECK-NEXT: ret void
1077 v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); }
1078 // CHECK-LABEL: @xvslti_w(
1079 // CHECK-NEXT: entry:
1080 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1081 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1)
1082 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1083 // CHECK-NEXT: ret void
1085 v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); }
1086 // CHECK-LABEL: @xvslti_d(
1087 // CHECK-NEXT: entry:
1088 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1089 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1)
1090 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1091 // CHECK-NEXT: ret void
1093 v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); }
1094 // CHECK-LABEL: @xvslt_bu(
1095 // CHECK-NEXT: entry:
1096 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1097 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1098 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1099 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1100 // CHECK-NEXT: ret void
1102 v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); }
1103 // CHECK-LABEL: @xvslt_hu(
1104 // CHECK-NEXT: entry:
1105 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1106 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1107 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1108 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1109 // CHECK-NEXT: ret void
1111 v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); }
1112 // CHECK-LABEL: @xvslt_wu(
1113 // CHECK-NEXT: entry:
1114 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1115 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1116 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1117 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1118 // CHECK-NEXT: ret void
1120 v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); }
1121 // CHECK-LABEL: @xvslt_du(
1122 // CHECK-NEXT: entry:
1123 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1124 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1125 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1126 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1127 // CHECK-NEXT: ret void
1129 v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); }
1130 // CHECK-LABEL: @xvslti_bu(
1131 // CHECK-NEXT: entry:
1132 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1133 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1)
1134 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1135 // CHECK-NEXT: ret void
1137 v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); }
1138 // CHECK-LABEL: @xvslti_hu(
1139 // CHECK-NEXT: entry:
1140 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1141 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1)
1142 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1143 // CHECK-NEXT: ret void
1145 v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); }
1146 // CHECK-LABEL: @xvslti_wu(
1147 // CHECK-NEXT: entry:
1148 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1149 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1)
1150 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1151 // CHECK-NEXT: ret void
1153 v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); }
1154 // CHECK-LABEL: @xvslti_du(
1155 // CHECK-NEXT: entry:
1156 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1157 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1)
1158 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1159 // CHECK-NEXT: ret void
1161 v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); }
1162 // CHECK-LABEL: @xvsle_b(
1163 // CHECK-NEXT: entry:
1164 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1165 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1166 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1167 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1168 // CHECK-NEXT: ret void
1170 v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); }
1171 // CHECK-LABEL: @xvsle_h(
1172 // CHECK-NEXT: entry:
1173 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1174 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1175 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1176 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1177 // CHECK-NEXT: ret void
1179 v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); }
1180 // CHECK-LABEL: @xvsle_w(
1181 // CHECK-NEXT: entry:
1182 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1183 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1184 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1185 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1186 // CHECK-NEXT: ret void
1188 v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); }
1189 // CHECK-LABEL: @xvsle_d(
1190 // CHECK-NEXT: entry:
1191 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1192 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1193 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1194 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1195 // CHECK-NEXT: ret void
1197 v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); }
1198 // CHECK-LABEL: @xvslei_b(
1199 // CHECK-NEXT: entry:
1200 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1201 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1)
1202 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1203 // CHECK-NEXT: ret void
1205 v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); }
1206 // CHECK-LABEL: @xvslei_h(
1207 // CHECK-NEXT: entry:
1208 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1209 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1)
1210 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1211 // CHECK-NEXT: ret void
1213 v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); }
1214 // CHECK-LABEL: @xvslei_w(
1215 // CHECK-NEXT: entry:
1216 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1217 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1)
1218 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1219 // CHECK-NEXT: ret void
1221 v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); }
1222 // CHECK-LABEL: @xvslei_d(
1223 // CHECK-NEXT: entry:
1224 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1225 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1)
1226 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1227 // CHECK-NEXT: ret void
1229 v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); }
1230 // CHECK-LABEL: @xvsle_bu(
1231 // CHECK-NEXT: entry:
1232 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1233 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1234 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1235 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1236 // CHECK-NEXT: ret void
1238 v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); }
1239 // CHECK-LABEL: @xvsle_hu(
1240 // CHECK-NEXT: entry:
1241 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1242 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1243 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1244 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1245 // CHECK-NEXT: ret void
1247 v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); }
1248 // CHECK-LABEL: @xvsle_wu(
1249 // CHECK-NEXT: entry:
1250 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1251 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1252 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1253 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1254 // CHECK-NEXT: ret void
1256 v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); }
1257 // CHECK-LABEL: @xvsle_du(
1258 // CHECK-NEXT: entry:
1259 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1260 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1261 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1262 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1263 // CHECK-NEXT: ret void
1265 v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); }
1266 // CHECK-LABEL: @xvslei_bu(
1267 // CHECK-NEXT: entry:
1268 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1269 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1)
1270 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1271 // CHECK-NEXT: ret void
1273 v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); }
1274 // CHECK-LABEL: @xvslei_hu(
1275 // CHECK-NEXT: entry:
1276 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1277 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1)
1278 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1279 // CHECK-NEXT: ret void
1281 v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); }
1282 // CHECK-LABEL: @xvslei_wu(
1283 // CHECK-NEXT: entry:
1284 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1285 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1)
1286 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1287 // CHECK-NEXT: ret void
1289 v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); }
1290 // CHECK-LABEL: @xvslei_du(
1291 // CHECK-NEXT: entry:
1292 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1293 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1)
1294 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1295 // CHECK-NEXT: ret void
1297 v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); }
1298 // CHECK-LABEL: @xvsat_b(
1299 // CHECK-NEXT: entry:
1300 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1301 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1)
1302 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1303 // CHECK-NEXT: ret void
1305 v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); }
1306 // CHECK-LABEL: @xvsat_h(
1307 // CHECK-NEXT: entry:
1308 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1309 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1)
1310 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1311 // CHECK-NEXT: ret void
1313 v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); }
1314 // CHECK-LABEL: @xvsat_w(
1315 // CHECK-NEXT: entry:
1316 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1317 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1)
1318 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1319 // CHECK-NEXT: ret void
1321 v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); }
1322 // CHECK-LABEL: @xvsat_d(
1323 // CHECK-NEXT: entry:
1324 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1325 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1)
1326 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1327 // CHECK-NEXT: ret void
1329 v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); }
1330 // CHECK-LABEL: @xvsat_bu(
1331 // CHECK-NEXT: entry:
1332 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1333 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1)
1334 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1335 // CHECK-NEXT: ret void
1337 v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); }
1338 // CHECK-LABEL: @xvsat_hu(
1339 // CHECK-NEXT: entry:
1340 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1341 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1)
1342 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1343 // CHECK-NEXT: ret void
1345 v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); }
1346 // CHECK-LABEL: @xvsat_wu(
1347 // CHECK-NEXT: entry:
1348 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1349 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1)
1350 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1351 // CHECK-NEXT: ret void
1353 v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); }
1354 // CHECK-LABEL: @xvsat_du(
1355 // CHECK-NEXT: entry:
1356 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1357 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1)
1358 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1359 // CHECK-NEXT: ret void
1361 v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); }
1362 // CHECK-LABEL: @xvadda_b(
1363 // CHECK-NEXT: entry:
1364 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1365 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1366 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1367 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1368 // CHECK-NEXT: ret void
1370 v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); }
1371 // CHECK-LABEL: @xvadda_h(
1372 // CHECK-NEXT: entry:
1373 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1374 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1375 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1376 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1377 // CHECK-NEXT: ret void
1379 v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); }
1380 // CHECK-LABEL: @xvadda_w(
1381 // CHECK-NEXT: entry:
1382 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1383 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1384 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1385 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1386 // CHECK-NEXT: ret void
1388 v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); }
1389 // CHECK-LABEL: @xvadda_d(
1390 // CHECK-NEXT: entry:
1391 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1392 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1393 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1394 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1395 // CHECK-NEXT: ret void
1397 v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); }
1398 // CHECK-LABEL: @xvsadd_b(
1399 // CHECK-NEXT: entry:
1400 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1401 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1402 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1403 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1404 // CHECK-NEXT: ret void
1406 v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); }
1407 // CHECK-LABEL: @xvsadd_h(
1408 // CHECK-NEXT: entry:
1409 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1410 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1411 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1412 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1413 // CHECK-NEXT: ret void
1415 v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); }
1416 // CHECK-LABEL: @xvsadd_w(
1417 // CHECK-NEXT: entry:
1418 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1419 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1420 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1421 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1422 // CHECK-NEXT: ret void
1424 v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); }
1425 // CHECK-LABEL: @xvsadd_d(
1426 // CHECK-NEXT: entry:
1427 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1428 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1429 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1430 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1431 // CHECK-NEXT: ret void
1433 v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); }
1434 // CHECK-LABEL: @xvsadd_bu(
1435 // CHECK-NEXT: entry:
1436 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1437 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1438 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1439 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1440 // CHECK-NEXT: ret void
1442 v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); }
1443 // CHECK-LABEL: @xvsadd_hu(
1444 // CHECK-NEXT: entry:
1445 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1446 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1447 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1448 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1449 // CHECK-NEXT: ret void
1451 v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); }
1452 // CHECK-LABEL: @xvsadd_wu(
1453 // CHECK-NEXT: entry:
1454 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1455 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1456 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1457 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1458 // CHECK-NEXT: ret void
1460 v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); }
1461 // CHECK-LABEL: @xvsadd_du(
1462 // CHECK-NEXT: entry:
1463 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1464 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1465 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1466 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1467 // CHECK-NEXT: ret void
1469 v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); }
1470 // CHECK-LABEL: @xvavg_b(
1471 // CHECK-NEXT: entry:
1472 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1473 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1474 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1475 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1476 // CHECK-NEXT: ret void
1478 v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); }
1479 // CHECK-LABEL: @xvavg_h(
1480 // CHECK-NEXT: entry:
1481 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1482 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1483 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1484 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1485 // CHECK-NEXT: ret void
1487 v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); }
1488 // CHECK-LABEL: @xvavg_w(
1489 // CHECK-NEXT: entry:
1490 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1491 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1492 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1493 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1494 // CHECK-NEXT: ret void
1496 v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); }
1497 // CHECK-LABEL: @xvavg_d(
1498 // CHECK-NEXT: entry:
1499 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1500 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1501 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1502 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1503 // CHECK-NEXT: ret void
1505 v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); }
1506 // CHECK-LABEL: @xvavg_bu(
1507 // CHECK-NEXT: entry:
1508 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1509 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1510 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1511 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1512 // CHECK-NEXT: ret void
1514 v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); }
1515 // CHECK-LABEL: @xvavg_hu(
1516 // CHECK-NEXT: entry:
1517 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1518 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1519 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1520 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1521 // CHECK-NEXT: ret void
1523 v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); }
1524 // CHECK-LABEL: @xvavg_wu(
1525 // CHECK-NEXT: entry:
1526 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1527 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1528 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1529 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1530 // CHECK-NEXT: ret void
1532 v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); }
1533 // CHECK-LABEL: @xvavg_du(
1534 // CHECK-NEXT: entry:
1535 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1536 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1537 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1538 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1539 // CHECK-NEXT: ret void
1541 v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); }
1542 // CHECK-LABEL: @xvavgr_b(
1543 // CHECK-NEXT: entry:
1544 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1545 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1546 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1547 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1548 // CHECK-NEXT: ret void
1550 v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); }
1551 // CHECK-LABEL: @xvavgr_h(
1552 // CHECK-NEXT: entry:
1553 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1554 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1555 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1556 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1557 // CHECK-NEXT: ret void
1559 v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); }
1560 // CHECK-LABEL: @xvavgr_w(
1561 // CHECK-NEXT: entry:
1562 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1563 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1564 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1565 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1566 // CHECK-NEXT: ret void
1568 v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); }
1569 // CHECK-LABEL: @xvavgr_d(
1570 // CHECK-NEXT: entry:
1571 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1572 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1573 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1574 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1575 // CHECK-NEXT: ret void
1577 v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); }
1578 // CHECK-LABEL: @xvavgr_bu(
1579 // CHECK-NEXT: entry:
1580 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1581 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1582 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1583 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1584 // CHECK-NEXT: ret void
1586 v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); }
1587 // CHECK-LABEL: @xvavgr_hu(
1588 // CHECK-NEXT: entry:
1589 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1590 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1591 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1592 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1593 // CHECK-NEXT: ret void
1595 v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); }
1596 // CHECK-LABEL: @xvavgr_wu(
1597 // CHECK-NEXT: entry:
1598 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1599 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1600 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1601 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1602 // CHECK-NEXT: ret void
1604 v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); }
1605 // CHECK-LABEL: @xvavgr_du(
1606 // CHECK-NEXT: entry:
1607 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1608 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1609 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1610 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1611 // CHECK-NEXT: ret void
1613 v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); }
1614 // CHECK-LABEL: @xvssub_b(
1615 // CHECK-NEXT: entry:
1616 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1617 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1618 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1619 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1620 // CHECK-NEXT: ret void
1622 v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); }
1623 // CHECK-LABEL: @xvssub_h(
1624 // CHECK-NEXT: entry:
1625 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1626 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1627 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1628 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1629 // CHECK-NEXT: ret void
1631 v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); }
1632 // CHECK-LABEL: @xvssub_w(
1633 // CHECK-NEXT: entry:
1634 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1635 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1636 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1637 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1638 // CHECK-NEXT: ret void
1640 v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); }
1641 // CHECK-LABEL: @xvssub_d(
1642 // CHECK-NEXT: entry:
1643 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1644 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1645 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1646 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1647 // CHECK-NEXT: ret void
1649 v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); }
1650 // CHECK-LABEL: @xvssub_bu(
1651 // CHECK-NEXT: entry:
1652 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1653 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1654 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1655 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1656 // CHECK-NEXT: ret void
1658 v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); }
1659 // CHECK-LABEL: @xvssub_hu(
1660 // CHECK-NEXT: entry:
1661 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1662 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1663 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1664 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1665 // CHECK-NEXT: ret void
1667 v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); }
1668 // CHECK-LABEL: @xvssub_wu(
1669 // CHECK-NEXT: entry:
1670 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1671 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1672 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1673 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1674 // CHECK-NEXT: ret void
1676 v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); }
1677 // CHECK-LABEL: @xvssub_du(
1678 // CHECK-NEXT: entry:
1679 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1680 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1681 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1682 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1683 // CHECK-NEXT: ret void
1685 v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); }
1686 // CHECK-LABEL: @xvabsd_b(
1687 // CHECK-NEXT: entry:
1688 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1689 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1691 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1692 // CHECK-NEXT: ret void
1694 v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); }
1695 // CHECK-LABEL: @xvabsd_h(
1696 // CHECK-NEXT: entry:
1697 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1698 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1699 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1700 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1701 // CHECK-NEXT: ret void
1703 v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); }
1704 // CHECK-LABEL: @xvabsd_w(
1705 // CHECK-NEXT: entry:
1706 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1707 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1708 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1709 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1710 // CHECK-NEXT: ret void
1712 v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); }
1713 // CHECK-LABEL: @xvabsd_d(
1714 // CHECK-NEXT: entry:
1715 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1716 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1717 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1718 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1719 // CHECK-NEXT: ret void
1721 v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); }
1722 // CHECK-LABEL: @xvabsd_bu(
1723 // CHECK-NEXT: entry:
1724 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1725 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1726 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1727 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1728 // CHECK-NEXT: ret void
1730 v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); }
1731 // CHECK-LABEL: @xvabsd_hu(
1732 // CHECK-NEXT: entry:
1733 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1734 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1735 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1736 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1737 // CHECK-NEXT: ret void
1739 v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); }
1740 // CHECK-LABEL: @xvabsd_wu(
1741 // CHECK-NEXT: entry:
1742 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1743 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1744 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1745 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1746 // CHECK-NEXT: ret void
1748 v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); }
1749 // CHECK-LABEL: @xvabsd_du(
1750 // CHECK-NEXT: entry:
1751 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1752 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1753 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1754 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1755 // CHECK-NEXT: ret void
1757 v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); }
1758 // CHECK-LABEL: @xvmul_b(
1759 // CHECK-NEXT: entry:
1760 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1761 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1762 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1763 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1764 // CHECK-NEXT: ret void
1766 v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); }
1767 // CHECK-LABEL: @xvmul_h(
1768 // CHECK-NEXT: entry:
1769 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1770 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1771 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1772 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1773 // CHECK-NEXT: ret void
1775 v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); }
1776 // CHECK-LABEL: @xvmul_w(
1777 // CHECK-NEXT: entry:
1778 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1779 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1780 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1781 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1782 // CHECK-NEXT: ret void
1784 v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); }
1785 // CHECK-LABEL: @xvmul_d(
1786 // CHECK-NEXT: entry:
1787 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1788 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1789 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1790 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1791 // CHECK-NEXT: ret void
1793 v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); }
1794 // CHECK-LABEL: @xvmadd_b(
1795 // CHECK-NEXT: entry:
1796 // CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1797 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1798 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1799 // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
1800 // CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1801 // CHECK-NEXT: ret void
1803 v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); }
1804 // CHECK-LABEL: @xvmadd_h(
1805 // CHECK-NEXT: entry:
1806 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1807 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1808 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1809 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
1810 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1811 // CHECK-NEXT: ret void
1813 v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); }
1814 // CHECK-LABEL: @xvmadd_w(
1815 // CHECK-NEXT: entry:
1816 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1817 // CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1818 // CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1819 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
1820 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1821 // CHECK-NEXT: ret void
1823 v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); }
1824 // CHECK-LABEL: @xvmadd_d(
1825 // CHECK-NEXT: entry:
1826 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1827 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1828 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1829 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
1830 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1831 // CHECK-NEXT: ret void
1833 v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); }
1834 // CHECK-LABEL: @xvmsub_b(
1835 // CHECK-NEXT: entry:
1836 // CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1837 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1838 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1839 // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
1840 // CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1841 // CHECK-NEXT: ret void
1843 v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); }
1844 // CHECK-LABEL: @xvmsub_h(
1845 // CHECK-NEXT: entry:
1846 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1847 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1848 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1849 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
1850 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1851 // CHECK-NEXT: ret void
1853 v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); }
1854 // CHECK-LABEL: @xvmsub_w(
1855 // CHECK-NEXT: entry:
1856 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1857 // CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1858 // CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1859 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
1860 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1861 // CHECK-NEXT: ret void
1863 v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); }
1864 // CHECK-LABEL: @xvmsub_d(
1865 // CHECK-NEXT: entry:
1866 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1867 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1868 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
1869 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
1870 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1871 // CHECK-NEXT: ret void
1873 v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); }
1874 // CHECK-LABEL: @xvdiv_b(
1875 // CHECK-NEXT: entry:
1876 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1877 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1878 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1879 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1880 // CHECK-NEXT: ret void
1882 v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); }
1883 // CHECK-LABEL: @xvdiv_h(
1884 // CHECK-NEXT: entry:
1885 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1886 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1887 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1888 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1889 // CHECK-NEXT: ret void
1891 v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); }
1892 // CHECK-LABEL: @xvdiv_w(
1893 // CHECK-NEXT: entry:
1894 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1895 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1896 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1897 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1898 // CHECK-NEXT: ret void
1900 v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); }
1901 // CHECK-LABEL: @xvdiv_d(
1902 // CHECK-NEXT: entry:
1903 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1904 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1905 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
1906 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1907 // CHECK-NEXT: ret void
1909 v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); }
1910 // CHECK-LABEL: @xvdiv_bu(
1911 // CHECK-NEXT: entry:
1912 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1913 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1914 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1915 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1916 // CHECK-NEXT: ret void
1918 v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); }
1919 // CHECK-LABEL: @xvdiv_hu(
1920 // CHECK-NEXT: entry:
1921 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1922 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1923 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1924 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1925 // CHECK-NEXT: ret void
1927 v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); }
1928 // CHECK-LABEL: @xvdiv_wu(
1929 // CHECK-NEXT: entry:
1930 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1931 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1932 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1933 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1934 // CHECK-NEXT: ret void
1936 v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); }
1937 // CHECK-LABEL: @xvdiv_du(
1938 // CHECK-NEXT: entry:
1939 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1940 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1941 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
1942 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1943 // CHECK-NEXT: ret void
1945 v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); }
1946 // CHECK-LABEL: @xvhaddw_h_b(
1947 // CHECK-NEXT: entry:
1948 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1949 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1950 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
1951 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1952 // CHECK-NEXT: ret void
1954 v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); }
1955 // CHECK-LABEL: @xvhaddw_w_h(
1956 // CHECK-NEXT: entry:
1957 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1958 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1959 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
1960 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1961 // CHECK-NEXT: ret void
1963 v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); }
1964 // CHECK-LABEL: @xvhaddw_d_w(
1965 // CHECK-NEXT: entry:
1966 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1967 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1968 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
1969 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1970 // CHECK-NEXT: ret void
1972 v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); }
1973 // CHECK-LABEL: @xvhaddw_hu_bu(
1974 // CHECK-NEXT: entry:
1975 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1976 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1977 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
1978 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1979 // CHECK-NEXT: ret void
1981 v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); }
1982 // CHECK-LABEL: @xvhaddw_wu_hu(
1983 // CHECK-NEXT: entry:
1984 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1985 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1986 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
1987 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1988 // CHECK-NEXT: ret void
1990 v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); }
1991 // CHECK-LABEL: @xvhaddw_du_wu(
1992 // CHECK-NEXT: entry:
1993 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
1994 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
1995 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
1996 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
1997 // CHECK-NEXT: ret void
1999 v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); }
2000 // CHECK-LABEL: @xvhsubw_h_b(
2001 // CHECK-NEXT: entry:
2002 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2003 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2004 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2005 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2006 // CHECK-NEXT: ret void
2008 v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); }
2009 // CHECK-LABEL: @xvhsubw_w_h(
2010 // CHECK-NEXT: entry:
2011 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2012 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2013 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2014 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2015 // CHECK-NEXT: ret void
2017 v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); }
2018 // CHECK-LABEL: @xvhsubw_d_w(
2019 // CHECK-NEXT: entry:
2020 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2021 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2022 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2023 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2024 // CHECK-NEXT: ret void
2026 v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); }
2027 // CHECK-LABEL: @xvhsubw_hu_bu(
2028 // CHECK-NEXT: entry:
2029 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2030 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2031 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
2032 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2033 // CHECK-NEXT: ret void
2035 v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); }
2036 // CHECK-LABEL: @xvhsubw_wu_hu(
2037 // CHECK-NEXT: entry:
2038 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2039 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2040 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
2041 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2042 // CHECK-NEXT: ret void
2044 v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); }
2045 // CHECK-LABEL: @xvhsubw_du_wu(
2046 // CHECK-NEXT: entry:
2047 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2048 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2049 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
2050 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2051 // CHECK-NEXT: ret void
2053 v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); }
2054 // CHECK-LABEL: @xvmod_b(
2055 // CHECK-NEXT: entry:
2056 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2057 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2058 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2059 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2060 // CHECK-NEXT: ret void
2062 v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); }
2063 // CHECK-LABEL: @xvmod_h(
2064 // CHECK-NEXT: entry:
2065 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2066 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2067 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2068 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2069 // CHECK-NEXT: ret void
2071 v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); }
2072 // CHECK-LABEL: @xvmod_w(
2073 // CHECK-NEXT: entry:
2074 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2075 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2076 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2077 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2078 // CHECK-NEXT: ret void
2080 v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); }
2081 // CHECK-LABEL: @xvmod_d(
2082 // CHECK-NEXT: entry:
2083 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2084 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2085 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2086 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2087 // CHECK-NEXT: ret void
2089 v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); }
2090 // CHECK-LABEL: @xvmod_bu(
2091 // CHECK-NEXT: entry:
2092 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2093 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2094 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
2095 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2096 // CHECK-NEXT: ret void
2098 v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); }
2099 // CHECK-LABEL: @xvmod_hu(
2100 // CHECK-NEXT: entry:
2101 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2102 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2103 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
2104 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2105 // CHECK-NEXT: ret void
2107 v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); }
2108 // CHECK-LABEL: @xvmod_wu(
2109 // CHECK-NEXT: entry:
2110 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2111 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2112 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
2113 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2114 // CHECK-NEXT: ret void
2116 v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); }
2117 // CHECK-LABEL: @xvmod_du(
2118 // CHECK-NEXT: entry:
2119 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2120 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2121 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
2122 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2123 // CHECK-NEXT: ret void
2125 v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); }
2126 // CHECK-LABEL: @xvrepl128vei_b(
2127 // CHECK-NEXT: entry:
2128 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2129 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1)
2130 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2131 // CHECK-NEXT: ret void
2133 v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); }
2134 // CHECK-LABEL: @xvrepl128vei_h(
2135 // CHECK-NEXT: entry:
2136 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2137 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1)
2138 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2139 // CHECK-NEXT: ret void
2141 v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); }
2142 // CHECK-LABEL: @xvrepl128vei_w(
2143 // CHECK-NEXT: entry:
2144 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2145 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1)
2146 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2147 // CHECK-NEXT: ret void
2149 v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); }
2150 // CHECK-LABEL: @xvrepl128vei_d(
2151 // CHECK-NEXT: entry:
2152 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2153 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1)
2154 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2155 // CHECK-NEXT: ret void
2157 v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); }
2158 // CHECK-LABEL: @xvpickev_b(
2159 // CHECK-NEXT: entry:
2160 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2161 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2162 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2163 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2164 // CHECK-NEXT: ret void
2166 v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); }
2167 // CHECK-LABEL: @xvpickev_h(
2168 // CHECK-NEXT: entry:
2169 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2170 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2171 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2172 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2173 // CHECK-NEXT: ret void
2175 v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); }
2176 // CHECK-LABEL: @xvpickev_w(
2177 // CHECK-NEXT: entry:
2178 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2179 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2180 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2181 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2182 // CHECK-NEXT: ret void
2184 v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); }
2185 // CHECK-LABEL: @xvpickev_d(
2186 // CHECK-NEXT: entry:
2187 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2188 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2189 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2190 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2191 // CHECK-NEXT: ret void
2193 v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); }
2194 // CHECK-LABEL: @xvpickod_b(
2195 // CHECK-NEXT: entry:
2196 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2197 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2198 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2199 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2200 // CHECK-NEXT: ret void
2202 v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); }
2203 // CHECK-LABEL: @xvpickod_h(
2204 // CHECK-NEXT: entry:
2205 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2206 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2207 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2208 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2209 // CHECK-NEXT: ret void
2211 v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); }
2212 // CHECK-LABEL: @xvpickod_w(
2213 // CHECK-NEXT: entry:
2214 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2215 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2216 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2217 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2218 // CHECK-NEXT: ret void
2220 v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); }
2221 // CHECK-LABEL: @xvpickod_d(
2222 // CHECK-NEXT: entry:
2223 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2224 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2225 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2226 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2227 // CHECK-NEXT: ret void
2229 v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); }
2230 // CHECK-LABEL: @xvilvh_b(
2231 // CHECK-NEXT: entry:
2232 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2233 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2234 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2235 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2236 // CHECK-NEXT: ret void
2238 v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); }
2239 // CHECK-LABEL: @xvilvh_h(
2240 // CHECK-NEXT: entry:
2241 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2242 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2243 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2244 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2245 // CHECK-NEXT: ret void
2247 v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); }
2248 // CHECK-LABEL: @xvilvh_w(
2249 // CHECK-NEXT: entry:
2250 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2251 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2252 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2253 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2254 // CHECK-NEXT: ret void
2256 v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); }
2257 // CHECK-LABEL: @xvilvh_d(
2258 // CHECK-NEXT: entry:
2259 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2260 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2261 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2262 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2263 // CHECK-NEXT: ret void
2265 v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); }
2266 // CHECK-LABEL: @xvilvl_b(
2267 // CHECK-NEXT: entry:
2268 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2269 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2270 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2271 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2272 // CHECK-NEXT: ret void
2274 v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); }
2275 // CHECK-LABEL: @xvilvl_h(
2276 // CHECK-NEXT: entry:
2277 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2278 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2279 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2280 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2281 // CHECK-NEXT: ret void
2283 v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); }
2284 // CHECK-LABEL: @xvilvl_w(
2285 // CHECK-NEXT: entry:
2286 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2287 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2288 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2289 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2290 // CHECK-NEXT: ret void
2292 v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); }
2293 // CHECK-LABEL: @xvilvl_d(
2294 // CHECK-NEXT: entry:
2295 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2296 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2297 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2298 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2299 // CHECK-NEXT: ret void
2301 v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); }
2302 // CHECK-LABEL: @xvpackev_b(
2303 // CHECK-NEXT: entry:
2304 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2305 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2306 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2307 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2308 // CHECK-NEXT: ret void
2310 v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); }
2311 // CHECK-LABEL: @xvpackev_h(
2312 // CHECK-NEXT: entry:
2313 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2314 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2315 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2316 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2317 // CHECK-NEXT: ret void
2319 v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); }
2320 // CHECK-LABEL: @xvpackev_w(
2321 // CHECK-NEXT: entry:
2322 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2323 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2324 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2325 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2326 // CHECK-NEXT: ret void
2328 v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); }
2329 // CHECK-LABEL: @xvpackev_d(
2330 // CHECK-NEXT: entry:
2331 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2332 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2333 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2334 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2335 // CHECK-NEXT: ret void
2337 v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); }
2338 // CHECK-LABEL: @xvpackod_b(
2339 // CHECK-NEXT: entry:
2340 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2341 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2342 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
2343 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2344 // CHECK-NEXT: ret void
2346 v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); }
2347 // CHECK-LABEL: @xvpackod_h(
2348 // CHECK-NEXT: entry:
2349 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2350 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2351 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
2352 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2353 // CHECK-NEXT: ret void
2355 v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); }
2356 // CHECK-LABEL: @xvpackod_w(
2357 // CHECK-NEXT: entry:
2358 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2359 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2360 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
2361 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2362 // CHECK-NEXT: ret void
2364 v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); }
2365 // CHECK-LABEL: @xvpackod_d(
2366 // CHECK-NEXT: entry:
2367 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2368 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2369 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
2370 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2371 // CHECK-NEXT: ret void
2373 v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); }
2374 // CHECK-LABEL: @xvshuf_b(
2375 // CHECK-NEXT: entry:
2376 // CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2377 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2378 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
2379 // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
2380 // CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2381 // CHECK-NEXT: ret void
2383 v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); }
2384 // CHECK-LABEL: @xvshuf_h(
2385 // CHECK-NEXT: entry:
2386 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2387 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2388 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
2389 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
2390 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2391 // CHECK-NEXT: ret void
2393 v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); }
2394 // CHECK-LABEL: @xvshuf_w(
2395 // CHECK-NEXT: entry:
2396 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2397 // CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2398 // CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
2399 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]])
2400 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2401 // CHECK-NEXT: ret void
2403 v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); }
2404 // CHECK-LABEL: @xvshuf_d(
2405 // CHECK-NEXT: entry:
2406 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2407 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2408 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
2409 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
2410 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2411 // CHECK-NEXT: ret void
2413 v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); }
2414 // CHECK-LABEL: @xvand_v(
2415 // CHECK-NEXT: entry:
2416 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2417 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2418 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
2419 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2420 // CHECK-NEXT: ret void
2422 v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); }
2423 // CHECK-LABEL: @xvandi_b(
2424 // CHECK-NEXT: entry:
2425 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2426 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1)
2427 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2428 // CHECK-NEXT: ret void
2430 v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); }
2431 // CHECK-LABEL: @xvor_v(
2432 // CHECK-NEXT: entry:
2433 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2434 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2435 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
2436 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2437 // CHECK-NEXT: ret void
2439 v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); }
2440 // CHECK-LABEL: @xvori_b(
2441 // CHECK-NEXT: entry:
2442 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2443 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1)
2444 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2445 // CHECK-NEXT: ret void
2447 v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); }
2448 // CHECK-LABEL: @xvnor_v(
2449 // CHECK-NEXT: entry:
2450 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2451 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2452 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
2453 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2454 // CHECK-NEXT: ret void
2456 v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); }
2457 // CHECK-LABEL: @xvnori_b(
2458 // CHECK-NEXT: entry:
2459 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2460 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1)
2461 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2462 // CHECK-NEXT: ret void
2464 v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); }
2465 // CHECK-LABEL: @xvxor_v(
2466 // CHECK-NEXT: entry:
2467 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2468 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2469 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
2470 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2471 // CHECK-NEXT: ret void
2473 v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); }
2474 // CHECK-LABEL: @xvxori_b(
2475 // CHECK-NEXT: entry:
2476 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2477 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1)
2478 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2479 // CHECK-NEXT: ret void
2481 v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); }
2482 // CHECK-LABEL: @xvbitsel_v(
2483 // CHECK-NEXT: entry:
2484 // CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2485 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2486 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
2487 // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
2488 // CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2489 // CHECK-NEXT: ret void
2491 v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); }
2492 // CHECK-LABEL: @xvbitseli_b(
2493 // CHECK-NEXT: entry:
2494 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2495 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2496 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
2497 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2498 // CHECK-NEXT: ret void
2500 v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); }
2501 // CHECK-LABEL: @xvshuf4i_b(
2502 // CHECK-NEXT: entry:
2503 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2504 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1)
2505 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2506 // CHECK-NEXT: ret void
2508 v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); }
2509 // CHECK-LABEL: @xvshuf4i_h(
2510 // CHECK-NEXT: entry:
2511 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2512 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1)
2513 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2514 // CHECK-NEXT: ret void
2516 v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); }
2517 // CHECK-LABEL: @xvshuf4i_w(
2518 // CHECK-NEXT: entry:
2519 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2520 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1)
2521 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2522 // CHECK-NEXT: ret void
2524 v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); }
2525 // CHECK-LABEL: @xvreplgr2vr_b(
2526 // CHECK-NEXT: entry:
2527 // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]])
2528 // CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2529 // CHECK-NEXT: ret void
2531 v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); }
2532 // CHECK-LABEL: @xvreplgr2vr_h(
2533 // CHECK-NEXT: entry:
2534 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]])
2535 // CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2536 // CHECK-NEXT: ret void
2538 v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); }
2539 // CHECK-LABEL: @xvreplgr2vr_w(
2540 // CHECK-NEXT: entry:
2541 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]])
2542 // CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2543 // CHECK-NEXT: ret void
2545 v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); }
2546 // CHECK-LABEL: @xvreplgr2vr_d(
2547 // CHECK-NEXT: entry:
2548 // CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64
2549 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]])
2550 // CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2551 // CHECK-NEXT: ret void
2553 v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); }
2554 // CHECK-LABEL: @xvpcnt_b(
2555 // CHECK-NEXT: entry:
2556 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2557 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]])
2558 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2559 // CHECK-NEXT: ret void
2561 v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); }
2562 // CHECK-LABEL: @xvpcnt_h(
2563 // CHECK-NEXT: entry:
2564 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2565 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]])
2566 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2567 // CHECK-NEXT: ret void
2569 v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); }
2570 // CHECK-LABEL: @xvpcnt_w(
2571 // CHECK-NEXT: entry:
2572 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2573 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]])
2574 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2575 // CHECK-NEXT: ret void
2577 v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); }
2578 // CHECK-LABEL: @xvpcnt_d(
2579 // CHECK-NEXT: entry:
2580 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2581 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]])
2582 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2583 // CHECK-NEXT: ret void
2585 v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); }
2586 // CHECK-LABEL: @xvclo_b(
2587 // CHECK-NEXT: entry:
2588 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2589 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]])
2590 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2591 // CHECK-NEXT: ret void
2593 v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); }
2594 // CHECK-LABEL: @xvclo_h(
2595 // CHECK-NEXT: entry:
2596 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2597 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]])
2598 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2599 // CHECK-NEXT: ret void
2601 v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); }
2602 // CHECK-LABEL: @xvclo_w(
2603 // CHECK-NEXT: entry:
2604 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2605 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]])
2606 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2607 // CHECK-NEXT: ret void
2609 v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); }
2610 // CHECK-LABEL: @xvclo_d(
2611 // CHECK-NEXT: entry:
2612 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2613 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]])
2614 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2615 // CHECK-NEXT: ret void
2617 v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); }
2618 // CHECK-LABEL: @xvclz_b(
2619 // CHECK-NEXT: entry:
2620 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2621 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]])
2622 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2623 // CHECK-NEXT: ret void
2625 v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); }
2626 // CHECK-LABEL: @xvclz_h(
2627 // CHECK-NEXT: entry:
2628 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2629 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]])
2630 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2631 // CHECK-NEXT: ret void
2633 v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); }
2634 // CHECK-LABEL: @xvclz_w(
2635 // CHECK-NEXT: entry:
2636 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2637 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]])
2638 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2639 // CHECK-NEXT: ret void
2641 v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); }
2642 // CHECK-LABEL: @xvclz_d(
2643 // CHECK-NEXT: entry:
2644 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2645 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]])
2646 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2647 // CHECK-NEXT: ret void
2649 v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); }
2650 // CHECK-LABEL: @xvfadd_s(
2651 // CHECK-NEXT: entry:
2652 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2653 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2654 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]])
2655 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2656 // CHECK-NEXT: ret void
2658 v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); }
2659 // CHECK-LABEL: @xvfadd_d(
2660 // CHECK-NEXT: entry:
2661 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2662 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2663 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]])
2664 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2665 // CHECK-NEXT: ret void
2667 v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); }
2668 // CHECK-LABEL: @xvfsub_s(
2669 // CHECK-NEXT: entry:
2670 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2671 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2672 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]])
2673 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2674 // CHECK-NEXT: ret void
2676 v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); }
2677 // CHECK-LABEL: @xvfsub_d(
2678 // CHECK-NEXT: entry:
2679 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2680 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2681 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]])
2682 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2683 // CHECK-NEXT: ret void
2685 v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); }
2686 // CHECK-LABEL: @xvfmul_s(
2687 // CHECK-NEXT: entry:
2688 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2689 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2690 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]])
2691 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2692 // CHECK-NEXT: ret void
2694 v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); }
2695 // CHECK-LABEL: @xvfmul_d(
2696 // CHECK-NEXT: entry:
2697 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2698 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2699 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]])
2700 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2701 // CHECK-NEXT: ret void
2703 v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); }
2704 // CHECK-LABEL: @xvfdiv_s(
2705 // CHECK-NEXT: entry:
2706 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2707 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2708 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]])
2709 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2710 // CHECK-NEXT: ret void
2712 v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); }
2713 // CHECK-LABEL: @xvfdiv_d(
2714 // CHECK-NEXT: entry:
2715 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2716 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2717 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]])
2718 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2719 // CHECK-NEXT: ret void
2721 v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); }
2722 // CHECK-LABEL: @xvfcvt_h_s(
2723 // CHECK-NEXT: entry:
2724 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2725 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2726 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]])
2727 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2728 // CHECK-NEXT: ret void
2730 v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); }
2731 // CHECK-LABEL: @xvfcvt_s_d(
2732 // CHECK-NEXT: entry:
2733 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2734 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2735 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]])
2736 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2737 // CHECK-NEXT: ret void
2739 v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); }
2740 // CHECK-LABEL: @xvfmin_s(
2741 // CHECK-NEXT: entry:
2742 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2743 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2744 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]])
2745 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2746 // CHECK-NEXT: ret void
2748 v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); }
2749 // CHECK-LABEL: @xvfmin_d(
2750 // CHECK-NEXT: entry:
2751 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2752 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2753 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]])
2754 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2755 // CHECK-NEXT: ret void
2757 v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); }
2758 // CHECK-LABEL: @xvfmina_s(
2759 // CHECK-NEXT: entry:
2760 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2761 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2762 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]])
2763 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2764 // CHECK-NEXT: ret void
2766 v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); }
2767 // CHECK-LABEL: @xvfmina_d(
2768 // CHECK-NEXT: entry:
2769 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2770 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2771 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]])
2772 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2773 // CHECK-NEXT: ret void
2775 v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); }
2776 // CHECK-LABEL: @xvfmax_s(
2777 // CHECK-NEXT: entry:
2778 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2779 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2780 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]])
2781 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2782 // CHECK-NEXT: ret void
2784 v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); }
2785 // CHECK-LABEL: @xvfmax_d(
2786 // CHECK-NEXT: entry:
2787 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2788 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2789 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]])
2790 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2791 // CHECK-NEXT: ret void
2793 v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); }
2794 // CHECK-LABEL: @xvfmaxa_s(
2795 // CHECK-NEXT: entry:
2796 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2797 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2798 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]])
2799 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2800 // CHECK-NEXT: ret void
2802 v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); }
2803 // CHECK-LABEL: @xvfmaxa_d(
2804 // CHECK-NEXT: entry:
2805 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2806 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
2807 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]])
2808 // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2809 // CHECK-NEXT: ret void
2811 v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); }
2812 // CHECK-LABEL: @xvfclass_s(
2813 // CHECK-NEXT: entry:
2814 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2815 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]])
2816 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2817 // CHECK-NEXT: ret void
2819 v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); }
2820 // CHECK-LABEL: @xvfclass_d(
2821 // CHECK-NEXT: entry:
2822 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2823 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]])
2824 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2825 // CHECK-NEXT: ret void
2827 v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); }
2828 // CHECK-LABEL: @xvfsqrt_s(
2829 // CHECK-NEXT: entry:
2830 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2831 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]])
2832 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2833 // CHECK-NEXT: ret void
2835 v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); }
2836 // CHECK-LABEL: @xvfsqrt_d(
2837 // CHECK-NEXT: entry:
2838 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2839 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]])
2840 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2841 // CHECK-NEXT: ret void
2843 v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); }
2844 // CHECK-LABEL: @xvfrecip_s(
2845 // CHECK-NEXT: entry:
2846 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2847 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]])
2848 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2849 // CHECK-NEXT: ret void
2851 v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); }
2852 // CHECK-LABEL: @xvfrecip_d(
2853 // CHECK-NEXT: entry:
2854 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2855 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]])
2856 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2857 // CHECK-NEXT: ret void
2859 v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); }
2860 // CHECK-LABEL: @xvfrint_s(
2861 // CHECK-NEXT: entry:
2862 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2863 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]])
2864 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2865 // CHECK-NEXT: ret void
2867 v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); }
2868 // CHECK-LABEL: @xvfrint_d(
2869 // CHECK-NEXT: entry:
2870 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2871 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]])
2872 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2873 // CHECK-NEXT: ret void
2875 v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); }
2876 // CHECK-LABEL: @xvfrsqrt_s(
2877 // CHECK-NEXT: entry:
2878 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2879 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]])
2880 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2881 // CHECK-NEXT: ret void
2883 v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); }
2884 // CHECK-LABEL: @xvfrsqrt_d(
2885 // CHECK-NEXT: entry:
2886 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2887 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]])
2888 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2889 // CHECK-NEXT: ret void
2891 v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); }
2892 // CHECK-LABEL: @xvflogb_s(
2893 // CHECK-NEXT: entry:
2894 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2895 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]])
2896 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2897 // CHECK-NEXT: ret void
2899 v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); }
2900 // CHECK-LABEL: @xvflogb_d(
2901 // CHECK-NEXT: entry:
2902 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2903 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]])
2904 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2905 // CHECK-NEXT: ret void
2907 v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); }
2908 // CHECK-LABEL: @xvfcvth_s_h(
2909 // CHECK-NEXT: entry:
2910 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2911 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]])
2912 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2913 // CHECK-NEXT: ret void
2915 v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); }
2916 // CHECK-LABEL: @xvfcvth_d_s(
2917 // CHECK-NEXT: entry:
2918 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2919 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]])
2920 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2921 // CHECK-NEXT: ret void
2923 v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); }
2924 // CHECK-LABEL: @xvfcvtl_s_h(
2925 // CHECK-NEXT: entry:
2926 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2927 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]])
2928 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2929 // CHECK-NEXT: ret void
2931 v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); }
2932 // CHECK-LABEL: @xvfcvtl_d_s(
2933 // CHECK-NEXT: entry:
2934 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2935 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]])
2936 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2937 // CHECK-NEXT: ret void
2939 v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); }
2940 // CHECK-LABEL: @xvftint_w_s(
2941 // CHECK-NEXT: entry:
2942 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2943 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]])
2944 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2945 // CHECK-NEXT: ret void
2947 v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); }
2948 // CHECK-LABEL: @xvftint_l_d(
2949 // CHECK-NEXT: entry:
2950 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2951 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]])
2952 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2953 // CHECK-NEXT: ret void
2955 v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); }
2956 // CHECK-LABEL: @xvftint_wu_s(
2957 // CHECK-NEXT: entry:
2958 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2959 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]])
2960 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2961 // CHECK-NEXT: ret void
2963 v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); }
2964 // CHECK-LABEL: @xvftint_lu_d(
2965 // CHECK-NEXT: entry:
2966 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2967 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]])
2968 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2969 // CHECK-NEXT: ret void
2971 v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); }
2972 // CHECK-LABEL: @xvftintrz_w_s(
2973 // CHECK-NEXT: entry:
2974 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2975 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]])
2976 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2977 // CHECK-NEXT: ret void
2979 v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); }
2980 // CHECK-LABEL: @xvftintrz_l_d(
2981 // CHECK-NEXT: entry:
2982 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2983 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]])
2984 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2985 // CHECK-NEXT: ret void
2987 v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); }
2988 // CHECK-LABEL: @xvftintrz_wu_s(
2989 // CHECK-NEXT: entry:
2990 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2991 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]])
2992 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
2993 // CHECK-NEXT: ret void
2995 v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); }
2996 // CHECK-LABEL: @xvftintrz_lu_d(
2997 // CHECK-NEXT: entry:
2998 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
2999 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]])
3000 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3001 // CHECK-NEXT: ret void
3003 v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); }
3004 // CHECK-LABEL: @xvffint_s_w(
3005 // CHECK-NEXT: entry:
3006 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3007 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]])
3008 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3009 // CHECK-NEXT: ret void
3011 v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); }
3012 // CHECK-LABEL: @xvffint_d_l(
3013 // CHECK-NEXT: entry:
3014 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3015 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]])
3016 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3017 // CHECK-NEXT: ret void
3019 v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); }
3020 // CHECK-LABEL: @xvffint_s_wu(
3021 // CHECK-NEXT: entry:
3022 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3023 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]])
3024 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3025 // CHECK-NEXT: ret void
3027 v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); }
3028 // CHECK-LABEL: @xvffint_d_lu(
3029 // CHECK-NEXT: entry:
3030 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3031 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]])
3032 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3033 // CHECK-NEXT: ret void
3035 v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); }
3036 // CHECK-LABEL: @xvreplve_b(
3037 // CHECK-NEXT: entry:
3038 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3039 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]])
3040 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3041 // CHECK-NEXT: ret void
3043 v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); }
3044 // CHECK-LABEL: @xvreplve_h(
3045 // CHECK-NEXT: entry:
3046 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3047 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]])
3048 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3049 // CHECK-NEXT: ret void
3051 v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); }
3052 // CHECK-LABEL: @xvreplve_w(
3053 // CHECK-NEXT: entry:
3054 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3055 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]])
3056 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3057 // CHECK-NEXT: ret void
3059 v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); }
3060 // CHECK-LABEL: @xvreplve_d(
3061 // CHECK-NEXT: entry:
3062 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3063 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]])
3064 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3065 // CHECK-NEXT: ret void
3067 v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); }
3068 // CHECK-LABEL: @xvpermi_w(
3069 // CHECK-NEXT: entry:
3070 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3071 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3072 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
3073 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3074 // CHECK-NEXT: ret void
3076 v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); }
3077 // CHECK-LABEL: @xvandn_v(
3078 // CHECK-NEXT: entry:
3079 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3080 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3081 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
3082 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3083 // CHECK-NEXT: ret void
3085 v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); }
3086 // CHECK-LABEL: @xvneg_b(
3087 // CHECK-NEXT: entry:
3088 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3089 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]])
3090 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3091 // CHECK-NEXT: ret void
3093 v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); }
3094 // CHECK-LABEL: @xvneg_h(
3095 // CHECK-NEXT: entry:
3096 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3097 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]])
3098 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3099 // CHECK-NEXT: ret void
3101 v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); }
3102 // CHECK-LABEL: @xvneg_w(
3103 // CHECK-NEXT: entry:
3104 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3105 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]])
3106 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3107 // CHECK-NEXT: ret void
3109 v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); }
3110 // CHECK-LABEL: @xvneg_d(
3111 // CHECK-NEXT: entry:
3112 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3113 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]])
3114 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3115 // CHECK-NEXT: ret void
3117 v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); }
3118 // CHECK-LABEL: @xvmuh_b(
3119 // CHECK-NEXT: entry:
3120 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3121 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3122 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
3123 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3124 // CHECK-NEXT: ret void
3126 v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); }
3127 // CHECK-LABEL: @xvmuh_h(
3128 // CHECK-NEXT: entry:
3129 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3130 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3131 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3132 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3133 // CHECK-NEXT: ret void
3135 v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); }
3136 // CHECK-LABEL: @xvmuh_w(
3137 // CHECK-NEXT: entry:
3138 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3139 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3140 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3141 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3142 // CHECK-NEXT: ret void
3144 v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); }
3145 // CHECK-LABEL: @xvmuh_d(
3146 // CHECK-NEXT: entry:
3147 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3148 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3149 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3150 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3151 // CHECK-NEXT: ret void
3153 v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); }
3154 // CHECK-LABEL: @xvmuh_bu(
3155 // CHECK-NEXT: entry:
3156 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3157 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3158 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
3159 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3160 // CHECK-NEXT: ret void
3162 v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); }
3163 // CHECK-LABEL: @xvmuh_hu(
3164 // CHECK-NEXT: entry:
3165 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3166 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3167 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
3168 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3169 // CHECK-NEXT: ret void
3171 v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); }
3172 // CHECK-LABEL: @xvmuh_wu(
3173 // CHECK-NEXT: entry:
3174 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3175 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3176 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
3177 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3178 // CHECK-NEXT: ret void
3180 v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); }
3181 // CHECK-LABEL: @xvmuh_du(
3182 // CHECK-NEXT: entry:
3183 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3184 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3185 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
3186 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3187 // CHECK-NEXT: ret void
3189 v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); }
3190 // CHECK-LABEL: @xvsllwil_h_b(
3191 // CHECK-NEXT: entry:
3192 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3193 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1)
3194 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3195 // CHECK-NEXT: ret void
3197 v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); }
3198 // CHECK-LABEL: @xvsllwil_w_h(
3199 // CHECK-NEXT: entry:
3200 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3201 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1)
3202 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3203 // CHECK-NEXT: ret void
3205 v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); }
3206 // CHECK-LABEL: @xvsllwil_d_w(
3207 // CHECK-NEXT: entry:
3208 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3209 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1)
3210 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3211 // CHECK-NEXT: ret void
3213 v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); }
3214 // CHECK-LABEL: @xvsllwil_hu_bu(
3215 // CHECK-NEXT: entry:
3216 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3217 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1)
3218 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3219 // CHECK-NEXT: ret void
3221 v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); }
3222 // CHECK-LABEL: @xvsllwil_wu_hu(
3223 // CHECK-NEXT: entry:
3224 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3225 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1)
3226 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3227 // CHECK-NEXT: ret void
3229 v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); }
3230 // CHECK-LABEL: @xvsllwil_du_wu(
3231 // CHECK-NEXT: entry:
3232 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3233 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1)
3234 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3235 // CHECK-NEXT: ret void
3237 v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); }
3238 // CHECK-LABEL: @xvsran_b_h(
3239 // CHECK-NEXT: entry:
3240 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3241 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3242 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3243 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3244 // CHECK-NEXT: ret void
3246 v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); }
3247 // CHECK-LABEL: @xvsran_h_w(
3248 // CHECK-NEXT: entry:
3249 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3250 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3251 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3252 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3253 // CHECK-NEXT: ret void
3255 v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); }
3256 // CHECK-LABEL: @xvsran_w_d(
3257 // CHECK-NEXT: entry:
3258 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3259 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3260 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3261 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3262 // CHECK-NEXT: ret void
3264 v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); }
3265 // CHECK-LABEL: @xvssran_b_h(
3266 // CHECK-NEXT: entry:
3267 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3268 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3269 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3270 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3271 // CHECK-NEXT: ret void
3273 v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); }
3274 // CHECK-LABEL: @xvssran_h_w(
3275 // CHECK-NEXT: entry:
3276 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3277 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3278 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3279 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3280 // CHECK-NEXT: ret void
3282 v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); }
3283 // CHECK-LABEL: @xvssran_w_d(
3284 // CHECK-NEXT: entry:
3285 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3286 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3287 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3288 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3289 // CHECK-NEXT: ret void
3291 v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); }
3292 // CHECK-LABEL: @xvssran_bu_h(
3293 // CHECK-NEXT: entry:
3294 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3295 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3296 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3297 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3298 // CHECK-NEXT: ret void
3300 v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); }
3301 // CHECK-LABEL: @xvssran_hu_w(
3302 // CHECK-NEXT: entry:
3303 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3304 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3305 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3306 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3307 // CHECK-NEXT: ret void
3309 v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); }
3310 // CHECK-LABEL: @xvssran_wu_d(
3311 // CHECK-NEXT: entry:
3312 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3313 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3314 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3315 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3316 // CHECK-NEXT: ret void
3318 v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); }
3319 // CHECK-LABEL: @xvsrarn_b_h(
3320 // CHECK-NEXT: entry:
3321 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3322 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3323 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3324 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3325 // CHECK-NEXT: ret void
3327 v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); }
3328 // CHECK-LABEL: @xvsrarn_h_w(
3329 // CHECK-NEXT: entry:
3330 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3331 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3332 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3333 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3334 // CHECK-NEXT: ret void
3336 v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); }
3337 // CHECK-LABEL: @xvsrarn_w_d(
3338 // CHECK-NEXT: entry:
3339 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3340 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3341 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3342 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3343 // CHECK-NEXT: ret void
3345 v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); }
3346 // CHECK-LABEL: @xvssrarn_b_h(
3347 // CHECK-NEXT: entry:
3348 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3349 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3350 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3351 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3352 // CHECK-NEXT: ret void
3354 v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); }
3355 // CHECK-LABEL: @xvssrarn_h_w(
3356 // CHECK-NEXT: entry:
3357 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3358 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3359 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3360 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3361 // CHECK-NEXT: ret void
3363 v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); }
3364 // CHECK-LABEL: @xvssrarn_w_d(
3365 // CHECK-NEXT: entry:
3366 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3367 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3368 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3369 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3370 // CHECK-NEXT: ret void
3372 v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); }
3373 // CHECK-LABEL: @xvssrarn_bu_h(
3374 // CHECK-NEXT: entry:
3375 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3376 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3377 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3378 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3379 // CHECK-NEXT: ret void
3381 v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); }
3382 // CHECK-LABEL: @xvssrarn_hu_w(
3383 // CHECK-NEXT: entry:
3384 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3385 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3386 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3387 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3388 // CHECK-NEXT: ret void
3390 v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); }
3391 // CHECK-LABEL: @xvssrarn_wu_d(
3392 // CHECK-NEXT: entry:
3393 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3394 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3395 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3396 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3397 // CHECK-NEXT: ret void
3399 v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); }
3400 // CHECK-LABEL: @xvsrln_b_h(
3401 // CHECK-NEXT: entry:
3402 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3403 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3404 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3405 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3406 // CHECK-NEXT: ret void
3408 v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); }
3409 // CHECK-LABEL: @xvsrln_h_w(
3410 // CHECK-NEXT: entry:
3411 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3412 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3413 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3414 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3415 // CHECK-NEXT: ret void
3417 v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); }
3418 // CHECK-LABEL: @xvsrln_w_d(
3419 // CHECK-NEXT: entry:
3420 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3421 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3422 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3423 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3424 // CHECK-NEXT: ret void
3426 v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); }
3427 // CHECK-LABEL: @xvssrln_bu_h(
3428 // CHECK-NEXT: entry:
3429 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3430 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3431 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3432 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3433 // CHECK-NEXT: ret void
3435 v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); }
3436 // CHECK-LABEL: @xvssrln_hu_w(
3437 // CHECK-NEXT: entry:
3438 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3439 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3440 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3441 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3442 // CHECK-NEXT: ret void
3444 v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); }
3445 // CHECK-LABEL: @xvssrln_wu_d(
3446 // CHECK-NEXT: entry:
3447 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3448 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3449 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3450 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3451 // CHECK-NEXT: ret void
3453 v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); }
3454 // CHECK-LABEL: @xvsrlrn_b_h(
3455 // CHECK-NEXT: entry:
3456 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3457 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3458 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3459 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3460 // CHECK-NEXT: ret void
3462 v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); }
3463 // CHECK-LABEL: @xvsrlrn_h_w(
3464 // CHECK-NEXT: entry:
3465 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3466 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3467 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3468 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3469 // CHECK-NEXT: ret void
3471 v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); }
3472 // CHECK-LABEL: @xvsrlrn_w_d(
3473 // CHECK-NEXT: entry:
3474 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3475 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3476 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3477 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3478 // CHECK-NEXT: ret void
3480 v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); }
3481 // CHECK-LABEL: @xvssrlrn_bu_h(
3482 // CHECK-NEXT: entry:
3483 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3484 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3485 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3486 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3487 // CHECK-NEXT: ret void
3489 v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); }
3490 // CHECK-LABEL: @xvssrlrn_hu_w(
3491 // CHECK-NEXT: entry:
3492 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3493 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3494 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3495 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3496 // CHECK-NEXT: ret void
3498 v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); }
3499 // CHECK-LABEL: @xvssrlrn_wu_d(
3500 // CHECK-NEXT: entry:
3501 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3502 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3503 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3504 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3505 // CHECK-NEXT: ret void
3507 v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); }
3508 // CHECK-LABEL: @xvfrstpi_b(
3509 // CHECK-NEXT: entry:
3510 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3511 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3512 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
3513 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3514 // CHECK-NEXT: ret void
3516 v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); }
3517 // CHECK-LABEL: @xvfrstpi_h(
3518 // CHECK-NEXT: entry:
3519 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3520 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3521 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
3522 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3523 // CHECK-NEXT: ret void
3525 v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); }
3526 // CHECK-LABEL: @xvfrstp_b(
3527 // CHECK-NEXT: entry:
3528 // CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3529 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3530 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3531 // CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
3532 // CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3533 // CHECK-NEXT: ret void
3535 v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); }
3536 // CHECK-LABEL: @xvfrstp_h(
3537 // CHECK-NEXT: entry:
3538 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3539 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3540 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3541 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
3542 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3543 // CHECK-NEXT: ret void
3545 v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); }
3546 // CHECK-LABEL: @xvshuf4i_d(
3547 // CHECK-NEXT: entry:
3548 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3549 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3550 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
3551 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3552 // CHECK-NEXT: ret void
3554 v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); }
3555 // CHECK-LABEL: @xvbsrl_v(
3556 // CHECK-NEXT: entry:
3557 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3558 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1)
3559 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3560 // CHECK-NEXT: ret void
3562 v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); }
3563 // CHECK-LABEL: @xvbsll_v(
3564 // CHECK-NEXT: entry:
3565 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3566 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1)
3567 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3568 // CHECK-NEXT: ret void
3570 v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); }
3571 // CHECK-LABEL: @xvextrins_b(
3572 // CHECK-NEXT: entry:
3573 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3574 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3575 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
3576 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3577 // CHECK-NEXT: ret void
3579 v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); }
3580 // CHECK-LABEL: @xvextrins_h(
3581 // CHECK-NEXT: entry:
3582 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3583 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3584 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
3585 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3586 // CHECK-NEXT: ret void
3588 v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); }
3589 // CHECK-LABEL: @xvextrins_w(
3590 // CHECK-NEXT: entry:
3591 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3592 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3593 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
3594 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3595 // CHECK-NEXT: ret void
3597 v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); }
3598 // CHECK-LABEL: @xvextrins_d(
3599 // CHECK-NEXT: entry:
3600 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3601 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3602 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
3603 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3604 // CHECK-NEXT: ret void
3606 v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); }
3607 // CHECK-LABEL: @xvmskltz_b(
3608 // CHECK-NEXT: entry:
3609 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3610 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]])
3611 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3612 // CHECK-NEXT: ret void
3614 v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); }
3615 // CHECK-LABEL: @xvmskltz_h(
3616 // CHECK-NEXT: entry:
3617 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3618 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]])
3619 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3620 // CHECK-NEXT: ret void
3622 v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); }
3623 // CHECK-LABEL: @xvmskltz_w(
3624 // CHECK-NEXT: entry:
3625 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3626 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]])
3627 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3628 // CHECK-NEXT: ret void
3630 v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); }
3631 // CHECK-LABEL: @xvmskltz_d(
3632 // CHECK-NEXT: entry:
3633 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3634 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]])
3635 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3636 // CHECK-NEXT: ret void
3638 v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); }
3639 // CHECK-LABEL: @xvsigncov_b(
3640 // CHECK-NEXT: entry:
3641 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3642 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3643 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
3644 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3645 // CHECK-NEXT: ret void
3647 v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); }
3648 // CHECK-LABEL: @xvsigncov_h(
3649 // CHECK-NEXT: entry:
3650 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3651 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3652 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
3653 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3654 // CHECK-NEXT: ret void
3656 v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); }
3657 // CHECK-LABEL: @xvsigncov_w(
3658 // CHECK-NEXT: entry:
3659 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3660 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3661 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
3662 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3663 // CHECK-NEXT: ret void
3665 v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); }
3666 // CHECK-LABEL: @xvsigncov_d(
3667 // CHECK-NEXT: entry:
3668 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3669 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3670 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
3671 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3672 // CHECK-NEXT: ret void
3674 v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); }
3675 // CHECK-LABEL: @xvfmadd_s(
3676 // CHECK-NEXT: entry:
3677 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3678 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3679 // CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3680 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
3681 // CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3682 // CHECK-NEXT: ret void
3684 v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); }
3685 // CHECK-LABEL: @xvfmadd_d(
3686 // CHECK-NEXT: entry:
3687 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3688 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3689 // CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3690 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
3691 // CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3692 // CHECK-NEXT: ret void
3694 v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); }
3695 // CHECK-LABEL: @xvfmsub_s(
3696 // CHECK-NEXT: entry:
3697 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3698 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3699 // CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3700 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
3701 // CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3702 // CHECK-NEXT: ret void
3704 v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); }
3705 // CHECK-LABEL: @xvfmsub_d(
3706 // CHECK-NEXT: entry:
3707 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3708 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3709 // CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3710 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
3711 // CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3712 // CHECK-NEXT: ret void
3714 v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); }
3715 // CHECK-LABEL: @xvfnmadd_s(
3716 // CHECK-NEXT: entry:
3717 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3718 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3719 // CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3720 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
3721 // CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3722 // CHECK-NEXT: ret void
3724 v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); }
3725 // CHECK-LABEL: @xvfnmadd_d(
3726 // CHECK-NEXT: entry:
3727 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3728 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3729 // CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3730 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
3731 // CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3732 // CHECK-NEXT: ret void
3734 v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); }
3735 // CHECK-LABEL: @xvfnmsub_s(
3736 // CHECK-NEXT: entry:
3737 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3738 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3739 // CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3740 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]])
3741 // CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3742 // CHECK-NEXT: ret void
3744 v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); }
3745 // CHECK-LABEL: @xvfnmsub_d(
3746 // CHECK-NEXT: entry:
3747 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3748 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3749 // CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
3750 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]])
3751 // CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3752 // CHECK-NEXT: ret void
3754 v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); }
3755 // CHECK-LABEL: @xvftintrne_w_s(
3756 // CHECK-NEXT: entry:
3757 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3758 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]])
3759 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3760 // CHECK-NEXT: ret void
3762 v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); }
3763 // CHECK-LABEL: @xvftintrne_l_d(
3764 // CHECK-NEXT: entry:
3765 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3766 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]])
3767 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3768 // CHECK-NEXT: ret void
3770 v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); }
3771 // CHECK-LABEL: @xvftintrp_w_s(
3772 // CHECK-NEXT: entry:
3773 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3774 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]])
3775 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3776 // CHECK-NEXT: ret void
3778 v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); }
3779 // CHECK-LABEL: @xvftintrp_l_d(
3780 // CHECK-NEXT: entry:
3781 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3782 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]])
3783 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3784 // CHECK-NEXT: ret void
3786 v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); }
3787 // CHECK-LABEL: @xvftintrm_w_s(
3788 // CHECK-NEXT: entry:
3789 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3790 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]])
3791 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3792 // CHECK-NEXT: ret void
3794 v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); }
3795 // CHECK-LABEL: @xvftintrm_l_d(
3796 // CHECK-NEXT: entry:
3797 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3798 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]])
3799 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3800 // CHECK-NEXT: ret void
3802 v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); }
3803 // CHECK-LABEL: @xvftint_w_d(
3804 // CHECK-NEXT: entry:
3805 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3806 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3807 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
3808 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3809 // CHECK-NEXT: ret void
3811 v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); }
3812 // CHECK-LABEL: @xvffint_s_l(
3813 // CHECK-NEXT: entry:
3814 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3815 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3816 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]])
3817 // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3818 // CHECK-NEXT: ret void
3820 v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); }
3821 // CHECK-LABEL: @xvftintrz_w_d(
3822 // CHECK-NEXT: entry:
3823 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3824 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3825 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
3826 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3827 // CHECK-NEXT: ret void
3829 v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); }
3830 // CHECK-LABEL: @xvftintrp_w_d(
3831 // CHECK-NEXT: entry:
3832 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3833 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3834 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
3835 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3836 // CHECK-NEXT: ret void
3838 v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); }
3839 // CHECK-LABEL: @xvftintrm_w_d(
3840 // CHECK-NEXT: entry:
3841 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3842 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3843 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
3844 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3845 // CHECK-NEXT: ret void
3847 v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); }
3848 // CHECK-LABEL: @xvftintrne_w_d(
3849 // CHECK-NEXT: entry:
3850 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3851 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
3852 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]])
3853 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3854 // CHECK-NEXT: ret void
3856 v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); }
3857 // CHECK-LABEL: @xvftinth_l_s(
3858 // CHECK-NEXT: entry:
3859 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3860 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]])
3861 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3862 // CHECK-NEXT: ret void
3864 v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); }
3865 // CHECK-LABEL: @xvftintl_l_s(
3866 // CHECK-NEXT: entry:
3867 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3868 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]])
3869 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3870 // CHECK-NEXT: ret void
3872 v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); }
3873 // CHECK-LABEL: @xvffinth_d_w(
3874 // CHECK-NEXT: entry:
3875 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3876 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]])
3877 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3878 // CHECK-NEXT: ret void
3880 v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); }
3881 // CHECK-LABEL: @xvffintl_d_w(
3882 // CHECK-NEXT: entry:
3883 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3884 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]])
3885 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3886 // CHECK-NEXT: ret void
3888 v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); }
3889 // CHECK-LABEL: @xvftintrzh_l_s(
3890 // CHECK-NEXT: entry:
3891 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3892 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]])
3893 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3894 // CHECK-NEXT: ret void
3896 v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); }
3897 // CHECK-LABEL: @xvftintrzl_l_s(
3898 // CHECK-NEXT: entry:
3899 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3900 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]])
3901 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3902 // CHECK-NEXT: ret void
3904 v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); }
3905 // CHECK-LABEL: @xvftintrph_l_s(
3906 // CHECK-NEXT: entry:
3907 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3908 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]])
3909 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3910 // CHECK-NEXT: ret void
3912 v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); }
3913 // CHECK-LABEL: @xvftintrpl_l_s(
3914 // CHECK-NEXT: entry:
3915 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3916 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]])
3917 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3918 // CHECK-NEXT: ret void
3920 v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); }
3921 // CHECK-LABEL: @xvftintrmh_l_s(
3922 // CHECK-NEXT: entry:
3923 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3924 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]])
3925 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3926 // CHECK-NEXT: ret void
3928 v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); }
3929 // CHECK-LABEL: @xvftintrml_l_s(
3930 // CHECK-NEXT: entry:
3931 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3932 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]])
3933 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3934 // CHECK-NEXT: ret void
3936 v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); }
3937 // CHECK-LABEL: @xvftintrneh_l_s(
3938 // CHECK-NEXT: entry:
3939 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3940 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]])
3941 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3942 // CHECK-NEXT: ret void
3944 v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); }
3945 // CHECK-LABEL: @xvftintrnel_l_s(
3946 // CHECK-NEXT: entry:
3947 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3948 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]])
3949 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3950 // CHECK-NEXT: ret void
3952 v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); }
3953 // CHECK-LABEL: @xvfrintrne_s(
3954 // CHECK-NEXT: entry:
3955 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3956 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]])
3957 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3958 // CHECK-NEXT: ret void
3960 v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); }
3961 // CHECK-LABEL: @xvfrintrne_d(
3962 // CHECK-NEXT: entry:
3963 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3964 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]])
3965 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3966 // CHECK-NEXT: ret void
3968 v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); }
3969 // CHECK-LABEL: @xvfrintrz_s(
3970 // CHECK-NEXT: entry:
3971 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3972 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]])
3973 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3974 // CHECK-NEXT: ret void
3976 v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); }
3977 // CHECK-LABEL: @xvfrintrz_d(
3978 // CHECK-NEXT: entry:
3979 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3980 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]])
3981 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3982 // CHECK-NEXT: ret void
3984 v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); }
3985 // CHECK-LABEL: @xvfrintrp_s(
3986 // CHECK-NEXT: entry:
3987 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3988 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]])
3989 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3990 // CHECK-NEXT: ret void
3992 v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); }
3993 // CHECK-LABEL: @xvfrintrp_d(
3994 // CHECK-NEXT: entry:
3995 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
3996 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]])
3997 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
3998 // CHECK-NEXT: ret void
4000 v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); }
4001 // CHECK-LABEL: @xvfrintrm_s(
4002 // CHECK-NEXT: entry:
4003 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4004 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]])
4005 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4006 // CHECK-NEXT: ret void
4008 v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); }
4009 // CHECK-LABEL: @xvfrintrm_d(
4010 // CHECK-NEXT: entry:
4011 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4012 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]])
4013 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4014 // CHECK-NEXT: ret void
4016 v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); }
4017 // CHECK-LABEL: @xvld(
4018 // CHECK-NEXT: entry:
4019 // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1)
4020 // CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4021 // CHECK-NEXT: ret void
4023 v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); }
4024 // CHECK-LABEL: @xvst(
4025 // CHECK-NEXT: entry:
4026 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4027 // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1)
4028 // CHECK-NEXT: ret void
4030 void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); }
4031 // CHECK-LABEL: @xvstelm_b(
4032 // CHECK-NEXT: entry:
4033 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4034 // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1)
4035 // CHECK-NEXT: ret void
4037 void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); }
4038 // CHECK-LABEL: @xvstelm_h(
4039 // CHECK-NEXT: entry:
4040 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4041 // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1)
4042 // CHECK-NEXT: ret void
4044 void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); }
4045 // CHECK-LABEL: @xvstelm_w(
4046 // CHECK-NEXT: entry:
4047 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4048 // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1)
4049 // CHECK-NEXT: ret void
4051 void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); }
4052 // CHECK-LABEL: @xvstelm_d(
4053 // CHECK-NEXT: entry:
4054 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4055 // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1)
4056 // CHECK-NEXT: ret void
4058 void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); }
4059 // CHECK-LABEL: @xvinsve0_w(
4060 // CHECK-NEXT: entry:
4061 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4062 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4063 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
4064 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4065 // CHECK-NEXT: ret void
4067 v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); }
4068 // CHECK-LABEL: @xvinsve0_d(
4069 // CHECK-NEXT: entry:
4070 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4071 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4072 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
4073 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4074 // CHECK-NEXT: ret void
4076 v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); }
4077 // CHECK-LABEL: @xvpickve_w(
4078 // CHECK-NEXT: entry:
4079 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4080 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1)
4081 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4082 // CHECK-NEXT: ret void
4084 v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); }
4085 // CHECK-LABEL: @xvpickve_d(
4086 // CHECK-NEXT: entry:
4087 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4088 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1)
4089 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4090 // CHECK-NEXT: ret void
4092 v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); }
4093 // CHECK-LABEL: @xvssrlrn_b_h(
4094 // CHECK-NEXT: entry:
4095 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4096 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4097 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4098 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4099 // CHECK-NEXT: ret void
4101 v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); }
4102 // CHECK-LABEL: @xvssrlrn_h_w(
4103 // CHECK-NEXT: entry:
4104 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4105 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4106 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4107 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4108 // CHECK-NEXT: ret void
4110 v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); }
4111 // CHECK-LABEL: @xvssrlrn_w_d(
4112 // CHECK-NEXT: entry:
4113 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4114 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4115 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4116 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4117 // CHECK-NEXT: ret void
4119 v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); }
4120 // CHECK-LABEL: @xvssrln_b_h(
4121 // CHECK-NEXT: entry:
4122 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4123 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4124 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4125 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4126 // CHECK-NEXT: ret void
4128 v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); }
4129 // CHECK-LABEL: @xvssrln_h_w(
4130 // CHECK-NEXT: entry:
4131 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4132 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4133 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4134 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4135 // CHECK-NEXT: ret void
4137 v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); }
4138 // CHECK-LABEL: @xvssrln_w_d(
4139 // CHECK-NEXT: entry:
4140 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4141 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4142 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4143 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4144 // CHECK-NEXT: ret void
4146 v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); }
4147 // CHECK-LABEL: @xvorn_v(
4148 // CHECK-NEXT: entry:
4149 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4150 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4151 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]])
4152 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4153 // CHECK-NEXT: ret void
4155 v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); }
4156 // CHECK-LABEL: @xvldi(
4157 // CHECK-NEXT: entry:
4158 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1)
4159 // CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4160 // CHECK-NEXT: ret void
4162 v4i64 xvldi() { return __lasx_xvldi(1); }
4163 // CHECK-LABEL: @xvldx(
4164 // CHECK-NEXT: entry:
4165 // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]]
4166 // CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4167 // CHECK-NEXT: ret void
4169 v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); }
4170 // CHECK-LABEL: @xvstx(
4171 // CHECK-NEXT: entry:
4172 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4173 // CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1)
4174 // CHECK-NEXT: ret void
4176 void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); }
4177 // CHECK-LABEL: @xvextl_qu_du(
4178 // CHECK-NEXT: entry:
4179 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4180 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]])
4181 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4182 // CHECK-NEXT: ret void
4184 v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); }
4185 // CHECK-LABEL: @xvinsgr2vr_w(
4186 // CHECK-NEXT: entry:
4187 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4188 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1)
4189 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4190 // CHECK-NEXT: ret void
4192 v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); }
4193 // CHECK-LABEL: @xvinsgr2vr_d(
4194 // CHECK-NEXT: entry:
4195 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4196 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1)
4197 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4198 // CHECK-NEXT: ret void
4200 v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); }
4201 // CHECK-LABEL: @xvreplve0_b(
4202 // CHECK-NEXT: entry:
4203 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4204 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]])
4205 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4206 // CHECK-NEXT: ret void
4208 v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); }
4209 // CHECK-LABEL: @xvreplve0_h(
4210 // CHECK-NEXT: entry:
4211 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4212 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]])
4213 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4214 // CHECK-NEXT: ret void
4216 v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); }
4217 // CHECK-LABEL: @xvreplve0_w(
4218 // CHECK-NEXT: entry:
4219 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4220 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]])
4221 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4222 // CHECK-NEXT: ret void
4224 v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); }
4225 // CHECK-LABEL: @xvreplve0_d(
4226 // CHECK-NEXT: entry:
4227 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4228 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]])
4229 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4230 // CHECK-NEXT: ret void
4232 v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); }
4233 // CHECK-LABEL: @xvreplve0_q(
4234 // CHECK-NEXT: entry:
4235 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4236 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]])
4237 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4238 // CHECK-NEXT: ret void
4240 v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); }
4241 // CHECK-LABEL: @vext2xv_h_b(
4242 // CHECK-NEXT: entry:
4243 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4244 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]])
4245 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4246 // CHECK-NEXT: ret void
4248 v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); }
4249 // CHECK-LABEL: @vext2xv_w_h(
4250 // CHECK-NEXT: entry:
4251 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4252 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]])
4253 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4254 // CHECK-NEXT: ret void
4256 v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); }
4257 // CHECK-LABEL: @vext2xv_d_w(
4258 // CHECK-NEXT: entry:
4259 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4260 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]])
4261 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4262 // CHECK-NEXT: ret void
4264 v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); }
4265 // CHECK-LABEL: @vext2xv_w_b(
4266 // CHECK-NEXT: entry:
4267 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4268 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]])
4269 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4270 // CHECK-NEXT: ret void
4272 v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); }
4273 // CHECK-LABEL: @vext2xv_d_h(
4274 // CHECK-NEXT: entry:
4275 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4276 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]])
4277 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4278 // CHECK-NEXT: ret void
4280 v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); }
4281 // CHECK-LABEL: @vext2xv_d_b(
4282 // CHECK-NEXT: entry:
4283 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4284 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]])
4285 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4286 // CHECK-NEXT: ret void
4288 v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); }
4289 // CHECK-LABEL: @vext2xv_hu_bu(
4290 // CHECK-NEXT: entry:
4291 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4292 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]])
4293 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4294 // CHECK-NEXT: ret void
4296 v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); }
4297 // CHECK-LABEL: @vext2xv_wu_hu(
4298 // CHECK-NEXT: entry:
4299 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4300 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]])
4301 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4302 // CHECK-NEXT: ret void
4304 v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); }
4305 // CHECK-LABEL: @vext2xv_du_wu(
4306 // CHECK-NEXT: entry:
4307 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4308 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]])
4309 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4310 // CHECK-NEXT: ret void
4312 v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); }
4313 // CHECK-LABEL: @vext2xv_wu_bu(
4314 // CHECK-NEXT: entry:
4315 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4316 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]])
4317 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4318 // CHECK-NEXT: ret void
4320 v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); }
4321 // CHECK-LABEL: @vext2xv_du_hu(
4322 // CHECK-NEXT: entry:
4323 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4324 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]])
4325 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4326 // CHECK-NEXT: ret void
4328 v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); }
4329 // CHECK-LABEL: @vext2xv_du_bu(
4330 // CHECK-NEXT: entry:
4331 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4332 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]])
4333 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4334 // CHECK-NEXT: ret void
4336 v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); }
4337 // CHECK-LABEL: @xvpermi_q(
4338 // CHECK-NEXT: entry:
4339 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4340 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4341 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
4342 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4343 // CHECK-NEXT: ret void
4345 v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); }
4346 // CHECK-LABEL: @xvpermi_d(
4347 // CHECK-NEXT: entry:
4348 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4349 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1)
4350 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4351 // CHECK-NEXT: ret void
4353 v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); }
4354 // CHECK-LABEL: @xvperm_w(
4355 // CHECK-NEXT: entry:
4356 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4357 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4358 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4359 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4360 // CHECK-NEXT: ret void
4362 v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); }
4363 // CHECK-LABEL: @xvldrepl_b(
4364 // CHECK-NEXT: entry:
4365 // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1)
4366 // CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4367 // CHECK-NEXT: ret void
4369 v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); }
4370 // CHECK-LABEL: @xvldrepl_h(
4371 // CHECK-NEXT: entry:
4372 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2)
4373 // CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4374 // CHECK-NEXT: ret void
4376 v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); }
4377 // CHECK-LABEL: @xvldrepl_w(
4378 // CHECK-NEXT: entry:
4379 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4)
4380 // CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4381 // CHECK-NEXT: ret void
4383 v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); }
4384 // CHECK-LABEL: @xvldrepl_d(
4385 // CHECK-NEXT: entry:
4386 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8)
4387 // CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4388 // CHECK-NEXT: ret void
4390 v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); }
4391 // CHECK-LABEL: @xvpickve2gr_w(
4392 // CHECK-NEXT: entry:
4393 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4394 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1)
4395 // CHECK-NEXT: ret i32 [[TMP1]]
4397 int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); }
4398 // CHECK-LABEL: @xvpickve2gr_wu(
4399 // CHECK-NEXT: entry:
4400 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4401 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1)
4402 // CHECK-NEXT: ret i32 [[TMP1]]
4404 unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); }
4405 // CHECK-LABEL: @xvpickve2gr_d(
4406 // CHECK-NEXT: entry:
4407 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4408 // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1)
4409 // CHECK-NEXT: ret i64 [[TMP1]]
4411 long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); }
4412 // CHECK-LABEL: @xvpickve2gr_du(
4413 // CHECK-NEXT: entry:
4414 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4415 // CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1)
4416 // CHECK-NEXT: ret i64 [[TMP1]]
4418 unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); }
4419 // CHECK-LABEL: @xvaddwev_q_d(
4420 // CHECK-NEXT: entry:
4421 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4422 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4423 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4424 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4425 // CHECK-NEXT: ret void
4427 v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); }
4428 // CHECK-LABEL: @xvaddwev_d_w(
4429 // CHECK-NEXT: entry:
4430 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4431 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4432 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4433 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4434 // CHECK-NEXT: ret void
4436 v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); }
4437 // CHECK-LABEL: @xvaddwev_w_h(
4438 // CHECK-NEXT: entry:
4439 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4440 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4441 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4442 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4443 // CHECK-NEXT: ret void
4445 v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); }
4446 // CHECK-LABEL: @xvaddwev_h_b(
4447 // CHECK-NEXT: entry:
4448 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4449 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4450 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4451 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4452 // CHECK-NEXT: ret void
4454 v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); }
4455 // CHECK-LABEL: @xvaddwev_q_du(
4456 // CHECK-NEXT: entry:
4457 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4458 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4459 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4460 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4461 // CHECK-NEXT: ret void
4463 v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); }
4464 // CHECK-LABEL: @xvaddwev_d_wu(
4465 // CHECK-NEXT: entry:
4466 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4467 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4468 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
4469 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4470 // CHECK-NEXT: ret void
4472 v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); }
4473 // CHECK-LABEL: @xvaddwev_w_hu(
4474 // CHECK-NEXT: entry:
4475 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4476 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4477 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
4478 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4479 // CHECK-NEXT: ret void
4481 v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); }
4482 // CHECK-LABEL: @xvaddwev_h_bu(
4483 // CHECK-NEXT: entry:
4484 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4485 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4486 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
4487 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4488 // CHECK-NEXT: ret void
4490 v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); }
4491 // CHECK-LABEL: @xvsubwev_q_d(
4492 // CHECK-NEXT: entry:
4493 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4494 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4495 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4496 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4497 // CHECK-NEXT: ret void
4499 v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); }
4500 // CHECK-LABEL: @xvsubwev_d_w(
4501 // CHECK-NEXT: entry:
4502 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4503 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4504 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4505 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4506 // CHECK-NEXT: ret void
4508 v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); }
4509 // CHECK-LABEL: @xvsubwev_w_h(
4510 // CHECK-NEXT: entry:
4511 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4512 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4513 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4514 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4515 // CHECK-NEXT: ret void
4517 v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); }
4518 // CHECK-LABEL: @xvsubwev_h_b(
4519 // CHECK-NEXT: entry:
4520 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4521 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4522 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4523 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4524 // CHECK-NEXT: ret void
4526 v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); }
4527 // CHECK-LABEL: @xvsubwev_q_du(
4528 // CHECK-NEXT: entry:
4529 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4530 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4531 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4532 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4533 // CHECK-NEXT: ret void
4535 v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); }
4536 // CHECK-LABEL: @xvsubwev_d_wu(
4537 // CHECK-NEXT: entry:
4538 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4539 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4540 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
4541 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4542 // CHECK-NEXT: ret void
4544 v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); }
4545 // CHECK-LABEL: @xvsubwev_w_hu(
4546 // CHECK-NEXT: entry:
4547 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4548 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4549 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
4550 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4551 // CHECK-NEXT: ret void
4553 v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); }
4554 // CHECK-LABEL: @xvsubwev_h_bu(
4555 // CHECK-NEXT: entry:
4556 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4557 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4558 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
4559 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4560 // CHECK-NEXT: ret void
4562 v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); }
4563 // CHECK-LABEL: @xvmulwev_q_d(
4564 // CHECK-NEXT: entry:
4565 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4566 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4567 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4568 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4569 // CHECK-NEXT: ret void
4571 v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); }
4572 // CHECK-LABEL: @xvmulwev_d_w(
4573 // CHECK-NEXT: entry:
4574 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4575 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4576 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4577 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4578 // CHECK-NEXT: ret void
4580 v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); }
4581 // CHECK-LABEL: @xvmulwev_w_h(
4582 // CHECK-NEXT: entry:
4583 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4584 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4585 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4586 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4587 // CHECK-NEXT: ret void
4589 v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); }
4590 // CHECK-LABEL: @xvmulwev_h_b(
4591 // CHECK-NEXT: entry:
4592 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4593 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4594 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4595 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4596 // CHECK-NEXT: ret void
4598 v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); }
4599 // CHECK-LABEL: @xvmulwev_q_du(
4600 // CHECK-NEXT: entry:
4601 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4602 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4603 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4604 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4605 // CHECK-NEXT: ret void
4607 v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); }
4608 // CHECK-LABEL: @xvmulwev_d_wu(
4609 // CHECK-NEXT: entry:
4610 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4611 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4612 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
4613 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4614 // CHECK-NEXT: ret void
4616 v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); }
4617 // CHECK-LABEL: @xvmulwev_w_hu(
4618 // CHECK-NEXT: entry:
4619 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4620 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4621 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
4622 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4623 // CHECK-NEXT: ret void
4625 v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); }
4626 // CHECK-LABEL: @xvmulwev_h_bu(
4627 // CHECK-NEXT: entry:
4628 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4629 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4630 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
4631 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4632 // CHECK-NEXT: ret void
4634 v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); }
4635 // CHECK-LABEL: @xvaddwod_q_d(
4636 // CHECK-NEXT: entry:
4637 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4638 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4639 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4640 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4641 // CHECK-NEXT: ret void
4643 v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); }
4644 // CHECK-LABEL: @xvaddwod_d_w(
4645 // CHECK-NEXT: entry:
4646 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4647 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4648 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4649 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4650 // CHECK-NEXT: ret void
4652 v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); }
4653 // CHECK-LABEL: @xvaddwod_w_h(
4654 // CHECK-NEXT: entry:
4655 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4656 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4657 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4658 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4659 // CHECK-NEXT: ret void
4661 v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); }
4662 // CHECK-LABEL: @xvaddwod_h_b(
4663 // CHECK-NEXT: entry:
4664 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4665 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4666 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4667 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4668 // CHECK-NEXT: ret void
4670 v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); }
4671 // CHECK-LABEL: @xvaddwod_q_du(
4672 // CHECK-NEXT: entry:
4673 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4674 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4675 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4676 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4677 // CHECK-NEXT: ret void
4679 v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); }
4680 // CHECK-LABEL: @xvaddwod_d_wu(
4681 // CHECK-NEXT: entry:
4682 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4683 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4684 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
4685 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4686 // CHECK-NEXT: ret void
4688 v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); }
4689 // CHECK-LABEL: @xvaddwod_w_hu(
4690 // CHECK-NEXT: entry:
4691 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4692 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4693 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
4694 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4695 // CHECK-NEXT: ret void
4697 v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); }
4698 // CHECK-LABEL: @xvaddwod_h_bu(
4699 // CHECK-NEXT: entry:
4700 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4701 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4702 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
4703 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4704 // CHECK-NEXT: ret void
4706 v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); }
4707 // CHECK-LABEL: @xvsubwod_q_d(
4708 // CHECK-NEXT: entry:
4709 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4710 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4711 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4712 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4713 // CHECK-NEXT: ret void
4715 v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); }
4716 // CHECK-LABEL: @xvsubwod_d_w(
4717 // CHECK-NEXT: entry:
4718 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4719 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4720 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4721 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4722 // CHECK-NEXT: ret void
4724 v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); }
4725 // CHECK-LABEL: @xvsubwod_w_h(
4726 // CHECK-NEXT: entry:
4727 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4728 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4729 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4730 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4731 // CHECK-NEXT: ret void
4733 v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); }
4734 // CHECK-LABEL: @xvsubwod_h_b(
4735 // CHECK-NEXT: entry:
4736 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4737 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4738 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4739 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4740 // CHECK-NEXT: ret void
4742 v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); }
4743 // CHECK-LABEL: @xvsubwod_q_du(
4744 // CHECK-NEXT: entry:
4745 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4746 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4747 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4748 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4749 // CHECK-NEXT: ret void
4751 v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); }
4752 // CHECK-LABEL: @xvsubwod_d_wu(
4753 // CHECK-NEXT: entry:
4754 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4755 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4756 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
4757 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4758 // CHECK-NEXT: ret void
4760 v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); }
4761 // CHECK-LABEL: @xvsubwod_w_hu(
4762 // CHECK-NEXT: entry:
4763 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4764 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4765 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
4766 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4767 // CHECK-NEXT: ret void
4769 v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); }
4770 // CHECK-LABEL: @xvsubwod_h_bu(
4771 // CHECK-NEXT: entry:
4772 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4773 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4774 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
4775 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4776 // CHECK-NEXT: ret void
4778 v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); }
4779 // CHECK-LABEL: @xvmulwod_q_d(
4780 // CHECK-NEXT: entry:
4781 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4782 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4783 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4784 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4785 // CHECK-NEXT: ret void
4787 v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); }
4788 // CHECK-LABEL: @xvmulwod_d_w(
4789 // CHECK-NEXT: entry:
4790 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4791 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4792 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4793 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4794 // CHECK-NEXT: ret void
4796 v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); }
4797 // CHECK-LABEL: @xvmulwod_w_h(
4798 // CHECK-NEXT: entry:
4799 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4800 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4801 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4802 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4803 // CHECK-NEXT: ret void
4805 v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); }
4806 // CHECK-LABEL: @xvmulwod_h_b(
4807 // CHECK-NEXT: entry:
4808 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4809 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4810 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4811 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4812 // CHECK-NEXT: ret void
4814 v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); }
4815 // CHECK-LABEL: @xvmulwod_q_du(
4816 // CHECK-NEXT: entry:
4817 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4818 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4819 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4820 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4821 // CHECK-NEXT: ret void
4823 v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); }
4824 // CHECK-LABEL: @xvmulwod_d_wu(
4825 // CHECK-NEXT: entry:
4826 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4827 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4828 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]])
4829 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4830 // CHECK-NEXT: ret void
4832 v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); }
4833 // CHECK-LABEL: @xvmulwod_w_hu(
4834 // CHECK-NEXT: entry:
4835 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4836 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4837 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]])
4838 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4839 // CHECK-NEXT: ret void
4841 v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); }
4842 // CHECK-LABEL: @xvmulwod_h_bu(
4843 // CHECK-NEXT: entry:
4844 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4845 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4846 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]])
4847 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4848 // CHECK-NEXT: ret void
4850 v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); }
4851 // CHECK-LABEL: @xvaddwev_d_wu_w(
4852 // CHECK-NEXT: entry:
4853 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4854 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4855 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4856 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4857 // CHECK-NEXT: ret void
4859 v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); }
4860 // CHECK-LABEL: @xvaddwev_w_hu_h(
4861 // CHECK-NEXT: entry:
4862 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4863 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4864 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4865 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4866 // CHECK-NEXT: ret void
4868 v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); }
4869 // CHECK-LABEL: @xvaddwev_h_bu_b(
4870 // CHECK-NEXT: entry:
4871 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4872 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4873 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4874 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4875 // CHECK-NEXT: ret void
4877 v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); }
4878 // CHECK-LABEL: @xvmulwev_d_wu_w(
4879 // CHECK-NEXT: entry:
4880 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4881 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4882 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4883 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4884 // CHECK-NEXT: ret void
4886 v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); }
4887 // CHECK-LABEL: @xvmulwev_w_hu_h(
4888 // CHECK-NEXT: entry:
4889 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4890 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4891 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4892 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4893 // CHECK-NEXT: ret void
4895 v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); }
4896 // CHECK-LABEL: @xvmulwev_h_bu_b(
4897 // CHECK-NEXT: entry:
4898 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4899 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4900 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4901 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4902 // CHECK-NEXT: ret void
4904 v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); }
4905 // CHECK-LABEL: @xvaddwod_d_wu_w(
4906 // CHECK-NEXT: entry:
4907 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4908 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4909 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4910 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4911 // CHECK-NEXT: ret void
4913 v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); }
4914 // CHECK-LABEL: @xvaddwod_w_hu_h(
4915 // CHECK-NEXT: entry:
4916 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4917 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4918 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4919 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4920 // CHECK-NEXT: ret void
4922 v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); }
4923 // CHECK-LABEL: @xvaddwod_h_bu_b(
4924 // CHECK-NEXT: entry:
4925 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4926 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4927 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4928 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4929 // CHECK-NEXT: ret void
4931 v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); }
4932 // CHECK-LABEL: @xvmulwod_d_wu_w(
4933 // CHECK-NEXT: entry:
4934 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4935 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4936 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
4937 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4938 // CHECK-NEXT: ret void
4940 v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); }
4941 // CHECK-LABEL: @xvmulwod_w_hu_h(
4942 // CHECK-NEXT: entry:
4943 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4944 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4945 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
4946 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4947 // CHECK-NEXT: ret void
4949 v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); }
4950 // CHECK-LABEL: @xvmulwod_h_bu_b(
4951 // CHECK-NEXT: entry:
4952 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4953 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4954 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
4955 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4956 // CHECK-NEXT: ret void
4958 v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); }
4959 // CHECK-LABEL: @xvhaddw_q_d(
4960 // CHECK-NEXT: entry:
4961 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4962 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4963 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4964 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4965 // CHECK-NEXT: ret void
4967 v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); }
4968 // CHECK-LABEL: @xvhaddw_qu_du(
4969 // CHECK-NEXT: entry:
4970 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4971 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4972 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4973 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4974 // CHECK-NEXT: ret void
4976 v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); }
4977 // CHECK-LABEL: @xvhsubw_q_d(
4978 // CHECK-NEXT: entry:
4979 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4980 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4981 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
4982 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4983 // CHECK-NEXT: ret void
4985 v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); }
4986 // CHECK-LABEL: @xvhsubw_qu_du(
4987 // CHECK-NEXT: entry:
4988 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4989 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4990 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]])
4991 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
4992 // CHECK-NEXT: ret void
4994 v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); }
4995 // CHECK-LABEL: @xvmaddwev_q_d(
4996 // CHECK-NEXT: entry:
4997 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
4998 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
4999 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5000 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
5001 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5002 // CHECK-NEXT: ret void
5004 v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); }
5005 // CHECK-LABEL: @xvmaddwev_d_w(
5006 // CHECK-NEXT: entry:
5007 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5008 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5009 // CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5010 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
5011 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5012 // CHECK-NEXT: ret void
5014 v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); }
5015 // CHECK-LABEL: @xvmaddwev_w_h(
5016 // CHECK-NEXT: entry:
5017 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5018 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5019 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5020 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
5021 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5022 // CHECK-NEXT: ret void
5024 v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); }
5025 // CHECK-LABEL: @xvmaddwev_h_b(
5026 // CHECK-NEXT: entry:
5027 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5028 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5029 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5030 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
5031 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5032 // CHECK-NEXT: ret void
5034 v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); }
5035 // CHECK-LABEL: @xvmaddwev_q_du(
5036 // CHECK-NEXT: entry:
5037 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5038 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5039 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5040 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
5041 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5042 // CHECK-NEXT: ret void
5044 v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); }
5045 // CHECK-LABEL: @xvmaddwev_d_wu(
5046 // CHECK-NEXT: entry:
5047 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5048 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5049 // CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5050 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
5051 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5052 // CHECK-NEXT: ret void
5054 v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); }
5055 // CHECK-LABEL: @xvmaddwev_w_hu(
5056 // CHECK-NEXT: entry:
5057 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5058 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5059 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5060 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
5061 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5062 // CHECK-NEXT: ret void
5064 v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); }
5065 // CHECK-LABEL: @xvmaddwev_h_bu(
5066 // CHECK-NEXT: entry:
5067 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5068 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5069 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5070 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
5071 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5072 // CHECK-NEXT: ret void
5074 v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); }
5075 // CHECK-LABEL: @xvmaddwod_q_d(
5076 // CHECK-NEXT: entry:
5077 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5078 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5079 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5080 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
5081 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5082 // CHECK-NEXT: ret void
5084 v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); }
5085 // CHECK-LABEL: @xvmaddwod_d_w(
5086 // CHECK-NEXT: entry:
5087 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5088 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5089 // CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5090 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
5091 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5092 // CHECK-NEXT: ret void
5094 v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); }
5095 // CHECK-LABEL: @xvmaddwod_w_h(
5096 // CHECK-NEXT: entry:
5097 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5098 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5099 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5100 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
5101 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5102 // CHECK-NEXT: ret void
5104 v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); }
5105 // CHECK-LABEL: @xvmaddwod_h_b(
5106 // CHECK-NEXT: entry:
5107 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5108 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5109 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5110 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
5111 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5112 // CHECK-NEXT: ret void
5114 v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); }
5115 // CHECK-LABEL: @xvmaddwod_q_du(
5116 // CHECK-NEXT: entry:
5117 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5118 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5119 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5120 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
5121 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5122 // CHECK-NEXT: ret void
5124 v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); }
5125 // CHECK-LABEL: @xvmaddwod_d_wu(
5126 // CHECK-NEXT: entry:
5127 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5128 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5129 // CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5130 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
5131 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5132 // CHECK-NEXT: ret void
5134 v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); }
5135 // CHECK-LABEL: @xvmaddwod_w_hu(
5136 // CHECK-NEXT: entry:
5137 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5138 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5139 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5140 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
5141 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5142 // CHECK-NEXT: ret void
5144 v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); }
5145 // CHECK-LABEL: @xvmaddwod_h_bu(
5146 // CHECK-NEXT: entry:
5147 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5148 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5149 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5150 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
5151 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5152 // CHECK-NEXT: ret void
5154 v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); }
5155 // CHECK-LABEL: @xvmaddwev_q_du_d(
5156 // CHECK-NEXT: entry:
5157 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5158 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5159 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5160 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
5161 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5162 // CHECK-NEXT: ret void
5164 v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); }
5165 // CHECK-LABEL: @xvmaddwev_d_wu_w(
5166 // CHECK-NEXT: entry:
5167 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5168 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5169 // CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5170 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
5171 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5172 // CHECK-NEXT: ret void
5174 v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); }
5175 // CHECK-LABEL: @xvmaddwev_w_hu_h(
5176 // CHECK-NEXT: entry:
5177 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5178 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5179 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5180 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
5181 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5182 // CHECK-NEXT: ret void
5184 v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); }
5185 // CHECK-LABEL: @xvmaddwev_h_bu_b(
5186 // CHECK-NEXT: entry:
5187 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5188 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5189 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5190 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
5191 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5192 // CHECK-NEXT: ret void
5194 v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); }
5195 // CHECK-LABEL: @xvmaddwod_q_du_d(
5196 // CHECK-NEXT: entry:
5197 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5198 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5199 // CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5200 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]])
5201 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5202 // CHECK-NEXT: ret void
5204 v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); }
5205 // CHECK-LABEL: @xvmaddwod_d_wu_w(
5206 // CHECK-NEXT: entry:
5207 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5208 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5209 // CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5210 // CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]])
5211 // CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5212 // CHECK-NEXT: ret void
5214 v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); }
5215 // CHECK-LABEL: @xvmaddwod_w_hu_h(
5216 // CHECK-NEXT: entry:
5217 // CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5218 // CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5219 // CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5220 // CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]])
5221 // CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5222 // CHECK-NEXT: ret void
5224 v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); }
5225 // CHECK-LABEL: @xvmaddwod_h_bu_b(
5226 // CHECK-NEXT: entry:
5227 // CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5228 // CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5229 // CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]]
5230 // CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]])
5231 // CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5232 // CHECK-NEXT: ret void
5234 v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); }
5235 // CHECK-LABEL: @xvrotr_b(
5236 // CHECK-NEXT: entry:
5237 // CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5238 // CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5239 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]])
5240 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5241 // CHECK-NEXT: ret void
5243 v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); }
5244 // CHECK-LABEL: @xvrotr_h(
5245 // CHECK-NEXT: entry:
5246 // CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5247 // CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5248 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]])
5249 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5250 // CHECK-NEXT: ret void
5252 v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); }
5253 // CHECK-LABEL: @xvrotr_w(
5254 // CHECK-NEXT: entry:
5255 // CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5256 // CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5257 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]])
5258 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5259 // CHECK-NEXT: ret void
5261 v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); }
5262 // CHECK-LABEL: @xvrotr_d(
5263 // CHECK-NEXT: entry:
5264 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5265 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5266 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
5267 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5268 // CHECK-NEXT: ret void
5270 v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); }
5271 // CHECK-LABEL: @xvadd_q(
5272 // CHECK-NEXT: entry:
5273 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5274 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5275 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
5276 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5277 // CHECK-NEXT: ret void
5279 v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); }
5280 // CHECK-LABEL: @xvsub_q(
5281 // CHECK-NEXT: entry:
5282 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5283 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5284 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]])
5285 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5286 // CHECK-NEXT: ret void
5288 v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); }
5289 // CHECK-LABEL: @xvaddwev_q_du_d(
5290 // CHECK-NEXT: entry:
5291 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5292 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5293 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
5294 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5295 // CHECK-NEXT: ret void
5297 v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); }
5298 // CHECK-LABEL: @xvaddwod_q_du_d(
5299 // CHECK-NEXT: entry:
5300 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5301 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5302 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
5303 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5304 // CHECK-NEXT: ret void
5306 v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); }
5307 // CHECK-LABEL: @xvmulwev_q_du_d(
5308 // CHECK-NEXT: entry:
5309 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5310 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5311 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
5312 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5313 // CHECK-NEXT: ret void
5315 v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); }
5316 // CHECK-LABEL: @xvmulwod_q_du_d(
5317 // CHECK-NEXT: entry:
5318 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5319 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5320 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]])
5321 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5322 // CHECK-NEXT: ret void
5324 v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); }
5325 // CHECK-LABEL: @xvmskgez_b(
5326 // CHECK-NEXT: entry:
5327 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5328 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]])
5329 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5330 // CHECK-NEXT: ret void
5332 v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); }
5333 // CHECK-LABEL: @xvmsknz_b(
5334 // CHECK-NEXT: entry:
5335 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5336 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]])
5337 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5338 // CHECK-NEXT: ret void
5340 v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); }
5341 // CHECK-LABEL: @xvexth_h_b(
5342 // CHECK-NEXT: entry:
5343 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5344 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]])
5345 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5346 // CHECK-NEXT: ret void
5348 v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); }
5349 // CHECK-LABEL: @xvexth_w_h(
5350 // CHECK-NEXT: entry:
5351 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5352 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]])
5353 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5354 // CHECK-NEXT: ret void
5356 v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); }
5357 // CHECK-LABEL: @xvexth_d_w(
5358 // CHECK-NEXT: entry:
5359 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5360 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]])
5361 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5362 // CHECK-NEXT: ret void
5364 v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); }
5365 // CHECK-LABEL: @xvexth_q_d(
5366 // CHECK-NEXT: entry:
5367 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5368 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]])
5369 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5370 // CHECK-NEXT: ret void
5372 v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); }
5373 // CHECK-LABEL: @xvexth_hu_bu(
5374 // CHECK-NEXT: entry:
5375 // CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5376 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]])
5377 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5378 // CHECK-NEXT: ret void
5380 v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); }
5381 // CHECK-LABEL: @xvexth_wu_hu(
5382 // CHECK-NEXT: entry:
5383 // CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5384 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]])
5385 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5386 // CHECK-NEXT: ret void
5388 v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); }
5389 // CHECK-LABEL: @xvexth_du_wu(
5390 // CHECK-NEXT: entry:
5391 // CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5392 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]])
5393 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5394 // CHECK-NEXT: ret void
5396 v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); }
5397 // CHECK-LABEL: @xvexth_qu_du(
5398 // CHECK-NEXT: entry:
5399 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5400 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]])
5401 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5402 // CHECK-NEXT: ret void
5404 v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); }
5405 // CHECK-LABEL: @xvrotri_b(
5406 // CHECK-NEXT: entry:
5407 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5408 // CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1)
5409 // CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5410 // CHECK-NEXT: ret void
5412 v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); }
5413 // CHECK-LABEL: @xvrotri_h(
5414 // CHECK-NEXT: entry:
5415 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5416 // CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1)
5417 // CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5418 // CHECK-NEXT: ret void
5420 v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); }
5421 // CHECK-LABEL: @xvrotri_w(
5422 // CHECK-NEXT: entry:
5423 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5424 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1)
5425 // CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5426 // CHECK-NEXT: ret void
5428 v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); }
5429 // CHECK-LABEL: @xvrotri_d(
5430 // CHECK-NEXT: entry:
5431 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5432 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1)
5433 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5434 // CHECK-NEXT: ret void
5436 v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); }
5437 // CHECK-LABEL: @xvextl_q_d(
5438 // CHECK-NEXT: entry:
5439 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5440 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]])
5441 // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5442 // CHECK-NEXT: ret void
5444 v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); }
5445 // CHECK-LABEL: @xvsrlni_b_h(
5446 // CHECK-NEXT: entry:
5447 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5448 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5449 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5450 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5451 // CHECK-NEXT: ret void
5453 v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); }
5454 // CHECK-LABEL: @xvsrlni_h_w(
5455 // CHECK-NEXT: entry:
5456 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5457 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5458 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5459 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5460 // CHECK-NEXT: ret void
5462 v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); }
5463 // CHECK-LABEL: @xvsrlni_w_d(
5464 // CHECK-NEXT: entry:
5465 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5466 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5467 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5468 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5469 // CHECK-NEXT: ret void
5471 v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); }
5472 // CHECK-LABEL: @xvsrlni_d_q(
5473 // CHECK-NEXT: entry:
5474 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5475 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5476 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5477 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5478 // CHECK-NEXT: ret void
5480 v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); }
5481 // CHECK-LABEL: @xvsrlrni_b_h(
5482 // CHECK-NEXT: entry:
5483 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5484 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5485 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5486 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5487 // CHECK-NEXT: ret void
5489 v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); }
5490 // CHECK-LABEL: @xvsrlrni_h_w(
5491 // CHECK-NEXT: entry:
5492 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5493 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5494 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5495 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5496 // CHECK-NEXT: ret void
5498 v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); }
5499 // CHECK-LABEL: @xvsrlrni_w_d(
5500 // CHECK-NEXT: entry:
5501 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5502 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5503 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5504 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5505 // CHECK-NEXT: ret void
5507 v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); }
5508 // CHECK-LABEL: @xvsrlrni_d_q(
5509 // CHECK-NEXT: entry:
5510 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5511 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5512 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5513 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5514 // CHECK-NEXT: ret void
5516 v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); }
5517 // CHECK-LABEL: @xvssrlni_b_h(
5518 // CHECK-NEXT: entry:
5519 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5520 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5521 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5522 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5523 // CHECK-NEXT: ret void
5525 v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); }
5526 // CHECK-LABEL: @xvssrlni_h_w(
5527 // CHECK-NEXT: entry:
5528 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5529 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5530 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5531 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5532 // CHECK-NEXT: ret void
5534 v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); }
5535 // CHECK-LABEL: @xvssrlni_w_d(
5536 // CHECK-NEXT: entry:
5537 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5538 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5539 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5540 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5541 // CHECK-NEXT: ret void
5543 v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); }
5544 // CHECK-LABEL: @xvssrlni_d_q(
5545 // CHECK-NEXT: entry:
5546 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5547 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5548 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5549 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5550 // CHECK-NEXT: ret void
5552 v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); }
5553 // CHECK-LABEL: @xvssrlni_bu_h(
5554 // CHECK-NEXT: entry:
5555 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5556 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5557 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5558 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5559 // CHECK-NEXT: ret void
5561 v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); }
5562 // CHECK-LABEL: @xvssrlni_hu_w(
5563 // CHECK-NEXT: entry:
5564 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5565 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5566 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5567 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5568 // CHECK-NEXT: ret void
5570 v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); }
5571 // CHECK-LABEL: @xvssrlni_wu_d(
5572 // CHECK-NEXT: entry:
5573 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5574 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5575 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5576 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5577 // CHECK-NEXT: ret void
5579 v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); }
5580 // CHECK-LABEL: @xvssrlni_du_q(
5581 // CHECK-NEXT: entry:
5582 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5583 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5584 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5585 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5586 // CHECK-NEXT: ret void
5588 v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); }
5589 // CHECK-LABEL: @xvssrlrni_b_h(
5590 // CHECK-NEXT: entry:
5591 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5592 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5593 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5594 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5595 // CHECK-NEXT: ret void
5597 v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); }
5598 // CHECK-LABEL: @xvssrlrni_h_w(
5599 // CHECK-NEXT: entry:
5600 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5601 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5602 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5603 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5604 // CHECK-NEXT: ret void
5606 v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); }
5607 // CHECK-LABEL: @xvssrlrni_w_d(
5608 // CHECK-NEXT: entry:
5609 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5610 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5611 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5612 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5613 // CHECK-NEXT: ret void
5615 v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); }
5616 // CHECK-LABEL: @xvssrlrni_d_q(
5617 // CHECK-NEXT: entry:
5618 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5619 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5620 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5621 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5622 // CHECK-NEXT: ret void
5624 v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); }
5625 // CHECK-LABEL: @xvssrlrni_bu_h(
5626 // CHECK-NEXT: entry:
5627 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5628 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5629 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5630 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5631 // CHECK-NEXT: ret void
5633 v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); }
5634 // CHECK-LABEL: @xvssrlrni_hu_w(
5635 // CHECK-NEXT: entry:
5636 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5637 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5638 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5639 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5640 // CHECK-NEXT: ret void
5642 v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); }
5643 // CHECK-LABEL: @xvssrlrni_wu_d(
5644 // CHECK-NEXT: entry:
5645 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5646 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5647 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5648 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5649 // CHECK-NEXT: ret void
5651 v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); }
5652 // CHECK-LABEL: @xvssrlrni_du_q(
5653 // CHECK-NEXT: entry:
5654 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5655 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5656 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5657 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5658 // CHECK-NEXT: ret void
5660 v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); }
5661 // CHECK-LABEL: @xvsrani_b_h(
5662 // CHECK-NEXT: entry:
5663 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5664 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5665 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5666 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5667 // CHECK-NEXT: ret void
5669 v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); }
5670 // CHECK-LABEL: @xvsrani_h_w(
5671 // CHECK-NEXT: entry:
5672 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5673 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5674 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5675 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5676 // CHECK-NEXT: ret void
5678 v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); }
5679 // CHECK-LABEL: @xvsrani_w_d(
5680 // CHECK-NEXT: entry:
5681 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5682 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5683 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5684 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5685 // CHECK-NEXT: ret void
5687 v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); }
5688 // CHECK-LABEL: @xvsrani_d_q(
5689 // CHECK-NEXT: entry:
5690 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5691 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5692 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5693 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5694 // CHECK-NEXT: ret void
5696 v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); }
5697 // CHECK-LABEL: @xvsrarni_b_h(
5698 // CHECK-NEXT: entry:
5699 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5700 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5701 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5702 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5703 // CHECK-NEXT: ret void
5705 v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); }
5706 // CHECK-LABEL: @xvsrarni_h_w(
5707 // CHECK-NEXT: entry:
5708 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5709 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5710 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5711 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5712 // CHECK-NEXT: ret void
5714 v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); }
5715 // CHECK-LABEL: @xvsrarni_w_d(
5716 // CHECK-NEXT: entry:
5717 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5718 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5719 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5720 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5721 // CHECK-NEXT: ret void
5723 v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); }
5724 // CHECK-LABEL: @xvsrarni_d_q(
5725 // CHECK-NEXT: entry:
5726 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5727 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5728 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5729 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5730 // CHECK-NEXT: ret void
5732 v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); }
5733 // CHECK-LABEL: @xvssrani_b_h(
5734 // CHECK-NEXT: entry:
5735 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5736 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5737 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5738 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5739 // CHECK-NEXT: ret void
5741 v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); }
5742 // CHECK-LABEL: @xvssrani_h_w(
5743 // CHECK-NEXT: entry:
5744 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5745 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5746 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5747 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5748 // CHECK-NEXT: ret void
5750 v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); }
5751 // CHECK-LABEL: @xvssrani_w_d(
5752 // CHECK-NEXT: entry:
5753 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5754 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5755 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5756 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5757 // CHECK-NEXT: ret void
5759 v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); }
5760 // CHECK-LABEL: @xvssrani_d_q(
5761 // CHECK-NEXT: entry:
5762 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5763 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5764 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5765 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5766 // CHECK-NEXT: ret void
5768 v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); }
5769 // CHECK-LABEL: @xvssrani_bu_h(
5770 // CHECK-NEXT: entry:
5771 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5772 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5773 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5774 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5775 // CHECK-NEXT: ret void
5777 v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); }
5778 // CHECK-LABEL: @xvssrani_hu_w(
5779 // CHECK-NEXT: entry:
5780 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5781 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5782 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5783 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5784 // CHECK-NEXT: ret void
5786 v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); }
5787 // CHECK-LABEL: @xvssrani_wu_d(
5788 // CHECK-NEXT: entry:
5789 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5790 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5791 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5792 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5793 // CHECK-NEXT: ret void
5795 v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); }
5796 // CHECK-LABEL: @xvssrani_du_q(
5797 // CHECK-NEXT: entry:
5798 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5799 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5800 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5801 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5802 // CHECK-NEXT: ret void
5804 v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); }
5805 // CHECK-LABEL: @xvssrarni_b_h(
5806 // CHECK-NEXT: entry:
5807 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5808 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5809 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5810 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5811 // CHECK-NEXT: ret void
5813 v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); }
5814 // CHECK-LABEL: @xvssrarni_h_w(
5815 // CHECK-NEXT: entry:
5816 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5817 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5818 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5819 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5820 // CHECK-NEXT: ret void
5822 v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); }
5823 // CHECK-LABEL: @xvssrarni_w_d(
5824 // CHECK-NEXT: entry:
5825 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5826 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5827 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5828 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5829 // CHECK-NEXT: ret void
5831 v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); }
5832 // CHECK-LABEL: @xvssrarni_d_q(
5833 // CHECK-NEXT: entry:
5834 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5835 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5836 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5837 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5838 // CHECK-NEXT: ret void
5840 v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); }
5841 // CHECK-LABEL: @xvssrarni_bu_h(
5842 // CHECK-NEXT: entry:
5843 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5844 // CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5845 // CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1)
5846 // CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5847 // CHECK-NEXT: ret void
5849 v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); }
5850 // CHECK-LABEL: @xvssrarni_hu_w(
5851 // CHECK-NEXT: entry:
5852 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5853 // CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5854 // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1)
5855 // CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5856 // CHECK-NEXT: ret void
5858 v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); }
5859 // CHECK-LABEL: @xvssrarni_wu_d(
5860 // CHECK-NEXT: entry:
5861 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5862 // CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5863 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1)
5864 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5865 // CHECK-NEXT: ret void
5867 v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); }
5868 // CHECK-LABEL: @xvssrarni_du_q(
5869 // CHECK-NEXT: entry:
5870 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5871 // CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5872 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1)
5873 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5874 // CHECK-NEXT: ret void
5876 v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); }
5877 // CHECK-LABEL: @xbnz_b(
5878 // CHECK-NEXT: entry:
5879 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5880 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]])
5881 // CHECK-NEXT: ret i32 [[TMP1]]
5883 int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); }
5884 // CHECK-LABEL: @xbnz_d(
5885 // CHECK-NEXT: entry:
5886 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5887 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]])
5888 // CHECK-NEXT: ret i32 [[TMP1]]
5890 int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); }
5891 // CHECK-LABEL: @xbnz_h(
5892 // CHECK-NEXT: entry:
5893 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5894 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]])
5895 // CHECK-NEXT: ret i32 [[TMP1]]
5897 int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); }
5898 // CHECK-LABEL: @xbnz_v(
5899 // CHECK-NEXT: entry:
5900 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5901 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]])
5902 // CHECK-NEXT: ret i32 [[TMP1]]
5904 int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); }
5905 // CHECK-LABEL: @xbnz_w(
5906 // CHECK-NEXT: entry:
5907 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5908 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]])
5909 // CHECK-NEXT: ret i32 [[TMP1]]
5911 int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); }
5912 // CHECK-LABEL: @xbz_b(
5913 // CHECK-NEXT: entry:
5914 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5915 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]])
5916 // CHECK-NEXT: ret i32 [[TMP1]]
5918 int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); }
5919 // CHECK-LABEL: @xbz_d(
5920 // CHECK-NEXT: entry:
5921 // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5922 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]])
5923 // CHECK-NEXT: ret i32 [[TMP1]]
5925 int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); }
5926 // CHECK-LABEL: @xbz_h(
5927 // CHECK-NEXT: entry:
5928 // CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5929 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]])
5930 // CHECK-NEXT: ret i32 [[TMP1]]
5932 int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); }
5933 // CHECK-LABEL: @xbz_v(
5934 // CHECK-NEXT: entry:
5935 // CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5936 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]])
5937 // CHECK-NEXT: ret i32 [[TMP1]]
5939 int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); }
5940 // CHECK-LABEL: @xbz_w(
5941 // CHECK-NEXT: entry:
5942 // CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5943 // CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]])
5944 // CHECK-NEXT: ret i32 [[TMP1]]
5946 int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); }
5947 // CHECK-LABEL: @xvfcmp_caf_d(
5948 // CHECK-NEXT: entry:
5949 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5950 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5951 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]])
5952 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5953 // CHECK-NEXT: ret void
5955 v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); }
5956 // CHECK-LABEL: @xvfcmp_caf_s(
5957 // CHECK-NEXT: entry:
5958 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5959 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5960 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]])
5961 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5962 // CHECK-NEXT: ret void
5964 v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); }
5965 // CHECK-LABEL: @xvfcmp_ceq_d(
5966 // CHECK-NEXT: entry:
5967 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5968 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5969 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]])
5970 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5971 // CHECK-NEXT: ret void
5973 v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); }
5974 // CHECK-LABEL: @xvfcmp_ceq_s(
5975 // CHECK-NEXT: entry:
5976 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5977 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5978 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]])
5979 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5980 // CHECK-NEXT: ret void
5982 v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); }
5983 // CHECK-LABEL: @xvfcmp_cle_d(
5984 // CHECK-NEXT: entry:
5985 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5986 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5987 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]])
5988 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5989 // CHECK-NEXT: ret void
5991 v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); }
5992 // CHECK-LABEL: @xvfcmp_cle_s(
5993 // CHECK-NEXT: entry:
5994 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
5995 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
5996 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]])
5997 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
5998 // CHECK-NEXT: ret void
6000 v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); }
6001 // CHECK-LABEL: @xvfcmp_clt_d(
6002 // CHECK-NEXT: entry:
6003 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6004 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6005 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]])
6006 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6007 // CHECK-NEXT: ret void
6009 v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); }
6010 // CHECK-LABEL: @xvfcmp_clt_s(
6011 // CHECK-NEXT: entry:
6012 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6013 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6014 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]])
6015 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6016 // CHECK-NEXT: ret void
6018 v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); }
6019 // CHECK-LABEL: @xvfcmp_cne_d(
6020 // CHECK-NEXT: entry:
6021 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6022 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6023 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]])
6024 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6025 // CHECK-NEXT: ret void
6027 v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); }
6028 // CHECK-LABEL: @xvfcmp_cne_s(
6029 // CHECK-NEXT: entry:
6030 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6031 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6032 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]])
6033 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6034 // CHECK-NEXT: ret void
6036 v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); }
6037 // CHECK-LABEL: @xvfcmp_cor_d(
6038 // CHECK-NEXT: entry:
6039 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6040 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6041 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]])
6042 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6043 // CHECK-NEXT: ret void
6045 v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); }
6046 // CHECK-LABEL: @xvfcmp_cor_s(
6047 // CHECK-NEXT: entry:
6048 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6049 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6050 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]])
6051 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6052 // CHECK-NEXT: ret void
6054 v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); }
6055 // CHECK-LABEL: @xvfcmp_cueq_d(
6056 // CHECK-NEXT: entry:
6057 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6058 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6059 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
6060 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6061 // CHECK-NEXT: ret void
6063 v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); }
6064 // CHECK-LABEL: @xvfcmp_cueq_s(
6065 // CHECK-NEXT: entry:
6066 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6067 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6068 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
6069 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6070 // CHECK-NEXT: ret void
6072 v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); }
6073 // CHECK-LABEL: @xvfcmp_cule_d(
6074 // CHECK-NEXT: entry:
6075 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6076 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6077 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]])
6078 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6079 // CHECK-NEXT: ret void
6081 v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); }
6082 // CHECK-LABEL: @xvfcmp_cule_s(
6083 // CHECK-NEXT: entry:
6084 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6085 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6086 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]])
6087 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6088 // CHECK-NEXT: ret void
6090 v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); }
6091 // CHECK-LABEL: @xvfcmp_cult_d(
6092 // CHECK-NEXT: entry:
6093 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6094 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6095 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]])
6096 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6097 // CHECK-NEXT: ret void
6099 v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); }
6100 // CHECK-LABEL: @xvfcmp_cult_s(
6101 // CHECK-NEXT: entry:
6102 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6103 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6104 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]])
6105 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6106 // CHECK-NEXT: ret void
6108 v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); }
6109 // CHECK-LABEL: @xvfcmp_cun_d(
6110 // CHECK-NEXT: entry:
6111 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6112 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6113 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]])
6114 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6115 // CHECK-NEXT: ret void
6117 v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); }
6118 // CHECK-LABEL: @xvfcmp_cune_d(
6119 // CHECK-NEXT: entry:
6120 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6121 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6122 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]])
6123 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6124 // CHECK-NEXT: ret void
6126 v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); }
6127 // CHECK-LABEL: @xvfcmp_cune_s(
6128 // CHECK-NEXT: entry:
6129 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6130 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6131 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]])
6132 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6133 // CHECK-NEXT: ret void
6135 v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); }
6136 // CHECK-LABEL: @xvfcmp_cun_s(
6137 // CHECK-NEXT: entry:
6138 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6139 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6140 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]])
6141 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6142 // CHECK-NEXT: ret void
6144 v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); }
6145 // CHECK-LABEL: @xvfcmp_saf_d(
6146 // CHECK-NEXT: entry:
6147 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6148 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6149 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]])
6150 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6151 // CHECK-NEXT: ret void
6153 v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); }
6154 // CHECK-LABEL: @xvfcmp_saf_s(
6155 // CHECK-NEXT: entry:
6156 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6157 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6158 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]])
6159 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6160 // CHECK-NEXT: ret void
6162 v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); }
6163 // CHECK-LABEL: @xvfcmp_seq_d(
6164 // CHECK-NEXT: entry:
6165 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6166 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6167 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]])
6168 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6169 // CHECK-NEXT: ret void
6171 v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); }
6172 // CHECK-LABEL: @xvfcmp_seq_s(
6173 // CHECK-NEXT: entry:
6174 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6175 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6176 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]])
6177 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6178 // CHECK-NEXT: ret void
6180 v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); }
6181 // CHECK-LABEL: @xvfcmp_sle_d(
6182 // CHECK-NEXT: entry:
6183 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6184 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6185 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]])
6186 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6187 // CHECK-NEXT: ret void
6189 v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); }
6190 // CHECK-LABEL: @xvfcmp_sle_s(
6191 // CHECK-NEXT: entry:
6192 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6193 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6194 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]])
6195 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6196 // CHECK-NEXT: ret void
6198 v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); }
6199 // CHECK-LABEL: @xvfcmp_slt_d(
6200 // CHECK-NEXT: entry:
6201 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6202 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6203 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]])
6204 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6205 // CHECK-NEXT: ret void
6207 v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); }
6208 // CHECK-LABEL: @xvfcmp_slt_s(
6209 // CHECK-NEXT: entry:
6210 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6211 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6212 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]])
6213 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6214 // CHECK-NEXT: ret void
6216 v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); }
6217 // CHECK-LABEL: @xvfcmp_sne_d(
6218 // CHECK-NEXT: entry:
6219 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6220 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6221 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]])
6222 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6223 // CHECK-NEXT: ret void
6225 v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); }
6226 // CHECK-LABEL: @xvfcmp_sne_s(
6227 // CHECK-NEXT: entry:
6228 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6229 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6230 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]])
6231 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6232 // CHECK-NEXT: ret void
6234 v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); }
6235 // CHECK-LABEL: @xvfcmp_sor_d(
6236 // CHECK-NEXT: entry:
6237 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6238 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6239 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]])
6240 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6241 // CHECK-NEXT: ret void
6243 v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); }
6244 // CHECK-LABEL: @xvfcmp_sor_s(
6245 // CHECK-NEXT: entry:
6246 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6247 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6248 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]])
6249 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6250 // CHECK-NEXT: ret void
6252 v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); }
6253 // CHECK-LABEL: @xvfcmp_sueq_d(
6254 // CHECK-NEXT: entry:
6255 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6256 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6257 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]])
6258 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6259 // CHECK-NEXT: ret void
6261 v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); }
6262 // CHECK-LABEL: @xvfcmp_sueq_s(
6263 // CHECK-NEXT: entry:
6264 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6265 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6266 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]])
6267 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6268 // CHECK-NEXT: ret void
6270 v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); }
6271 // CHECK-LABEL: @xvfcmp_sule_d(
6272 // CHECK-NEXT: entry:
6273 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6274 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6275 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]])
6276 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6277 // CHECK-NEXT: ret void
6279 v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); }
6280 // CHECK-LABEL: @xvfcmp_sule_s(
6281 // CHECK-NEXT: entry:
6282 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6283 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6284 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]])
6285 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6286 // CHECK-NEXT: ret void
6288 v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); }
6289 // CHECK-LABEL: @xvfcmp_sult_d(
6290 // CHECK-NEXT: entry:
6291 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6292 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6293 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]])
6294 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6295 // CHECK-NEXT: ret void
6297 v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); }
6298 // CHECK-LABEL: @xvfcmp_sult_s(
6299 // CHECK-NEXT: entry:
6300 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6301 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6302 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]])
6303 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6304 // CHECK-NEXT: ret void
6306 v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); }
6307 // CHECK-LABEL: @xvfcmp_sun_d(
6308 // CHECK-NEXT: entry:
6309 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6310 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6311 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]])
6312 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6313 // CHECK-NEXT: ret void
6315 v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); }
6316 // CHECK-LABEL: @xvfcmp_sune_d(
6317 // CHECK-NEXT: entry:
6318 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6319 // CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6320 // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]])
6321 // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6322 // CHECK-NEXT: ret void
6324 v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); }
6325 // CHECK-LABEL: @xvfcmp_sune_s(
6326 // CHECK-NEXT: entry:
6327 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6328 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6329 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]])
6330 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6331 // CHECK-NEXT: ret void
6333 v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); }
6334 // CHECK-LABEL: @xvfcmp_sun_s(
6335 // CHECK-NEXT: entry:
6336 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6337 // CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]]
6338 // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]])
6339 // CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6340 // CHECK-NEXT: ret void
6342 v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); }
6343 // CHECK-LABEL: @xvpickve_d_f(
6344 // CHECK-NEXT: entry:
6345 // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6346 // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1)
6347 // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6348 // CHECK-NEXT: ret void
6350 v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); }
6351 // CHECK-LABEL: @xvpickve_w_f(
6352 // CHECK-NEXT: entry:
6353 // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6354 // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1)
6355 // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6356 // CHECK-NEXT: ret void
6358 v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); }
6359 // CHECK-LABEL: @xvrepli_b(
6360 // CHECK-NEXT: entry:
6361 // CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1)
6362 // CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6363 // CHECK-NEXT: ret void
6365 v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); }
6366 // CHECK-LABEL: @xvrepli_d(
6367 // CHECK-NEXT: entry:
6368 // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1)
6369 // CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6370 // CHECK-NEXT: ret void
6372 v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); }
6373 // CHECK-LABEL: @xvrepli_h(
6374 // CHECK-NEXT: entry:
6375 // CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1)
6376 // CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6377 // CHECK-NEXT: ret void
6379 v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
6380 // CHECK-LABEL: @xvrepli_w(
6381 // CHECK-NEXT: entry:
6382 // CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1)
6383 // CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6384 // CHECK-NEXT: ret void
6386 v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }