1 ; RUN: llc -march=mips -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64,+mips32r2 < %s | FileCheck %s
4 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
5 ; CHECK-LABEL: vshf_v16i8_0:
7 %1 = load <16 x i8>, <16 x i8>* %a
8 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
9 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
10 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
11 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
12 ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
13 store <16 x i8> %2, <16 x i8>* %c
14 ; CHECK-DAG: st.b [[R3]], 0($4)
19 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
20 ; CHECK-LABEL: vshf_v16i8_1:
22 %1 = load <16 x i8>, <16 x i8>* %a
23 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
24 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
26 store <16 x i8> %2, <16 x i8>* %c
27 ; CHECK-DAG: st.b [[R3]], 0($4)
32 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
33 ; CHECK-LABEL: vshf_v16i8_2:
35 %1 = load <16 x i8>, <16 x i8>* %a
36 %2 = load <16 x i8>, <16 x i8>* %b
37 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
38 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
39 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
40 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
41 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
42 store <16 x i8> %3, <16 x i8>* %c
43 ; CHECK-DAG: st.b [[R3]], 0($4)
48 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
49 ; CHECK-LABEL: vshf_v16i8_3:
51 %1 = load <16 x i8>, <16 x i8>* %a
52 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
53 %2 = load <16 x i8>, <16 x i8>* %b
54 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
55 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
56 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
57 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
58 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
59 ; the operands to get the right answer.
60 ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]]
61 store <16 x i8> %3, <16 x i8>* %c
62 ; CHECK-DAG: st.b [[R3]], 0($4)
67 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
68 ; CHECK-LABEL: vshf_v16i8_4:
70 %1 = load <16 x i8>, <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
73 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
74 store <16 x i8> %2, <16 x i8>* %c
75 ; CHECK-DAG: st.b [[R3]], 0($4)
80 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
81 ; CHECK-LABEL: vshf_v8i16_0:
83 %1 = load <8 x i16>, <8 x i16>* %a
84 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
85 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
86 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
87 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
88 ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
89 store <8 x i16> %2, <8 x i16>* %c
90 ; CHECK-DAG: st.h [[R3]], 0($4)
95 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
96 ; CHECK-LABEL: vshf_v8i16_1:
98 %1 = load <8 x i16>, <8 x i16>* %a
99 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
100 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
101 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
102 store <8 x i16> %2, <8 x i16>* %c
103 ; CHECK-DAG: st.h [[R3]], 0($4)
108 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
109 ; CHECK-LABEL: vshf_v8i16_2:
111 %1 = load <8 x i16>, <8 x i16>* %a
112 %2 = load <8 x i16>, <8 x i16>* %b
113 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
114 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
115 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
116 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
117 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
118 store <8 x i16> %3, <8 x i16>* %c
119 ; CHECK-DAG: st.h [[R3]], 0($4)
124 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
125 ; CHECK-LABEL: vshf_v8i16_3:
127 %1 = load <8 x i16>, <8 x i16>* %a
128 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
129 %2 = load <8 x i16>, <8 x i16>* %b
130 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
131 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
132 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
133 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
134 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
135 ; the operands to get the right answer.
136 ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]]
137 store <8 x i16> %3, <8 x i16>* %c
138 ; CHECK-DAG: st.h [[R3]], 0($4)
143 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
144 ; CHECK-LABEL: vshf_v8i16_4:
146 %1 = load <8 x i16>, <8 x i16>* %a
147 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
148 %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
149 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
150 store <8 x i16> %2, <8 x i16>* %c
151 ; CHECK-DAG: st.h [[R3]], 0($4)
156 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
157 ; instruction when using a single vector.
159 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160 ; CHECK-LABEL: vshf_v4i32_0:
162 %1 = load <4 x i32>, <4 x i32>* %a
163 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
164 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
165 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
166 store <4 x i32> %2, <4 x i32>* %c
167 ; CHECK-DAG: st.w [[R3]], 0($4)
172 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
173 ; CHECK-LABEL: vshf_v4i32_1:
175 %1 = load <4 x i32>, <4 x i32>* %a
176 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
177 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
178 ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
179 store <4 x i32> %2, <4 x i32>* %c
180 ; CHECK-DAG: st.w [[R3]], 0($4)
185 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
186 ; CHECK-LABEL: vshf_v4i32_2:
188 %1 = load <4 x i32>, <4 x i32>* %a
189 %2 = load <4 x i32>, <4 x i32>* %b
190 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
191 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
192 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
193 store <4 x i32> %3, <4 x i32>* %c
194 ; CHECK-DAG: st.w [[R3]], 0($4)
199 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
200 ; CHECK-LABEL: vshf_v4i32_3:
202 %1 = load <4 x i32>, <4 x i32>* %a
203 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
204 %2 = load <4 x i32>, <4 x i32>* %b
205 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
206 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
207 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
208 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]])
209 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
210 ; the operands to get the right answer.
211 ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]]
212 store <4 x i32> %3, <4 x i32>* %c
213 ; CHECK-DAG: st.w [[R3]], 0($4)
218 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
219 ; CHECK-LABEL: vshf_v4i32_4:
221 %1 = load <4 x i32>, <4 x i32>* %a
222 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
223 %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
224 ; The two operand vectors are the same so element 1 and 5 are equivalent.
225 ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
226 store <4 x i32> %2, <4 x i32>* %c
227 ; CHECK-DAG: st.w [[R3]], 0($4)
232 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
233 ; CHECK-LABEL: vshf_v2i64_0:
235 %1 = load <2 x i64>, <2 x i64>* %a
236 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
237 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
238 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
239 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
240 ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
241 store <2 x i64> %2, <2 x i64>* %c
242 ; CHECK-DAG: st.d [[R3]], 0($4)
247 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
248 ; CHECK-LABEL: vshf_v2i64_1:
250 %1 = load <2 x i64>, <2 x i64>* %a
251 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
252 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
253 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
254 store <2 x i64> %2, <2 x i64>* %c
255 ; CHECK-DAG: st.d [[R3]], 0($4)
260 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
261 ; CHECK-LABEL: vshf_v2i64_2:
263 %1 = load <2 x i64>, <2 x i64>* %a
264 %2 = load <2 x i64>, <2 x i64>* %b
265 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
266 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
267 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
268 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
269 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
270 store <2 x i64> %3, <2 x i64>* %c
271 ; CHECK-DAG: st.d [[R3]], 0($4)
276 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
277 ; CHECK-LABEL: vshf_v2i64_3:
279 %1 = load <2 x i64>, <2 x i64>* %a
280 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
281 %2 = load <2 x i64>, <2 x i64>* %b
282 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
283 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
284 ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
285 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
286 ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
287 ; the operands to get the right answer.
288 ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]]
289 store <2 x i64> %3, <2 x i64>* %c
290 ; CHECK-DAG: st.d [[R3]], 0($4)
295 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
296 ; CHECK-LABEL: vshf_v2i64_4:
298 %1 = load <2 x i64>, <2 x i64>* %a
299 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
300 %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
301 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
302 store <2 x i64> %2, <2 x i64>* %c
303 ; CHECK-DAG: st.d [[R3]], 0($4)
308 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
309 ; CHECK-LABEL: shf_v16i8_0:
311 %1 = load <16 x i8>, <16 x i8>* %a
312 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
313 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
314 ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
315 store <16 x i8> %2, <16 x i8>* %c
316 ; CHECK-DAG: st.b [[R3]], 0($4)
321 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
322 ; CHECK-LABEL: shf_v8i16_0:
324 %1 = load <8 x i16>, <8 x i16>* %a
325 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
326 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
327 ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
328 store <8 x i16> %2, <8 x i16>* %c
329 ; CHECK-DAG: st.h [[R3]], 0($4)
334 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
335 ; CHECK-LABEL: shf_v4i32_0:
337 %1 = load <4 x i32>, <4 x i32>* %a
338 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
339 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
340 ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
341 store <4 x i32> %2, <4 x i32>* %c
342 ; CHECK-DAG: st.w [[R3]], 0($4)
347 ; shf.d does not exist
349 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
350 ; CHECK-LABEL: ilvev_v16i8_0:
352 %1 = load <16 x i8>, <16 x i8>* %a
353 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
354 %2 = load <16 x i8>, <16 x i8>* %b
355 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
356 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
357 <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
358 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
359 store <16 x i8> %3, <16 x i8>* %c
360 ; CHECK-DAG: st.b [[R3]], 0($4)
365 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
366 ; CHECK-LABEL: ilvev_v8i16_0:
368 %1 = load <8 x i16>, <8 x i16>* %a
369 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
370 %2 = load <8 x i16>, <8 x i16>* %b
371 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
372 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
373 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
374 store <8 x i16> %3, <8 x i16>* %c
375 ; CHECK-DAG: st.h [[R3]], 0($4)
380 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
381 ; CHECK-LABEL: ilvev_v4i32_0:
383 %1 = load <4 x i32>, <4 x i32>* %a
384 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
385 %2 = load <4 x i32>, <4 x i32>* %b
386 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
387 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
388 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
389 store <4 x i32> %3, <4 x i32>* %c
390 ; CHECK-DAG: st.w [[R3]], 0($4)
395 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
396 ; CHECK-LABEL: ilvev_v2i64_0:
398 %1 = load <2 x i64>, <2 x i64>* %a
399 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
400 %2 = load <2 x i64>, <2 x i64>* %b
401 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
402 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
403 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
404 store <2 x i64> %3, <2 x i64>* %c
405 ; CHECK-DAG: st.d [[R3]], 0($4)
410 ; Interleaving one operand with itself.
411 define void @ilvev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
412 ; CHECK-LABEL: ilvev_v16i8_1:
414 %1 = load <16 x i8>, <16 x i8>* %a
415 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
416 %2 = load <16 x i8>, <16 x i8>* %b
417 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
418 <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
419 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
420 store <16 x i8> %3, <16 x i8>* %c
421 ; CHECK-DAG: st.b [[R3]], 0($4)
426 define void @ilvev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
427 ; CHECK-LABEL: ilvev_v8i16_1:
429 %1 = load <8 x i16>, <8 x i16>* %a
430 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
431 %2 = load <8 x i16>, <8 x i16>* %b
432 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
433 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
434 store <8 x i16> %3, <8 x i16>* %c
435 ; CHECK-DAG: st.h [[R3]], 0($4)
440 define void @ilvev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
441 ; CHECK-LABEL: ilvev_v4i32_1:
443 %1 = load <4 x i32>, <4 x i32>* %a
444 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
445 %2 = load <4 x i32>, <4 x i32>* %b
446 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
447 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
448 store <4 x i32> %3, <4 x i32>* %c
449 ; CHECK-DAG: st.w [[R3]], 0($4)
454 define void @ilvev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
455 ; CHECK-LABEL: ilvev_v2i64_1:
457 %1 = load <2 x i64>, <2 x i64>* %a
458 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
459 %2 = load <2 x i64>, <2 x i64>* %b
460 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
461 ; ilvev.d with two identical operands is equivalent to splati.d
462 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
463 store <2 x i64> %3, <2 x i64>* %c
464 ; CHECK-DAG: st.d [[R3]], 0($4)
469 define void @ilvev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
470 ; CHECK-LABEL: ilvev_v16i8_2:
472 %1 = load <16 x i8>, <16 x i8>* %a
473 %2 = load <16 x i8>, <16 x i8>* %b
474 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
475 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
476 <16 x i32> <i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
477 ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
478 store <16 x i8> %3, <16 x i8>* %c
479 ; CHECK-DAG: st.b [[R3]], 0($4)
484 define void @ilvev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
485 ; CHECK-LABEL: ilvev_v8i16_2:
487 %1 = load <8 x i16>, <8 x i16>* %a
488 %2 = load <8 x i16>, <8 x i16>* %b
489 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
490 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
491 ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
492 store <8 x i16> %3, <8 x i16>* %c
493 ; CHECK-DAG: st.h [[R3]], 0($4)
498 define void @ilvev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
499 ; CHECK-LABEL: ilvev_v4i32_2:
501 %1 = load <4 x i32>, <4 x i32>* %a
502 %2 = load <4 x i32>, <4 x i32>* %b
503 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
504 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 6, i32 6>
505 ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
506 store <4 x i32> %3, <4 x i32>* %c
507 ; CHECK-DAG: st.w [[R3]], 0($4)
512 define void @ilvev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
513 ; CHECK-LABEL: ilvev_v2i64_2:
515 %1 = load <2 x i64>, <2 x i64>* %a
516 %2 = load <2 x i64>, <2 x i64>* %b
517 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
518 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
519 ; ilvev.d with two identical operands is equivalent to splati.d
520 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
521 store <2 x i64> %3, <2 x i64>* %c
522 ; CHECK-DAG: st.d [[R3]], 0($4)
527 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
528 ; CHECK-LABEL: ilvod_v16i8_0:
530 %1 = load <16 x i8>, <16 x i8>* %a
531 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
532 %2 = load <16 x i8>, <16 x i8>* %b
533 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
534 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
535 <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
536 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
537 store <16 x i8> %3, <16 x i8>* %c
538 ; CHECK-DAG: st.b [[R3]], 0($4)
543 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
544 ; CHECK-LABEL: ilvod_v8i16_0:
546 %1 = load <8 x i16>, <8 x i16>* %a
547 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
548 %2 = load <8 x i16>, <8 x i16>* %b
549 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
550 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
551 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
552 store <8 x i16> %3, <8 x i16>* %c
553 ; CHECK-DAG: st.h [[R3]], 0($4)
558 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
559 ; CHECK-LABEL: ilvod_v4i32_0:
561 %1 = load <4 x i32>, <4 x i32>* %a
562 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
563 %2 = load <4 x i32>, <4 x i32>* %b
564 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
565 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
566 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
567 store <4 x i32> %3, <4 x i32>* %c
568 ; CHECK-DAG: st.w [[R3]], 0($4)
573 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
574 ; CHECK-LABEL: ilvod_v2i64_0:
576 %1 = load <2 x i64>, <2 x i64>* %a
577 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
578 %2 = load <2 x i64>, <2 x i64>* %b
579 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
580 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
581 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
582 store <2 x i64> %3, <2 x i64>* %c
583 ; CHECK-DAG: st.d [[R3]], 0($4)
588 define void @ilvod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
589 ; CHECK-LABEL: ilvod_v16i8_1:
591 %1 = load <16 x i8>, <16 x i8>* %a
592 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
593 %2 = load <16 x i8>, <16 x i8>* %b
594 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
595 <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
596 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
597 store <16 x i8> %3, <16 x i8>* %c
598 ; CHECK-DAG: st.b [[R3]], 0($4)
603 define void @ilvod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
604 ; CHECK-LABEL: ilvod_v8i16_1:
606 %1 = load <8 x i16>, <8 x i16>* %a
607 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
608 %2 = load <8 x i16>, <8 x i16>* %b
609 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
610 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
611 store <8 x i16> %3, <8 x i16>* %c
612 ; CHECK-DAG: st.h [[R3]], 0($4)
617 define void @ilvod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
618 ; CHECK-LABEL: ilvod_v4i32_1:
620 %1 = load <4 x i32>, <4 x i32>* %a
621 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
622 %2 = load <4 x i32>, <4 x i32>* %b
623 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
624 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
625 store <4 x i32> %3, <4 x i32>* %c
626 ; CHECK-DAG: st.w [[R3]], 0($4)
631 define void @ilvod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
632 ; CHECK-LABEL: ilvod_v2i64_1:
634 %1 = load <2 x i64>, <2 x i64>* %a
635 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
636 %2 = load <2 x i64>, <2 x i64>* %b
637 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
638 ; ilvod.d with two identical operands is equivalent to splati.d
639 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
640 store <2 x i64> %3, <2 x i64>* %c
641 ; CHECK-DAG: st.d [[R3]], 0($4)
646 define void @ilvod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
647 ; CHECK-LABEL: ilvod_v16i8_2:
649 %1 = load <16 x i8>, <16 x i8>* %a
650 %2 = load <16 x i8>, <16 x i8>* %b
651 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
652 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
653 <16 x i32> <i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
654 ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
655 store <16 x i8> %3, <16 x i8>* %c
656 ; CHECK-DAG: st.b [[R3]], 0($4)
661 define void @ilvod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
662 ; CHECK-LABEL: ilvod_v8i16_2:
664 %1 = load <8 x i16>, <8 x i16>* %a
665 %2 = load <8 x i16>, <8 x i16>* %b
666 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
667 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
668 ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
669 store <8 x i16> %3, <8 x i16>* %c
670 ; CHECK-DAG: st.h [[R3]], 0($4)
675 define void @ilvod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
676 ; CHECK-LABEL: ilvod_v4i32_2:
678 %1 = load <4 x i32>, <4 x i32>* %a
679 %2 = load <4 x i32>, <4 x i32>* %b
680 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
681 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 5, i32 7, i32 7>
682 ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
683 store <4 x i32> %3, <4 x i32>* %c
684 ; CHECK-DAG: st.w [[R3]], 0($4)
689 define void @ilvod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
690 ; CHECK-LABEL: ilvod_v2i64_2:
692 %1 = load <2 x i64>, <2 x i64>* %a
693 %2 = load <2 x i64>, <2 x i64>* %b
694 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
695 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
696 ; ilvod.d with two identical operands is equivalent to splati.d
697 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
698 store <2 x i64> %3, <2 x i64>* %c
699 ; CHECK-DAG: st.d [[R3]], 0($4)
704 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
705 ; CHECK-LABEL: ilvr_v16i8_0:
707 %1 = load <16 x i8>, <16 x i8>* %a
708 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
709 %2 = load <16 x i8>, <16 x i8>* %b
710 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
711 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
712 <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
713 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
714 store <16 x i8> %3, <16 x i8>* %c
715 ; CHECK-DAG: st.b [[R3]], 0($4)
720 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
721 ; CHECK-LABEL: ilvr_v8i16_0:
723 %1 = load <8 x i16>, <8 x i16>* %a
724 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
725 %2 = load <8 x i16>, <8 x i16>* %b
726 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
727 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
728 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
729 store <8 x i16> %3, <8 x i16>* %c
730 ; CHECK-DAG: st.h [[R3]], 0($4)
735 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
736 ; CHECK-LABEL: ilvr_v4i32_0:
738 %1 = load <4 x i32>, <4 x i32>* %a
739 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
740 %2 = load <4 x i32>, <4 x i32>* %b
741 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
742 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
743 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
744 store <4 x i32> %3, <4 x i32>* %c
745 ; CHECK-DAG: st.w [[R3]], 0($4)
750 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
751 ; CHECK-LABEL: ilvr_v2i64_0:
753 %1 = load <2 x i64>, <2 x i64>* %a
754 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
755 %2 = load <2 x i64>, <2 x i64>* %b
756 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
757 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
758 ; ilvr.d and ilvev.d are equivalent for v2i64
759 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
760 store <2 x i64> %3, <2 x i64>* %c
761 ; CHECK-DAG: st.d [[R3]], 0($4)
766 define void @ilvr_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
767 ; CHECK-LABEL: ilvr_v16i8_1:
769 %1 = load <16 x i8>, <16 x i8>* %a
770 %2 = load <16 x i8>, <16 x i8>* %b
771 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
772 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
773 <16 x i32> <i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23>
774 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
775 store <16 x i8> %3, <16 x i8>* %c
776 ; CHECK-DAG: st.b [[R3]], 0($4)
781 define void @ilvr_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
782 ; CHECK-LABEL: ilvr_v8i16_1:
784 %1 = load <8 x i16>, <8 x i16>* %a
785 %2 = load <8 x i16>, <8 x i16>* %b
786 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
787 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11>
788 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
789 store <8 x i16> %3, <8 x i16>* %c
790 ; CHECK-DAG: st.h [[R3]], 0($4)
795 define void @ilvr_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
796 ; CHECK-LABEL: ilvr_v4i32_1:
798 %1 = load <4 x i32>, <4 x i32>* %a
799 %2 = load <4 x i32>, <4 x i32>* %b
800 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
801 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 5, i32 5>
802 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
803 store <4 x i32> %3, <4 x i32>* %c
804 ; CHECK-DAG: st.w [[R3]], 0($4)
809 define void @ilvr_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
810 ; CHECK-LABEL: ilvr_v2i64_1:
812 %1 = load <2 x i64>, <2 x i64>* %a
813 %2 = load <2 x i64>, <2 x i64>* %b
814 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
815 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
816 ; ilvr.d and splati.d are equivalent for v2i64
817 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
818 store <2 x i64> %3, <2 x i64>* %c
819 ; CHECK-DAG: st.d [[R3]], 0($4)
824 define void @ilvr_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
825 ; CHECK-LABEL: ilvr_v16i8_2:
827 %1 = load <16 x i8>, <16 x i8>* %a
828 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
829 %2 = load <16 x i8>, <16 x i8>* %b
830 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
831 <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
832 ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
833 store <16 x i8> %3, <16 x i8>* %c
834 ; CHECK-DAG: st.b [[R3]], 0($4)
839 define void @ilvr_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
840 ; CHECK-LABEL: ilvr_v8i16_2:
842 %1 = load <8 x i16>, <8 x i16>* %a
843 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
844 %2 = load <8 x i16>, <8 x i16>* %b
845 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
846 ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
847 store <8 x i16> %3, <8 x i16>* %c
848 ; CHECK-DAG: st.h [[R3]], 0($4)
853 define void @ilvr_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
854 ; CHECK-LABEL: ilvr_v4i32_2:
856 %1 = load <4 x i32>, <4 x i32>* %a
857 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
858 %2 = load <4 x i32>, <4 x i32>* %b
859 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
860 ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
861 store <4 x i32> %3, <4 x i32>* %c
862 ; CHECK-DAG: st.w [[R3]], 0($4)
867 define void @ilvr_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
868 ; CHECK-LABEL: ilvr_v2i64_2:
870 %1 = load <2 x i64>, <2 x i64>* %a
871 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
872 %2 = load <2 x i64>, <2 x i64>* %b
873 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
874 ; ilvr.d and splati.d are equivalent for v2i64
875 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
876 store <2 x i64> %3, <2 x i64>* %c
877 ; CHECK-DAG: st.d [[R3]], 0($4)
882 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
883 ; CHECK-LABEL: ilvl_v16i8_0:
885 %1 = load <16 x i8>, <16 x i8>* %a
886 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
887 %2 = load <16 x i8>, <16 x i8>* %b
888 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
889 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
890 <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
891 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
892 store <16 x i8> %3, <16 x i8>* %c
893 ; CHECK-DAG: st.b [[R3]], 0($4)
898 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
899 ; CHECK-LABEL: ilvl_v8i16_0:
901 %1 = load <8 x i16>, <8 x i16>* %a
902 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
903 %2 = load <8 x i16>, <8 x i16>* %b
904 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
905 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
906 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
907 store <8 x i16> %3, <8 x i16>* %c
908 ; CHECK-DAG: st.h [[R3]], 0($4)
913 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
914 ; CHECK-LABEL: ilvl_v4i32_0:
916 %1 = load <4 x i32>, <4 x i32>* %a
917 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
918 %2 = load <4 x i32>, <4 x i32>* %b
919 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
920 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
921 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
922 store <4 x i32> %3, <4 x i32>* %c
923 ; CHECK-DAG: st.w [[R3]], 0($4)
928 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
929 ; CHECK-LABEL: ilvl_v2i64_0:
931 %1 = load <2 x i64>, <2 x i64>* %a
932 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
933 %2 = load <2 x i64>, <2 x i64>* %b
934 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
935 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
936 ; ilvl.d and ilvod.d are equivalent for v2i64
937 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
938 store <2 x i64> %3, <2 x i64>* %c
939 ; CHECK-DAG: st.d [[R3]], 0($4)
944 define void @ilvl_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
945 ; CHECK-LABEL: ilvl_v16i8_1:
947 %1 = load <16 x i8>, <16 x i8>* %a
948 %2 = load <16 x i8>, <16 x i8>* %b
949 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
950 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
951 <16 x i32> <i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
952 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
953 store <16 x i8> %3, <16 x i8>* %c
954 ; CHECK-DAG: st.b [[R3]], 0($4)
959 define void @ilvl_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
960 ; CHECK-LABEL: ilvl_v8i16_1:
962 %1 = load <8 x i16>, <8 x i16>* %a
963 %2 = load <8 x i16>, <8 x i16>* %b
964 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
965 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
966 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
967 store <8 x i16> %3, <8 x i16>* %c
968 ; CHECK-DAG: st.h [[R3]], 0($4)
973 define void @ilvl_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
974 ; CHECK-LABEL: ilvl_v4i32_1:
976 %1 = load <4 x i32>, <4 x i32>* %a
977 %2 = load <4 x i32>, <4 x i32>* %b
978 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
979 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 6, i32 6, i32 7, i32 7>
980 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
981 store <4 x i32> %3, <4 x i32>* %c
982 ; CHECK-DAG: st.w [[R3]], 0($4)
987 define void @ilvl_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
988 ; CHECK-LABEL: ilvl_v2i64_1:
990 %1 = load <2 x i64>, <2 x i64>* %a
991 %2 = load <2 x i64>, <2 x i64>* %b
992 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
993 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
994 ; ilvl.d and splati.d are equivalent for v2i64
995 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
996 store <2 x i64> %3, <2 x i64>* %c
997 ; CHECK-DAG: st.d [[R3]], 0($4)
1002 define void @ilvl_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1003 ; CHECK-LABEL: ilvl_v16i8_2:
1005 %1 = load <16 x i8>, <16 x i8>* %a
1006 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1007 %2 = load <16 x i8>, <16 x i8>* %b
1008 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1009 <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
1010 ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1011 store <16 x i8> %3, <16 x i8>* %c
1012 ; CHECK-DAG: st.b [[R3]], 0($4)
1017 define void @ilvl_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1018 ; CHECK-LABEL: ilvl_v8i16_2:
1020 %1 = load <8 x i16>, <8 x i16>* %a
1021 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1022 %2 = load <8 x i16>, <8 x i16>* %b
1023 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
1024 ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1025 store <8 x i16> %3, <8 x i16>* %c
1026 ; CHECK-DAG: st.h [[R3]], 0($4)
1031 define void @ilvl_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1032 ; CHECK-LABEL: ilvl_v4i32_2:
1034 %1 = load <4 x i32>, <4 x i32>* %a
1035 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1036 %2 = load <4 x i32>, <4 x i32>* %b
1037 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
1038 ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1039 store <4 x i32> %3, <4 x i32>* %c
1040 ; CHECK-DAG: st.w [[R3]], 0($4)
1045 define void @ilvl_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1046 ; CHECK-LABEL: ilvl_v2i64_2:
1048 %1 = load <2 x i64>, <2 x i64>* %a
1049 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1050 %2 = load <2 x i64>, <2 x i64>* %b
1051 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
1052 ; ilvl.d and splati.d are equivalent for v2i64
1053 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
1054 store <2 x i64> %3, <2 x i64>* %c
1055 ; CHECK-DAG: st.d [[R3]], 0($4)
1060 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1061 ; CHECK-LABEL: pckev_v16i8_0:
1063 %1 = load <16 x i8>, <16 x i8>* %a
1064 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1065 %2 = load <16 x i8>, <16 x i8>* %b
1066 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1067 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1068 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1069 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1070 store <16 x i8> %3, <16 x i8>* %c
1071 ; CHECK-DAG: st.b [[R3]], 0($4)
1076 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1077 ; CHECK-LABEL: pckev_v8i16_0:
1079 %1 = load <8 x i16>, <8 x i16>* %a
1080 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1081 %2 = load <8 x i16>, <8 x i16>* %b
1082 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1083 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1084 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1085 store <8 x i16> %3, <8 x i16>* %c
1086 ; CHECK-DAG: st.h [[R3]], 0($4)
1091 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1092 ; CHECK-LABEL: pckev_v4i32_0:
1094 %1 = load <4 x i32>, <4 x i32>* %a
1095 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1096 %2 = load <4 x i32>, <4 x i32>* %b
1097 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1098 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1099 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1100 store <4 x i32> %3, <4 x i32>* %c
1101 ; CHECK-DAG: st.w [[R3]], 0($4)
1106 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1107 ; CHECK-LABEL: pckev_v2i64_0:
1109 %1 = load <2 x i64>, <2 x i64>* %a
1110 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1111 %2 = load <2 x i64>, <2 x i64>* %b
1112 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1113 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
1114 ; pckev.d and ilvev.d are equivalent for v2i64
1115 ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1116 store <2 x i64> %3, <2 x i64>* %c
1117 ; CHECK-DAG: st.d [[R3]], 0($4)
1122 define void @pckev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1123 ; CHECK-LABEL: pckev_v16i8_1:
1125 %1 = load <16 x i8>, <16 x i8>* %a
1126 %2 = load <16 x i8>, <16 x i8>* %b
1127 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1128 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1129 <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1130 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1131 store <16 x i8> %3, <16 x i8>* %c
1132 ; CHECK-DAG: st.b [[R3]], 0($4)
1137 define void @pckev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1138 ; CHECK-LABEL: pckev_v8i16_1:
1140 %1 = load <8 x i16>, <8 x i16>* %a
1141 %2 = load <8 x i16>, <8 x i16>* %b
1142 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1143 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 8, i32 10, i32 12, i32 14>
1144 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1145 store <8 x i16> %3, <8 x i16>* %c
1146 ; CHECK-DAG: st.h [[R3]], 0($4)
1151 define void @pckev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1152 ; CHECK-LABEL: pckev_v4i32_1:
1154 %1 = load <4 x i32>, <4 x i32>* %a
1155 %2 = load <4 x i32>, <4 x i32>* %b
1156 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1157 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 6, i32 4, i32 6>
1158 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1159 store <4 x i32> %3, <4 x i32>* %c
1160 ; CHECK-DAG: st.w [[R3]], 0($4)
1165 define void @pckev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1166 ; CHECK-LABEL: pckev_v2i64_1:
1168 %1 = load <2 x i64>, <2 x i64>* %a
1169 %2 = load <2 x i64>, <2 x i64>* %b
1170 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1171 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
1172 ; pckev.d and splati.d are equivalent for v2i64
1173 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
1174 store <2 x i64> %3, <2 x i64>* %c
1175 ; CHECK-DAG: st.d [[R3]], 0($4)
1180 define void @pckev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1181 ; CHECK-LABEL: pckev_v16i8_2:
1183 %1 = load <16 x i8>, <16 x i8>* %a
1184 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1185 %2 = load <16 x i8>, <16 x i8>* %b
1186 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1187 <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1188 ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1189 store <16 x i8> %3, <16 x i8>* %c
1190 ; CHECK-DAG: st.b [[R3]], 0($4)
1195 define void @pckev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1196 ; CHECK-LABEL: pckev_v8i16_2:
1198 %1 = load <8 x i16>, <8 x i16>* %a
1199 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1200 %2 = load <8 x i16>, <8 x i16>* %b
1201 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
1202 ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1203 store <8 x i16> %3, <8 x i16>* %c
1204 ; CHECK-DAG: st.h [[R3]], 0($4)
1209 define void @pckev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1210 ; CHECK-LABEL: pckev_v4i32_2:
1212 %1 = load <4 x i32>, <4 x i32>* %a
1213 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1214 %2 = load <4 x i32>, <4 x i32>* %b
1215 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
1216 ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1217 store <4 x i32> %3, <4 x i32>* %c
1218 ; CHECK-DAG: st.w [[R3]], 0($4)
1223 define void @pckev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1224 ; CHECK-LABEL: pckev_v2i64_2:
1226 %1 = load <2 x i64>, <2 x i64>* %a
1227 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1228 %2 = load <2 x i64>, <2 x i64>* %b
1229 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
1230 ; pckev.d and splati.d are equivalent for v2i64
1231 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
1232 store <2 x i64> %3, <2 x i64>* %c
1233 ; CHECK-DAG: st.d [[R3]], 0($4)
1238 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1239 ; CHECK-LABEL: pckod_v16i8_0:
1241 %1 = load <16 x i8>, <16 x i8>* %a
1242 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1243 %2 = load <16 x i8>, <16 x i8>* %b
1244 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1245 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1246 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1247 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1248 store <16 x i8> %3, <16 x i8>* %c
1249 ; CHECK-DAG: st.b [[R3]], 0($4)
1254 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1255 ; CHECK-LABEL: pckod_v8i16_0:
1257 %1 = load <8 x i16>, <8 x i16>* %a
1258 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1259 %2 = load <8 x i16>, <8 x i16>* %b
1260 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1261 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1262 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1263 store <8 x i16> %3, <8 x i16>* %c
1264 ; CHECK-DAG: st.h [[R3]], 0($4)
1269 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1270 ; CHECK-LABEL: pckod_v4i32_0:
1272 %1 = load <4 x i32>, <4 x i32>* %a
1273 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1274 %2 = load <4 x i32>, <4 x i32>* %b
1275 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1276 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1277 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
1278 store <4 x i32> %3, <4 x i32>* %c
1279 ; CHECK-DAG: st.w [[R3]], 0($4)
1284 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1285 ; CHECK-LABEL: pckod_v2i64_0:
1287 %1 = load <2 x i64>, <2 x i64>* %a
1288 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1289 %2 = load <2 x i64>, <2 x i64>* %b
1290 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1291 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
1292 ; pckod.d and ilvod.d are equivalent for v2i64
1293 ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1294 store <2 x i64> %3, <2 x i64>* %c
1295 ; CHECK-DAG: st.d [[R3]], 0($4)
1300 define void @pckod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1301 ; CHECK-LABEL: pckod_v16i8_1:
1303 %1 = load <16 x i8>, <16 x i8>* %a
1304 %2 = load <16 x i8>, <16 x i8>* %b
1305 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1306 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1307 <16 x i32> <i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1308 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1309 store <16 x i8> %3, <16 x i8>* %c
1310 ; CHECK-DAG: st.b [[R3]], 0($4)
1315 define void @pckod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1316 ; CHECK-LABEL: pckod_v8i16_1:
1318 %1 = load <8 x i16>, <8 x i16>* %a
1319 %2 = load <8 x i16>, <8 x i16>* %b
1320 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1321 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 9, i32 11, i32 13, i32 15>
1322 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1323 store <8 x i16> %3, <8 x i16>* %c
1324 ; CHECK-DAG: st.h [[R3]], 0($4)
1329 define void @pckod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1330 ; CHECK-LABEL: pckod_v4i32_1:
1332 %1 = load <4 x i32>, <4 x i32>* %a
1333 %2 = load <4 x i32>, <4 x i32>* %b
1334 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1335 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 7, i32 5, i32 7>
1336 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
1337 store <4 x i32> %3, <4 x i32>* %c
1338 ; CHECK-DAG: st.w [[R3]], 0($4)
1343 define void @pckod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1344 ; CHECK-LABEL: pckod_v2i64_1:
1346 %1 = load <2 x i64>, <2 x i64>* %a
1347 %2 = load <2 x i64>, <2 x i64>* %b
1348 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1349 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
1350 ; pckod.d and splati.d are equivalent for v2i64
1351 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
1352 store <2 x i64> %3, <2 x i64>* %c
1353 ; CHECK-DAG: st.d [[R3]], 0($4)
1358 define void @pckod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1359 ; CHECK-LABEL: pckod_v16i8_2:
1361 %1 = load <16 x i8>, <16 x i8>* %a
1362 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1363 %2 = load <16 x i8>, <16 x i8>* %b
1364 %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
1365 <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1366 ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1367 store <16 x i8> %3, <16 x i8>* %c
1368 ; CHECK-DAG: st.b [[R3]], 0($4)
1373 define void @pckod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1374 ; CHECK-LABEL: pckod_v8i16_2:
1376 %1 = load <8 x i16>, <8 x i16>* %a
1377 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1378 %2 = load <8 x i16>, <8 x i16>* %b
1379 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7>
1380 ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1381 store <8 x i16> %3, <8 x i16>* %c
1382 ; CHECK-DAG: st.h [[R3]], 0($4)
1387 define void @pckod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1388 ; CHECK-LABEL: pckod_v4i32_2:
1390 %1 = load <4 x i32>, <4 x i32>* %a
1391 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1392 %2 = load <4 x i32>, <4 x i32>* %b
1393 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
1394 ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
1395 store <4 x i32> %3, <4 x i32>* %c
1396 ; CHECK-DAG: st.w [[R3]], 0($4)
1401 define void @pckod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1402 ; CHECK-LABEL: pckod_v2i64_2:
1404 %1 = load <2 x i64>, <2 x i64>* %a
1405 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1406 %2 = load <2 x i64>, <2 x i64>* %b
1407 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
1408 ; pckod.d and splati.d are equivalent for v2i64
1409 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
1410 store <2 x i64> %3, <2 x i64>* %c
1411 ; CHECK-DAG: st.d [[R3]], 0($4)
1416 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1417 ; CHECK-LABEL: splati_v16i8_0:
1419 %1 = load <16 x i8>, <16 x i8>* %a
1420 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1421 %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
1422 <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1423 ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
1424 store <16 x i8> %2, <16 x i8>* %c
1425 ; CHECK-DAG: st.b [[R3]], 0($4)
1430 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1431 ; CHECK-LABEL: splati_v8i16_0:
1433 %1 = load <8 x i16>, <8 x i16>* %a
1434 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1435 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
1436 ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
1437 store <8 x i16> %2, <8 x i16>* %c
1438 ; CHECK-DAG: st.h [[R3]], 0($4)
1443 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1444 ; CHECK-LABEL: splati_v4i32_0:
1446 %1 = load <4 x i32>, <4 x i32>* %a
1447 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1448 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1449 ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][3]
1450 store <4 x i32> %2, <4 x i32>* %c
1451 ; CHECK-DAG: st.w [[R3]], 0($4)
1456 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1457 ; CHECK-LABEL: splati_v2i64_0:
1459 %1 = load <2 x i64>, <2 x i64>* %a
1460 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1461 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
1462 ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
1463 store <2 x i64> %2, <2 x i64>* %c
1464 ; CHECK-DAG: st.d [[R3]], 0($4)