1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 define void @test_vext_s8() nounwind ssp {
4 ; CHECK-LABEL: test_vext_s8:
6 %xS8x8 = alloca <8 x i8>, align 8
7 %__a = alloca <8 x i8>, align 8
8 %__b = alloca <8 x i8>, align 8
9 %tmp = load <8 x i8>, <8 x i8>* %xS8x8, align 8
10 store <8 x i8> %tmp, <8 x i8>* %__a, align 8
11 %tmp1 = load <8 x i8>, <8 x i8>* %xS8x8, align 8
12 store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
13 %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
14 %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
15 %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
16 store <8 x i8> %vext, <8 x i8>* %xS8x8, align 8
20 define void @test_vext_u8() nounwind ssp {
21 ; CHECK-LABEL: test_vext_u8:
22 ; CHECK: {{ext.8.*#2}}
23 %xU8x8 = alloca <8 x i8>, align 8
24 %__a = alloca <8 x i8>, align 8
25 %__b = alloca <8 x i8>, align 8
26 %tmp = load <8 x i8>, <8 x i8>* %xU8x8, align 8
27 store <8 x i8> %tmp, <8 x i8>* %__a, align 8
28 %tmp1 = load <8 x i8>, <8 x i8>* %xU8x8, align 8
29 store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
30 %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
31 %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
32 %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
33 store <8 x i8> %vext, <8 x i8>* %xU8x8, align 8
37 define void @test_vext_p8() nounwind ssp {
38 ; CHECK-LABEL: test_vext_p8:
39 ; CHECK: {{ext.8.*#3}}
40 %xP8x8 = alloca <8 x i8>, align 8
41 %__a = alloca <8 x i8>, align 8
42 %__b = alloca <8 x i8>, align 8
43 %tmp = load <8 x i8>, <8 x i8>* %xP8x8, align 8
44 store <8 x i8> %tmp, <8 x i8>* %__a, align 8
45 %tmp1 = load <8 x i8>, <8 x i8>* %xP8x8, align 8
46 store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
47 %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
48 %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
49 %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
50 store <8 x i8> %vext, <8 x i8>* %xP8x8, align 8
54 define void @test_vext_s16() nounwind ssp {
55 ; CHECK-LABEL: test_vext_s16:
56 ; CHECK: {{ext.8.*#2}}
57 %xS16x4 = alloca <4 x i16>, align 8
58 %__a = alloca <4 x i16>, align 8
59 %__b = alloca <4 x i16>, align 8
60 %tmp = load <4 x i16>, <4 x i16>* %xS16x4, align 8
61 store <4 x i16> %tmp, <4 x i16>* %__a, align 8
62 %tmp1 = load <4 x i16>, <4 x i16>* %xS16x4, align 8
63 store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
64 %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
65 %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
66 %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
67 %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
68 %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
69 %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
70 %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
71 store <4 x i16> %vext, <4 x i16>* %xS16x4, align 8
75 define void @test_vext_u16() nounwind ssp {
76 ; CHECK-LABEL: test_vext_u16:
77 ; CHECK: {{ext.8.*#4}}
78 %xU16x4 = alloca <4 x i16>, align 8
79 %__a = alloca <4 x i16>, align 8
80 %__b = alloca <4 x i16>, align 8
81 %tmp = load <4 x i16>, <4 x i16>* %xU16x4, align 8
82 store <4 x i16> %tmp, <4 x i16>* %__a, align 8
83 %tmp1 = load <4 x i16>, <4 x i16>* %xU16x4, align 8
84 store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
85 %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
86 %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
87 %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
88 %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
89 %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
90 %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
91 %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
92 store <4 x i16> %vext, <4 x i16>* %xU16x4, align 8
96 define void @test_vext_p16() nounwind ssp {
97 ; CHECK-LABEL: test_vext_p16:
98 ; CHECK: {{ext.8.*#6}}
99 %xP16x4 = alloca <4 x i16>, align 8
100 %__a = alloca <4 x i16>, align 8
101 %__b = alloca <4 x i16>, align 8
102 %tmp = load <4 x i16>, <4 x i16>* %xP16x4, align 8
103 store <4 x i16> %tmp, <4 x i16>* %__a, align 8
104 %tmp1 = load <4 x i16>, <4 x i16>* %xP16x4, align 8
105 store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
106 %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
107 %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
108 %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
109 %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
110 %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
111 %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
112 %vext = shufflevector <4 x i16> %tmp6, <4 x i16> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
113 store <4 x i16> %vext, <4 x i16>* %xP16x4, align 8
117 define void @test_vext_s32() nounwind ssp {
118 ; CHECK-LABEL: test_vext_s32:
119 ; CHECK: {{rev64.2s.*}}
120 %xS32x2 = alloca <2 x i32>, align 8
121 %__a = alloca <2 x i32>, align 8
122 %__b = alloca <2 x i32>, align 8
123 %tmp = load <2 x i32>, <2 x i32>* %xS32x2, align 8
124 store <2 x i32> %tmp, <2 x i32>* %__a, align 8
125 %tmp1 = load <2 x i32>, <2 x i32>* %xS32x2, align 8
126 store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
127 %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8
128 %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
129 %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8
130 %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
131 %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
132 %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
133 %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2>
134 store <2 x i32> %vext, <2 x i32>* %xS32x2, align 8
138 define void @test_vext_u32() nounwind ssp {
139 ; CHECK-LABEL: test_vext_u32:
140 ; CHECK: {{rev64.2s.*}}
141 %xU32x2 = alloca <2 x i32>, align 8
142 %__a = alloca <2 x i32>, align 8
143 %__b = alloca <2 x i32>, align 8
144 %tmp = load <2 x i32>, <2 x i32>* %xU32x2, align 8
145 store <2 x i32> %tmp, <2 x i32>* %__a, align 8
146 %tmp1 = load <2 x i32>, <2 x i32>* %xU32x2, align 8
147 store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
148 %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8
149 %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
150 %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8
151 %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
152 %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
153 %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
154 %vext = shufflevector <2 x i32> %tmp6, <2 x i32> %tmp7, <2 x i32> <i32 1, i32 2>
155 store <2 x i32> %vext, <2 x i32>* %xU32x2, align 8
159 define void @test_vext_f32() nounwind ssp {
160 ; CHECK-LABEL: test_vext_f32:
161 ; CHECK: {{rev64.2s.*}}
162 %xF32x2 = alloca <2 x float>, align 8
163 %__a = alloca <2 x float>, align 8
164 %__b = alloca <2 x float>, align 8
165 %tmp = load <2 x float>, <2 x float>* %xF32x2, align 8
166 store <2 x float> %tmp, <2 x float>* %__a, align 8
167 %tmp1 = load <2 x float>, <2 x float>* %xF32x2, align 8
168 store <2 x float> %tmp1, <2 x float>* %__b, align 8
169 %tmp2 = load <2 x float>, <2 x float>* %__a, align 8
170 %tmp3 = bitcast <2 x float> %tmp2 to <8 x i8>
171 %tmp4 = load <2 x float>, <2 x float>* %__b, align 8
172 %tmp5 = bitcast <2 x float> %tmp4 to <8 x i8>
173 %tmp6 = bitcast <8 x i8> %tmp3 to <2 x float>
174 %tmp7 = bitcast <8 x i8> %tmp5 to <2 x float>
175 %vext = shufflevector <2 x float> %tmp6, <2 x float> %tmp7, <2 x i32> <i32 1, i32 2>
176 store <2 x float> %vext, <2 x float>* %xF32x2, align 8
180 define void @test_vext_s64() nounwind ssp {
181 ; CHECK-LABEL: test_vext_s64:
182 ; CHECK_FIXME: {{rev64.2s.*}}
183 ; this just turns into a load of the second element
184 %xS64x1 = alloca <1 x i64>, align 8
185 %__a = alloca <1 x i64>, align 8
186 %__b = alloca <1 x i64>, align 8
187 %tmp = load <1 x i64>, <1 x i64>* %xS64x1, align 8
188 store <1 x i64> %tmp, <1 x i64>* %__a, align 8
189 %tmp1 = load <1 x i64>, <1 x i64>* %xS64x1, align 8
190 store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
191 %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8
192 %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
193 %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8
194 %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
195 %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
196 %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
197 %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1>
198 store <1 x i64> %vext, <1 x i64>* %xS64x1, align 8
202 define void @test_vext_u64() nounwind ssp {
203 ; CHECK-LABEL: test_vext_u64:
204 ; CHECK_FIXME: {{ext.8.*#1}}
205 ; this is turned into a simple load of the 2nd element
206 %xU64x1 = alloca <1 x i64>, align 8
207 %__a = alloca <1 x i64>, align 8
208 %__b = alloca <1 x i64>, align 8
209 %tmp = load <1 x i64>, <1 x i64>* %xU64x1, align 8
210 store <1 x i64> %tmp, <1 x i64>* %__a, align 8
211 %tmp1 = load <1 x i64>, <1 x i64>* %xU64x1, align 8
212 store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
213 %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8
214 %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
215 %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8
216 %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
217 %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
218 %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
219 %vext = shufflevector <1 x i64> %tmp6, <1 x i64> %tmp7, <1 x i32> <i32 1>
220 store <1 x i64> %vext, <1 x i64>* %xU64x1, align 8
224 define void @test_vextq_s8() nounwind ssp {
225 ; CHECK-LABEL: test_vextq_s8:
226 ; CHECK: {{ext.16.*#4}}
227 %xS8x16 = alloca <16 x i8>, align 16
228 %__a = alloca <16 x i8>, align 16
229 %__b = alloca <16 x i8>, align 16
230 %tmp = load <16 x i8>, <16 x i8>* %xS8x16, align 16
231 store <16 x i8> %tmp, <16 x i8>* %__a, align 16
232 %tmp1 = load <16 x i8>, <16 x i8>* %xS8x16, align 16
233 store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
234 %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
235 %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
236 %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
237 store <16 x i8> %vext, <16 x i8>* %xS8x16, align 16
241 define void @test_vextq_u8() nounwind ssp {
242 ; CHECK-LABEL: test_vextq_u8:
243 ; CHECK: {{ext.16.*#5}}
244 %xU8x16 = alloca <16 x i8>, align 16
245 %__a = alloca <16 x i8>, align 16
246 %__b = alloca <16 x i8>, align 16
247 %tmp = load <16 x i8>, <16 x i8>* %xU8x16, align 16
248 store <16 x i8> %tmp, <16 x i8>* %__a, align 16
249 %tmp1 = load <16 x i8>, <16 x i8>* %xU8x16, align 16
250 store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
251 %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
252 %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
253 %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
254 store <16 x i8> %vext, <16 x i8>* %xU8x16, align 16
258 define void @test_vextq_p8() nounwind ssp {
259 ; CHECK-LABEL: test_vextq_p8:
260 ; CHECK: {{ext.16.*#6}}
261 %xP8x16 = alloca <16 x i8>, align 16
262 %__a = alloca <16 x i8>, align 16
263 %__b = alloca <16 x i8>, align 16
264 %tmp = load <16 x i8>, <16 x i8>* %xP8x16, align 16
265 store <16 x i8> %tmp, <16 x i8>* %__a, align 16
266 %tmp1 = load <16 x i8>, <16 x i8>* %xP8x16, align 16
267 store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
268 %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
269 %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
270 %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21>
271 store <16 x i8> %vext, <16 x i8>* %xP8x16, align 16
275 define void @test_vextq_s16() nounwind ssp {
276 ; CHECK-LABEL: test_vextq_s16:
277 ; CHECK: {{ext.16.*#14}}
278 %xS16x8 = alloca <8 x i16>, align 16
279 %__a = alloca <8 x i16>, align 16
280 %__b = alloca <8 x i16>, align 16
281 %tmp = load <8 x i16>, <8 x i16>* %xS16x8, align 16
282 store <8 x i16> %tmp, <8 x i16>* %__a, align 16
283 %tmp1 = load <8 x i16>, <8 x i16>* %xS16x8, align 16
284 store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
285 %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
286 %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
287 %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
288 %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
289 %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
290 %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
291 %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
292 store <8 x i16> %vext, <8 x i16>* %xS16x8, align 16
296 define void @test_vextq_u16() nounwind ssp {
297 ; CHECK-LABEL: test_vextq_u16:
298 ; CHECK: {{ext.16.*#8}}
299 %xU16x8 = alloca <8 x i16>, align 16
300 %__a = alloca <8 x i16>, align 16
301 %__b = alloca <8 x i16>, align 16
302 %tmp = load <8 x i16>, <8 x i16>* %xU16x8, align 16
303 store <8 x i16> %tmp, <8 x i16>* %__a, align 16
304 %tmp1 = load <8 x i16>, <8 x i16>* %xU16x8, align 16
305 store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
306 %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
307 %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
308 %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
309 %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
310 %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
311 %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
312 %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
313 store <8 x i16> %vext, <8 x i16>* %xU16x8, align 16
317 define void @test_vextq_p16() nounwind ssp {
318 ; CHECK-LABEL: test_vextq_p16:
319 ; CHECK: {{ext.16.*#10}}
320 %xP16x8 = alloca <8 x i16>, align 16
321 %__a = alloca <8 x i16>, align 16
322 %__b = alloca <8 x i16>, align 16
323 %tmp = load <8 x i16>, <8 x i16>* %xP16x8, align 16
324 store <8 x i16> %tmp, <8 x i16>* %__a, align 16
325 %tmp1 = load <8 x i16>, <8 x i16>* %xP16x8, align 16
326 store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
327 %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
328 %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
329 %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
330 %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
331 %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
332 %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
333 %vext = shufflevector <8 x i16> %tmp6, <8 x i16> %tmp7, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
334 store <8 x i16> %vext, <8 x i16>* %xP16x8, align 16
338 define void @test_vextq_s32() nounwind ssp {
339 ; CHECK-LABEL: test_vextq_s32:
340 ; CHECK: {{ext.16.*#4}}
341 %xS32x4 = alloca <4 x i32>, align 16
342 %__a = alloca <4 x i32>, align 16
343 %__b = alloca <4 x i32>, align 16
344 %tmp = load <4 x i32>, <4 x i32>* %xS32x4, align 16
345 store <4 x i32> %tmp, <4 x i32>* %__a, align 16
346 %tmp1 = load <4 x i32>, <4 x i32>* %xS32x4, align 16
347 store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
348 %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16
349 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
350 %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16
351 %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
352 %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
353 %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
354 %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
355 store <4 x i32> %vext, <4 x i32>* %xS32x4, align 16
359 define void @test_vextq_u32() nounwind ssp {
360 ; CHECK-LABEL: test_vextq_u32:
361 ; CHECK: {{ext.16.*#8}}
362 %xU32x4 = alloca <4 x i32>, align 16
363 %__a = alloca <4 x i32>, align 16
364 %__b = alloca <4 x i32>, align 16
365 %tmp = load <4 x i32>, <4 x i32>* %xU32x4, align 16
366 store <4 x i32> %tmp, <4 x i32>* %__a, align 16
367 %tmp1 = load <4 x i32>, <4 x i32>* %xU32x4, align 16
368 store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
369 %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16
370 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
371 %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16
372 %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
373 %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
374 %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
375 %vext = shufflevector <4 x i32> %tmp6, <4 x i32> %tmp7, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
376 store <4 x i32> %vext, <4 x i32>* %xU32x4, align 16
380 define void @test_vextq_f32() nounwind ssp {
381 ; CHECK-LABEL: test_vextq_f32:
382 ; CHECK: {{ext.16.*#12}}
383 %xF32x4 = alloca <4 x float>, align 16
384 %__a = alloca <4 x float>, align 16
385 %__b = alloca <4 x float>, align 16
386 %tmp = load <4 x float>, <4 x float>* %xF32x4, align 16
387 store <4 x float> %tmp, <4 x float>* %__a, align 16
388 %tmp1 = load <4 x float>, <4 x float>* %xF32x4, align 16
389 store <4 x float> %tmp1, <4 x float>* %__b, align 16
390 %tmp2 = load <4 x float>, <4 x float>* %__a, align 16
391 %tmp3 = bitcast <4 x float> %tmp2 to <16 x i8>
392 %tmp4 = load <4 x float>, <4 x float>* %__b, align 16
393 %tmp5 = bitcast <4 x float> %tmp4 to <16 x i8>
394 %tmp6 = bitcast <16 x i8> %tmp3 to <4 x float>
395 %tmp7 = bitcast <16 x i8> %tmp5 to <4 x float>
396 %vext = shufflevector <4 x float> %tmp6, <4 x float> %tmp7, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
397 store <4 x float> %vext, <4 x float>* %xF32x4, align 16
401 define void @test_vextq_s64() nounwind ssp {
402 ; CHECK-LABEL: test_vextq_s64:
403 ; CHECK: {{ext.16.*#8}}
404 %xS64x2 = alloca <2 x i64>, align 16
405 %__a = alloca <2 x i64>, align 16
406 %__b = alloca <2 x i64>, align 16
407 %tmp = load <2 x i64>, <2 x i64>* %xS64x2, align 16
408 store <2 x i64> %tmp, <2 x i64>* %__a, align 16
409 %tmp1 = load <2 x i64>, <2 x i64>* %xS64x2, align 16
410 store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
411 %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16
412 %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
413 %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16
414 %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
415 %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
416 %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
417 %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2>
418 store <2 x i64> %vext, <2 x i64>* %xS64x2, align 16
422 define void @test_vextq_u64() nounwind ssp {
423 ; CHECK-LABEL: test_vextq_u64:
424 ; CHECK: {{ext.16.*#8}}
425 %xU64x2 = alloca <2 x i64>, align 16
426 %__a = alloca <2 x i64>, align 16
427 %__b = alloca <2 x i64>, align 16
428 %tmp = load <2 x i64>, <2 x i64>* %xU64x2, align 16
429 store <2 x i64> %tmp, <2 x i64>* %__a, align 16
430 %tmp1 = load <2 x i64>, <2 x i64>* %xU64x2, align 16
431 store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
432 %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16
433 %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
434 %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16
435 %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
436 %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
437 %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
438 %vext = shufflevector <2 x i64> %tmp6, <2 x i64> %tmp7, <2 x i32> <i32 1, i32 2>
439 store <2 x i64> %vext, <2 x i64>* %xU64x2, align 16
443 ; shuffles with an undef second operand can use an EXT also so long as the
444 ; indices wrap and stay sequential.
446 define <16 x i8> @vext1(<16 x i8> %_a) nounwind {
447 ; CHECK-LABEL: vext1:
448 ; CHECK: ext.16b v0, v0, v0, #8
449 %vext = shufflevector <16 x i8> %_a, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
453 ; <rdar://problem/12212062>
454 define <2 x i64> @vext2(<2 x i64> %p0, <2 x i64> %p1) nounwind readnone ssp {
456 ; CHECK-LABEL: vext2:
457 ; CHECK: ext.16b v1, v1, v1, #8
458 ; CHECK: ext.16b v0, v0, v0, #8
459 ; CHECK: add.2d v0, v0, v1
460 %t0 = shufflevector <2 x i64> %p1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
461 %t1 = shufflevector <2 x i64> %p0, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
462 %t2 = add <2 x i64> %t1, %t0