1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s --check-prefix=CHECK-NEON
3 ; RUN: opt < %s -mattr=+mve.fp -interleaved-access -S | FileCheck %s --check-prefix=CHECK-MVE
4 ; RUN: opt < %s -interleaved-access -S | FileCheck %s --check-prefix=CHECK-NONE
6 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
7 target triple = "arm---eabi"
9 define void @load_factor2(<16 x i8>* %ptr) {
10 ; CHECK-NEON-LABEL: @load_factor2(
11 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[PTR:%.*]] to i8*
12 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* [[TMP1]], i32 4)
13 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 1
14 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 0
15 ; CHECK-NEON-NEXT: ret void
17 ; CHECK-MVE-LABEL: @load_factor2(
18 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i8>, <16 x i8>* [[PTR:%.*]], align 4
19 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
21 ; CHECK-MVE-NEXT: ret void
23 ; CHECK-NONE-LABEL: @load_factor2(
24 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i8>, <16 x i8>* [[PTR:%.*]], align 4
25 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
26 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
27 ; CHECK-NONE-NEXT: ret void
29 %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
30 %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
31 %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
35 define void @load_factor3(<6 x i32>* %ptr) {
36 ; CHECK-NEON-LABEL: @load_factor3(
37 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <6 x i32>* [[PTR:%.*]] to i8*
38 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
39 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
40 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
41 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
42 ; CHECK-NEON-NEXT: ret void
44 ; CHECK-MVE-LABEL: @load_factor3(
45 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x i32>, <6 x i32>* [[PTR:%.*]], align 4
46 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> poison, <2 x i32> <i32 0, i32 3>
47 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> poison, <2 x i32> <i32 1, i32 4>
48 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> poison, <2 x i32> <i32 2, i32 5>
49 ; CHECK-MVE-NEXT: ret void
51 ; CHECK-NONE-LABEL: @load_factor3(
52 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x i32>, <6 x i32>* [[PTR:%.*]], align 4
53 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> poison, <2 x i32> <i32 0, i32 3>
54 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> poison, <2 x i32> <i32 1, i32 4>
55 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> poison, <2 x i32> <i32 2, i32 5>
56 ; CHECK-NONE-NEXT: ret void
58 %interleaved.vec = load <6 x i32>, <6 x i32>* %ptr, align 4
59 %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <2 x i32> <i32 0, i32 3>
60 %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <2 x i32> <i32 1, i32 4>
61 %v2 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <2 x i32> <i32 2, i32 5>
65 define void @load_factor4(<16 x i32>* %ptr) {
66 ; CHECK-NEON-LABEL: @load_factor4(
67 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i8*
68 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
69 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
70 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
71 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
72 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
73 ; CHECK-NEON-NEXT: ret void
75 ; CHECK-MVE-LABEL: @load_factor4(
76 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[PTR:%.*]], align 4
77 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
78 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
79 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
80 ; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
81 ; CHECK-MVE-NEXT: ret void
83 ; CHECK-NONE-LABEL: @load_factor4(
84 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[PTR:%.*]], align 4
85 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
86 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
87 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
88 ; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
89 ; CHECK-NONE-NEXT: ret void
91 %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
92 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
93 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
94 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
95 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
99 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
100 ; CHECK-NEON-LABEL: @store_factor2(
101 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
102 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
103 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = bitcast <16 x i8>* [[PTR:%.*]] to i8*
104 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* [[TMP3]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 4)
105 ; CHECK-NEON-NEXT: ret void
107 ; CHECK-MVE-LABEL: @store_factor2(
108 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
109 ; CHECK-MVE-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[PTR:%.*]], align 4
110 ; CHECK-MVE-NEXT: ret void
112 ; CHECK-NONE-LABEL: @store_factor2(
113 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
114 ; CHECK-NONE-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], <16 x i8>* [[PTR:%.*]], align 4
115 ; CHECK-NONE-NEXT: ret void
117 %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
118 store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
122 define void @store_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
123 ; CHECK-NEON-LABEL: @store_factor3(
124 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
125 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
126 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
127 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
128 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
129 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
130 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
131 ; CHECK-NEON-NEXT: ret void
133 ; CHECK-MVE-LABEL: @store_factor3(
134 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
135 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
136 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
137 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
138 ; CHECK-MVE-NEXT: ret void
140 ; CHECK-NONE-LABEL: @store_factor3(
141 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
142 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
143 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
144 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
145 ; CHECK-NONE-NEXT: ret void
147 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
148 %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
149 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
150 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
154 define void @store_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
155 ; CHECK-NEON-LABEL: @store_factor4(
156 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
157 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
158 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
159 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
160 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
161 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
162 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i8*
163 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP5]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
164 ; CHECK-NEON-NEXT: ret void
166 ; CHECK-MVE-LABEL: @store_factor4(
167 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
168 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
169 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
170 ; CHECK-MVE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], <16 x i32>* [[PTR:%.*]], align 4
171 ; CHECK-MVE-NEXT: ret void
173 ; CHECK-NONE-LABEL: @store_factor4(
174 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
175 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
176 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
177 ; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], <16 x i32>* [[PTR:%.*]], align 4
178 ; CHECK-NONE-NEXT: ret void
180 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
181 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
182 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
183 store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
187 define void @load_ptrvec_factor2(<4 x i32*>* %ptr) {
188 ; CHECK-NEON-LABEL: @load_ptrvec_factor2(
189 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <4 x i32*>* [[PTR:%.*]] to i8*
190 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4)
191 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLDN]], 0
192 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
193 ; CHECK-NEON-NEXT: ret void
195 ; CHECK-MVE-LABEL: @load_ptrvec_factor2(
196 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32*>, <4 x i32*>* [[PTR:%.*]], align 4
197 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32*> [[INTERLEAVED_VEC]], <4 x i32*> poison, <2 x i32> <i32 0, i32 2>
198 ; CHECK-MVE-NEXT: ret void
200 ; CHECK-NONE-LABEL: @load_ptrvec_factor2(
201 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32*>, <4 x i32*>* [[PTR:%.*]], align 4
202 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32*> [[INTERLEAVED_VEC]], <4 x i32*> poison, <2 x i32> <i32 0, i32 2>
203 ; CHECK-NONE-NEXT: ret void
205 %interleaved.vec = load <4 x i32*>, <4 x i32*>* %ptr, align 4
206 %v0 = shufflevector <4 x i32*> %interleaved.vec, <4 x i32*> poison, <2 x i32> <i32 0, i32 2>
210 define void @load_ptrvec_factor3(<6 x i32*>* %ptr) {
211 ; CHECK-NEON-LABEL: @load_ptrvec_factor3(
212 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <6 x i32*>* [[PTR:%.*]] to i8*
213 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4)
214 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
215 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
216 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
217 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x i32*>
218 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
219 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x i32*>
220 ; CHECK-NEON-NEXT: ret void
222 ; CHECK-MVE-LABEL: @load_ptrvec_factor3(
223 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x i32*>, <6 x i32*>* [[PTR:%.*]], align 4
224 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*> poison, <2 x i32> <i32 0, i32 3>
225 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*> poison, <2 x i32> <i32 1, i32 4>
226 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*> poison, <2 x i32> <i32 2, i32 5>
227 ; CHECK-MVE-NEXT: ret void
229 ; CHECK-NONE-LABEL: @load_ptrvec_factor3(
230 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x i32*>, <6 x i32*>* [[PTR:%.*]], align 4
231 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*> poison, <2 x i32> <i32 0, i32 3>
232 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*> poison, <2 x i32> <i32 1, i32 4>
233 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*> poison, <2 x i32> <i32 2, i32 5>
234 ; CHECK-NONE-NEXT: ret void
236 %interleaved.vec = load <6 x i32*>, <6 x i32*>* %ptr, align 4
237 %v0 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> poison, <2 x i32> <i32 0, i32 3>
238 %v1 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> poison, <2 x i32> <i32 1, i32 4>
239 %v2 = shufflevector <6 x i32*> %interleaved.vec, <6 x i32*> poison, <2 x i32> <i32 2, i32 5>
243 define void @load_ptrvec_factor4(<8 x i32*>* %ptr) {
244 ; CHECK-NEON-LABEL: @load_ptrvec_factor4(
245 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <8 x i32*>* [[PTR:%.*]] to i8*
246 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4)
247 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 3
248 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x i32*>
249 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
250 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x i32*>
251 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
252 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x i32*>
253 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
254 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = inttoptr <2 x i32> [[TMP8]] to <2 x i32*>
255 ; CHECK-NEON-NEXT: ret void
257 ; CHECK-MVE-LABEL: @load_ptrvec_factor4(
258 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32*>, <8 x i32*>* [[PTR:%.*]], align 4
259 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 0, i32 4>
260 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 1, i32 5>
261 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 2, i32 6>
262 ; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 3, i32 7>
263 ; CHECK-MVE-NEXT: ret void
265 ; CHECK-NONE-LABEL: @load_ptrvec_factor4(
266 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32*>, <8 x i32*>* [[PTR:%.*]], align 4
267 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 0, i32 4>
268 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 1, i32 5>
269 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 2, i32 6>
270 ; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*> poison, <2 x i32> <i32 3, i32 7>
271 ; CHECK-NONE-NEXT: ret void
273 %interleaved.vec = load <8 x i32*>, <8 x i32*>* %ptr, align 4
274 %v0 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 0, i32 4>
275 %v1 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 1, i32 5>
276 %v2 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 2, i32 6>
277 %v3 = shufflevector <8 x i32*> %interleaved.vec, <8 x i32*> poison, <2 x i32> <i32 3, i32 7>
281 define void @store_ptrvec_factor2(<4 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
282 ; CHECK-NEON-LABEL: @store_ptrvec_factor2(
283 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <2 x i32*> [[V0:%.*]] to <2 x i32>
284 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <2 x i32*> [[V1:%.*]] to <2 x i32>
285 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
286 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
287 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <4 x i32*>* [[PTR:%.*]] to i8*
288 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP5]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
289 ; CHECK-NEON-NEXT: ret void
291 ; CHECK-MVE-LABEL: @store_ptrvec_factor2(
292 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
293 ; CHECK-MVE-NEXT: store <4 x i32*> [[INTERLEAVED_VEC]], <4 x i32*>* [[PTR:%.*]], align 4
294 ; CHECK-MVE-NEXT: ret void
296 ; CHECK-NONE-LABEL: @store_ptrvec_factor2(
297 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
298 ; CHECK-NONE-NEXT: store <4 x i32*> [[INTERLEAVED_VEC]], <4 x i32*>* [[PTR:%.*]], align 4
299 ; CHECK-NONE-NEXT: ret void
301 %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
302 store <4 x i32*> %interleaved.vec, <4 x i32*>* %ptr, align 4
306 define void @store_ptrvec_factor3(<6 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
307 ; CHECK-NEON-LABEL: @store_ptrvec_factor3(
308 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
309 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x i32*> [[V2:%.*]], <2 x i32*> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
310 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <4 x i32*> [[S0]] to <4 x i32>
311 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x i32*> [[S1]] to <4 x i32>
312 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
313 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
314 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
315 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = bitcast <6 x i32*>* [[PTR:%.*]] to i8*
316 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP6]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4)
317 ; CHECK-NEON-NEXT: ret void
319 ; CHECK-MVE-LABEL: @store_ptrvec_factor3(
320 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
321 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x i32*> [[V2:%.*]], <2 x i32*> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
322 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32*> [[S0]], <4 x i32*> [[S1]], <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
323 ; CHECK-MVE-NEXT: store <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*>* [[PTR:%.*]], align 4
324 ; CHECK-MVE-NEXT: ret void
326 ; CHECK-NONE-LABEL: @store_ptrvec_factor3(
327 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
328 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x i32*> [[V2:%.*]], <2 x i32*> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
329 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32*> [[S0]], <4 x i32*> [[S1]], <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
330 ; CHECK-NONE-NEXT: store <6 x i32*> [[INTERLEAVED_VEC]], <6 x i32*>* [[PTR:%.*]], align 4
331 ; CHECK-NONE-NEXT: ret void
333 %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
334 %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
335 %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
336 store <6 x i32*> %interleaved.vec, <6 x i32*>* %ptr, align 4
340 define void @store_ptrvec_factor4(<8 x i32*>* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
341 ; CHECK-NEON-LABEL: @store_ptrvec_factor4(
342 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
343 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x i32*> [[V2:%.*]], <2 x i32*> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
344 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <4 x i32*> [[S0]] to <4 x i32>
345 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x i32*> [[S1]] to <4 x i32>
346 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1>
347 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3>
348 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5>
349 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 6, i32 7>
350 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = bitcast <8 x i32*>* [[PTR:%.*]] to i8*
351 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP7]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], i32 4)
352 ; CHECK-NEON-NEXT: ret void
354 ; CHECK-MVE-LABEL: @store_ptrvec_factor4(
355 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
356 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x i32*> [[V2:%.*]], <2 x i32*> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
357 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32*> [[S0]], <4 x i32*> [[S1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
358 ; CHECK-MVE-NEXT: store <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*>* [[PTR:%.*]], align 4
359 ; CHECK-MVE-NEXT: ret void
361 ; CHECK-NONE-LABEL: @store_ptrvec_factor4(
362 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <2 x i32*> [[V0:%.*]], <2 x i32*> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
363 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x i32*> [[V2:%.*]], <2 x i32*> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
364 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32*> [[S0]], <4 x i32*> [[S1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
365 ; CHECK-NONE-NEXT: store <8 x i32*> [[INTERLEAVED_VEC]], <8 x i32*>* [[PTR:%.*]], align 4
366 ; CHECK-NONE-NEXT: ret void
368 %s0 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
369 %s1 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
370 %interleaved.vec = shufflevector <4 x i32*> %s0, <4 x i32*> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
371 store <8 x i32*> %interleaved.vec, <8 x i32*>* %ptr, align 4
375 define void @load_undef_mask_factor2(<8 x i32>* %ptr) {
376 ; CHECK-NEON-LABEL: @load_undef_mask_factor2(
377 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
378 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4)
379 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
380 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
381 ; CHECK-NEON-NEXT: ret void
383 ; CHECK-MVE-LABEL: @load_undef_mask_factor2(
384 ; CHECK-MVE-NEXT: [[TMP1:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i32*
385 ; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP1]])
386 ; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
387 ; CHECK-MVE-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
388 ; CHECK-MVE-NEXT: ret void
390 ; CHECK-NONE-LABEL: @load_undef_mask_factor2(
391 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[PTR:%.*]], align 4
392 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
393 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
394 ; CHECK-NONE-NEXT: ret void
396 %interleaved.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
397 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
398 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
402 define void @load_undef_mask_factor3(<12 x i32>* %ptr) {
403 ; CHECK-NEON-LABEL: @load_undef_mask_factor3(
404 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
405 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4)
406 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
407 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
408 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
409 ; CHECK-NEON-NEXT: ret void
411 ; CHECK-MVE-LABEL: @load_undef_mask_factor3(
412 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[PTR:%.*]], align 4
413 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
414 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
415 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
416 ; CHECK-MVE-NEXT: ret void
418 ; CHECK-NONE-LABEL: @load_undef_mask_factor3(
419 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, <12 x i32>* [[PTR:%.*]], align 4
420 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
421 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
422 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
423 ; CHECK-NONE-NEXT: ret void
425 %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
426 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
427 %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
428 %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
432 define void @load_undef_mask_factor4(<16 x i32>* %ptr) {
433 ; CHECK-NEON-LABEL: @load_undef_mask_factor4(
434 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i8*
435 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4)
436 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
437 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
438 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
439 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
440 ; CHECK-NEON-NEXT: ret void
442 ; CHECK-MVE-LABEL: @load_undef_mask_factor4(
443 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[PTR:%.*]], align 4
444 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
445 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
446 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
447 ; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
448 ; CHECK-MVE-NEXT: ret void
450 ; CHECK-NONE-LABEL: @load_undef_mask_factor4(
451 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[PTR:%.*]], align 4
452 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
453 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
454 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
455 ; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
456 ; CHECK-NONE-NEXT: ret void
458 %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
459 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
460 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
461 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
462 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef>
466 define void @store_undef_mask_factor2(<8 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
467 ; CHECK-NEON-LABEL: @store_undef_mask_factor2(
468 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
469 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
470 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
471 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 4)
472 ; CHECK-NEON-NEXT: ret void
474 ; CHECK-MVE-LABEL: @store_undef_mask_factor2(
475 ; CHECK-MVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
476 ; CHECK-MVE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
477 ; CHECK-MVE-NEXT: [[TMP3:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i32*
478 ; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[TMP3]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 0)
479 ; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[TMP3]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1)
480 ; CHECK-MVE-NEXT: ret void
482 ; CHECK-NONE-LABEL: @store_undef_mask_factor2(
483 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
484 ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
485 ; CHECK-NONE-NEXT: ret void
487 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
488 store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
492 define void @store_undef_mask_factor3(<12 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
493 ; CHECK-NEON-LABEL: @store_undef_mask_factor3(
494 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
495 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
496 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
497 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
498 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
499 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
500 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
501 ; CHECK-NEON-NEXT: ret void
503 ; CHECK-MVE-LABEL: @store_undef_mask_factor3(
504 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
505 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
506 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
507 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
508 ; CHECK-MVE-NEXT: ret void
510 ; CHECK-NONE-LABEL: @store_undef_mask_factor3(
511 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
512 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
513 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
514 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
515 ; CHECK-NONE-NEXT: ret void
517 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
518 %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
519 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
520 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
524 define void @store_undef_mask_factor4(<16 x i32>* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
525 ; CHECK-NEON-LABEL: @store_undef_mask_factor4(
526 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
527 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
528 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
529 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
530 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
531 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
532 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i8*
533 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP5]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
534 ; CHECK-NEON-NEXT: ret void
536 ; CHECK-MVE-LABEL: @store_undef_mask_factor4(
537 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
538 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
539 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
540 ; CHECK-MVE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], <16 x i32>* [[PTR:%.*]], align 4
541 ; CHECK-MVE-NEXT: ret void
543 ; CHECK-NONE-LABEL: @store_undef_mask_factor4(
544 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
545 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
546 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
547 ; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], <16 x i32>* [[PTR:%.*]], align 4
548 ; CHECK-NONE-NEXT: ret void
550 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
551 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
552 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
553 store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
557 define void @load_address_space(<8 x i32> addrspace(1)* %ptr) {
558 ; CHECK-NEON-LABEL: @load_address_space(
559 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> addrspace(1)* [[PTR:%.*]] to i8 addrspace(1)*
560 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1i8(i8 addrspace(1)* [[TMP1]], i32 32)
561 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2
562 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1
563 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0
564 ; CHECK-NEON-NEXT: ret void
566 ; CHECK-MVE-LABEL: @load_address_space(
567 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[PTR:%.*]], align 32
568 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <2 x i32> <i32 0, i32 3>
569 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 4>
570 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <2 x i32> <i32 2, i32 5>
571 ; CHECK-MVE-NEXT: ret void
573 ; CHECK-NONE-LABEL: @load_address_space(
574 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, <8 x i32> addrspace(1)* [[PTR:%.*]], align 32
575 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <2 x i32> <i32 0, i32 3>
576 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 4>
577 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> poison, <2 x i32> <i32 2, i32 5>
578 ; CHECK-NONE-NEXT: ret void
580 %interleaved.vec = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
581 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 0, i32 3>
582 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 1, i32 4>
583 %v2 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <2 x i32> <i32 2, i32 5>
587 define void @store_address_space(<4 x i32> addrspace(1)* %ptr, <2 x i32> %v0, <2 x i32> %v1) {
588 ; CHECK-NEON-LABEL: @store_address_space(
589 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> <i32 0, i32 1>
590 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V0]], <2 x i32> [[V1]], <2 x i32> <i32 2, i32 3>
591 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> addrspace(1)* [[PTR:%.*]] to i8 addrspace(1)*
592 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p1i8.v2i32(i8 addrspace(1)* [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 8)
593 ; CHECK-NEON-NEXT: ret void
595 ; CHECK-MVE-LABEL: @store_address_space(
596 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
597 ; CHECK-MVE-NEXT: store <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> addrspace(1)* [[PTR:%.*]], align 8
598 ; CHECK-MVE-NEXT: ret void
600 ; CHECK-NONE-LABEL: @store_address_space(
601 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
602 ; CHECK-NONE-NEXT: store <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> addrspace(1)* [[PTR:%.*]], align 8
603 ; CHECK-NONE-NEXT: ret void
605 %interleaved.vec = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
606 store <4 x i32> %interleaved.vec, <4 x i32> addrspace(1)* %ptr
610 define void @load_f16_factor2(<8 x half>* %ptr) {
611 ; CHECK-NEON-LABEL: @load_f16_factor2(
612 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x half>, <8 x half>* [[PTR:%.*]], align 4
613 ; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
614 ; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
615 ; CHECK-NEON-NEXT: ret void
617 ; CHECK-MVE-LABEL: @load_f16_factor2(
618 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x half>, <8 x half>* [[PTR:%.*]], align 4
619 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
620 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
621 ; CHECK-MVE-NEXT: ret void
623 ; CHECK-NONE-LABEL: @load_f16_factor2(
624 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x half>, <8 x half>* [[PTR:%.*]], align 4
625 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
626 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
627 ; CHECK-NONE-NEXT: ret void
629 %interleaved.vec = load <8 x half>, <8 x half>* %ptr, align 4
630 %v0 = shufflevector <8 x half> %interleaved.vec, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
631 %v1 = shufflevector <8 x half> %interleaved.vec, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
635 define void @store_f16_factor2(<8 x half>* %ptr, <4 x half> %v0, <4 x half> %v1) {
636 ; CHECK-NEON-LABEL: @store_f16_factor2(
637 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
638 ; CHECK-NEON-NEXT: store <8 x half> [[INTERLEAVED_VEC]], <8 x half>* [[PTR:%.*]], align 4
639 ; CHECK-NEON-NEXT: ret void
641 ; CHECK-MVE-LABEL: @store_f16_factor2(
642 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
643 ; CHECK-MVE-NEXT: store <8 x half> [[INTERLEAVED_VEC]], <8 x half>* [[PTR:%.*]], align 4
644 ; CHECK-MVE-NEXT: ret void
646 ; CHECK-NONE-LABEL: @store_f16_factor2(
647 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
648 ; CHECK-NONE-NEXT: store <8 x half> [[INTERLEAVED_VEC]], <8 x half>* [[PTR:%.*]], align 4
649 ; CHECK-NONE-NEXT: ret void
651 %interleaved.vec = shufflevector <4 x half> %v0, <4 x half> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
652 store <8 x half> %interleaved.vec, <8 x half>* %ptr, align 4
656 define void @load_illegal_factor2(<3 x float>* %ptr) nounwind {
657 ; CHECK-NEON-LABEL: @load_illegal_factor2(
658 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, <3 x float>* [[PTR:%.*]], align 16
659 ; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
660 ; CHECK-NEON-NEXT: ret void
662 ; CHECK-MVE-LABEL: @load_illegal_factor2(
663 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, <3 x float>* [[PTR:%.*]], align 16
664 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
665 ; CHECK-MVE-NEXT: ret void
667 ; CHECK-NONE-LABEL: @load_illegal_factor2(
668 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, <3 x float>* [[PTR:%.*]], align 16
669 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
670 ; CHECK-NONE-NEXT: ret void
672 %interleaved.vec = load <3 x float>, <3 x float>* %ptr, align 16
673 %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
677 define void @store_illegal_factor2(<3 x float>* %ptr, <3 x float> %v0) nounwind {
678 ; CHECK-NEON-LABEL: @store_illegal_factor2(
679 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
680 ; CHECK-NEON-NEXT: store <3 x float> [[INTERLEAVED_VEC]], <3 x float>* [[PTR:%.*]], align 16
681 ; CHECK-NEON-NEXT: ret void
683 ; CHECK-MVE-LABEL: @store_illegal_factor2(
684 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
685 ; CHECK-MVE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], <3 x float>* [[PTR:%.*]], align 16
686 ; CHECK-MVE-NEXT: ret void
688 ; CHECK-NONE-LABEL: @store_illegal_factor2(
689 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
690 ; CHECK-NONE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], <3 x float>* [[PTR:%.*]], align 16
691 ; CHECK-NONE-NEXT: ret void
693 %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef>
694 store <3 x float> %interleaved.vec, <3 x float>* %ptr, align 16
698 define void @store_general_mask_factor4(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
699 ; CHECK-NEON-LABEL: @store_general_mask_factor4(
700 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
701 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
702 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
703 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
704 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
705 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP5]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
706 ; CHECK-NEON-NEXT: ret void
708 ; CHECK-MVE-LABEL: @store_general_mask_factor4(
709 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
710 ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
711 ; CHECK-MVE-NEXT: ret void
713 ; CHECK-NONE-LABEL: @store_general_mask_factor4(
714 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
715 ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
716 ; CHECK-NONE-NEXT: ret void
718 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
719 store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
723 define void @store_general_mask_factor4_undefbeg(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
724 ; CHECK-NEON-LABEL: @store_general_mask_factor4_undefbeg(
725 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
726 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
727 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
728 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
729 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
730 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP5]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
731 ; CHECK-NEON-NEXT: ret void
733 ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefbeg(
734 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
735 ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
736 ; CHECK-MVE-NEXT: ret void
738 ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefbeg(
739 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
740 ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
741 ; CHECK-NONE-NEXT: ret void
743 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9>
744 store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
748 define void @store_general_mask_factor4_undefend(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
749 ; CHECK-NEON-LABEL: @store_general_mask_factor4_undefend(
750 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
751 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
752 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
753 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
754 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
755 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP5]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
756 ; CHECK-NEON-NEXT: ret void
758 ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefend(
759 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
760 ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
761 ; CHECK-MVE-NEXT: ret void
763 ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefend(
764 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
765 ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
766 ; CHECK-NONE-NEXT: ret void
768 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef>
769 store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
773 define void @store_general_mask_factor4_undefmid(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
774 ; CHECK-NEON-LABEL: @store_general_mask_factor4_undefmid(
775 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
776 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17>
777 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33>
778 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
779 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
780 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP5]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
781 ; CHECK-NEON-NEXT: ret void
783 ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmid(
784 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
785 ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
786 ; CHECK-MVE-NEXT: ret void
788 ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmid(
789 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
790 ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
791 ; CHECK-NONE-NEXT: ret void
793 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9>
794 store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
798 define void @store_general_mask_factor4_undefmulti(<8 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
799 ; CHECK-NEON-LABEL: @store_general_mask_factor4_undefmulti(
800 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5>
801 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 0, i32 1>
802 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 0, i32 1>
803 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9>
804 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast <8 x i32>* [[PTR:%.*]] to i8*
805 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP5]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4)
806 ; CHECK-NEON-NEXT: ret void
808 ; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmulti(
809 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
810 ; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
811 ; CHECK-MVE-NEXT: ret void
813 ; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmulti(
814 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
815 ; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], <8 x i32>* [[PTR:%.*]], align 4
816 ; CHECK-NONE-NEXT: ret void
818 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9>
819 store <8 x i32> %interleaved.vec, <8 x i32>* %ptr, align 4
823 define void @store_general_mask_factor3(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
824 ; CHECK-NEON-LABEL: @store_general_mask_factor3(
825 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
826 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
827 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
828 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
829 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
830 ; CHECK-NEON-NEXT: ret void
832 ; CHECK-MVE-LABEL: @store_general_mask_factor3(
833 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
834 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
835 ; CHECK-MVE-NEXT: ret void
837 ; CHECK-NONE-LABEL: @store_general_mask_factor3(
838 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
839 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
840 ; CHECK-NONE-NEXT: ret void
842 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19>
843 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
847 define void @store_general_mask_factor3_undefmultimid(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
848 ; CHECK-NEON-LABEL: @store_general_mask_factor3_undefmultimid(
849 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
850 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
851 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
852 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
853 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
854 ; CHECK-NEON-NEXT: ret void
856 ; CHECK-MVE-LABEL: @store_general_mask_factor3_undefmultimid(
857 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
858 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
859 ; CHECK-MVE-NEXT: ret void
861 ; CHECK-NONE-LABEL: @store_general_mask_factor3_undefmultimid(
862 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
863 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
864 ; CHECK-NONE-NEXT: ret void
866 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
867 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
871 define void @store_general_mask_factor3_undef_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
872 ; CHECK-NEON-LABEL: @store_general_mask_factor3_undef_fail(
873 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
874 ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
875 ; CHECK-NEON-NEXT: ret void
877 ; CHECK-MVE-LABEL: @store_general_mask_factor3_undef_fail(
878 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
879 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
880 ; CHECK-MVE-NEXT: ret void
882 ; CHECK-NONE-LABEL: @store_general_mask_factor3_undef_fail(
883 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
884 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
885 ; CHECK-NONE-NEXT: ret void
887 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19>
888 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
892 define void @store_general_mask_factor3_undeflane(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
893 ; CHECK-NEON-LABEL: @store_general_mask_factor3_undeflane(
894 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
895 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
896 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
897 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
898 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
899 ; CHECK-NEON-NEXT: ret void
901 ; CHECK-MVE-LABEL: @store_general_mask_factor3_undeflane(
902 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
903 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
904 ; CHECK-MVE-NEXT: ret void
906 ; CHECK-NONE-LABEL: @store_general_mask_factor3_undeflane(
907 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
908 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
909 ; CHECK-NONE-NEXT: ret void
911 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
912 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
916 define void @store_general_mask_factor3_endstart_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
917 ; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_fail(
918 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
919 ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
920 ; CHECK-NEON-NEXT: ret void
922 ; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_fail(
923 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
924 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
925 ; CHECK-MVE-NEXT: ret void
927 ; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_fail(
928 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
929 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
930 ; CHECK-NONE-NEXT: ret void
932 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19>
933 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
937 define void @store_general_mask_factor3_endstart_pass(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
938 ; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_pass(
939 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
940 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
941 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
942 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
943 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
944 ; CHECK-NEON-NEXT: ret void
946 ; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_pass(
947 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
948 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
949 ; CHECK-MVE-NEXT: ret void
951 ; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_pass(
952 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
953 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
954 ; CHECK-NONE-NEXT: ret void
956 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19>
957 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
961 define void @store_general_mask_factor3_midstart_fail(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
962 ; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_fail(
963 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
964 ; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
965 ; CHECK-NEON-NEXT: ret void
967 ; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_fail(
968 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
969 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
970 ; CHECK-MVE-NEXT: ret void
972 ; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_fail(
973 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
974 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
975 ; CHECK-NONE-NEXT: ret void
977 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
978 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
982 define void @store_general_mask_factor3_midstart_pass(<12 x i32>* %ptr, <32 x i32> %v0, <32 x i32> %v1) {
983 ; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_pass(
984 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
985 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
986 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
987 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i8*
988 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
989 ; CHECK-NEON-NEXT: ret void
991 ; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_pass(
992 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
993 ; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
994 ; CHECK-MVE-NEXT: ret void
996 ; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_pass(
997 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
998 ; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], <12 x i32>* [[PTR:%.*]], align 4
999 ; CHECK-NONE-NEXT: ret void
1001 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19>
1002 store <12 x i32> %interleaved.vec, <12 x i32>* %ptr, align 4
1006 @g = external global <4 x float>
1008 ; The following does not give a valid interleaved store
1009 define void @no_interleave(<4 x float> %a0) {
1010 ; CHECK-NEON-LABEL: @no_interleave(
1011 ; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
1012 ; CHECK-NEON-NEXT: store <4 x float> [[V0]], <4 x float>* @g, align 16
1013 ; CHECK-NEON-NEXT: ret void
1015 ; CHECK-MVE-LABEL: @no_interleave(
1016 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
1017 ; CHECK-MVE-NEXT: store <4 x float> [[V0]], <4 x float>* @g, align 16
1018 ; CHECK-MVE-NEXT: ret void
1020 ; CHECK-NONE-LABEL: @no_interleave(
1021 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
1022 ; CHECK-NONE-NEXT: store <4 x float> [[V0]], <4 x float>* @g, align 16
1023 ; CHECK-NONE-NEXT: ret void
1025 %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef>
1026 store <4 x float> %v0, <4 x float>* @g, align 16
1030 define void @load_factor2_wide2(<16 x i32>* %ptr) {
1031 ; CHECK-NEON-LABEL: @load_factor2_wide2(
1032 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32*
1033 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
1034 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4)
1035 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1036 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1037 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1038 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to i8*
1039 ; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP6]], i32 4)
1040 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1041 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1042 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1043 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1044 ; CHECK-NEON-NEXT: ret void
1046 ; CHECK-MVE-LABEL: @load_factor2_wide2(
1047 ; CHECK-MVE-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32*
1048 ; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP1]])
1049 ; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1050 ; CHECK-MVE-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1051 ; CHECK-MVE-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1052 ; CHECK-MVE-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP4]])
1053 ; CHECK-MVE-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1054 ; CHECK-MVE-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1055 ; CHECK-MVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1056 ; CHECK-MVE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1057 ; CHECK-MVE-NEXT: ret void
1059 ; CHECK-NONE-LABEL: @load_factor2_wide2(
1060 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[PTR:%.*]], align 4
1061 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1062 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1063 ; CHECK-NONE-NEXT: ret void
1065 %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
1066 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1067 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1071 define void @load_factor2_wide3(<24 x i32>* %ptr) {
1072 ; CHECK-NEON-LABEL: @load_factor2_wide3(
1073 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <24 x i32>* [[PTR:%.*]] to i32*
1074 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
1075 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4)
1076 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1077 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1078 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1079 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to i8*
1080 ; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP6]], i32 4)
1081 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1082 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1083 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP5]], i32 8
1084 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to i8*
1085 ; CHECK-NEON-NEXT: [[VLDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP10]], i32 4)
1086 ; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
1087 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
1088 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1089 ; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1090 ; CHECK-NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1091 ; CHECK-NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1092 ; CHECK-NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1093 ; CHECK-NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1094 ; CHECK-NEON-NEXT: ret void
1096 ; CHECK-MVE-LABEL: @load_factor2_wide3(
1097 ; CHECK-MVE-NEXT: [[TMP1:%.*]] = bitcast <24 x i32>* [[PTR:%.*]] to i32*
1098 ; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP1]])
1099 ; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1100 ; CHECK-MVE-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1101 ; CHECK-MVE-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1102 ; CHECK-MVE-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP4]])
1103 ; CHECK-MVE-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1104 ; CHECK-MVE-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1105 ; CHECK-MVE-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP4]], i32 8
1106 ; CHECK-MVE-NEXT: [[VLDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP7]])
1107 ; CHECK-MVE-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1
1108 ; CHECK-MVE-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0
1109 ; CHECK-MVE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1110 ; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1111 ; CHECK-MVE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1112 ; CHECK-MVE-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1113 ; CHECK-MVE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1114 ; CHECK-MVE-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1115 ; CHECK-MVE-NEXT: ret void
1117 ; CHECK-NONE-LABEL: @load_factor2_wide3(
1118 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, <24 x i32>* [[PTR:%.*]], align 4
1119 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
1120 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
1121 ; CHECK-NONE-NEXT: ret void
1123 %interleaved.vec = load <24 x i32>, <24 x i32>* %ptr, align 4
1124 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22>
1125 %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23>
1129 define void @load_factor3_wide(<24 x i32>* %ptr) {
1130 ; CHECK-NEON-LABEL: @load_factor3_wide(
1131 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <24 x i32>* [[PTR:%.*]] to i32*
1132 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
1133 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP2]], i32 4)
1134 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
1135 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
1136 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
1137 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[TMP1]], i32 12
1138 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to i8*
1139 ; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP7]], i32 4)
1140 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 2
1141 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 1
1142 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 0
1143 ; CHECK-NEON-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1145 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1146 ; CHECK-NEON-NEXT: ret void
1148 ; CHECK-MVE-LABEL: @load_factor3_wide(
1149 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, <24 x i32>* [[PTR:%.*]], align 4
1150 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
1151 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
1152 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
1153 ; CHECK-MVE-NEXT: ret void
1155 ; CHECK-NONE-LABEL: @load_factor3_wide(
1156 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, <24 x i32>* [[PTR:%.*]], align 4
1157 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
1158 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
1159 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
1160 ; CHECK-NONE-NEXT: ret void
1162 %interleaved.vec = load <24 x i32>, <24 x i32>* %ptr, align 4
1163 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
1164 %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
1165 %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
1169 define void @load_factor4_wide(<32 x i32>* %ptr) {
1170 ; CHECK-NEON-LABEL: @load_factor4_wide(
1171 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <32 x i32>* [[PTR:%.*]] to i32*
1172 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
1173 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP2]], i32 4)
1174 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3
1175 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2
1176 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1
1177 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0
1178 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 16
1179 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8*
1180 ; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP8]], i32 4)
1181 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 3
1182 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 2
1183 ; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 1
1184 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 0
1185 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1186 ; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1187 ; CHECK-NEON-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1188 ; CHECK-NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1189 ; CHECK-NEON-NEXT: ret void
1191 ; CHECK-MVE-LABEL: @load_factor4_wide(
1192 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <32 x i32>, <32 x i32>* [[PTR:%.*]], align 4
1193 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
1194 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1195 ; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
1196 ; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1197 ; CHECK-MVE-NEXT: ret void
1199 ; CHECK-NONE-LABEL: @load_factor4_wide(
1200 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <32 x i32>, <32 x i32>* [[PTR:%.*]], align 4
1201 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
1202 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1203 ; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
1204 ; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1205 ; CHECK-NONE-NEXT: ret void
1207 %interleaved.vec = load <32 x i32>, <32 x i32>* %ptr, align 4
1208 %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
1209 %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
1210 %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30>
1211 %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
1215 define void @store_factor2_wide(<16 x i32>* %ptr, <8 x i32> %v0, <8 x i32> %v1) {
1216 ; CHECK-NEON-LABEL: @store_factor2_wide(
1217 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32*
1218 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1219 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1220 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP1]] to i8*
1221 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP4]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4)
1222 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1223 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1224 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1225 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP5]] to i8*
1226 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP8]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 4)
1227 ; CHECK-NEON-NEXT: ret void
1229 ; CHECK-MVE-LABEL: @store_factor2_wide(
1230 ; CHECK-MVE-NEXT: [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32*
1231 ; CHECK-MVE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1232 ; CHECK-MVE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1233 ; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 0)
1234 ; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 1)
1235 ; CHECK-MVE-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1236 ; CHECK-MVE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1237 ; CHECK-MVE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1238 ; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], i32 0)
1239 ; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], i32 1)
1240 ; CHECK-MVE-NEXT: ret void
1242 ; CHECK-NONE-LABEL: @store_factor2_wide(
1243 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1244 ; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], <16 x i32>* [[PTR:%.*]], align 4
1245 ; CHECK-NONE-NEXT: ret void
1247 %interleaved.vec = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1248 store <16 x i32> %interleaved.vec, <16 x i32>* %ptr, align 4
1252 define void @store_factor3_wide(<24 x i32>* %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) {
1253 ; CHECK-NEON-LABEL: @store_factor3_wide(
1254 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1255 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1256 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <24 x i32>* [[PTR:%.*]] to i32*
1257 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1258 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1259 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
1260 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP1]] to i8*
1261 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP5]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4)
1262 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[TMP1]], i32 12
1263 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1264 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1265 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 20, i32 21, i32 22, i32 23>
1266 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP6]] to i8*
1267 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP10]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
1268 ; CHECK-NEON-NEXT: ret void
1270 ; CHECK-MVE-LABEL: @store_factor3_wide(
1271 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1272 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1273 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
1274 ; CHECK-MVE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], <24 x i32>* [[PTR:%.*]], align 4
1275 ; CHECK-MVE-NEXT: ret void
1277 ; CHECK-NONE-LABEL: @store_factor3_wide(
1278 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1279 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1280 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
1281 ; CHECK-NONE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], <24 x i32>* [[PTR:%.*]], align 4
1282 ; CHECK-NONE-NEXT: ret void
1284 %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1285 %s1 = shufflevector <8 x i32> %v2, <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1286 %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23>
1287 store <24 x i32> %interleaved.vec, <24 x i32>* %ptr, align 4
1291 define void @store_factor4_wide(<32 x i32>* %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2, <8 x i32> %v3) {
1292 ; CHECK-NEON-LABEL: @store_factor4_wide(
1293 ; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1294 ; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1295 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <32 x i32>* [[PTR:%.*]] to i32*
1296 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1297 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
1298 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
1299 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 24, i32 25, i32 26, i32 27>
1300 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP1]] to i8*
1301 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4)
1302 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 16
1303 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1304 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
1305 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 20, i32 21, i32 22, i32 23>
1306 ; CHECK-NEON-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 28, i32 29, i32 30, i32 31>
1307 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP7]] to i8*
1308 ; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP12]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 4)
1309 ; CHECK-NEON-NEXT: ret void
1311 ; CHECK-MVE-LABEL: @store_factor4_wide(
1312 ; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1313 ; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1314 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1315 ; CHECK-MVE-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], <32 x i32>* [[PTR:%.*]], align 4
1316 ; CHECK-MVE-NEXT: ret void
1318 ; CHECK-NONE-LABEL: @store_factor4_wide(
1319 ; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1320 ; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1321 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1322 ; CHECK-NONE-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], <32 x i32>* [[PTR:%.*]], align 4
1323 ; CHECK-NONE-NEXT: ret void
1325 %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1326 %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1327 %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
1328 store <32 x i32> %interleaved.vec, <32 x i32>* %ptr, align 4
1332 define void @load_factor2_fp128(<4 x fp128>* %ptr) {
1333 ; CHECK-NEON-LABEL: @load_factor2_fp128(
1334 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, <4 x fp128>* [[PTR:%.*]], align 16
1335 ; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> poison, <2 x i32> <i32 0, i32 2>
1336 ; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> poison, <2 x i32> <i32 1, i32 3>
1337 ; CHECK-NEON-NEXT: ret void
1339 ; CHECK-MVE-LABEL: @load_factor2_fp128(
1340 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, <4 x fp128>* [[PTR:%.*]], align 16
1341 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> poison, <2 x i32> <i32 0, i32 2>
1342 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> poison, <2 x i32> <i32 1, i32 3>
1343 ; CHECK-MVE-NEXT: ret void
1345 ; CHECK-NONE-LABEL: @load_factor2_fp128(
1346 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, <4 x fp128>* [[PTR:%.*]], align 16
1347 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> poison, <2 x i32> <i32 0, i32 2>
1348 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> poison, <2 x i32> <i32 1, i32 3>
1349 ; CHECK-NONE-NEXT: ret void
1351 %interleaved.vec = load <4 x fp128>, <4 x fp128>* %ptr, align 16
1352 %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 0, i32 2>
1353 %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 1, i32 3>
1357 define void @load_factor2_wide_pointer(<16 x i32*>* %ptr) {
1358 ; CHECK-NEON-LABEL: @load_factor2_wide_pointer(
1359 ; CHECK-NEON-NEXT: [[TMP1:%.*]] = bitcast <16 x i32*>* [[PTR:%.*]] to i32*
1360 ; CHECK-NEON-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8*
1361 ; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP2]], i32 4)
1362 ; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1363 ; CHECK-NEON-NEXT: [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x i32*>
1364 ; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1365 ; CHECK-NEON-NEXT: [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x i32*>
1366 ; CHECK-NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1367 ; CHECK-NEON-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8*
1368 ; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP8]], i32 4)
1369 ; CHECK-NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1370 ; CHECK-NEON-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*>
1371 ; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1372 ; CHECK-NEON-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x i32*>
1373 ; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32*> [[TMP4]], <4 x i32*> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1374 ; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32*> [[TMP6]], <4 x i32*> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1375 ; CHECK-NEON-NEXT: ret void
1377 ; CHECK-MVE-LABEL: @load_factor2_wide_pointer(
1378 ; CHECK-MVE-NEXT: [[TMP1:%.*]] = bitcast <16 x i32*>* [[PTR:%.*]] to i32*
1379 ; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP1]])
1380 ; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1
1381 ; CHECK-MVE-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x i32*>
1382 ; CHECK-MVE-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0
1383 ; CHECK-MVE-NEXT: [[TMP5:%.*]] = inttoptr <4 x i32> [[TMP4]] to <4 x i32*>
1384 ; CHECK-MVE-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[TMP1]], i32 8
1385 ; CHECK-MVE-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[TMP6]])
1386 ; CHECK-MVE-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1
1387 ; CHECK-MVE-NEXT: [[TMP8:%.*]] = inttoptr <4 x i32> [[TMP7]] to <4 x i32*>
1388 ; CHECK-MVE-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0
1389 ; CHECK-MVE-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x i32*>
1390 ; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32*> [[TMP3]], <4 x i32*> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1391 ; CHECK-MVE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32*> [[TMP5]], <4 x i32*> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1392 ; CHECK-MVE-NEXT: ret void
1394 ; CHECK-NONE-LABEL: @load_factor2_wide_pointer(
1395 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32*>, <16 x i32*>* [[PTR:%.*]], align 4
1396 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32*> [[INTERLEAVED_VEC]], <16 x i32*> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1397 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32*> [[INTERLEAVED_VEC]], <16 x i32*> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1398 ; CHECK-NONE-NEXT: ret void
1400 %interleaved.vec = load <16 x i32*>, <16 x i32*>* %ptr, align 4
1401 %v0 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1402 %v1 = shufflevector <16 x i32*> %interleaved.vec, <16 x i32*> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1406 ; This would be a candidate for interleaving, except that load doesn't
1407 ; actually load enough elements to satisfy the shuffle masks. (It would be
1408 ; possible to produce a vld2.v2i32, but that currently isn't implemented.)
1409 define void @load_out_of_range(<4 x i32>* %ptr) {
1410 ; CHECK-NEON-LABEL: @load_out_of_range(
1411 ; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, <4 x i32>* [[PTR:%.*]], align 4
1412 ; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
1413 ; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1414 ; CHECK-NEON-NEXT: ret void
1416 ; CHECK-MVE-LABEL: @load_out_of_range(
1417 ; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, <4 x i32>* [[PTR:%.*]], align 4
1418 ; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
1419 ; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1420 ; CHECK-MVE-NEXT: ret void
1422 ; CHECK-NONE-LABEL: @load_out_of_range(
1423 ; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, <4 x i32>* [[PTR:%.*]], align 4
1424 ; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
1425 ; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1426 ; CHECK-NONE-NEXT: ret void
1428 %interleaved.vec = load <4 x i32>, <4 x i32>* %ptr, align 4
1429 %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
1430 %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>