1 ; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s
3 ; Vectorize and emit valid code (Issue #54896).
5 define void @int8x3a2(ptr nocapture align 2 %ptr) {
6 %ptr0 = getelementptr i8, ptr %ptr, i64 0
7 %ptr1 = getelementptr i8, ptr %ptr, i64 1
8 %ptr2 = getelementptr i8, ptr %ptr, i64 2
10 %l0 = load i8, ptr %ptr0, align 2
11 %l1 = load i8, ptr %ptr1, align 1
12 %l2 = load i8, ptr %ptr2, align 2
14 store i8 %l2, ptr %ptr0, align 2
15 store i8 %l1, ptr %ptr1, align 1
16 store i8 %l0, ptr %ptr2, align 2
20 ; CHECK-LABEL: @int8x3a2
21 ; CHECK-DAG: load <2 x i8>
23 ; CHECK-DAG: store <2 x i8>
27 define void @int8x3a4(ptr nocapture align 4 %ptr) {
28 %ptr0 = getelementptr i8, ptr %ptr, i64 0
29 %ptr1 = getelementptr i8, ptr %ptr, i64 1
30 %ptr2 = getelementptr i8, ptr %ptr, i64 2
32 %l0 = load i8, ptr %ptr0, align 4
33 %l1 = load i8, ptr %ptr1, align 1
34 %l2 = load i8, ptr %ptr2, align 2
36 store i8 %l2, ptr %ptr0, align 2
37 store i8 %l1, ptr %ptr1, align 1
38 store i8 %l0, ptr %ptr2, align 4
42 ; CHECK-LABEL: @int8x3a4
43 ; CHECK: load <2 x i8>
45 ; CHECK: store <2 x i8>
49 define void @int8x12a4(ptr nocapture align 4 %ptr) {
50 %ptr0 = getelementptr i8, ptr %ptr, i64 0
51 %ptr1 = getelementptr i8, ptr %ptr, i64 1
52 %ptr2 = getelementptr i8, ptr %ptr, i64 2
53 %ptr3 = getelementptr i8, ptr %ptr, i64 3
54 %ptr4 = getelementptr i8, ptr %ptr, i64 4
55 %ptr5 = getelementptr i8, ptr %ptr, i64 5
56 %ptr6 = getelementptr i8, ptr %ptr, i64 6
57 %ptr7 = getelementptr i8, ptr %ptr, i64 7
58 %ptr8 = getelementptr i8, ptr %ptr, i64 8
59 %ptr9 = getelementptr i8, ptr %ptr, i64 9
60 %ptra = getelementptr i8, ptr %ptr, i64 10
61 %ptrb = getelementptr i8, ptr %ptr, i64 11
63 %l0 = load i8, ptr %ptr0, align 4
64 %l1 = load i8, ptr %ptr1, align 1
65 %l2 = load i8, ptr %ptr2, align 2
66 %l3 = load i8, ptr %ptr3, align 1
67 %l4 = load i8, ptr %ptr4, align 4
68 %l5 = load i8, ptr %ptr5, align 1
69 %l6 = load i8, ptr %ptr6, align 2
70 %l7 = load i8, ptr %ptr7, align 1
71 %l8 = load i8, ptr %ptr8, align 4
72 %l9 = load i8, ptr %ptr9, align 1
73 %la = load i8, ptr %ptra, align 2
74 %lb = load i8, ptr %ptrb, align 1
76 store i8 %lb, ptr %ptr0, align 4
77 store i8 %la, ptr %ptr1, align 1
78 store i8 %l9, ptr %ptr2, align 2
79 store i8 %l8, ptr %ptr3, align 1
80 store i8 %l7, ptr %ptr4, align 4
81 store i8 %l6, ptr %ptr5, align 1
82 store i8 %l5, ptr %ptr6, align 2
83 store i8 %l4, ptr %ptr7, align 1
84 store i8 %l3, ptr %ptr8, align 4
85 store i8 %l2, ptr %ptr9, align 1
86 store i8 %l1, ptr %ptra, align 2
87 store i8 %l0, ptr %ptrb, align 1
91 ; CHECK-LABEL: @int8x12a4
92 ; CHECK: load <4 x i8>
93 ; CHECK: load <4 x i8>
94 ; CHECK: load <4 x i8>
95 ; CHECK: store <4 x i8>
96 ; CHECK: store <4 x i8>
97 ; CHECK: store <4 x i8>
101 define void @int8x16a4(ptr nocapture align 4 %ptr) {
102 %ptr0 = getelementptr i8, ptr %ptr, i64 0
103 %ptr1 = getelementptr i8, ptr %ptr, i64 1
104 %ptr2 = getelementptr i8, ptr %ptr, i64 2
105 %ptr3 = getelementptr i8, ptr %ptr, i64 3
106 %ptr4 = getelementptr i8, ptr %ptr, i64 4
107 %ptr5 = getelementptr i8, ptr %ptr, i64 5
108 %ptr6 = getelementptr i8, ptr %ptr, i64 6
109 %ptr7 = getelementptr i8, ptr %ptr, i64 7
110 %ptr8 = getelementptr i8, ptr %ptr, i64 8
111 %ptr9 = getelementptr i8, ptr %ptr, i64 9
112 %ptra = getelementptr i8, ptr %ptr, i64 10
113 %ptrb = getelementptr i8, ptr %ptr, i64 11
114 %ptrc = getelementptr i8, ptr %ptr, i64 12
115 %ptrd = getelementptr i8, ptr %ptr, i64 13
116 %ptre = getelementptr i8, ptr %ptr, i64 14
117 %ptrf = getelementptr i8, ptr %ptr, i64 15
119 %l0 = load i8, ptr %ptr0, align 4
120 %l1 = load i8, ptr %ptr1, align 1
121 %l2 = load i8, ptr %ptr2, align 2
122 %l3 = load i8, ptr %ptr3, align 1
123 %l4 = load i8, ptr %ptr4, align 4
124 %l5 = load i8, ptr %ptr5, align 1
125 %l6 = load i8, ptr %ptr6, align 2
126 %l7 = load i8, ptr %ptr7, align 1
127 %l8 = load i8, ptr %ptr8, align 4
128 %l9 = load i8, ptr %ptr9, align 1
129 %la = load i8, ptr %ptra, align 2
130 %lb = load i8, ptr %ptrb, align 1
131 %lc = load i8, ptr %ptrc, align 4
132 %ld = load i8, ptr %ptrd, align 1
133 %le = load i8, ptr %ptre, align 2
134 %lf = load i8, ptr %ptrf, align 1
136 store i8 %lf, ptr %ptrc, align 4
137 store i8 %le, ptr %ptrd, align 1
138 store i8 %ld, ptr %ptre, align 2
139 store i8 %lc, ptr %ptrf, align 1
140 store i8 %lb, ptr %ptr0, align 4
141 store i8 %la, ptr %ptr1, align 1
142 store i8 %l9, ptr %ptr2, align 2
143 store i8 %l8, ptr %ptr3, align 1
144 store i8 %l7, ptr %ptr4, align 4
145 store i8 %l6, ptr %ptr5, align 1
146 store i8 %l5, ptr %ptr6, align 2
147 store i8 %l4, ptr %ptr7, align 1
148 store i8 %l3, ptr %ptr8, align 4
149 store i8 %l2, ptr %ptr9, align 1
150 store i8 %l1, ptr %ptra, align 2
151 store i8 %l0, ptr %ptrb, align 1
155 ; CHECK-LABEL: @int8x16a4
156 ; CHECK: load <4 x i8>
157 ; CHECK: load <4 x i8>
158 ; CHECK: load <4 x i8>
159 ; CHECK: load <4 x i8>
160 ; CHECK: store <4 x i8>
161 ; CHECK: store <4 x i8>
162 ; CHECK: store <4 x i8>
163 ; CHECK: store <4 x i8>
166 define void @int8x8a8(ptr nocapture align 8 %ptr) {
167 %ptr0 = getelementptr i8, ptr %ptr, i64 0
168 %ptr1 = getelementptr i8, ptr %ptr, i64 1
169 %ptr2 = getelementptr i8, ptr %ptr, i64 2
170 %ptr3 = getelementptr i8, ptr %ptr, i64 3
171 %ptr4 = getelementptr i8, ptr %ptr, i64 4
172 %ptr5 = getelementptr i8, ptr %ptr, i64 5
173 %ptr6 = getelementptr i8, ptr %ptr, i64 6
174 %ptr7 = getelementptr i8, ptr %ptr, i64 7
176 %l0 = load i8, ptr %ptr0, align 8
177 %l1 = load i8, ptr %ptr1, align 1
178 %l2 = load i8, ptr %ptr2, align 2
179 %l3 = load i8, ptr %ptr3, align 1
180 %l4 = load i8, ptr %ptr4, align 4
181 %l5 = load i8, ptr %ptr5, align 1
182 %l6 = load i8, ptr %ptr6, align 2
183 %l7 = load i8, ptr %ptr7, align 1
185 store i8 %l7, ptr %ptr0, align 8
186 store i8 %l6, ptr %ptr1, align 1
187 store i8 %l5, ptr %ptr2, align 2
188 store i8 %l4, ptr %ptr3, align 1
189 store i8 %l3, ptr %ptr4, align 4
190 store i8 %l2, ptr %ptr5, align 1
191 store i8 %l1, ptr %ptr6, align 2
192 store i8 %l0, ptr %ptr7, align 1
196 ; CHECK-LABEL: @int8x8a8
197 ; CHECK: load <8 x i8>
198 ; CHECK: store <8 x i8>
201 define void @int8x12a8(ptr nocapture align 8 %ptr) {
202 %ptr0 = getelementptr i8, ptr %ptr, i64 0
203 %ptr1 = getelementptr i8, ptr %ptr, i64 1
204 %ptr2 = getelementptr i8, ptr %ptr, i64 2
205 %ptr3 = getelementptr i8, ptr %ptr, i64 3
206 %ptr4 = getelementptr i8, ptr %ptr, i64 4
207 %ptr5 = getelementptr i8, ptr %ptr, i64 5
208 %ptr6 = getelementptr i8, ptr %ptr, i64 6
209 %ptr7 = getelementptr i8, ptr %ptr, i64 7
210 %ptr8 = getelementptr i8, ptr %ptr, i64 8
211 %ptr9 = getelementptr i8, ptr %ptr, i64 9
212 %ptra = getelementptr i8, ptr %ptr, i64 10
213 %ptrb = getelementptr i8, ptr %ptr, i64 11
215 %l0 = load i8, ptr %ptr0, align 8
216 %l1 = load i8, ptr %ptr1, align 1
217 %l2 = load i8, ptr %ptr2, align 2
218 %l3 = load i8, ptr %ptr3, align 1
219 %l4 = load i8, ptr %ptr4, align 4
220 %l5 = load i8, ptr %ptr5, align 1
221 %l6 = load i8, ptr %ptr6, align 2
222 %l7 = load i8, ptr %ptr7, align 1
223 %l8 = load i8, ptr %ptr8, align 8
224 %l9 = load i8, ptr %ptr9, align 1
225 %la = load i8, ptr %ptra, align 2
226 %lb = load i8, ptr %ptrb, align 1
228 store i8 %lb, ptr %ptr0, align 8
229 store i8 %la, ptr %ptr1, align 1
230 store i8 %l9, ptr %ptr2, align 2
231 store i8 %l8, ptr %ptr3, align 1
232 store i8 %l7, ptr %ptr4, align 4
233 store i8 %l6, ptr %ptr5, align 1
234 store i8 %l5, ptr %ptr6, align 2
235 store i8 %l4, ptr %ptr7, align 1
236 store i8 %l3, ptr %ptr8, align 8
237 store i8 %l2, ptr %ptr9, align 1
238 store i8 %l1, ptr %ptra, align 2
239 store i8 %l0, ptr %ptrb, align 1
243 ; CHECK-LABEL: @int8x12a8
244 ; CHECK-DAG: load <8 x i8>
245 ; CHECK-DAG: load <4 x i8>
246 ; CHECK-DAG: store <8 x i8>
247 ; CHECK-DAG: store <4 x i8>
251 define void @int8x16a8(ptr nocapture align 8 %ptr) {
252 %ptr0 = getelementptr i8, ptr %ptr, i64 0
253 %ptr1 = getelementptr i8, ptr %ptr, i64 1
254 %ptr2 = getelementptr i8, ptr %ptr, i64 2
255 %ptr3 = getelementptr i8, ptr %ptr, i64 3
256 %ptr4 = getelementptr i8, ptr %ptr, i64 4
257 %ptr5 = getelementptr i8, ptr %ptr, i64 5
258 %ptr6 = getelementptr i8, ptr %ptr, i64 6
259 %ptr7 = getelementptr i8, ptr %ptr, i64 7
260 %ptr8 = getelementptr i8, ptr %ptr, i64 8
261 %ptr9 = getelementptr i8, ptr %ptr, i64 9
262 %ptra = getelementptr i8, ptr %ptr, i64 10
263 %ptrb = getelementptr i8, ptr %ptr, i64 11
264 %ptrc = getelementptr i8, ptr %ptr, i64 12
265 %ptrd = getelementptr i8, ptr %ptr, i64 13
266 %ptre = getelementptr i8, ptr %ptr, i64 14
267 %ptrf = getelementptr i8, ptr %ptr, i64 15
269 %l0 = load i8, ptr %ptr0, align 8
270 %l1 = load i8, ptr %ptr1, align 1
271 %l2 = load i8, ptr %ptr2, align 2
272 %l3 = load i8, ptr %ptr3, align 1
273 %l4 = load i8, ptr %ptr4, align 4
274 %l5 = load i8, ptr %ptr5, align 1
275 %l6 = load i8, ptr %ptr6, align 2
276 %l7 = load i8, ptr %ptr7, align 1
277 %l8 = load i8, ptr %ptr8, align 8
278 %l9 = load i8, ptr %ptr9, align 1
279 %la = load i8, ptr %ptra, align 2
280 %lb = load i8, ptr %ptrb, align 1
281 %lc = load i8, ptr %ptrc, align 4
282 %ld = load i8, ptr %ptrd, align 1
283 %le = load i8, ptr %ptre, align 2
284 %lf = load i8, ptr %ptrf, align 1
286 store i8 %lf, ptr %ptr0, align 8
287 store i8 %le, ptr %ptr1, align 1
288 store i8 %ld, ptr %ptr2, align 2
289 store i8 %lc, ptr %ptr3, align 1
290 store i8 %lb, ptr %ptr4, align 4
291 store i8 %la, ptr %ptr5, align 1
292 store i8 %l9, ptr %ptr6, align 2
293 store i8 %l8, ptr %ptr7, align 1
294 store i8 %l7, ptr %ptr8, align 8
295 store i8 %l6, ptr %ptr9, align 1
296 store i8 %l5, ptr %ptra, align 2
297 store i8 %l4, ptr %ptrb, align 1
298 store i8 %l3, ptr %ptrc, align 4
299 store i8 %l2, ptr %ptrd, align 1
300 store i8 %l1, ptr %ptre, align 2
301 store i8 %l0, ptr %ptrf, align 1
305 ; CHECK-LABEL: @int8x16a8
306 ; CHECK: load <8 x i8>
307 ; CHECK: load <8 x i8>
308 ; CHECK: store <8 x i8>
309 ; CHECK: store <8 x i8>