1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
5 define ptr @ldrwu32_4(ptr %x, ptr %y, ptr %m) {
6 ; CHECK-LABEL: ldrwu32_4:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q0, [r2]
9 ; CHECK-NEXT: vpt.i32 ne, q0, zr
10 ; CHECK-NEXT: vldrwt.u32 q0, [r0, #4]
11 ; CHECK-NEXT: vstrw.32 q0, [r1]
14 %z = getelementptr inbounds i8, ptr %x, i32 4
15 %mask = load <4 x i32>, ptr %m, align 4
16 %c = icmp ne <4 x i32> %mask, zeroinitializer
17 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
18 store <4 x i32> %0, ptr %y, align 4
22 define ptr @ldrwu32_3(ptr %x, ptr %y, ptr %m) {
23 ; CHECK-LABEL: ldrwu32_3:
24 ; CHECK: @ %bb.0: @ %entry
25 ; CHECK-NEXT: vldrw.u32 q0, [r2]
26 ; CHECK-NEXT: adds r3, r0, #3
27 ; CHECK-NEXT: vpt.i32 ne, q0, zr
28 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
29 ; CHECK-NEXT: vstrw.32 q0, [r1]
32 %z = getelementptr inbounds i8, ptr %x, i32 3
33 %mask = load <4 x i32>, ptr %m, align 4
34 %c = icmp ne <4 x i32> %mask, zeroinitializer
35 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
36 store <4 x i32> %0, ptr %y, align 4
40 define ptr @ldrwu32_2(ptr %x, ptr %y, ptr %m) {
41 ; CHECK-LABEL: ldrwu32_2:
42 ; CHECK: @ %bb.0: @ %entry
43 ; CHECK-NEXT: vldrw.u32 q0, [r2]
44 ; CHECK-NEXT: adds r3, r0, #2
45 ; CHECK-NEXT: vpt.i32 ne, q0, zr
46 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
47 ; CHECK-NEXT: vstrw.32 q0, [r1]
50 %z = getelementptr inbounds i8, ptr %x, i32 2
51 %mask = load <4 x i32>, ptr %m, align 4
52 %c = icmp ne <4 x i32> %mask, zeroinitializer
53 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
54 store <4 x i32> %0, ptr %y, align 4
58 define ptr @ldrwu32_508(ptr %x, ptr %y, ptr %m) {
59 ; CHECK-LABEL: ldrwu32_508:
60 ; CHECK: @ %bb.0: @ %entry
61 ; CHECK-NEXT: vldrw.u32 q0, [r2]
62 ; CHECK-NEXT: vpt.i32 ne, q0, zr
63 ; CHECK-NEXT: vldrwt.u32 q0, [r0, #508]
64 ; CHECK-NEXT: vstrw.32 q0, [r1]
67 %z = getelementptr inbounds i8, ptr %x, i32 508
68 %mask = load <4 x i32>, ptr %m, align 4
69 %c = icmp ne <4 x i32> %mask, zeroinitializer
70 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
71 store <4 x i32> %0, ptr %y, align 4
75 define ptr @ldrwu32_512(ptr %x, ptr %y, ptr %m) {
76 ; CHECK-LABEL: ldrwu32_512:
77 ; CHECK: @ %bb.0: @ %entry
78 ; CHECK-NEXT: vldrw.u32 q0, [r2]
79 ; CHECK-NEXT: add.w r3, r0, #512
80 ; CHECK-NEXT: vpt.i32 ne, q0, zr
81 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
82 ; CHECK-NEXT: vstrw.32 q0, [r1]
85 %z = getelementptr inbounds i8, ptr %x, i32 512
86 %mask = load <4 x i32>, ptr %m, align 4
87 %c = icmp ne <4 x i32> %mask, zeroinitializer
88 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
89 store <4 x i32> %0, ptr %y, align 4
93 define ptr @ldrwu32_m508(ptr %x, ptr %y, ptr %m) {
94 ; CHECK-LABEL: ldrwu32_m508:
95 ; CHECK: @ %bb.0: @ %entry
96 ; CHECK-NEXT: vldrw.u32 q0, [r2]
97 ; CHECK-NEXT: vpt.i32 ne, q0, zr
98 ; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508]
99 ; CHECK-NEXT: vstrw.32 q0, [r1]
102 %z = getelementptr inbounds i8, ptr %x, i32 -508
103 %mask = load <4 x i32>, ptr %m, align 4
104 %c = icmp ne <4 x i32> %mask, zeroinitializer
105 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
106 store <4 x i32> %0, ptr %y, align 4
110 define ptr @ldrwu32_m512(ptr %x, ptr %y, ptr %m) {
111 ; CHECK-LABEL: ldrwu32_m512:
112 ; CHECK: @ %bb.0: @ %entry
113 ; CHECK-NEXT: vldrw.u32 q0, [r2]
114 ; CHECK-NEXT: sub.w r3, r0, #512
115 ; CHECK-NEXT: vpt.i32 ne, q0, zr
116 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
117 ; CHECK-NEXT: vstrw.32 q0, [r1]
120 %z = getelementptr inbounds i8, ptr %x, i32 -512
121 %mask = load <4 x i32>, ptr %m, align 4
122 %c = icmp ne <4 x i32> %mask, zeroinitializer
123 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
124 store <4 x i32> %0, ptr %y, align 4
128 define ptr @ldrhu32_4(ptr %x, ptr %y, ptr %m) {
129 ; CHECK-LABEL: ldrhu32_4:
130 ; CHECK: @ %bb.0: @ %entry
131 ; CHECK-NEXT: vldrw.u32 q0, [r2]
132 ; CHECK-NEXT: vpt.i32 ne, q0, zr
133 ; CHECK-NEXT: vldrht.u32 q0, [r0, #4]
134 ; CHECK-NEXT: vstrw.32 q0, [r1]
137 %z = getelementptr inbounds i8, ptr %x, i32 4
138 %mask = load <4 x i32>, ptr %m, align 4
139 %c = icmp ne <4 x i32> %mask, zeroinitializer
140 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
141 %1 = zext <4 x i16> %0 to <4 x i32>
142 store <4 x i32> %1, ptr %y, align 4
146 define ptr @ldrhu32_3(ptr %x, ptr %y, ptr %m) {
147 ; CHECK-LABEL: ldrhu32_3:
148 ; CHECK: @ %bb.0: @ %entry
149 ; CHECK-NEXT: vldrw.u32 q0, [r2]
150 ; CHECK-NEXT: adds r3, r0, #3
151 ; CHECK-NEXT: vpt.i32 ne, q0, zr
152 ; CHECK-NEXT: vldrht.u32 q0, [r3]
153 ; CHECK-NEXT: vstrw.32 q0, [r1]
156 %z = getelementptr inbounds i8, ptr %x, i32 3
157 %mask = load <4 x i32>, ptr %m, align 4
158 %c = icmp ne <4 x i32> %mask, zeroinitializer
159 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
160 %1 = zext <4 x i16> %0 to <4 x i32>
161 store <4 x i32> %1, ptr %y, align 4
165 define ptr @ldrhu32_2(ptr %x, ptr %y, ptr %m) {
166 ; CHECK-LABEL: ldrhu32_2:
167 ; CHECK: @ %bb.0: @ %entry
168 ; CHECK-NEXT: vldrw.u32 q0, [r2]
169 ; CHECK-NEXT: vpt.i32 ne, q0, zr
170 ; CHECK-NEXT: vldrht.u32 q0, [r0, #2]
171 ; CHECK-NEXT: vstrw.32 q0, [r1]
174 %z = getelementptr inbounds i8, ptr %x, i32 2
175 %mask = load <4 x i32>, ptr %m, align 4
176 %c = icmp ne <4 x i32> %mask, zeroinitializer
177 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
178 %1 = zext <4 x i16> %0 to <4 x i32>
179 store <4 x i32> %1, ptr %y, align 4
183 define ptr @ldrhu32_254(ptr %x, ptr %y, ptr %m) {
184 ; CHECK-LABEL: ldrhu32_254:
185 ; CHECK: @ %bb.0: @ %entry
186 ; CHECK-NEXT: vldrw.u32 q0, [r2]
187 ; CHECK-NEXT: vpt.i32 ne, q0, zr
188 ; CHECK-NEXT: vldrht.u32 q0, [r0, #254]
189 ; CHECK-NEXT: vstrw.32 q0, [r1]
192 %z = getelementptr inbounds i8, ptr %x, i32 254
193 %mask = load <4 x i32>, ptr %m, align 4
194 %c = icmp ne <4 x i32> %mask, zeroinitializer
195 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
196 %1 = zext <4 x i16> %0 to <4 x i32>
197 store <4 x i32> %1, ptr %y, align 4
201 define ptr @ldrhu32_256(ptr %x, ptr %y, ptr %m) {
202 ; CHECK-LABEL: ldrhu32_256:
203 ; CHECK: @ %bb.0: @ %entry
204 ; CHECK-NEXT: vldrw.u32 q0, [r2]
205 ; CHECK-NEXT: add.w r3, r0, #256
206 ; CHECK-NEXT: vpt.i32 ne, q0, zr
207 ; CHECK-NEXT: vldrht.u32 q0, [r3]
208 ; CHECK-NEXT: vstrw.32 q0, [r1]
211 %z = getelementptr inbounds i8, ptr %x, i32 256
212 %mask = load <4 x i32>, ptr %m, align 4
213 %c = icmp ne <4 x i32> %mask, zeroinitializer
214 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
215 %1 = zext <4 x i16> %0 to <4 x i32>
216 store <4 x i32> %1, ptr %y, align 4
220 define ptr @ldrhu32_m254(ptr %x, ptr %y, ptr %m) {
221 ; CHECK-LABEL: ldrhu32_m254:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vldrw.u32 q0, [r2]
224 ; CHECK-NEXT: vpt.i32 ne, q0, zr
225 ; CHECK-NEXT: vldrht.u32 q0, [r0, #-254]
226 ; CHECK-NEXT: vstrw.32 q0, [r1]
229 %z = getelementptr inbounds i8, ptr %x, i32 -254
230 %mask = load <4 x i32>, ptr %m, align 4
231 %c = icmp ne <4 x i32> %mask, zeroinitializer
232 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
233 %1 = zext <4 x i16> %0 to <4 x i32>
234 store <4 x i32> %1, ptr %y, align 4
238 define ptr @ldrhu32_m256(ptr %x, ptr %y, ptr %m) {
239 ; CHECK-LABEL: ldrhu32_m256:
240 ; CHECK: @ %bb.0: @ %entry
241 ; CHECK-NEXT: vldrw.u32 q0, [r2]
242 ; CHECK-NEXT: sub.w r3, r0, #256
243 ; CHECK-NEXT: vpt.i32 ne, q0, zr
244 ; CHECK-NEXT: vldrht.u32 q0, [r3]
245 ; CHECK-NEXT: vstrw.32 q0, [r1]
248 %z = getelementptr inbounds i8, ptr %x, i32 -256
249 %mask = load <4 x i32>, ptr %m, align 4
250 %c = icmp ne <4 x i32> %mask, zeroinitializer
251 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
252 %1 = zext <4 x i16> %0 to <4 x i32>
253 store <4 x i32> %1, ptr %y, align 4
257 define ptr @ldrhs32_4(ptr %x, ptr %y, ptr %m) {
258 ; CHECK-LABEL: ldrhs32_4:
259 ; CHECK: @ %bb.0: @ %entry
260 ; CHECK-NEXT: vldrw.u32 q0, [r2]
261 ; CHECK-NEXT: vpt.i32 ne, q0, zr
262 ; CHECK-NEXT: vldrht.s32 q0, [r0, #4]
263 ; CHECK-NEXT: vstrw.32 q0, [r1]
266 %z = getelementptr inbounds i8, ptr %x, i32 4
267 %mask = load <4 x i32>, ptr %m, align 4
268 %c = icmp ne <4 x i32> %mask, zeroinitializer
269 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
270 %1 = sext <4 x i16> %0 to <4 x i32>
271 store <4 x i32> %1, ptr %y, align 4
275 define ptr @ldrhs32_3(ptr %x, ptr %y, ptr %m) {
276 ; CHECK-LABEL: ldrhs32_3:
277 ; CHECK: @ %bb.0: @ %entry
278 ; CHECK-NEXT: vldrw.u32 q0, [r2]
279 ; CHECK-NEXT: adds r3, r0, #3
280 ; CHECK-NEXT: vpt.i32 ne, q0, zr
281 ; CHECK-NEXT: vldrht.s32 q0, [r3]
282 ; CHECK-NEXT: vstrw.32 q0, [r1]
285 %z = getelementptr inbounds i8, ptr %x, i32 3
286 %mask = load <4 x i32>, ptr %m, align 4
287 %c = icmp ne <4 x i32> %mask, zeroinitializer
288 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
289 %1 = sext <4 x i16> %0 to <4 x i32>
290 store <4 x i32> %1, ptr %y, align 4
294 define ptr @ldrhs32_2(ptr %x, ptr %y, ptr %m) {
295 ; CHECK-LABEL: ldrhs32_2:
296 ; CHECK: @ %bb.0: @ %entry
297 ; CHECK-NEXT: vldrw.u32 q0, [r2]
298 ; CHECK-NEXT: vpt.i32 ne, q0, zr
299 ; CHECK-NEXT: vldrht.s32 q0, [r0, #2]
300 ; CHECK-NEXT: vstrw.32 q0, [r1]
303 %z = getelementptr inbounds i8, ptr %x, i32 2
304 %mask = load <4 x i32>, ptr %m, align 4
305 %c = icmp ne <4 x i32> %mask, zeroinitializer
306 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
307 %1 = sext <4 x i16> %0 to <4 x i32>
308 store <4 x i32> %1, ptr %y, align 4
312 define ptr @ldrhs32_254(ptr %x, ptr %y, ptr %m) {
313 ; CHECK-LABEL: ldrhs32_254:
314 ; CHECK: @ %bb.0: @ %entry
315 ; CHECK-NEXT: vldrw.u32 q0, [r2]
316 ; CHECK-NEXT: vpt.i32 ne, q0, zr
317 ; CHECK-NEXT: vldrht.s32 q0, [r0, #254]
318 ; CHECK-NEXT: vstrw.32 q0, [r1]
321 %z = getelementptr inbounds i8, ptr %x, i32 254
322 %mask = load <4 x i32>, ptr %m, align 4
323 %c = icmp ne <4 x i32> %mask, zeroinitializer
324 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
325 %1 = sext <4 x i16> %0 to <4 x i32>
326 store <4 x i32> %1, ptr %y, align 4
330 define ptr @ldrhs32_256(ptr %x, ptr %y, ptr %m) {
331 ; CHECK-LABEL: ldrhs32_256:
332 ; CHECK: @ %bb.0: @ %entry
333 ; CHECK-NEXT: vldrw.u32 q0, [r2]
334 ; CHECK-NEXT: add.w r3, r0, #256
335 ; CHECK-NEXT: vpt.i32 ne, q0, zr
336 ; CHECK-NEXT: vldrht.s32 q0, [r3]
337 ; CHECK-NEXT: vstrw.32 q0, [r1]
340 %z = getelementptr inbounds i8, ptr %x, i32 256
341 %mask = load <4 x i32>, ptr %m, align 4
342 %c = icmp ne <4 x i32> %mask, zeroinitializer
343 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
344 %1 = sext <4 x i16> %0 to <4 x i32>
345 store <4 x i32> %1, ptr %y, align 4
349 define ptr @ldrhs32_m254(ptr %x, ptr %y, ptr %m) {
350 ; CHECK-LABEL: ldrhs32_m254:
351 ; CHECK: @ %bb.0: @ %entry
352 ; CHECK-NEXT: vldrw.u32 q0, [r2]
353 ; CHECK-NEXT: vpt.i32 ne, q0, zr
354 ; CHECK-NEXT: vldrht.s32 q0, [r0, #-254]
355 ; CHECK-NEXT: vstrw.32 q0, [r1]
358 %z = getelementptr inbounds i8, ptr %x, i32 -254
359 %mask = load <4 x i32>, ptr %m, align 4
360 %c = icmp ne <4 x i32> %mask, zeroinitializer
361 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
362 %1 = sext <4 x i16> %0 to <4 x i32>
363 store <4 x i32> %1, ptr %y, align 4
367 define ptr @ldrhs32_m256(ptr %x, ptr %y, ptr %m) {
368 ; CHECK-LABEL: ldrhs32_m256:
369 ; CHECK: @ %bb.0: @ %entry
370 ; CHECK-NEXT: vldrw.u32 q0, [r2]
371 ; CHECK-NEXT: sub.w r3, r0, #256
372 ; CHECK-NEXT: vpt.i32 ne, q0, zr
373 ; CHECK-NEXT: vldrht.s32 q0, [r3]
374 ; CHECK-NEXT: vstrw.32 q0, [r1]
377 %z = getelementptr inbounds i8, ptr %x, i32 -256
378 %mask = load <4 x i32>, ptr %m, align 4
379 %c = icmp ne <4 x i32> %mask, zeroinitializer
380 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef)
381 %1 = sext <4 x i16> %0 to <4 x i32>
382 store <4 x i32> %1, ptr %y, align 4
386 define ptr @ldrhu16_4(ptr %x, ptr %y, ptr %m) {
387 ; CHECK-LABEL: ldrhu16_4:
388 ; CHECK: @ %bb.0: @ %entry
389 ; CHECK-NEXT: vldrh.u16 q0, [r2]
390 ; CHECK-NEXT: vpt.i16 ne, q0, zr
391 ; CHECK-NEXT: vldrht.u16 q0, [r0, #4]
392 ; CHECK-NEXT: vstrh.16 q0, [r1]
395 %z = getelementptr inbounds i8, ptr %x, i32 4
396 %mask = load <8 x i16>, ptr %m, align 2
397 %c = icmp ne <8 x i16> %mask, zeroinitializer
398 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
399 store <8 x i16> %0, ptr %y, align 2
403 define ptr @ldrhu16_3(ptr %x, ptr %y, ptr %m) {
404 ; CHECK-LABEL: ldrhu16_3:
405 ; CHECK: @ %bb.0: @ %entry
406 ; CHECK-NEXT: vldrh.u16 q0, [r2]
407 ; CHECK-NEXT: adds r3, r0, #3
408 ; CHECK-NEXT: vpt.i16 ne, q0, zr
409 ; CHECK-NEXT: vldrht.u16 q0, [r3]
410 ; CHECK-NEXT: vstrh.16 q0, [r1]
413 %z = getelementptr inbounds i8, ptr %x, i32 3
414 %mask = load <8 x i16>, ptr %m, align 2
415 %c = icmp ne <8 x i16> %mask, zeroinitializer
416 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
417 store <8 x i16> %0, ptr %y, align 2
421 define ptr @ldrhu16_2(ptr %x, ptr %y, ptr %m) {
422 ; CHECK-LABEL: ldrhu16_2:
423 ; CHECK: @ %bb.0: @ %entry
424 ; CHECK-NEXT: vldrh.u16 q0, [r2]
425 ; CHECK-NEXT: vpt.i16 ne, q0, zr
426 ; CHECK-NEXT: vldrht.u16 q0, [r0, #2]
427 ; CHECK-NEXT: vstrh.16 q0, [r1]
430 %z = getelementptr inbounds i8, ptr %x, i32 2
431 %mask = load <8 x i16>, ptr %m, align 2
432 %c = icmp ne <8 x i16> %mask, zeroinitializer
433 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
434 store <8 x i16> %0, ptr %y, align 2
438 define ptr @ldrhu16_254(ptr %x, ptr %y, ptr %m) {
439 ; CHECK-LABEL: ldrhu16_254:
440 ; CHECK: @ %bb.0: @ %entry
441 ; CHECK-NEXT: vldrh.u16 q0, [r2]
442 ; CHECK-NEXT: vpt.i16 ne, q0, zr
443 ; CHECK-NEXT: vldrht.u16 q0, [r0, #254]
444 ; CHECK-NEXT: vstrh.16 q0, [r1]
447 %z = getelementptr inbounds i8, ptr %x, i32 254
448 %mask = load <8 x i16>, ptr %m, align 2
449 %c = icmp ne <8 x i16> %mask, zeroinitializer
450 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
451 store <8 x i16> %0, ptr %y, align 2
455 define ptr @ldrhu16_256(ptr %x, ptr %y, ptr %m) {
456 ; CHECK-LABEL: ldrhu16_256:
457 ; CHECK: @ %bb.0: @ %entry
458 ; CHECK-NEXT: vldrh.u16 q0, [r2]
459 ; CHECK-NEXT: add.w r3, r0, #256
460 ; CHECK-NEXT: vpt.i16 ne, q0, zr
461 ; CHECK-NEXT: vldrht.u16 q0, [r3]
462 ; CHECK-NEXT: vstrh.16 q0, [r1]
465 %z = getelementptr inbounds i8, ptr %x, i32 256
466 %mask = load <8 x i16>, ptr %m, align 2
467 %c = icmp ne <8 x i16> %mask, zeroinitializer
468 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
469 store <8 x i16> %0, ptr %y, align 2
473 define ptr @ldrhu16_m254(ptr %x, ptr %y, ptr %m) {
474 ; CHECK-LABEL: ldrhu16_m254:
475 ; CHECK: @ %bb.0: @ %entry
476 ; CHECK-NEXT: vldrh.u16 q0, [r2]
477 ; CHECK-NEXT: vpt.i16 ne, q0, zr
478 ; CHECK-NEXT: vldrht.u16 q0, [r0, #-254]
479 ; CHECK-NEXT: vstrh.16 q0, [r1]
482 %z = getelementptr inbounds i8, ptr %x, i32 -254
483 %mask = load <8 x i16>, ptr %m, align 2
484 %c = icmp ne <8 x i16> %mask, zeroinitializer
485 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
486 store <8 x i16> %0, ptr %y, align 2
490 define ptr @ldrhu16_m256(ptr %x, ptr %y, ptr %m) {
491 ; CHECK-LABEL: ldrhu16_m256:
492 ; CHECK: @ %bb.0: @ %entry
493 ; CHECK-NEXT: vldrh.u16 q0, [r2]
494 ; CHECK-NEXT: sub.w r3, r0, #256
495 ; CHECK-NEXT: vpt.i16 ne, q0, zr
496 ; CHECK-NEXT: vldrht.u16 q0, [r3]
497 ; CHECK-NEXT: vstrh.16 q0, [r1]
500 %z = getelementptr inbounds i8, ptr %x, i32 -256
501 %mask = load <8 x i16>, ptr %m, align 2
502 %c = icmp ne <8 x i16> %mask, zeroinitializer
503 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef)
504 store <8 x i16> %0, ptr %y, align 2
508 define ptr @ldrbu32_4(ptr %x, ptr %y, ptr %m) {
509 ; CHECK-LABEL: ldrbu32_4:
510 ; CHECK: @ %bb.0: @ %entry
511 ; CHECK-NEXT: vldrw.u32 q0, [r2]
512 ; CHECK-NEXT: vpt.i32 ne, q0, zr
513 ; CHECK-NEXT: vldrbt.u32 q0, [r0, #4]
514 ; CHECK-NEXT: vstrw.32 q0, [r1]
517 %z = getelementptr inbounds i8, ptr %x, i32 4
518 %mask = load <4 x i32>, ptr %m, align 4
519 %c = icmp ne <4 x i32> %mask, zeroinitializer
520 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
521 %1 = zext <4 x i8> %0 to <4 x i32>
522 store <4 x i32> %1, ptr %y, align 4
526 define ptr @ldrbu32_3(ptr %x, ptr %y, ptr %m) {
527 ; CHECK-LABEL: ldrbu32_3:
528 ; CHECK: @ %bb.0: @ %entry
529 ; CHECK-NEXT: vldrw.u32 q0, [r2]
530 ; CHECK-NEXT: vpt.i32 ne, q0, zr
531 ; CHECK-NEXT: vldrbt.u32 q0, [r0, #3]
532 ; CHECK-NEXT: vstrw.32 q0, [r1]
535 %z = getelementptr inbounds i8, ptr %x, i32 3
536 %mask = load <4 x i32>, ptr %m, align 4
537 %c = icmp ne <4 x i32> %mask, zeroinitializer
538 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
539 %1 = zext <4 x i8> %0 to <4 x i32>
540 store <4 x i32> %1, ptr %y, align 4
544 define ptr @ldrbu32_2(ptr %x, ptr %y, ptr %m) {
545 ; CHECK-LABEL: ldrbu32_2:
546 ; CHECK: @ %bb.0: @ %entry
547 ; CHECK-NEXT: vldrw.u32 q0, [r2]
548 ; CHECK-NEXT: vpt.i32 ne, q0, zr
549 ; CHECK-NEXT: vldrbt.u32 q0, [r0, #2]
550 ; CHECK-NEXT: vstrw.32 q0, [r1]
553 %z = getelementptr inbounds i8, ptr %x, i32 2
554 %mask = load <4 x i32>, ptr %m, align 4
555 %c = icmp ne <4 x i32> %mask, zeroinitializer
556 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
557 %1 = zext <4 x i8> %0 to <4 x i32>
558 store <4 x i32> %1, ptr %y, align 4
562 define ptr @ldrbu32_127(ptr %x, ptr %y, ptr %m) {
563 ; CHECK-LABEL: ldrbu32_127:
564 ; CHECK: @ %bb.0: @ %entry
565 ; CHECK-NEXT: vldrw.u32 q0, [r2]
566 ; CHECK-NEXT: vpt.i32 ne, q0, zr
567 ; CHECK-NEXT: vldrbt.u32 q0, [r0, #127]
568 ; CHECK-NEXT: vstrw.32 q0, [r1]
571 %z = getelementptr inbounds i8, ptr %x, i32 127
572 %mask = load <4 x i32>, ptr %m, align 4
573 %c = icmp ne <4 x i32> %mask, zeroinitializer
574 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
575 %1 = zext <4 x i8> %0 to <4 x i32>
576 store <4 x i32> %1, ptr %y, align 4
580 define ptr @ldrbu32_128(ptr %x, ptr %y, ptr %m) {
581 ; CHECK-LABEL: ldrbu32_128:
582 ; CHECK: @ %bb.0: @ %entry
583 ; CHECK-NEXT: vldrw.u32 q0, [r2]
584 ; CHECK-NEXT: add.w r3, r0, #128
585 ; CHECK-NEXT: vpt.i32 ne, q0, zr
586 ; CHECK-NEXT: vldrbt.u32 q0, [r3]
587 ; CHECK-NEXT: vstrw.32 q0, [r1]
590 %z = getelementptr inbounds i8, ptr %x, i32 128
591 %mask = load <4 x i32>, ptr %m, align 4
592 %c = icmp ne <4 x i32> %mask, zeroinitializer
593 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
594 %1 = zext <4 x i8> %0 to <4 x i32>
595 store <4 x i32> %1, ptr %y, align 4
599 define ptr @ldrbu32_m127(ptr %x, ptr %y, ptr %m) {
600 ; CHECK-LABEL: ldrbu32_m127:
601 ; CHECK: @ %bb.0: @ %entry
602 ; CHECK-NEXT: vldrw.u32 q0, [r2]
603 ; CHECK-NEXT: vpt.i32 ne, q0, zr
604 ; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127]
605 ; CHECK-NEXT: vstrw.32 q0, [r1]
608 %z = getelementptr inbounds i8, ptr %x, i32 -127
609 %mask = load <4 x i32>, ptr %m, align 4
610 %c = icmp ne <4 x i32> %mask, zeroinitializer
611 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
612 %1 = zext <4 x i8> %0 to <4 x i32>
613 store <4 x i32> %1, ptr %y, align 4
617 define ptr @ldrbu32_m128(ptr %x, ptr %y, ptr %m) {
618 ; CHECK-LABEL: ldrbu32_m128:
619 ; CHECK: @ %bb.0: @ %entry
620 ; CHECK-NEXT: vldrw.u32 q0, [r2]
621 ; CHECK-NEXT: sub.w r3, r0, #128
622 ; CHECK-NEXT: vpt.i32 ne, q0, zr
623 ; CHECK-NEXT: vldrbt.u32 q0, [r3]
624 ; CHECK-NEXT: vstrw.32 q0, [r1]
627 %z = getelementptr inbounds i8, ptr %x, i32 -128
628 %mask = load <4 x i32>, ptr %m, align 4
629 %c = icmp ne <4 x i32> %mask, zeroinitializer
630 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
631 %1 = zext <4 x i8> %0 to <4 x i32>
632 store <4 x i32> %1, ptr %y, align 4
636 define ptr @ldrbs32_4(ptr %x, ptr %y, ptr %m) {
637 ; CHECK-LABEL: ldrbs32_4:
638 ; CHECK: @ %bb.0: @ %entry
639 ; CHECK-NEXT: vldrw.u32 q0, [r2]
640 ; CHECK-NEXT: vpt.i32 ne, q0, zr
641 ; CHECK-NEXT: vldrbt.s32 q0, [r0, #4]
642 ; CHECK-NEXT: vstrw.32 q0, [r1]
645 %z = getelementptr inbounds i8, ptr %x, i32 4
646 %mask = load <4 x i32>, ptr %m, align 4
647 %c = icmp ne <4 x i32> %mask, zeroinitializer
648 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
649 %1 = sext <4 x i8> %0 to <4 x i32>
650 store <4 x i32> %1, ptr %y, align 4
654 define ptr @ldrbs32_3(ptr %x, ptr %y, ptr %m) {
655 ; CHECK-LABEL: ldrbs32_3:
656 ; CHECK: @ %bb.0: @ %entry
657 ; CHECK-NEXT: vldrw.u32 q0, [r2]
658 ; CHECK-NEXT: vpt.i32 ne, q0, zr
659 ; CHECK-NEXT: vldrbt.s32 q0, [r0, #3]
660 ; CHECK-NEXT: vstrw.32 q0, [r1]
663 %z = getelementptr inbounds i8, ptr %x, i32 3
664 %mask = load <4 x i32>, ptr %m, align 4
665 %c = icmp ne <4 x i32> %mask, zeroinitializer
666 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
667 %1 = sext <4 x i8> %0 to <4 x i32>
668 store <4 x i32> %1, ptr %y, align 4
672 define ptr @ldrbs32_2(ptr %x, ptr %y, ptr %m) {
673 ; CHECK-LABEL: ldrbs32_2:
674 ; CHECK: @ %bb.0: @ %entry
675 ; CHECK-NEXT: vldrw.u32 q0, [r2]
676 ; CHECK-NEXT: vpt.i32 ne, q0, zr
677 ; CHECK-NEXT: vldrbt.s32 q0, [r0, #2]
678 ; CHECK-NEXT: vstrw.32 q0, [r1]
681 %z = getelementptr inbounds i8, ptr %x, i32 2
682 %mask = load <4 x i32>, ptr %m, align 4
683 %c = icmp ne <4 x i32> %mask, zeroinitializer
684 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
685 %1 = sext <4 x i8> %0 to <4 x i32>
686 store <4 x i32> %1, ptr %y, align 4
690 define ptr @ldrbs32_127(ptr %x, ptr %y, ptr %m) {
691 ; CHECK-LABEL: ldrbs32_127:
692 ; CHECK: @ %bb.0: @ %entry
693 ; CHECK-NEXT: vldrw.u32 q0, [r2]
694 ; CHECK-NEXT: vpt.i32 ne, q0, zr
695 ; CHECK-NEXT: vldrbt.s32 q0, [r0, #127]
696 ; CHECK-NEXT: vstrw.32 q0, [r1]
699 %z = getelementptr inbounds i8, ptr %x, i32 127
700 %mask = load <4 x i32>, ptr %m, align 4
701 %c = icmp ne <4 x i32> %mask, zeroinitializer
702 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
703 %1 = sext <4 x i8> %0 to <4 x i32>
704 store <4 x i32> %1, ptr %y, align 4
708 define ptr @ldrbs32_128(ptr %x, ptr %y, ptr %m) {
709 ; CHECK-LABEL: ldrbs32_128:
710 ; CHECK: @ %bb.0: @ %entry
711 ; CHECK-NEXT: vldrw.u32 q0, [r2]
712 ; CHECK-NEXT: add.w r3, r0, #128
713 ; CHECK-NEXT: vpt.i32 ne, q0, zr
714 ; CHECK-NEXT: vldrbt.s32 q0, [r3]
715 ; CHECK-NEXT: vstrw.32 q0, [r1]
718 %z = getelementptr inbounds i8, ptr %x, i32 128
719 %mask = load <4 x i32>, ptr %m, align 4
720 %c = icmp ne <4 x i32> %mask, zeroinitializer
721 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
722 %1 = sext <4 x i8> %0 to <4 x i32>
723 store <4 x i32> %1, ptr %y, align 4
727 define ptr @ldrbs32_m127(ptr %x, ptr %y, ptr %m) {
728 ; CHECK-LABEL: ldrbs32_m127:
729 ; CHECK: @ %bb.0: @ %entry
730 ; CHECK-NEXT: vldrw.u32 q0, [r2]
731 ; CHECK-NEXT: vpt.i32 ne, q0, zr
732 ; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127]
733 ; CHECK-NEXT: vstrw.32 q0, [r1]
736 %z = getelementptr inbounds i8, ptr %x, i32 -127
737 %mask = load <4 x i32>, ptr %m, align 4
738 %c = icmp ne <4 x i32> %mask, zeroinitializer
739 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
740 %1 = sext <4 x i8> %0 to <4 x i32>
741 store <4 x i32> %1, ptr %y, align 4
745 define ptr @ldrbs32_m128(ptr %x, ptr %y, ptr %m) {
746 ; CHECK-LABEL: ldrbs32_m128:
747 ; CHECK: @ %bb.0: @ %entry
748 ; CHECK-NEXT: vldrw.u32 q0, [r2]
749 ; CHECK-NEXT: sub.w r3, r0, #128
750 ; CHECK-NEXT: vpt.i32 ne, q0, zr
751 ; CHECK-NEXT: vldrbt.s32 q0, [r3]
752 ; CHECK-NEXT: vstrw.32 q0, [r1]
755 %z = getelementptr inbounds i8, ptr %x, i32 -128
756 %mask = load <4 x i32>, ptr %m, align 4
757 %c = icmp ne <4 x i32> %mask, zeroinitializer
758 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef)
759 %1 = sext <4 x i8> %0 to <4 x i32>
760 store <4 x i32> %1, ptr %y, align 4
764 define ptr @ldrbu16_4(ptr %x, ptr %y, ptr %m) {
765 ; CHECK-LABEL: ldrbu16_4:
766 ; CHECK: @ %bb.0: @ %entry
767 ; CHECK-NEXT: vldrh.u16 q0, [r2]
768 ; CHECK-NEXT: vpt.i16 ne, q0, zr
769 ; CHECK-NEXT: vldrbt.u16 q0, [r0, #4]
770 ; CHECK-NEXT: vstrh.16 q0, [r1]
773 %z = getelementptr inbounds i8, ptr %x, i32 4
774 %mask = load <8 x i16>, ptr %m, align 2
775 %c = icmp ne <8 x i16> %mask, zeroinitializer
776 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
777 %1 = zext <8 x i8> %0 to <8 x i16>
778 store <8 x i16> %1, ptr %y, align 2
782 define ptr @ldrbu16_3(ptr %x, ptr %y, ptr %m) {
783 ; CHECK-LABEL: ldrbu16_3:
784 ; CHECK: @ %bb.0: @ %entry
785 ; CHECK-NEXT: vldrh.u16 q0, [r2]
786 ; CHECK-NEXT: vpt.i16 ne, q0, zr
787 ; CHECK-NEXT: vldrbt.u16 q0, [r0, #3]
788 ; CHECK-NEXT: vstrh.16 q0, [r1]
791 %z = getelementptr inbounds i8, ptr %x, i32 3
792 %mask = load <8 x i16>, ptr %m, align 2
793 %c = icmp ne <8 x i16> %mask, zeroinitializer
794 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
795 %1 = zext <8 x i8> %0 to <8 x i16>
796 store <8 x i16> %1, ptr %y, align 2
800 define ptr @ldrbu16_2(ptr %x, ptr %y, ptr %m) {
801 ; CHECK-LABEL: ldrbu16_2:
802 ; CHECK: @ %bb.0: @ %entry
803 ; CHECK-NEXT: vldrh.u16 q0, [r2]
804 ; CHECK-NEXT: vpt.i16 ne, q0, zr
805 ; CHECK-NEXT: vldrbt.u16 q0, [r0, #2]
806 ; CHECK-NEXT: vstrh.16 q0, [r1]
809 %z = getelementptr inbounds i8, ptr %x, i32 2
810 %mask = load <8 x i16>, ptr %m, align 2
811 %c = icmp ne <8 x i16> %mask, zeroinitializer
812 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
813 %1 = zext <8 x i8> %0 to <8 x i16>
814 store <8 x i16> %1, ptr %y, align 2
818 define ptr @ldrbu16_127(ptr %x, ptr %y, ptr %m) {
819 ; CHECK-LABEL: ldrbu16_127:
820 ; CHECK: @ %bb.0: @ %entry
821 ; CHECK-NEXT: vldrh.u16 q0, [r2]
822 ; CHECK-NEXT: vpt.i16 ne, q0, zr
823 ; CHECK-NEXT: vldrbt.u16 q0, [r0, #127]
824 ; CHECK-NEXT: vstrh.16 q0, [r1]
827 %z = getelementptr inbounds i8, ptr %x, i32 127
828 %mask = load <8 x i16>, ptr %m, align 2
829 %c = icmp ne <8 x i16> %mask, zeroinitializer
830 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
831 %1 = zext <8 x i8> %0 to <8 x i16>
832 store <8 x i16> %1, ptr %y, align 2
836 define ptr @ldrbu16_128(ptr %x, ptr %y, ptr %m) {
837 ; CHECK-LABEL: ldrbu16_128:
838 ; CHECK: @ %bb.0: @ %entry
839 ; CHECK-NEXT: vldrh.u16 q0, [r2]
840 ; CHECK-NEXT: add.w r3, r0, #128
841 ; CHECK-NEXT: vpt.i16 ne, q0, zr
842 ; CHECK-NEXT: vldrbt.u16 q0, [r3]
843 ; CHECK-NEXT: vstrh.16 q0, [r1]
846 %z = getelementptr inbounds i8, ptr %x, i32 128
847 %mask = load <8 x i16>, ptr %m, align 2
848 %c = icmp ne <8 x i16> %mask, zeroinitializer
849 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
850 %1 = zext <8 x i8> %0 to <8 x i16>
851 store <8 x i16> %1, ptr %y, align 2
855 define ptr @ldrbu16_m127(ptr %x, ptr %y, ptr %m) {
856 ; CHECK-LABEL: ldrbu16_m127:
857 ; CHECK: @ %bb.0: @ %entry
858 ; CHECK-NEXT: vldrh.u16 q0, [r2]
859 ; CHECK-NEXT: vpt.i16 ne, q0, zr
860 ; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127]
861 ; CHECK-NEXT: vstrh.16 q0, [r1]
864 %z = getelementptr inbounds i8, ptr %x, i32 -127
865 %mask = load <8 x i16>, ptr %m, align 2
866 %c = icmp ne <8 x i16> %mask, zeroinitializer
867 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
868 %1 = zext <8 x i8> %0 to <8 x i16>
869 store <8 x i16> %1, ptr %y, align 2
873 define ptr @ldrbu16_m128(ptr %x, ptr %y, ptr %m) {
874 ; CHECK-LABEL: ldrbu16_m128:
875 ; CHECK: @ %bb.0: @ %entry
876 ; CHECK-NEXT: vldrh.u16 q0, [r2]
877 ; CHECK-NEXT: sub.w r3, r0, #128
878 ; CHECK-NEXT: vpt.i16 ne, q0, zr
879 ; CHECK-NEXT: vldrbt.u16 q0, [r3]
880 ; CHECK-NEXT: vstrh.16 q0, [r1]
883 %z = getelementptr inbounds i8, ptr %x, i32 -128
884 %mask = load <8 x i16>, ptr %m, align 2
885 %c = icmp ne <8 x i16> %mask, zeroinitializer
886 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
887 %1 = zext <8 x i8> %0 to <8 x i16>
888 store <8 x i16> %1, ptr %y, align 2
892 define ptr @ldrbs16_4(ptr %x, ptr %y, ptr %m) {
893 ; CHECK-LABEL: ldrbs16_4:
894 ; CHECK: @ %bb.0: @ %entry
895 ; CHECK-NEXT: vldrh.u16 q0, [r2]
896 ; CHECK-NEXT: vpt.i16 ne, q0, zr
897 ; CHECK-NEXT: vldrbt.s16 q0, [r0, #4]
898 ; CHECK-NEXT: vstrh.16 q0, [r1]
901 %z = getelementptr inbounds i8, ptr %x, i32 4
902 %mask = load <8 x i16>, ptr %m, align 2
903 %c = icmp ne <8 x i16> %mask, zeroinitializer
904 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
905 %1 = sext <8 x i8> %0 to <8 x i16>
906 store <8 x i16> %1, ptr %y, align 2
910 define ptr @ldrbs16_3(ptr %x, ptr %y, ptr %m) {
911 ; CHECK-LABEL: ldrbs16_3:
912 ; CHECK: @ %bb.0: @ %entry
913 ; CHECK-NEXT: vldrh.u16 q0, [r2]
914 ; CHECK-NEXT: vpt.i16 ne, q0, zr
915 ; CHECK-NEXT: vldrbt.s16 q0, [r0, #3]
916 ; CHECK-NEXT: vstrh.16 q0, [r1]
919 %z = getelementptr inbounds i8, ptr %x, i32 3
920 %mask = load <8 x i16>, ptr %m, align 2
921 %c = icmp ne <8 x i16> %mask, zeroinitializer
922 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
923 %1 = sext <8 x i8> %0 to <8 x i16>
924 store <8 x i16> %1, ptr %y, align 2
928 define ptr @ldrbs16_2(ptr %x, ptr %y, ptr %m) {
929 ; CHECK-LABEL: ldrbs16_2:
930 ; CHECK: @ %bb.0: @ %entry
931 ; CHECK-NEXT: vldrh.u16 q0, [r2]
932 ; CHECK-NEXT: vpt.i16 ne, q0, zr
933 ; CHECK-NEXT: vldrbt.s16 q0, [r0, #2]
934 ; CHECK-NEXT: vstrh.16 q0, [r1]
937 %z = getelementptr inbounds i8, ptr %x, i32 2
938 %mask = load <8 x i16>, ptr %m, align 2
939 %c = icmp ne <8 x i16> %mask, zeroinitializer
940 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
941 %1 = sext <8 x i8> %0 to <8 x i16>
942 store <8 x i16> %1, ptr %y, align 2
946 define ptr @ldrbs16_127(ptr %x, ptr %y, ptr %m) {
947 ; CHECK-LABEL: ldrbs16_127:
948 ; CHECK: @ %bb.0: @ %entry
949 ; CHECK-NEXT: vldrh.u16 q0, [r2]
950 ; CHECK-NEXT: vpt.i16 ne, q0, zr
951 ; CHECK-NEXT: vldrbt.s16 q0, [r0, #127]
952 ; CHECK-NEXT: vstrh.16 q0, [r1]
955 %z = getelementptr inbounds i8, ptr %x, i32 127
956 %mask = load <8 x i16>, ptr %m, align 2
957 %c = icmp ne <8 x i16> %mask, zeroinitializer
958 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
959 %1 = sext <8 x i8> %0 to <8 x i16>
960 store <8 x i16> %1, ptr %y, align 2
964 define ptr @ldrbs16_128(ptr %x, ptr %y, ptr %m) {
965 ; CHECK-LABEL: ldrbs16_128:
966 ; CHECK: @ %bb.0: @ %entry
967 ; CHECK-NEXT: vldrh.u16 q0, [r2]
968 ; CHECK-NEXT: add.w r3, r0, #128
969 ; CHECK-NEXT: vpt.i16 ne, q0, zr
970 ; CHECK-NEXT: vldrbt.s16 q0, [r3]
971 ; CHECK-NEXT: vstrh.16 q0, [r1]
974 %z = getelementptr inbounds i8, ptr %x, i32 128
975 %mask = load <8 x i16>, ptr %m, align 2
976 %c = icmp ne <8 x i16> %mask, zeroinitializer
977 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
978 %1 = sext <8 x i8> %0 to <8 x i16>
979 store <8 x i16> %1, ptr %y, align 2
983 define ptr @ldrbs16_m127(ptr %x, ptr %y, ptr %m) {
984 ; CHECK-LABEL: ldrbs16_m127:
985 ; CHECK: @ %bb.0: @ %entry
986 ; CHECK-NEXT: vldrh.u16 q0, [r2]
987 ; CHECK-NEXT: vpt.i16 ne, q0, zr
988 ; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127]
989 ; CHECK-NEXT: vstrh.16 q0, [r1]
992 %z = getelementptr inbounds i8, ptr %x, i32 -127
993 %mask = load <8 x i16>, ptr %m, align 2
994 %c = icmp ne <8 x i16> %mask, zeroinitializer
995 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
996 %1 = sext <8 x i8> %0 to <8 x i16>
997 store <8 x i16> %1, ptr %y, align 2
1001 define ptr @ldrbs16_m128(ptr %x, ptr %y, ptr %m) {
1002 ; CHECK-LABEL: ldrbs16_m128:
1003 ; CHECK: @ %bb.0: @ %entry
1004 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1005 ; CHECK-NEXT: sub.w r3, r0, #128
1006 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1007 ; CHECK-NEXT: vldrbt.s16 q0, [r3]
1008 ; CHECK-NEXT: vstrh.16 q0, [r1]
1011 %z = getelementptr inbounds i8, ptr %x, i32 -128
1012 %mask = load <8 x i16>, ptr %m, align 2
1013 %c = icmp ne <8 x i16> %mask, zeroinitializer
1014 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef)
1015 %1 = sext <8 x i8> %0 to <8 x i16>
1016 store <8 x i16> %1, ptr %y, align 2
1020 define ptr @ldrbu8_4(ptr %x, ptr %y, ptr %m) {
1021 ; CHECK-LABEL: ldrbu8_4:
1022 ; CHECK: @ %bb.0: @ %entry
1023 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1024 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1025 ; CHECK-NEXT: vldrbt.u8 q0, [r0, #4]
1026 ; CHECK-NEXT: vstrb.8 q0, [r1]
1029 %z = getelementptr inbounds i8, ptr %x, i32 4
1030 %mask = load <16 x i8>, ptr %m, align 1
1031 %c = icmp ne <16 x i8> %mask, zeroinitializer
1032 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1033 store <16 x i8> %0, ptr %y, align 1
1037 define ptr @ldrbu8_3(ptr %x, ptr %y, ptr %m) {
1038 ; CHECK-LABEL: ldrbu8_3:
1039 ; CHECK: @ %bb.0: @ %entry
1040 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1041 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1042 ; CHECK-NEXT: vldrbt.u8 q0, [r0, #3]
1043 ; CHECK-NEXT: vstrb.8 q0, [r1]
1046 %z = getelementptr inbounds i8, ptr %x, i32 3
1047 %mask = load <16 x i8>, ptr %m, align 1
1048 %c = icmp ne <16 x i8> %mask, zeroinitializer
1049 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1050 store <16 x i8> %0, ptr %y, align 1
1054 define ptr @ldrbu8_2(ptr %x, ptr %y, ptr %m) {
1055 ; CHECK-LABEL: ldrbu8_2:
1056 ; CHECK: @ %bb.0: @ %entry
1057 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1058 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1059 ; CHECK-NEXT: vldrbt.u8 q0, [r0, #2]
1060 ; CHECK-NEXT: vstrb.8 q0, [r1]
1063 %z = getelementptr inbounds i8, ptr %x, i32 2
1064 %mask = load <16 x i8>, ptr %m, align 1
1065 %c = icmp ne <16 x i8> %mask, zeroinitializer
1066 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1067 store <16 x i8> %0, ptr %y, align 1
1071 define ptr @ldrbu8_127(ptr %x, ptr %y, ptr %m) {
1072 ; CHECK-LABEL: ldrbu8_127:
1073 ; CHECK: @ %bb.0: @ %entry
1074 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1075 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1076 ; CHECK-NEXT: vldrbt.u8 q0, [r0, #127]
1077 ; CHECK-NEXT: vstrb.8 q0, [r1]
1080 %z = getelementptr inbounds i8, ptr %x, i32 127
1081 %mask = load <16 x i8>, ptr %m, align 1
1082 %c = icmp ne <16 x i8> %mask, zeroinitializer
1083 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1084 store <16 x i8> %0, ptr %y, align 1
1088 define ptr @ldrbu8_128(ptr %x, ptr %y, ptr %m) {
1089 ; CHECK-LABEL: ldrbu8_128:
1090 ; CHECK: @ %bb.0: @ %entry
1091 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1092 ; CHECK-NEXT: add.w r3, r0, #128
1093 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1094 ; CHECK-NEXT: vldrbt.u8 q0, [r3]
1095 ; CHECK-NEXT: vstrb.8 q0, [r1]
1098 %z = getelementptr inbounds i8, ptr %x, i32 128
1099 %mask = load <16 x i8>, ptr %m, align 1
1100 %c = icmp ne <16 x i8> %mask, zeroinitializer
1101 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1102 store <16 x i8> %0, ptr %y, align 1
1106 define ptr @ldrbu8_m127(ptr %x, ptr %y, ptr %m) {
1107 ; CHECK-LABEL: ldrbu8_m127:
1108 ; CHECK: @ %bb.0: @ %entry
1109 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1110 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1111 ; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127]
1112 ; CHECK-NEXT: vstrb.8 q0, [r1]
1115 %z = getelementptr inbounds i8, ptr %x, i32 -127
1116 %mask = load <16 x i8>, ptr %m, align 1
1117 %c = icmp ne <16 x i8> %mask, zeroinitializer
1118 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1119 store <16 x i8> %0, ptr %y, align 1
1123 define ptr @ldrbu8_m128(ptr %x, ptr %y, ptr %m) {
1124 ; CHECK-LABEL: ldrbu8_m128:
1125 ; CHECK: @ %bb.0: @ %entry
1126 ; CHECK-NEXT: vldrb.u8 q0, [r2]
1127 ; CHECK-NEXT: sub.w r3, r0, #128
1128 ; CHECK-NEXT: vpt.i8 ne, q0, zr
1129 ; CHECK-NEXT: vldrbt.u8 q0, [r3]
1130 ; CHECK-NEXT: vstrb.8 q0, [r1]
1133 %z = getelementptr inbounds i8, ptr %x, i32 -128
1134 %mask = load <16 x i8>, ptr %m, align 1
1135 %c = icmp ne <16 x i8> %mask, zeroinitializer
1136 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef)
1137 store <16 x i8> %0, ptr %y, align 1
1141 define ptr @ldrwf32_4(ptr %x, ptr %y, ptr %m) {
1142 ; CHECK-LABEL: ldrwf32_4:
1143 ; CHECK: @ %bb.0: @ %entry
1144 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1145 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1146 ; CHECK-NEXT: vldrwt.u32 q0, [r0, #4]
1147 ; CHECK-NEXT: vstrw.32 q0, [r1]
1150 %z = getelementptr inbounds i8, ptr %x, i32 4
1151 %mask = load <4 x i32>, ptr %m, align 4
1152 %c = icmp ne <4 x i32> %mask, zeroinitializer
1153 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1154 store <4 x float> %0, ptr %y, align 4
1158 define ptr @ldrwf32_3(ptr %x, ptr %y, ptr %m) {
1159 ; CHECK-LABEL: ldrwf32_3:
1160 ; CHECK: @ %bb.0: @ %entry
1161 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1162 ; CHECK-NEXT: adds r3, r0, #3
1163 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1164 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
1165 ; CHECK-NEXT: vstrw.32 q0, [r1]
1168 %z = getelementptr inbounds i8, ptr %x, i32 3
1169 %mask = load <4 x i32>, ptr %m, align 4
1170 %c = icmp ne <4 x i32> %mask, zeroinitializer
1171 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1172 store <4 x float> %0, ptr %y, align 4
1176 define ptr @ldrwf32_2(ptr %x, ptr %y, ptr %m) {
1177 ; CHECK-LABEL: ldrwf32_2:
1178 ; CHECK: @ %bb.0: @ %entry
1179 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1180 ; CHECK-NEXT: adds r3, r0, #2
1181 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1182 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
1183 ; CHECK-NEXT: vstrw.32 q0, [r1]
1186 %z = getelementptr inbounds i8, ptr %x, i32 2
1187 %mask = load <4 x i32>, ptr %m, align 4
1188 %c = icmp ne <4 x i32> %mask, zeroinitializer
1189 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1190 store <4 x float> %0, ptr %y, align 4
1194 define ptr @ldrwf32_508(ptr %x, ptr %y, ptr %m) {
1195 ; CHECK-LABEL: ldrwf32_508:
1196 ; CHECK: @ %bb.0: @ %entry
1197 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1198 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1199 ; CHECK-NEXT: vldrwt.u32 q0, [r0, #508]
1200 ; CHECK-NEXT: vstrw.32 q0, [r1]
1203 %z = getelementptr inbounds i8, ptr %x, i32 508
1204 %mask = load <4 x i32>, ptr %m, align 4
1205 %c = icmp ne <4 x i32> %mask, zeroinitializer
1206 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1207 store <4 x float> %0, ptr %y, align 4
1211 define ptr @ldrwf32_512(ptr %x, ptr %y, ptr %m) {
1212 ; CHECK-LABEL: ldrwf32_512:
1213 ; CHECK: @ %bb.0: @ %entry
1214 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1215 ; CHECK-NEXT: add.w r3, r0, #512
1216 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1217 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
1218 ; CHECK-NEXT: vstrw.32 q0, [r1]
1221 %z = getelementptr inbounds i8, ptr %x, i32 512
1222 %mask = load <4 x i32>, ptr %m, align 4
1223 %c = icmp ne <4 x i32> %mask, zeroinitializer
1224 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1225 store <4 x float> %0, ptr %y, align 4
1229 define ptr @ldrwf32_m508(ptr %x, ptr %y, ptr %m) {
1230 ; CHECK-LABEL: ldrwf32_m508:
1231 ; CHECK: @ %bb.0: @ %entry
1232 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1233 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1234 ; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508]
1235 ; CHECK-NEXT: vstrw.32 q0, [r1]
1238 %z = getelementptr inbounds i8, ptr %x, i32 -508
1239 %mask = load <4 x i32>, ptr %m, align 4
1240 %c = icmp ne <4 x i32> %mask, zeroinitializer
1241 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1242 store <4 x float> %0, ptr %y, align 4
1246 define ptr @ldrwf32_m512(ptr %x, ptr %y, ptr %m) {
1247 ; CHECK-LABEL: ldrwf32_m512:
1248 ; CHECK: @ %bb.0: @ %entry
1249 ; CHECK-NEXT: vldrw.u32 q0, [r2]
1250 ; CHECK-NEXT: sub.w r3, r0, #512
1251 ; CHECK-NEXT: vpt.i32 ne, q0, zr
1252 ; CHECK-NEXT: vldrwt.u32 q0, [r3]
1253 ; CHECK-NEXT: vstrw.32 q0, [r1]
1256 %z = getelementptr inbounds i8, ptr %x, i32 -512
1257 %mask = load <4 x i32>, ptr %m, align 4
1258 %c = icmp ne <4 x i32> %mask, zeroinitializer
1259 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1260 store <4 x float> %0, ptr %y, align 4
1264 define ptr @ldrhf16_4(ptr %x, ptr %y, ptr %m) {
1265 ; CHECK-LABEL: ldrhf16_4:
1266 ; CHECK: @ %bb.0: @ %entry
1267 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1268 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1269 ; CHECK-NEXT: vldrht.u16 q0, [r0, #4]
1270 ; CHECK-NEXT: vstrh.16 q0, [r1]
1273 %z = getelementptr inbounds i8, ptr %x, i32 4
1274 %mask = load <8 x i16>, ptr %m, align 2
1275 %c = icmp ne <8 x i16> %mask, zeroinitializer
1276 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1277 store <8 x half> %0, ptr %y, align 2
1281 define ptr @ldrhf16_3(ptr %x, ptr %y, ptr %m) {
1282 ; CHECK-LABEL: ldrhf16_3:
1283 ; CHECK: @ %bb.0: @ %entry
1284 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1285 ; CHECK-NEXT: adds r3, r0, #3
1286 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1287 ; CHECK-NEXT: vldrht.u16 q0, [r3]
1288 ; CHECK-NEXT: vstrh.16 q0, [r1]
1291 %z = getelementptr inbounds i8, ptr %x, i32 3
1292 %mask = load <8 x i16>, ptr %m, align 2
1293 %c = icmp ne <8 x i16> %mask, zeroinitializer
1294 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1295 store <8 x half> %0, ptr %y, align 2
1299 define ptr @ldrhf16_2(ptr %x, ptr %y, ptr %m) {
1300 ; CHECK-LABEL: ldrhf16_2:
1301 ; CHECK: @ %bb.0: @ %entry
1302 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1303 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1304 ; CHECK-NEXT: vldrht.u16 q0, [r0, #2]
1305 ; CHECK-NEXT: vstrh.16 q0, [r1]
1308 %z = getelementptr inbounds i8, ptr %x, i32 2
1309 %mask = load <8 x i16>, ptr %m, align 2
1310 %c = icmp ne <8 x i16> %mask, zeroinitializer
1311 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1312 store <8 x half> %0, ptr %y, align 2
1316 define ptr @ldrhf16_254(ptr %x, ptr %y, ptr %m) {
1317 ; CHECK-LABEL: ldrhf16_254:
1318 ; CHECK: @ %bb.0: @ %entry
1319 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1320 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1321 ; CHECK-NEXT: vldrht.u16 q0, [r0, #254]
1322 ; CHECK-NEXT: vstrh.16 q0, [r1]
1325 %z = getelementptr inbounds i8, ptr %x, i32 254
1326 %mask = load <8 x i16>, ptr %m, align 2
1327 %c = icmp ne <8 x i16> %mask, zeroinitializer
1328 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1329 store <8 x half> %0, ptr %y, align 2
1333 define ptr @ldrhf16_256(ptr %x, ptr %y, ptr %m) {
1334 ; CHECK-LABEL: ldrhf16_256:
1335 ; CHECK: @ %bb.0: @ %entry
1336 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1337 ; CHECK-NEXT: add.w r3, r0, #256
1338 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1339 ; CHECK-NEXT: vldrht.u16 q0, [r3]
1340 ; CHECK-NEXT: vstrh.16 q0, [r1]
1343 %z = getelementptr inbounds i8, ptr %x, i32 256
1344 %mask = load <8 x i16>, ptr %m, align 2
1345 %c = icmp ne <8 x i16> %mask, zeroinitializer
1346 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1347 store <8 x half> %0, ptr %y, align 2
1351 define ptr @ldrhf16_m254(ptr %x, ptr %y, ptr %m) {
1352 ; CHECK-LABEL: ldrhf16_m254:
1353 ; CHECK: @ %bb.0: @ %entry
1354 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1355 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1356 ; CHECK-NEXT: vldrht.u16 q0, [r0, #-254]
1357 ; CHECK-NEXT: vstrh.16 q0, [r1]
1360 %z = getelementptr inbounds i8, ptr %x, i32 -254
1361 %mask = load <8 x i16>, ptr %m, align 2
1362 %c = icmp ne <8 x i16> %mask, zeroinitializer
1363 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1364 store <8 x half> %0, ptr %y, align 2
1368 define ptr @ldrhf16_m256(ptr %x, ptr %y, ptr %m) {
1369 ; CHECK-LABEL: ldrhf16_m256:
1370 ; CHECK: @ %bb.0: @ %entry
1371 ; CHECK-NEXT: vldrh.u16 q0, [r2]
1372 ; CHECK-NEXT: sub.w r3, r0, #256
1373 ; CHECK-NEXT: vpt.i16 ne, q0, zr
1374 ; CHECK-NEXT: vldrht.u16 q0, [r3]
1375 ; CHECK-NEXT: vstrh.16 q0, [r1]
1378 %z = getelementptr inbounds i8, ptr %x, i32 -256
1379 %mask = load <8 x i16>, ptr %m, align 2
1380 %c = icmp ne <8 x i16> %mask, zeroinitializer
1381 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef)
1382 store <8 x half> %0, ptr %y, align 2
1389 define ptr @strw32_4(ptr %y, ptr %x, ptr %m) {
1390 ; CHECK-LABEL: strw32_4:
1391 ; CHECK: @ %bb.0: @ %entry
1392 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1393 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1394 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1395 ; CHECK-NEXT: vstrwt.32 q0, [r0, #4]
1398 %z = getelementptr inbounds i8, ptr %y, i32 4
1399 %mask = load <4 x i32>, ptr %m, align 4
1400 %c = icmp ne <4 x i32> %mask, zeroinitializer
1401 %0 = load <4 x i32>, ptr %x, align 4
1402 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1406 define ptr @strw32_3(ptr %y, ptr %x, ptr %m) {
1407 ; CHECK-LABEL: strw32_3:
1408 ; CHECK: @ %bb.0: @ %entry
1409 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1410 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1411 ; CHECK-NEXT: adds r1, r0, #3
1412 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1413 ; CHECK-NEXT: vstrwt.32 q0, [r1]
1416 %z = getelementptr inbounds i8, ptr %y, i32 3
1417 %mask = load <4 x i32>, ptr %m, align 4
1418 %c = icmp ne <4 x i32> %mask, zeroinitializer
1419 %0 = load <4 x i32>, ptr %x, align 4
1420 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1424 define ptr @strw32_2(ptr %y, ptr %x, ptr %m) {
1425 ; CHECK-LABEL: strw32_2:
1426 ; CHECK: @ %bb.0: @ %entry
1427 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1428 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1429 ; CHECK-NEXT: adds r1, r0, #2
1430 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1431 ; CHECK-NEXT: vstrwt.32 q0, [r1]
1434 %z = getelementptr inbounds i8, ptr %y, i32 2
1435 %mask = load <4 x i32>, ptr %m, align 4
1436 %c = icmp ne <4 x i32> %mask, zeroinitializer
1437 %0 = load <4 x i32>, ptr %x, align 4
1438 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1442 define ptr @strw32_508(ptr %y, ptr %x, ptr %m) {
1443 ; CHECK-LABEL: strw32_508:
1444 ; CHECK: @ %bb.0: @ %entry
1445 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1446 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1447 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1448 ; CHECK-NEXT: vstrwt.32 q0, [r0, #508]
1451 %z = getelementptr inbounds i8, ptr %y, i32 508
1452 %mask = load <4 x i32>, ptr %m, align 4
1453 %c = icmp ne <4 x i32> %mask, zeroinitializer
1454 %0 = load <4 x i32>, ptr %x, align 4
1455 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1459 define ptr @strw32_512(ptr %y, ptr %x, ptr %m) {
1460 ; CHECK-LABEL: strw32_512:
1461 ; CHECK: @ %bb.0: @ %entry
1462 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1463 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1464 ; CHECK-NEXT: add.w r1, r0, #512
1465 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1466 ; CHECK-NEXT: vstrwt.32 q0, [r1]
1469 %z = getelementptr inbounds i8, ptr %y, i32 512
1470 %mask = load <4 x i32>, ptr %m, align 4
1471 %c = icmp ne <4 x i32> %mask, zeroinitializer
1472 %0 = load <4 x i32>, ptr %x, align 4
1473 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1477 define ptr @strw32_m508(ptr %y, ptr %x, ptr %m) {
1478 ; CHECK-LABEL: strw32_m508:
1479 ; CHECK: @ %bb.0: @ %entry
1480 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1481 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1482 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1483 ; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]
1486 %z = getelementptr inbounds i8, ptr %y, i32 -508
1487 %mask = load <4 x i32>, ptr %m, align 4
1488 %c = icmp ne <4 x i32> %mask, zeroinitializer
1489 %0 = load <4 x i32>, ptr %x, align 4
1490 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1494 define ptr @strw32_m512(ptr %y, ptr %x, ptr %m) {
1495 ; CHECK-LABEL: strw32_m512:
1496 ; CHECK: @ %bb.0: @ %entry
1497 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1498 ; CHECK-NEXT: vldrw.u32 q0, [r1]
1499 ; CHECK-NEXT: sub.w r1, r0, #512
1500 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1501 ; CHECK-NEXT: vstrwt.32 q0, [r1]
1504 %z = getelementptr inbounds i8, ptr %y, i32 -512
1505 %mask = load <4 x i32>, ptr %m, align 4
1506 %c = icmp ne <4 x i32> %mask, zeroinitializer
1507 %0 = load <4 x i32>, ptr %x, align 4
1508 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c)
1512 define ptr @strh32_4(ptr %y, ptr %x, ptr %m) {
1513 ; CHECK-LABEL: strh32_4:
1514 ; CHECK: @ %bb.0: @ %entry
1515 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1516 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1517 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1518 ; CHECK-NEXT: vstrht.32 q0, [r0, #4]
1521 %z = getelementptr inbounds i8, ptr %y, i32 4
1522 %mask = load <4 x i32>, ptr %m, align 4
1523 %c = icmp ne <4 x i32> %mask, zeroinitializer
1524 %0 = load <4 x i16>, ptr %x, align 2
1525 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1529 define ptr @strh32_3(ptr %y, ptr %x, ptr %m) {
1530 ; CHECK-LABEL: strh32_3:
1531 ; CHECK: @ %bb.0: @ %entry
1532 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1533 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1534 ; CHECK-NEXT: adds r1, r0, #3
1535 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1536 ; CHECK-NEXT: vstrht.32 q0, [r1]
1539 %z = getelementptr inbounds i8, ptr %y, i32 3
1540 %mask = load <4 x i32>, ptr %m, align 4
1541 %c = icmp ne <4 x i32> %mask, zeroinitializer
1542 %0 = load <4 x i16>, ptr %x, align 2
1543 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1547 define ptr @strh32_2(ptr %y, ptr %x, ptr %m) {
1548 ; CHECK-LABEL: strh32_2:
1549 ; CHECK: @ %bb.0: @ %entry
1550 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1551 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1552 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1553 ; CHECK-NEXT: vstrht.32 q0, [r0, #2]
1556 %z = getelementptr inbounds i8, ptr %y, i32 2
1557 %mask = load <4 x i32>, ptr %m, align 4
1558 %c = icmp ne <4 x i32> %mask, zeroinitializer
1559 %0 = load <4 x i16>, ptr %x, align 2
1560 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1564 define ptr @strh32_254(ptr %y, ptr %x, ptr %m) {
1565 ; CHECK-LABEL: strh32_254:
1566 ; CHECK: @ %bb.0: @ %entry
1567 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1568 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1569 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1570 ; CHECK-NEXT: vstrht.32 q0, [r0, #254]
1573 %z = getelementptr inbounds i8, ptr %y, i32 254
1574 %mask = load <4 x i32>, ptr %m, align 4
1575 %c = icmp ne <4 x i32> %mask, zeroinitializer
1576 %0 = load <4 x i16>, ptr %x, align 2
1577 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1581 define ptr @strh32_256(ptr %y, ptr %x, ptr %m) {
1582 ; CHECK-LABEL: strh32_256:
1583 ; CHECK: @ %bb.0: @ %entry
1584 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1585 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1586 ; CHECK-NEXT: add.w r1, r0, #256
1587 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1588 ; CHECK-NEXT: vstrht.32 q0, [r1]
1591 %z = getelementptr inbounds i8, ptr %y, i32 256
1592 %mask = load <4 x i32>, ptr %m, align 4
1593 %c = icmp ne <4 x i32> %mask, zeroinitializer
1594 %0 = load <4 x i16>, ptr %x, align 2
1595 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1599 define ptr @strh32_m254(ptr %y, ptr %x, ptr %m) {
1600 ; CHECK-LABEL: strh32_m254:
1601 ; CHECK: @ %bb.0: @ %entry
1602 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1603 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1604 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1605 ; CHECK-NEXT: vstrht.32 q0, [r0, #-254]
1608 %z = getelementptr inbounds i8, ptr %y, i32 -254
1609 %mask = load <4 x i32>, ptr %m, align 4
1610 %c = icmp ne <4 x i32> %mask, zeroinitializer
1611 %0 = load <4 x i16>, ptr %x, align 2
1612 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1616 define ptr @strh32_m256(ptr %y, ptr %x, ptr %m) {
1617 ; CHECK-LABEL: strh32_m256:
1618 ; CHECK: @ %bb.0: @ %entry
1619 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1620 ; CHECK-NEXT: vldrh.u32 q0, [r1]
1621 ; CHECK-NEXT: sub.w r1, r0, #256
1622 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1623 ; CHECK-NEXT: vstrht.32 q0, [r1]
1626 %z = getelementptr inbounds i8, ptr %y, i32 -256
1627 %mask = load <4 x i32>, ptr %m, align 4
1628 %c = icmp ne <4 x i32> %mask, zeroinitializer
1629 %0 = load <4 x i16>, ptr %x, align 2
1630 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c)
1634 define ptr @strh16_4(ptr %y, ptr %x, ptr %m) {
1635 ; CHECK-LABEL: strh16_4:
1636 ; CHECK: @ %bb.0: @ %entry
1637 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1638 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1639 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1640 ; CHECK-NEXT: vstrht.16 q0, [r0, #4]
1643 %z = getelementptr inbounds i8, ptr %y, i32 4
1644 %mask = load <8 x i16>, ptr %m, align 2
1645 %c = icmp ne <8 x i16> %mask, zeroinitializer
1646 %0 = load <8 x i16>, ptr %x, align 2
1647 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1651 define ptr @strh16_3(ptr %y, ptr %x, ptr %m) {
1652 ; CHECK-LABEL: strh16_3:
1653 ; CHECK: @ %bb.0: @ %entry
1654 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1655 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1656 ; CHECK-NEXT: adds r1, r0, #3
1657 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1658 ; CHECK-NEXT: vstrht.16 q0, [r1]
1661 %z = getelementptr inbounds i8, ptr %y, i32 3
1662 %mask = load <8 x i16>, ptr %m, align 2
1663 %c = icmp ne <8 x i16> %mask, zeroinitializer
1664 %0 = load <8 x i16>, ptr %x, align 2
1665 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1669 define ptr @strh16_2(ptr %y, ptr %x, ptr %m) {
1670 ; CHECK-LABEL: strh16_2:
1671 ; CHECK: @ %bb.0: @ %entry
1672 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1673 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1674 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1675 ; CHECK-NEXT: vstrht.16 q0, [r0, #2]
1678 %z = getelementptr inbounds i8, ptr %y, i32 2
1679 %mask = load <8 x i16>, ptr %m, align 2
1680 %c = icmp ne <8 x i16> %mask, zeroinitializer
1681 %0 = load <8 x i16>, ptr %x, align 2
1682 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1686 define ptr @strh16_254(ptr %y, ptr %x, ptr %m) {
1687 ; CHECK-LABEL: strh16_254:
1688 ; CHECK: @ %bb.0: @ %entry
1689 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1690 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1691 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1692 ; CHECK-NEXT: vstrht.16 q0, [r0, #254]
1695 %z = getelementptr inbounds i8, ptr %y, i32 254
1696 %mask = load <8 x i16>, ptr %m, align 2
1697 %c = icmp ne <8 x i16> %mask, zeroinitializer
1698 %0 = load <8 x i16>, ptr %x, align 2
1699 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1703 define ptr @strh16_256(ptr %y, ptr %x, ptr %m) {
1704 ; CHECK-LABEL: strh16_256:
1705 ; CHECK: @ %bb.0: @ %entry
1706 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1707 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1708 ; CHECK-NEXT: add.w r1, r0, #256
1709 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1710 ; CHECK-NEXT: vstrht.16 q0, [r1]
1713 %z = getelementptr inbounds i8, ptr %y, i32 256
1714 %mask = load <8 x i16>, ptr %m, align 2
1715 %c = icmp ne <8 x i16> %mask, zeroinitializer
1716 %0 = load <8 x i16>, ptr %x, align 2
1717 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1721 define ptr @strh16_m254(ptr %y, ptr %x, ptr %m) {
1722 ; CHECK-LABEL: strh16_m254:
1723 ; CHECK: @ %bb.0: @ %entry
1724 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1725 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1726 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1727 ; CHECK-NEXT: vstrht.16 q0, [r0, #-254]
1730 %z = getelementptr inbounds i8, ptr %y, i32 -254
1731 %mask = load <8 x i16>, ptr %m, align 2
1732 %c = icmp ne <8 x i16> %mask, zeroinitializer
1733 %0 = load <8 x i16>, ptr %x, align 2
1734 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1738 define ptr @strh16_m256(ptr %y, ptr %x, ptr %m) {
1739 ; CHECK-LABEL: strh16_m256:
1740 ; CHECK: @ %bb.0: @ %entry
1741 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1742 ; CHECK-NEXT: vldrh.u16 q0, [r1]
1743 ; CHECK-NEXT: sub.w r1, r0, #256
1744 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1745 ; CHECK-NEXT: vstrht.16 q0, [r1]
1748 %z = getelementptr inbounds i8, ptr %y, i32 -256
1749 %mask = load <8 x i16>, ptr %m, align 2
1750 %c = icmp ne <8 x i16> %mask, zeroinitializer
1751 %0 = load <8 x i16>, ptr %x, align 2
1752 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c)
1756 define ptr @strb32_4(ptr %y, ptr %x, ptr %m) {
1757 ; CHECK-LABEL: strb32_4:
1758 ; CHECK: @ %bb.0: @ %entry
1759 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1760 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1761 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1762 ; CHECK-NEXT: vstrbt.32 q0, [r0, #4]
1765 %z = getelementptr inbounds i8, ptr %y, i32 4
1766 %mask = load <4 x i32>, ptr %m, align 4
1767 %c = icmp ne <4 x i32> %mask, zeroinitializer
1768 %0 = load <4 x i8>, ptr %x, align 1
1769 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1773 define ptr @strb32_3(ptr %y, ptr %x, ptr %m) {
1774 ; CHECK-LABEL: strb32_3:
1775 ; CHECK: @ %bb.0: @ %entry
1776 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1777 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1778 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1779 ; CHECK-NEXT: vstrbt.32 q0, [r0, #3]
1782 %z = getelementptr inbounds i8, ptr %y, i32 3
1783 %mask = load <4 x i32>, ptr %m, align 4
1784 %c = icmp ne <4 x i32> %mask, zeroinitializer
1785 %0 = load <4 x i8>, ptr %x, align 1
1786 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1790 define ptr @strb32_2(ptr %y, ptr %x, ptr %m) {
1791 ; CHECK-LABEL: strb32_2:
1792 ; CHECK: @ %bb.0: @ %entry
1793 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1794 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1795 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1796 ; CHECK-NEXT: vstrbt.32 q0, [r0, #2]
1799 %z = getelementptr inbounds i8, ptr %y, i32 2
1800 %mask = load <4 x i32>, ptr %m, align 4
1801 %c = icmp ne <4 x i32> %mask, zeroinitializer
1802 %0 = load <4 x i8>, ptr %x, align 1
1803 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1807 define ptr @strb32_127(ptr %y, ptr %x, ptr %m) {
1808 ; CHECK-LABEL: strb32_127:
1809 ; CHECK: @ %bb.0: @ %entry
1810 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1811 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1812 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1813 ; CHECK-NEXT: vstrbt.32 q0, [r0, #127]
1816 %z = getelementptr inbounds i8, ptr %y, i32 127
1817 %mask = load <4 x i32>, ptr %m, align 4
1818 %c = icmp ne <4 x i32> %mask, zeroinitializer
1819 %0 = load <4 x i8>, ptr %x, align 1
1820 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1824 define ptr @strb32_128(ptr %y, ptr %x, ptr %m) {
1825 ; CHECK-LABEL: strb32_128:
1826 ; CHECK: @ %bb.0: @ %entry
1827 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1828 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1829 ; CHECK-NEXT: add.w r1, r0, #128
1830 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1831 ; CHECK-NEXT: vstrbt.32 q0, [r1]
1834 %z = getelementptr inbounds i8, ptr %y, i32 128
1835 %mask = load <4 x i32>, ptr %m, align 4
1836 %c = icmp ne <4 x i32> %mask, zeroinitializer
1837 %0 = load <4 x i8>, ptr %x, align 1
1838 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1842 define ptr @strb32_m127(ptr %y, ptr %x, ptr %m) {
1843 ; CHECK-LABEL: strb32_m127:
1844 ; CHECK: @ %bb.0: @ %entry
1845 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1846 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1847 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1848 ; CHECK-NEXT: vstrbt.32 q0, [r0, #-127]
1851 %z = getelementptr inbounds i8, ptr %y, i32 -127
1852 %mask = load <4 x i32>, ptr %m, align 4
1853 %c = icmp ne <4 x i32> %mask, zeroinitializer
1854 %0 = load <4 x i8>, ptr %x, align 1
1855 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1859 define ptr @strb32_m128(ptr %y, ptr %x, ptr %m) {
1860 ; CHECK-LABEL: strb32_m128:
1861 ; CHECK: @ %bb.0: @ %entry
1862 ; CHECK-NEXT: vldrw.u32 q1, [r2]
1863 ; CHECK-NEXT: vldrb.u32 q0, [r1]
1864 ; CHECK-NEXT: sub.w r1, r0, #128
1865 ; CHECK-NEXT: vpt.i32 ne, q1, zr
1866 ; CHECK-NEXT: vstrbt.32 q0, [r1]
1869 %z = getelementptr inbounds i8, ptr %y, i32 -128
1870 %mask = load <4 x i32>, ptr %m, align 4
1871 %c = icmp ne <4 x i32> %mask, zeroinitializer
1872 %0 = load <4 x i8>, ptr %x, align 1
1873 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c)
1877 define ptr @strb16_4(ptr %y, ptr %x, ptr %m) {
1878 ; CHECK-LABEL: strb16_4:
1879 ; CHECK: @ %bb.0: @ %entry
1880 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1881 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1882 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1883 ; CHECK-NEXT: vstrbt.16 q0, [r0, #4]
1886 %z = getelementptr inbounds i8, ptr %y, i32 4
1887 %mask = load <8 x i16>, ptr %m, align 2
1888 %c = icmp ne <8 x i16> %mask, zeroinitializer
1889 %0 = load <8 x i8>, ptr %x, align 1
1890 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1894 define ptr @strb16_3(ptr %y, ptr %x, ptr %m) {
1895 ; CHECK-LABEL: strb16_3:
1896 ; CHECK: @ %bb.0: @ %entry
1897 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1898 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1899 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1900 ; CHECK-NEXT: vstrbt.16 q0, [r0, #3]
1903 %z = getelementptr inbounds i8, ptr %y, i32 3
1904 %mask = load <8 x i16>, ptr %m, align 2
1905 %c = icmp ne <8 x i16> %mask, zeroinitializer
1906 %0 = load <8 x i8>, ptr %x, align 1
1907 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1911 define ptr @strb16_2(ptr %y, ptr %x, ptr %m) {
1912 ; CHECK-LABEL: strb16_2:
1913 ; CHECK: @ %bb.0: @ %entry
1914 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1915 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1916 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1917 ; CHECK-NEXT: vstrbt.16 q0, [r0, #2]
1920 %z = getelementptr inbounds i8, ptr %y, i32 2
1921 %mask = load <8 x i16>, ptr %m, align 2
1922 %c = icmp ne <8 x i16> %mask, zeroinitializer
1923 %0 = load <8 x i8>, ptr %x, align 1
1924 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1928 define ptr @strb16_127(ptr %y, ptr %x, ptr %m) {
1929 ; CHECK-LABEL: strb16_127:
1930 ; CHECK: @ %bb.0: @ %entry
1931 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1932 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1933 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1934 ; CHECK-NEXT: vstrbt.16 q0, [r0, #127]
1937 %z = getelementptr inbounds i8, ptr %y, i32 127
1938 %mask = load <8 x i16>, ptr %m, align 2
1939 %c = icmp ne <8 x i16> %mask, zeroinitializer
1940 %0 = load <8 x i8>, ptr %x, align 1
1941 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1945 define ptr @strb16_128(ptr %y, ptr %x, ptr %m) {
1946 ; CHECK-LABEL: strb16_128:
1947 ; CHECK: @ %bb.0: @ %entry
1948 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1949 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1950 ; CHECK-NEXT: add.w r1, r0, #128
1951 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1952 ; CHECK-NEXT: vstrbt.16 q0, [r1]
1955 %z = getelementptr inbounds i8, ptr %y, i32 128
1956 %mask = load <8 x i16>, ptr %m, align 2
1957 %c = icmp ne <8 x i16> %mask, zeroinitializer
1958 %0 = load <8 x i8>, ptr %x, align 1
1959 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1963 define ptr @strb16_m127(ptr %y, ptr %x, ptr %m) {
1964 ; CHECK-LABEL: strb16_m127:
1965 ; CHECK: @ %bb.0: @ %entry
1966 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1967 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1968 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1969 ; CHECK-NEXT: vstrbt.16 q0, [r0, #-127]
1972 %z = getelementptr inbounds i8, ptr %y, i32 -127
1973 %mask = load <8 x i16>, ptr %m, align 2
1974 %c = icmp ne <8 x i16> %mask, zeroinitializer
1975 %0 = load <8 x i8>, ptr %x, align 1
1976 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1980 define ptr @strb16_m128(ptr %y, ptr %x, ptr %m) {
1981 ; CHECK-LABEL: strb16_m128:
1982 ; CHECK: @ %bb.0: @ %entry
1983 ; CHECK-NEXT: vldrh.u16 q1, [r2]
1984 ; CHECK-NEXT: vldrb.u16 q0, [r1]
1985 ; CHECK-NEXT: sub.w r1, r0, #128
1986 ; CHECK-NEXT: vpt.i16 ne, q1, zr
1987 ; CHECK-NEXT: vstrbt.16 q0, [r1]
1990 %z = getelementptr inbounds i8, ptr %y, i32 -128
1991 %mask = load <8 x i16>, ptr %m, align 2
1992 %c = icmp ne <8 x i16> %mask, zeroinitializer
1993 %0 = load <8 x i8>, ptr %x, align 1
1994 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c)
1998 define ptr @strb8_4(ptr %y, ptr %x, ptr %m) {
1999 ; CHECK-LABEL: strb8_4:
2000 ; CHECK: @ %bb.0: @ %entry
2001 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2002 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2003 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2004 ; CHECK-NEXT: vstrbt.8 q0, [r0, #4]
2007 %z = getelementptr inbounds i8, ptr %y, i32 4
2008 %mask = load <16 x i8>, ptr %m, align 1
2009 %c = icmp ne <16 x i8> %mask, zeroinitializer
2010 %0 = load <16 x i8>, ptr %x, align 1
2011 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2015 define ptr @strb8_3(ptr %y, ptr %x, ptr %m) {
2016 ; CHECK-LABEL: strb8_3:
2017 ; CHECK: @ %bb.0: @ %entry
2018 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2019 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2020 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2021 ; CHECK-NEXT: vstrbt.8 q0, [r0, #3]
2024 %z = getelementptr inbounds i8, ptr %y, i32 3
2025 %mask = load <16 x i8>, ptr %m, align 1
2026 %c = icmp ne <16 x i8> %mask, zeroinitializer
2027 %0 = load <16 x i8>, ptr %x, align 1
2028 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2032 define ptr @strb8_2(ptr %y, ptr %x, ptr %m) {
2033 ; CHECK-LABEL: strb8_2:
2034 ; CHECK: @ %bb.0: @ %entry
2035 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2036 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2037 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2038 ; CHECK-NEXT: vstrbt.8 q0, [r0, #2]
2041 %z = getelementptr inbounds i8, ptr %y, i32 2
2042 %mask = load <16 x i8>, ptr %m, align 1
2043 %c = icmp ne <16 x i8> %mask, zeroinitializer
2044 %0 = load <16 x i8>, ptr %x, align 1
2045 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2049 define ptr @strb8_127(ptr %y, ptr %x, ptr %m) {
2050 ; CHECK-LABEL: strb8_127:
2051 ; CHECK: @ %bb.0: @ %entry
2052 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2053 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2054 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2055 ; CHECK-NEXT: vstrbt.8 q0, [r0, #127]
2058 %z = getelementptr inbounds i8, ptr %y, i32 127
2059 %mask = load <16 x i8>, ptr %m, align 1
2060 %c = icmp ne <16 x i8> %mask, zeroinitializer
2061 %0 = load <16 x i8>, ptr %x, align 1
2062 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2066 define ptr @strb8_128(ptr %y, ptr %x, ptr %m) {
2067 ; CHECK-LABEL: strb8_128:
2068 ; CHECK: @ %bb.0: @ %entry
2069 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2070 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2071 ; CHECK-NEXT: add.w r1, r0, #128
2072 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2073 ; CHECK-NEXT: vstrbt.8 q0, [r1]
2076 %z = getelementptr inbounds i8, ptr %y, i32 128
2077 %mask = load <16 x i8>, ptr %m, align 1
2078 %c = icmp ne <16 x i8> %mask, zeroinitializer
2079 %0 = load <16 x i8>, ptr %x, align 1
2080 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2084 define ptr @strb8_m127(ptr %y, ptr %x, ptr %m) {
2085 ; CHECK-LABEL: strb8_m127:
2086 ; CHECK: @ %bb.0: @ %entry
2087 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2088 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2089 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2090 ; CHECK-NEXT: vstrbt.8 q0, [r0, #-127]
2093 %z = getelementptr inbounds i8, ptr %y, i32 -127
2094 %mask = load <16 x i8>, ptr %m, align 1
2095 %c = icmp ne <16 x i8> %mask, zeroinitializer
2096 %0 = load <16 x i8>, ptr %x, align 1
2097 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2101 define ptr @strb8_m128(ptr %y, ptr %x, ptr %m) {
2102 ; CHECK-LABEL: strb8_m128:
2103 ; CHECK: @ %bb.0: @ %entry
2104 ; CHECK-NEXT: vldrb.u8 q1, [r2]
2105 ; CHECK-NEXT: vldrb.u8 q0, [r1]
2106 ; CHECK-NEXT: sub.w r1, r0, #128
2107 ; CHECK-NEXT: vpt.i8 ne, q1, zr
2108 ; CHECK-NEXT: vstrbt.8 q0, [r1]
2111 %z = getelementptr inbounds i8, ptr %y, i32 -128
2112 %mask = load <16 x i8>, ptr %m, align 1
2113 %c = icmp ne <16 x i8> %mask, zeroinitializer
2114 %0 = load <16 x i8>, ptr %x, align 1
2115 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c)
2119 define ptr @strwf32_4(ptr %y, ptr %x, ptr %m) {
2120 ; CHECK-LABEL: strwf32_4:
2121 ; CHECK: @ %bb.0: @ %entry
2122 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2123 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2124 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2125 ; CHECK-NEXT: vstrwt.32 q0, [r0, #4]
2128 %z = getelementptr inbounds i8, ptr %y, i32 4
2129 %mask = load <4 x i32>, ptr %m, align 4
2130 %c = icmp ne <4 x i32> %mask, zeroinitializer
2131 %0 = load <4 x float>, ptr %x, align 4
2132 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2136 define ptr @strwf32_3(ptr %y, ptr %x, ptr %m) {
2137 ; CHECK-LABEL: strwf32_3:
2138 ; CHECK: @ %bb.0: @ %entry
2139 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2140 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2141 ; CHECK-NEXT: adds r1, r0, #3
2142 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2143 ; CHECK-NEXT: vstrwt.32 q0, [r1]
2146 %z = getelementptr inbounds i8, ptr %y, i32 3
2147 %mask = load <4 x i32>, ptr %m, align 4
2148 %c = icmp ne <4 x i32> %mask, zeroinitializer
2149 %0 = load <4 x float>, ptr %x, align 4
2150 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2154 define ptr @strwf32_2(ptr %y, ptr %x, ptr %m) {
2155 ; CHECK-LABEL: strwf32_2:
2156 ; CHECK: @ %bb.0: @ %entry
2157 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2158 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2159 ; CHECK-NEXT: adds r1, r0, #2
2160 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2161 ; CHECK-NEXT: vstrwt.32 q0, [r1]
2164 %z = getelementptr inbounds i8, ptr %y, i32 2
2165 %mask = load <4 x i32>, ptr %m, align 4
2166 %c = icmp ne <4 x i32> %mask, zeroinitializer
2167 %0 = load <4 x float>, ptr %x, align 4
2168 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2172 define ptr @strwf32_508(ptr %y, ptr %x, ptr %m) {
2173 ; CHECK-LABEL: strwf32_508:
2174 ; CHECK: @ %bb.0: @ %entry
2175 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2176 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2177 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2178 ; CHECK-NEXT: vstrwt.32 q0, [r0, #508]
2181 %z = getelementptr inbounds i8, ptr %y, i32 508
2182 %mask = load <4 x i32>, ptr %m, align 4
2183 %c = icmp ne <4 x i32> %mask, zeroinitializer
2184 %0 = load <4 x float>, ptr %x, align 4
2185 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2189 define ptr @strwf32_512(ptr %y, ptr %x, ptr %m) {
2190 ; CHECK-LABEL: strwf32_512:
2191 ; CHECK: @ %bb.0: @ %entry
2192 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2193 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2194 ; CHECK-NEXT: add.w r1, r0, #512
2195 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2196 ; CHECK-NEXT: vstrwt.32 q0, [r1]
2199 %z = getelementptr inbounds i8, ptr %y, i32 512
2200 %mask = load <4 x i32>, ptr %m, align 4
2201 %c = icmp ne <4 x i32> %mask, zeroinitializer
2202 %0 = load <4 x float>, ptr %x, align 4
2203 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2207 define ptr @strwf32_m508(ptr %y, ptr %x, ptr %m) {
2208 ; CHECK-LABEL: strwf32_m508:
2209 ; CHECK: @ %bb.0: @ %entry
2210 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2211 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2212 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2213 ; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]
2216 %z = getelementptr inbounds i8, ptr %y, i32 -508
2217 %mask = load <4 x i32>, ptr %m, align 4
2218 %c = icmp ne <4 x i32> %mask, zeroinitializer
2219 %0 = load <4 x float>, ptr %x, align 4
2220 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2224 define ptr @strwf32_m512(ptr %y, ptr %x, ptr %m) {
2225 ; CHECK-LABEL: strwf32_m512:
2226 ; CHECK: @ %bb.0: @ %entry
2227 ; CHECK-NEXT: vldrw.u32 q1, [r2]
2228 ; CHECK-NEXT: vldrw.u32 q0, [r1]
2229 ; CHECK-NEXT: sub.w r1, r0, #512
2230 ; CHECK-NEXT: vpt.i32 ne, q1, zr
2231 ; CHECK-NEXT: vstrwt.32 q0, [r1]
2234 %z = getelementptr inbounds i8, ptr %y, i32 -512
2235 %mask = load <4 x i32>, ptr %m, align 4
2236 %c = icmp ne <4 x i32> %mask, zeroinitializer
2237 %0 = load <4 x float>, ptr %x, align 4
2238 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c)
2242 define ptr @strhf16_4(ptr %y, ptr %x, ptr %m) {
2243 ; CHECK-LABEL: strhf16_4:
2244 ; CHECK: @ %bb.0: @ %entry
2245 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2246 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2247 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2248 ; CHECK-NEXT: vstrht.16 q0, [r0, #4]
2251 %z = getelementptr inbounds i8, ptr %y, i32 4
2252 %mask = load <8 x i16>, ptr %m, align 2
2253 %c = icmp ne <8 x i16> %mask, zeroinitializer
2254 %0 = load <8 x half>, ptr %x, align 2
2255 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2259 define ptr @strhf16_3(ptr %y, ptr %x, ptr %m) {
2260 ; CHECK-LABEL: strhf16_3:
2261 ; CHECK: @ %bb.0: @ %entry
2262 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2263 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2264 ; CHECK-NEXT: adds r1, r0, #3
2265 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2266 ; CHECK-NEXT: vstrht.16 q0, [r1]
2269 %z = getelementptr inbounds i8, ptr %y, i32 3
2270 %mask = load <8 x i16>, ptr %m, align 2
2271 %c = icmp ne <8 x i16> %mask, zeroinitializer
2272 %0 = load <8 x half>, ptr %x, align 2
2273 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2277 define ptr @strhf16_2(ptr %y, ptr %x, ptr %m) {
2278 ; CHECK-LABEL: strhf16_2:
2279 ; CHECK: @ %bb.0: @ %entry
2280 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2281 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2282 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2283 ; CHECK-NEXT: vstrht.16 q0, [r0, #2]
2286 %z = getelementptr inbounds i8, ptr %y, i32 2
2287 %mask = load <8 x i16>, ptr %m, align 2
2288 %c = icmp ne <8 x i16> %mask, zeroinitializer
2289 %0 = load <8 x half>, ptr %x, align 2
2290 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2294 define ptr @strhf16_254(ptr %y, ptr %x, ptr %m) {
2295 ; CHECK-LABEL: strhf16_254:
2296 ; CHECK: @ %bb.0: @ %entry
2297 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2298 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2299 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2300 ; CHECK-NEXT: vstrht.16 q0, [r0, #254]
2303 %z = getelementptr inbounds i8, ptr %y, i32 254
2304 %mask = load <8 x i16>, ptr %m, align 2
2305 %c = icmp ne <8 x i16> %mask, zeroinitializer
2306 %0 = load <8 x half>, ptr %x, align 2
2307 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2311 define ptr @strhf16_256(ptr %y, ptr %x, ptr %m) {
2312 ; CHECK-LABEL: strhf16_256:
2313 ; CHECK: @ %bb.0: @ %entry
2314 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2315 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2316 ; CHECK-NEXT: add.w r1, r0, #256
2317 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2318 ; CHECK-NEXT: vstrht.16 q0, [r1]
2321 %z = getelementptr inbounds i8, ptr %y, i32 256
2322 %mask = load <8 x i16>, ptr %m, align 2
2323 %c = icmp ne <8 x i16> %mask, zeroinitializer
2324 %0 = load <8 x half>, ptr %x, align 2
2325 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2329 define ptr @strhf16_m254(ptr %y, ptr %x, ptr %m) {
2330 ; CHECK-LABEL: strhf16_m254:
2331 ; CHECK: @ %bb.0: @ %entry
2332 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2333 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2334 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2335 ; CHECK-NEXT: vstrht.16 q0, [r0, #-254]
2338 %z = getelementptr inbounds i8, ptr %y, i32 -254
2339 %mask = load <8 x i16>, ptr %m, align 2
2340 %c = icmp ne <8 x i16> %mask, zeroinitializer
2341 %0 = load <8 x half>, ptr %x, align 2
2342 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2346 define ptr @strhf16_m256(ptr %y, ptr %x, ptr %m) {
2347 ; CHECK-LABEL: strhf16_m256:
2348 ; CHECK: @ %bb.0: @ %entry
2349 ; CHECK-NEXT: vldrh.u16 q1, [r2]
2350 ; CHECK-NEXT: vldrh.u16 q0, [r1]
2351 ; CHECK-NEXT: sub.w r1, r0, #256
2352 ; CHECK-NEXT: vpt.i16 ne, q1, zr
2353 ; CHECK-NEXT: vstrht.16 q0, [r1]
2356 %z = getelementptr inbounds i8, ptr %y, i32 -256
2357 %mask = load <8 x i16>, ptr %m, align 2
2358 %c = icmp ne <8 x i16> %mask, zeroinitializer
2359 %0 = load <8 x half>, ptr %x, align 2
2360 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c)
2364 declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
2365 declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
2366 declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
2367 declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>)
2368 declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
2369 declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
2370 declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
2371 declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>)
2373 declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
2374 declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>)
2375 declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>)
2376 declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>)
2377 declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>)
2378 declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32, <4 x i1>)
2379 declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
2380 declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32, <8 x i1>)