1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
4 ; VLDRB.u32 Qd, [base, offs]
5 define arm_aapcs_vfpcc void @ext_unscaled_i8_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
6 ; CHECK-LABEL: ext_unscaled_i8_i32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q1, [r1]
9 ; CHECK-NEXT: vstrb.32 q0, [r0, q1]
12 %offs = load <4 x i32>, ptr %offptr, align 4
13 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
14 %t = trunc <4 x i32> %input to <4 x i8>
15 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
19 ; VLDRH.u32 Qd, [base, offs]
20 define arm_aapcs_vfpcc void @ext_unscaled_i16_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
21 ; CHECK-LABEL: ext_unscaled_i16_i32:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vldrw.u32 q1, [r1]
24 ; CHECK-NEXT: vstrh.32 q0, [r0, q1]
27 %offs = load <4 x i32>, ptr %offptr, align 4
28 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
29 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
30 %t = trunc <4 x i32> %input to <4 x i16>
31 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
35 ; VSTRW.32 Qd, [base, offs]
36 define arm_aapcs_vfpcc void @unscaled_i32_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
37 ; CHECK-LABEL: unscaled_i32_i32:
38 ; CHECK: @ %bb.0: @ %entry
39 ; CHECK-NEXT: vldrw.u32 q1, [r1]
40 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
43 %offs = load <4 x i32>, ptr %offptr, align 4
44 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
45 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
46 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
50 ; VSTRW.32 Qd, [base, offs]
51 define arm_aapcs_vfpcc void @unscaled_f32_i32(ptr %base, ptr %offptr, <4 x float> %input) {
52 ; CHECK-LABEL: unscaled_f32_i32:
53 ; CHECK: @ %bb.0: @ %entry
54 ; CHECK-NEXT: vldrw.u32 q1, [r1]
55 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
58 %offs = load <4 x i32>, ptr %offptr, align 4
59 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs
60 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
61 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
65 ; VSTRW.32 Qd, [base, offs.zext]
66 define arm_aapcs_vfpcc void @unsigned_unscaled_b_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
67 ; CHECK-LABEL: unsigned_unscaled_b_i32_i16:
68 ; CHECK: @ %bb.0: @ %entry
69 ; CHECK-NEXT: vldrh.u32 q1, [r1]
70 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
73 %offs = load <4 x i16>, ptr %offptr, align 2
74 %offs.zext = zext <4 x i16> %offs to <4 x i32>
75 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
76 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
77 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
81 ; VSTRW.32 Qd, [base, offs.sext]
82 define arm_aapcs_vfpcc void @signed_unscaled_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
83 ; CHECK-LABEL: signed_unscaled_i32_i16:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: vldrh.s32 q1, [r1]
86 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
89 %offs = load <4 x i16>, ptr %offptr, align 2
90 %offs.sext = sext <4 x i16> %offs to <4 x i32>
91 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
92 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
93 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
97 ; VSTRW.32 Qd, [base, offs.zext]
98 define arm_aapcs_vfpcc void @a_unsigned_unscaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) {
99 ; CHECK-LABEL: a_unsigned_unscaled_f32_i16:
100 ; CHECK: @ %bb.0: @ %entry
101 ; CHECK-NEXT: vldrh.u32 q1, [r1]
102 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
105 %offs = load <4 x i16>, ptr %offptr, align 2
106 %offs.zext = zext <4 x i16> %offs to <4 x i32>
107 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
108 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
109 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
113 ; VSTRW.32 Qd, [base, offs.sext]
114 define arm_aapcs_vfpcc void @b_signed_unscaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) {
115 ; CHECK-LABEL: b_signed_unscaled_f32_i16:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vldrh.s32 q1, [r1]
118 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
121 %offs = load <4 x i16>, ptr %offptr, align 2
122 %offs.sext = sext <4 x i16> %offs to <4 x i32>
123 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
124 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
125 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
129 ; VLDRH.u32 Qd, [base, offs.sext]
130 define arm_aapcs_vfpcc void @ext_signed_unscaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
131 ; CHECK-LABEL: ext_signed_unscaled_i16_i16:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: vldrh.s32 q1, [r1]
134 ; CHECK-NEXT: vstrh.32 q0, [r0, q1]
137 %offs = load <4 x i16>, ptr %offptr, align 2
138 %offs.sext = sext <4 x i16> %offs to <4 x i32>
139 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
140 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
141 %t = trunc <4 x i32> %input to <4 x i16>
142 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
146 ; VLDRH.u32 Qd, [base, offs.zext]
147 define arm_aapcs_vfpcc void @ext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
148 ; CHECK-LABEL: ext_unsigned_unscaled_i16_i16:
149 ; CHECK: @ %bb.0: @ %entry
150 ; CHECK-NEXT: vldrh.u32 q1, [r1]
151 ; CHECK-NEXT: vstrh.32 q0, [r0, q1]
154 %offs = load <4 x i16>, ptr %offptr, align 2
155 %offs.zext = zext <4 x i16> %offs to <4 x i32>
156 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
157 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
158 %t = trunc <4 x i32> %input to <4 x i16>
159 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
163 ; VLDRB.u32 Qd, [base, offs.sext]
164 define arm_aapcs_vfpcc void @ext_signed_unscaled_i8_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
165 ; CHECK-LABEL: ext_signed_unscaled_i8_i16:
166 ; CHECK: @ %bb.0: @ %entry
167 ; CHECK-NEXT: vldrh.s32 q1, [r1]
168 ; CHECK-NEXT: vstrb.32 q0, [r0, q1]
171 %offs = load <4 x i16>, ptr %offptr, align 2
172 %offs.sext = sext <4 x i16> %offs to <4 x i32>
173 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
174 %t = trunc <4 x i32> %input to <4 x i8>
175 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
179 ; VLDRB.s32 Qd, [base, offs.zext]
180 define arm_aapcs_vfpcc void @ext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
181 ; CHECK-LABEL: ext_unsigned_unscaled_i8_i16:
182 ; CHECK: @ %bb.0: @ %entry
183 ; CHECK-NEXT: vldrh.u32 q1, [r1]
184 ; CHECK-NEXT: vstrb.32 q0, [r0, q1]
187 %offs = load <4 x i16>, ptr %offptr, align 2
188 %offs.zext = zext <4 x i16> %offs to <4 x i32>
189 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
190 %t = trunc <4 x i32> %input to <4 x i8>
191 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
195 ; VSTRW.32 Qd, [base, offs.zext]
196 define arm_aapcs_vfpcc void @unsigned_unscaled_b_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
197 ; CHECK-LABEL: unsigned_unscaled_b_i32_i8:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vldrb.u32 q1, [r1]
200 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
203 %offs = load <4 x i8>, ptr %offptr, align 1
204 %offs.zext = zext <4 x i8> %offs to <4 x i32>
205 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
206 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
207 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
211 ; VSTRW.32 Qd, [base, offs.sext]
212 define arm_aapcs_vfpcc void @signed_unscaled_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
213 ; CHECK-LABEL: signed_unscaled_i32_i8:
214 ; CHECK: @ %bb.0: @ %entry
215 ; CHECK-NEXT: vldrb.s32 q1, [r1]
216 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
219 %offs = load <4 x i8>, ptr %offptr, align 1
220 %offs.sext = sext <4 x i8> %offs to <4 x i32>
221 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
222 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
223 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
227 ; VSTRW.32 Qd, [base, offs.zext]
228 define arm_aapcs_vfpcc void @a_unsigned_unscaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) {
229 ; CHECK-LABEL: a_unsigned_unscaled_f32_i8:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vldrb.u32 q1, [r1]
232 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
235 %offs = load <4 x i8>, ptr %offptr, align 1
236 %offs.zext = zext <4 x i8> %offs to <4 x i32>
237 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
238 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
239 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
243 ; VSTRW.32 Qd, [base, offs.sext]
244 define arm_aapcs_vfpcc void @b_signed_unscaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) {
245 ; CHECK-LABEL: b_signed_unscaled_f32_i8:
246 ; CHECK: @ %bb.0: @ %entry
247 ; CHECK-NEXT: vldrb.s32 q1, [r1]
248 ; CHECK-NEXT: vstrw.32 q0, [r0, q1]
251 %offs = load <4 x i8>, ptr %offptr, align 1
252 %offs.sext = sext <4 x i8> %offs to <4 x i32>
253 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
254 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
255 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
259 ; VLDRH.u32 Qd, [base, offs.sext]
260 define arm_aapcs_vfpcc void @ext_signed_unscaled_i8_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
261 ; CHECK-LABEL: ext_signed_unscaled_i8_i8:
262 ; CHECK: @ %bb.0: @ %entry
263 ; CHECK-NEXT: vldrb.s32 q1, [r1]
264 ; CHECK-NEXT: vstrb.32 q0, [r0, q1]
267 %offs = load <4 x i8>, ptr %offptr, align 1
268 %offs.sext = sext <4 x i8> %offs to <4 x i32>
269 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
270 %t = trunc <4 x i32> %input to <4 x i8>
271 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
275 ; VLDRH.u32 Qd, [base, offs.zext]
276 define arm_aapcs_vfpcc void @ext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
277 ; CHECK-LABEL: ext_unsigned_unscaled_i8_i8:
278 ; CHECK: @ %bb.0: @ %entry
279 ; CHECK-NEXT: vldrb.u32 q1, [r1]
280 ; CHECK-NEXT: vstrb.32 q0, [r0, q1]
283 %offs = load <4 x i8>, ptr %offptr, align 1
284 %offs.zext = zext <4 x i8> %offs to <4 x i32>
285 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
286 %t = trunc <4 x i32> %input to <4 x i8>
287 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
291 define arm_aapcs_vfpcc void @trunc_signed_unscaled_i64_i8(ptr %base, ptr %offptr, <4 x i64> %input) {
292 ; CHECK-LABEL: trunc_signed_unscaled_i64_i8:
293 ; CHECK: @ %bb.0: @ %entry
294 ; CHECK-NEXT: vldrb.s32 q2, [r1]
295 ; CHECK-NEXT: vmov.f32 s1, s2
296 ; CHECK-NEXT: vmov.f32 s2, s4
297 ; CHECK-NEXT: vmov.f32 s3, s6
298 ; CHECK-NEXT: vstrw.32 q0, [r0, q2]
301 %offs = load <4 x i8>, ptr %offptr, align 1
302 %offs.sext = sext <4 x i8> %offs to <4 x i32>
303 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
304 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
305 %input.trunc = trunc <4 x i64> %input to <4 x i32>
306 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input.trunc, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
310 define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i64_i8(ptr %base, ptr %offptr, <4 x i64> %input) {
311 ; CHECK-LABEL: trunc_unsigned_unscaled_i64_i8:
312 ; CHECK: @ %bb.0: @ %entry
313 ; CHECK-NEXT: vldrb.u32 q2, [r1]
314 ; CHECK-NEXT: vmov.f32 s1, s2
315 ; CHECK-NEXT: vmov.f32 s2, s4
316 ; CHECK-NEXT: vmov.f32 s3, s6
317 ; CHECK-NEXT: vstrw.32 q0, [r0, q2]
320 %offs = load <4 x i8>, ptr %offptr, align 1
321 %offs.zext = zext <4 x i8> %offs to <4 x i32>
322 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
323 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
324 %input.trunc = trunc <4 x i64> %input to <4 x i32>
325 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input.trunc, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
329 define arm_aapcs_vfpcc void @trunc_signed_unscaled_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
330 ; CHECK-LABEL: trunc_signed_unscaled_i32_i8:
331 ; CHECK: @ %bb.0: @ %entry
332 ; CHECK-NEXT: vldrb.s32 q1, [r1]
333 ; CHECK-NEXT: vstrh.32 q0, [r0, q1]
336 %offs = load <4 x i8>, ptr %offptr, align 1
337 %offs.sext = sext <4 x i8> %offs to <4 x i32>
338 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
339 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
340 %input.trunc = trunc <4 x i32> %input to <4 x i16>
341 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %input.trunc, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
345 define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
346 ; CHECK-LABEL: trunc_unsigned_unscaled_i32_i8:
347 ; CHECK: @ %bb.0: @ %entry
348 ; CHECK-NEXT: vldrb.u32 q1, [r1]
349 ; CHECK-NEXT: vstrh.32 q0, [r0, q1]
352 %offs = load <4 x i8>, ptr %offptr, align 1
353 %offs.zext = zext <4 x i8> %offs to <4 x i32>
354 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
355 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr>
356 %input.trunc = trunc <4 x i32> %input to <4 x i16>
357 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %input.trunc, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
361 define arm_aapcs_vfpcc void @trunc_signed_unscaled_i16_i8(ptr %base, ptr %offptr, <4 x i16> %input) {
362 ; CHECK-LABEL: trunc_signed_unscaled_i16_i8:
363 ; CHECK: @ %bb.0: @ %entry
364 ; CHECK-NEXT: vmov.i32 q1, #0xff
365 ; CHECK-NEXT: vldrb.s32 q2, [r1]
366 ; CHECK-NEXT: vand q0, q0, q1
367 ; CHECK-NEXT: vstrb.32 q0, [r0, q2]
370 %offs = load <4 x i8>, ptr %offptr, align 1
371 %offs.sext = sext <4 x i8> %offs to <4 x i32>
372 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext
373 %input.trunc = trunc <4 x i16> %input to <4 x i8>
374 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %input.trunc, <4 x ptr> %byte_ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
378 define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr, <4 x i16> %input) {
379 ; CHECK-LABEL: trunc_unsigned_unscaled_i16_i8:
380 ; CHECK: @ %bb.0: @ %entry
381 ; CHECK-NEXT: vmov.i32 q1, #0xff
382 ; CHECK-NEXT: vldrb.u32 q2, [r1]
383 ; CHECK-NEXT: vand q0, q0, q1
384 ; CHECK-NEXT: vstrb.32 q0, [r0, q2]
387 %offs = load <4 x i8>, ptr %offptr, align 1
388 %offs.zext = zext <4 x i8> %offs to <4 x i32>
389 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext
390 %input.trunc = trunc <4 x i16> %input to <4 x i8>
391 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %input.trunc, <4 x ptr> %byte_ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
395 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
396 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
397 declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
398 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
399 declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)