1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
4 ; VLDRH.u32 Qd, [base, offs, #uxtw #1]
5 define arm_aapcs_vfpcc void @ext_scaled_i16_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
6 ; CHECK-LABEL: ext_scaled_i16_i32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vldrw.u32 q1, [r1]
9 ; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1]
12 %offs = load <4 x i32>, ptr %offptr, align 4
13 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs
14 %t = trunc <4 x i32> %input to <4 x i16>
15 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
19 ; VSTRW.32 Qd, [base, offs, uxtw #2]
20 define arm_aapcs_vfpcc void @scaled_i32_i32(ptr %base, ptr %offptr, <4 x i32> %input) {
21 ; CHECK-LABEL: scaled_i32_i32:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vldrw.u32 q1, [r1]
24 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
27 %offs = load <4 x i32>, ptr %offptr, align 4
28 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs
29 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
33 ; VSTRW.32 Qd, [base, offs, uxtw #2]
34 define arm_aapcs_vfpcc void @scaled_f32_i32(ptr %base, ptr %offptr, <4 x float> %input) {
35 ; CHECK-LABEL: scaled_f32_i32:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vldrw.u32 q1, [r1]
38 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
41 %offs = load <4 x i32>, ptr %offptr, align 4
42 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs
43 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
44 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
48 ; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
49 define arm_aapcs_vfpcc void @unsigned_scaled_b_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
50 ; CHECK-LABEL: unsigned_scaled_b_i32_i16:
51 ; CHECK: @ %bb.0: @ %entry
52 ; CHECK-NEXT: vldrh.u32 q1, [r1]
53 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
56 %offs = load <4 x i16>, ptr %offptr, align 2
57 %offs.zext = zext <4 x i16> %offs to <4 x i32>
58 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
59 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
63 ; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
64 define arm_aapcs_vfpcc void @signed_scaled_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
65 ; CHECK-LABEL: signed_scaled_i32_i16:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vldrh.s32 q1, [r1]
68 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
71 %offs = load <4 x i16>, ptr %offptr, align 2
72 %offs.sext = sext <4 x i16> %offs to <4 x i32>
73 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
74 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
78 ; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
79 define arm_aapcs_vfpcc void @a_unsigned_scaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) {
80 ; CHECK-LABEL: a_unsigned_scaled_f32_i16:
81 ; CHECK: @ %bb.0: @ %entry
82 ; CHECK-NEXT: vldrh.u32 q1, [r1]
83 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
86 %offs = load <4 x i16>, ptr %offptr, align 2
87 %offs.zext = zext <4 x i16> %offs to <4 x i32>
88 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
89 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
90 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
94 ; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
95 define arm_aapcs_vfpcc void @b_signed_scaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) {
96 ; CHECK-LABEL: b_signed_scaled_f32_i16:
97 ; CHECK: @ %bb.0: @ %entry
98 ; CHECK-NEXT: vldrh.s32 q1, [r1]
99 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
102 %offs = load <4 x i16>, ptr %offptr, align 2
103 %offs.sext = sext <4 x i16> %offs to <4 x i32>
104 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
105 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
106 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
110 ; VLDRH.u32 Qd, [base, offs.sext, uxtw #1]
111 define arm_aapcs_vfpcc void @ext_signed_scaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
112 ; CHECK-LABEL: ext_signed_scaled_i16_i16:
113 ; CHECK: @ %bb.0: @ %entry
114 ; CHECK-NEXT: vldrh.s32 q1, [r1]
115 ; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1]
118 %offs = load <4 x i16>, ptr %offptr, align 2
119 %offs.sext = sext <4 x i16> %offs to <4 x i32>
120 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.sext
121 %t = trunc <4 x i32> %input to <4 x i16>
122 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
126 ; VSTRH.32 Qd, [base, offs.sext, uxtw #1]
127 define arm_aapcs_vfpcc void @ext_unsigned_scaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) {
128 ; CHECK-LABEL: ext_unsigned_scaled_i16_i16:
129 ; CHECK: @ %bb.0: @ %entry
130 ; CHECK-NEXT: vldrh.u32 q1, [r1]
131 ; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1]
134 %offs = load <4 x i16>, ptr %offptr, align 2
135 %offs.zext = zext <4 x i16> %offs to <4 x i32>
136 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.zext
137 %t = trunc <4 x i32> %input to <4 x i16>
138 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
142 ; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
143 define arm_aapcs_vfpcc void @unsigned_scaled_b_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
144 ; CHECK-LABEL: unsigned_scaled_b_i32_i8:
145 ; CHECK: @ %bb.0: @ %entry
146 ; CHECK-NEXT: vldrb.u32 q1, [r1]
147 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
150 %offs = load <4 x i8>, ptr %offptr, align 1
151 %offs.zext = zext <4 x i8> %offs to <4 x i32>
152 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
153 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
157 ; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
158 define arm_aapcs_vfpcc void @signed_scaled_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
159 ; CHECK-LABEL: signed_scaled_i32_i8:
160 ; CHECK: @ %bb.0: @ %entry
161 ; CHECK-NEXT: vldrb.s32 q1, [r1]
162 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
165 %offs = load <4 x i8>, ptr %offptr, align 1
166 %offs.sext = sext <4 x i8> %offs to <4 x i32>
167 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
168 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
172 ; VSTRW.32 Qd, [base, offs.zext, uxtw #2]
173 define arm_aapcs_vfpcc void @a_unsigned_scaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) {
174 ; CHECK-LABEL: a_unsigned_scaled_f32_i8:
175 ; CHECK: @ %bb.0: @ %entry
176 ; CHECK-NEXT: vldrb.u32 q1, [r1]
177 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
180 %offs = load <4 x i8>, ptr %offptr, align 1
181 %offs.zext = zext <4 x i8> %offs to <4 x i32>
182 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext
183 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
184 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
188 ; VSTRW.32 Qd, [base, offs.sext, uxtw #2]
189 define arm_aapcs_vfpcc void @b_signed_scaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) {
190 ; CHECK-LABEL: b_signed_scaled_f32_i8:
191 ; CHECK: @ %bb.0: @ %entry
192 ; CHECK-NEXT: vldrb.s32 q1, [r1]
193 ; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2]
196 %offs = load <4 x i8>, ptr %offptr, align 1
197 %offs.sext = sext <4 x i8> %offs to <4 x i32>
198 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext
199 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr>
200 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
204 ; VLDRH.z32 Qd, [base, offs.sext, uxtw #1]
205 define arm_aapcs_vfpcc void @ext_signed_scaled_i16_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
206 ; CHECK-LABEL: ext_signed_scaled_i16_i8:
207 ; CHECK: @ %bb.0: @ %entry
208 ; CHECK-NEXT: vldrb.s32 q1, [r1]
209 ; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1]
212 %offs = load <4 x i8>, ptr %offptr, align 1
213 %offs.sext = sext <4 x i8> %offs to <4 x i32>
214 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.sext
215 %t = trunc <4 x i32> %input to <4 x i16>
216 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
220 ; VLDRH.z32 Qd, [base, offs.zext, uxtw #1]
221 define arm_aapcs_vfpcc void @ext_unsigned_scaled_i16_i8(ptr %base, ptr %offptr, <4 x i32> %input) {
222 ; CHECK-LABEL: ext_unsigned_scaled_i16_i8:
223 ; CHECK: @ %bb.0: @ %entry
224 ; CHECK-NEXT: vldrb.u32 q1, [r1]
225 ; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1]
228 %offs = load <4 x i8>, ptr %offptr, align 1
229 %offs.zext = zext <4 x i8> %offs to <4 x i32>
230 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.zext
231 %t = trunc <4 x i32> %input to <4 x i16>
232 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
236 define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep(ptr %base, ptr %offptr, <4 x i32> %input) {
237 ; CHECK-LABEL: ext_scaled_i16_i32_2gep:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vldrw.u32 q1, [r1]
240 ; CHECK-NEXT: movs r2, #10
241 ; CHECK-NEXT: movs r3, #0
242 ; CHECK-NEXT: vshl.i32 q1, q1, #1
243 ; CHECK-NEXT: vadd.i32 q1, q1, r0
244 ; CHECK-NEXT: vadd.i32 q1, q1, r2
245 ; CHECK-NEXT: vstrh.32 q0, [r3, q1]
248 %offs = load <4 x i32>, ptr %offptr, align 4
249 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs
250 %ptrs2 = getelementptr inbounds i16, <4 x ptr> %ptrs, i16 5
251 %t = trunc <4 x i32> %input to <4 x i16>
252 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs2, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
256 define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep2(ptr %base, ptr %offptr, <4 x i32> %input) {
257 ; CHECK-LABEL: ext_scaled_i16_i32_2gep2:
258 ; CHECK: @ %bb.0: @ %entry
259 ; CHECK-NEXT: adr r1, .LCPI16_0
260 ; CHECK-NEXT: vldrw.u32 q1, [r1]
261 ; CHECK-NEXT: vstrh.32 q0, [r0, q1]
263 ; CHECK-NEXT: .p2align 4
264 ; CHECK-NEXT: @ %bb.1:
265 ; CHECK-NEXT: .LCPI16_0:
266 ; CHECK-NEXT: .long 10 @ 0xa
267 ; CHECK-NEXT: .long 16 @ 0x10
268 ; CHECK-NEXT: .long 22 @ 0x16
269 ; CHECK-NEXT: .long 28 @ 0x1c
271 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i16> <i16 0, i16 3, i16 6, i16 9>
272 %ptrs2 = getelementptr inbounds i16, <4 x ptr> %ptrs, i16 5
273 %t = trunc <4 x i32> %input to <4 x i16>
274 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs2, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
278 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
279 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
280 declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
281 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
282 declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)