1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 ; <rdar://problem/14486451>
5 %struct.a = type [256 x i16]
6 %struct.b = type [256 x i32]
7 %struct.c = type [256 x i64]
9 define i16 @load_halfword(%struct.a* %ctx, i32 %xor72) nounwind {
10 ; CHECK-LABEL: load_halfword:
11 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
12 ; CHECK: ldrh w0, [x0, [[REG]], lsl #1]
13 %shr81 = lshr i32 %xor72, 9
14 %conv82 = zext i32 %shr81 to i64
15 %idxprom83 = and i64 %conv82, 255
16 %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83
17 %result = load i16, i16* %arrayidx86, align 2
21 define i32 @load_word(%struct.b* %ctx, i32 %xor72) nounwind {
22 ; CHECK-LABEL: load_word:
23 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
24 ; CHECK: ldr w0, [x0, [[REG]], lsl #2]
25 %shr81 = lshr i32 %xor72, 9
26 %conv82 = zext i32 %shr81 to i64
27 %idxprom83 = and i64 %conv82, 255
28 %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83
29 %result = load i32, i32* %arrayidx86, align 4
33 define i64 @load_doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
34 ; CHECK-LABEL: load_doubleword:
35 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
36 ; CHECK: ldr x0, [x0, [[REG]], lsl #3]
37 %shr81 = lshr i32 %xor72, 9
38 %conv82 = zext i32 %shr81 to i64
39 %idxprom83 = and i64 %conv82, 255
40 %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83
41 %result = load i64, i64* %arrayidx86, align 8
45 define void @store_halfword(%struct.a* %ctx, i32 %xor72, i16 %val) nounwind {
46 ; CHECK-LABEL: store_halfword:
47 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
48 ; CHECK: strh w2, [x0, [[REG]], lsl #1]
49 %shr81 = lshr i32 %xor72, 9
50 %conv82 = zext i32 %shr81 to i64
51 %idxprom83 = and i64 %conv82, 255
52 %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83
53 store i16 %val, i16* %arrayidx86, align 8
57 define void @store_word(%struct.b* %ctx, i32 %xor72, i32 %val) nounwind {
58 ; CHECK-LABEL: store_word:
59 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
60 ; CHECK: str w2, [x0, [[REG]], lsl #2]
61 %shr81 = lshr i32 %xor72, 9
62 %conv82 = zext i32 %shr81 to i64
63 %idxprom83 = and i64 %conv82, 255
64 %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83
65 store i32 %val, i32* %arrayidx86, align 8
69 define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val) nounwind {
70 ; CHECK-LABEL: store_doubleword:
71 ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8
72 ; CHECK: str x2, [x0, [[REG]], lsl #3]
73 %shr81 = lshr i32 %xor72, 9
74 %conv82 = zext i32 %shr81 to i64
75 %idxprom83 = and i64 %conv82, 255
76 %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83
77 store i64 %val, i64* %arrayidx86, align 8
81 ; Check that we combine a shift into the offset instead of using a narrower load
82 ; when we have a load followed by a trunc
84 define i32 @load_doubleword_trunc_word(i64* %ptr, i64 %off) {
85 ; CHECK-LABEL: load_doubleword_trunc_word:
86 ; CHECK: ldr x0, [x0, x1, lsl #3]
88 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
89 %x = load i64, i64* %idx, align 8
90 %trunc = trunc i64 %x to i32
94 define i16 @load_doubleword_trunc_halfword(i64* %ptr, i64 %off) {
95 ; CHECK-LABEL: load_doubleword_trunc_halfword:
96 ; CHECK: ldr x0, [x0, x1, lsl #3]
98 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
99 %x = load i64, i64* %idx, align 8
100 %trunc = trunc i64 %x to i16
104 define i8 @load_doubleword_trunc_byte(i64* %ptr, i64 %off) {
105 ; CHECK-LABEL: load_doubleword_trunc_byte:
106 ; CHECK: ldr x0, [x0, x1, lsl #3]
108 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
109 %x = load i64, i64* %idx, align 8
110 %trunc = trunc i64 %x to i8
114 define i16 @load_word_trunc_halfword(i32* %ptr, i64 %off) {
116 ; CHECK-LABEL: load_word_trunc_halfword:
117 ; CHECK: ldr w0, [x0, x1, lsl #2]
118 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
119 %x = load i32, i32* %idx, align 8
120 %trunc = trunc i32 %x to i16
124 define i8 @load_word_trunc_byte(i32* %ptr, i64 %off) {
125 ; CHECK-LABEL: load_word_trunc_byte:
126 ; CHECK: ldr w0, [x0, x1, lsl #2]
128 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
129 %x = load i32, i32* %idx, align 8
130 %trunc = trunc i32 %x to i8
134 define i8 @load_halfword_trunc_byte(i16* %ptr, i64 %off) {
135 ; CHECK-LABEL: load_halfword_trunc_byte:
136 ; CHECK: ldrh w0, [x0, x1, lsl #1]
138 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off
139 %x = load i16, i16* %idx, align 8
140 %trunc = trunc i16 %x to i8
144 ; Check that we do use a narrower load, and so don't combine the shift, when
145 ; the loaded value is zero-extended.
147 define i64 @load_doubleword_trunc_word_zext(i64* %ptr, i64 %off) {
148 ; CHECK-LABEL: load_doubleword_trunc_word_zext:
149 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3
150 ; CHECK: ldr w0, [x0, [[REG]]]
152 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
153 %x = load i64, i64* %idx, align 8
154 %trunc = trunc i64 %x to i32
155 %ext = zext i32 %trunc to i64
159 define i64 @load_doubleword_trunc_halfword_zext(i64* %ptr, i64 %off) {
160 ; CHECK-LABEL: load_doubleword_trunc_halfword_zext:
161 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3
162 ; CHECK: ldrh w0, [x0, [[REG]]]
164 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
165 %x = load i64, i64* %idx, align 8
166 %trunc = trunc i64 %x to i16
167 %ext = zext i16 %trunc to i64
171 define i64 @load_doubleword_trunc_byte_zext(i64* %ptr, i64 %off) {
172 ; CHECK-LABEL: load_doubleword_trunc_byte_zext:
173 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3
174 ; CHECK: ldrb w0, [x0, [[REG]]]
176 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
177 %x = load i64, i64* %idx, align 8
178 %trunc = trunc i64 %x to i8
179 %ext = zext i8 %trunc to i64
183 define i64 @load_word_trunc_halfword_zext(i32* %ptr, i64 %off) {
184 ; CHECK-LABEL: load_word_trunc_halfword_zext:
185 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2
186 ; CHECK: ldrh w0, [x0, [[REG]]]
188 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
189 %x = load i32, i32* %idx, align 8
190 %trunc = trunc i32 %x to i16
191 %ext = zext i16 %trunc to i64
195 define i64 @load_word_trunc_byte_zext(i32* %ptr, i64 %off) {
196 ; CHECK-LABEL: load_word_trunc_byte_zext:
197 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2
198 ; CHECK: ldrb w0, [x0, [[REG]]]
200 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
201 %x = load i32, i32* %idx, align 8
202 %trunc = trunc i32 %x to i8
203 %ext = zext i8 %trunc to i64
207 define i64 @load_halfword_trunc_byte_zext(i16* %ptr, i64 %off) {
208 ; CHECK-LABEL: load_halfword_trunc_byte_zext:
209 ; CHECK: lsl [[REG:x[0-9]+]], x1, #1
210 ; CHECK: ldrb w0, [x0, [[REG]]]
212 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off
213 %x = load i16, i16* %idx, align 8
214 %trunc = trunc i16 %x to i8
215 %ext = zext i8 %trunc to i64
219 ; Check that we do use a narrower load, and so don't combine the shift, when
220 ; the loaded value is sign-extended.
222 define i64 @load_doubleword_trunc_word_sext(i64* %ptr, i64 %off) {
223 ; CHECK-LABEL: load_doubleword_trunc_word_sext:
224 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3
225 ; CHECK: ldrsw x0, [x0, [[REG]]]
227 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
228 %x = load i64, i64* %idx, align 8
229 %trunc = trunc i64 %x to i32
230 %ext = sext i32 %trunc to i64
234 define i64 @load_doubleword_trunc_halfword_sext(i64* %ptr, i64 %off) {
235 ; CHECK-LABEL: load_doubleword_trunc_halfword_sext:
236 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3
237 ; CHECK: ldrsh x0, [x0, [[REG]]]
239 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
240 %x = load i64, i64* %idx, align 8
241 %trunc = trunc i64 %x to i16
242 %ext = sext i16 %trunc to i64
246 define i64 @load_doubleword_trunc_byte_sext(i64* %ptr, i64 %off) {
247 ; CHECK-LABEL: load_doubleword_trunc_byte_sext:
248 ; CHECK: lsl [[REG:x[0-9]+]], x1, #3
249 ; CHECK: ldrsb x0, [x0, [[REG]]]
251 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
252 %x = load i64, i64* %idx, align 8
253 %trunc = trunc i64 %x to i8
254 %ext = sext i8 %trunc to i64
258 define i64 @load_word_trunc_halfword_sext(i32* %ptr, i64 %off) {
259 ; CHECK-LABEL: load_word_trunc_halfword_sext:
260 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2
261 ; CHECK: ldrsh x0, [x0, [[REG]]]
263 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
264 %x = load i32, i32* %idx, align 8
265 %trunc = trunc i32 %x to i16
266 %ext = sext i16 %trunc to i64
270 define i64 @load_word_trunc_byte_sext(i32* %ptr, i64 %off) {
271 ; CHECK-LABEL: load_word_trunc_byte_sext:
272 ; CHECK: lsl [[REG:x[0-9]+]], x1, #2
273 ; CHECK: ldrsb x0, [x0, [[REG]]]
275 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
276 %x = load i32, i32* %idx, align 8
277 %trunc = trunc i32 %x to i8
278 %ext = sext i8 %trunc to i64
282 define i64 @load_halfword_trunc_byte_sext(i16* %ptr, i64 %off) {
283 ; CHECK-LABEL: load_halfword_trunc_byte_sext:
284 ; CHECK: lsl [[REG:x[0-9]+]], x1, #1
285 ; CHECK: ldrsb x0, [x0, [[REG]]]
287 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off
288 %x = load i16, i16* %idx, align 8
289 %trunc = trunc i16 %x to i8
290 %ext = sext i8 %trunc to i64
294 ; Check that we don't combine the shift, and so will use a narrower load, when
295 ; the shift is used more than once.
297 define i32 @load_doubleword_trunc_word_reuse_shift(i64* %ptr, i64 %off) {
298 ; CHECK-LABEL: load_doubleword_trunc_word_reuse_shift:
299 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #3
300 ; CHECK: ldr w[[REG2:[0-9]+]], [x0, x[[REG1]]]
301 ; CHECL: add w0, w[[REG2]], w[[REG1]]
303 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
304 %x = load i64, i64* %idx, align 8
305 %trunc = trunc i64 %x to i32
306 %lsl = shl i64 %off, 3
307 %lsl.trunc = trunc i64 %lsl to i32
308 %add = add i32 %trunc, %lsl.trunc
312 define i16 @load_doubleword_trunc_halfword_reuse_shift(i64* %ptr, i64 %off) {
313 ; CHECK-LABEL: load_doubleword_trunc_halfword_reuse_shift:
314 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #3
315 ; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]]
316 ; CHECK: add w0, w[[REG2]], w[[REG1]]
318 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
319 %x = load i64, i64* %idx, align 8
320 %trunc = trunc i64 %x to i16
321 %lsl = shl i64 %off, 3
322 %lsl.trunc = trunc i64 %lsl to i16
323 %add = add i16 %trunc, %lsl.trunc
327 define i8 @load_doubleword_trunc_byte_reuse_shift(i64* %ptr, i64 %off) {
328 ; CHECK-LABEL: load_doubleword_trunc_byte_reuse_shift:
329 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #3
330 ; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]]
331 ; CHECK: add w0, w[[REG2]], w[[REG1]]
333 %idx = getelementptr inbounds i64, i64* %ptr, i64 %off
334 %x = load i64, i64* %idx, align 8
335 %trunc = trunc i64 %x to i8
336 %lsl = shl i64 %off, 3
337 %lsl.trunc = trunc i64 %lsl to i8
338 %add = add i8 %trunc, %lsl.trunc
342 define i16 @load_word_trunc_halfword_reuse_shift(i32* %ptr, i64 %off) {
344 ; CHECK-LABEL: load_word_trunc_halfword_reuse_shift:
345 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #2
346 ; CHECK: ldrh w[[REG2:[0-9]+]], [x0, x[[REG1]]]
347 ; CHECK: add w0, w[[REG2]], w[[REG1]]
348 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
349 %x = load i32, i32* %idx, align 8
350 %trunc = trunc i32 %x to i16
351 %lsl = shl i64 %off, 2
352 %lsl.trunc = trunc i64 %lsl to i16
353 %add = add i16 %trunc, %lsl.trunc
357 define i8 @load_word_trunc_byte_reuse_shift(i32* %ptr, i64 %off) {
358 ; CHECK-LABEL: load_word_trunc_byte_reuse_shift:
359 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #2
360 ; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]]
361 ; CHECK: add w0, w[[REG2]], w[[REG1]]
363 %idx = getelementptr inbounds i32, i32* %ptr, i64 %off
364 %x = load i32, i32* %idx, align 8
365 %trunc = trunc i32 %x to i8
366 %lsl = shl i64 %off, 2
367 %lsl.trunc = trunc i64 %lsl to i8
368 %add = add i8 %trunc, %lsl.trunc
372 define i8 @load_halfword_trunc_byte_reuse_shift(i16* %ptr, i64 %off) {
373 ; CHECK-LABEL: load_halfword_trunc_byte_reuse_shift:
374 ; CHECK: lsl x[[REG1:[0-9]+]], x1, #1
375 ; CHECK: ldrb w[[REG2:[0-9]+]], [x0, x[[REG1]]]
376 ; CHECK: add w0, w[[REG2]], w[[REG1]]
378 %idx = getelementptr inbounds i16, i16* %ptr, i64 %off
379 %x = load i16, i16* %idx, align 8
380 %trunc = trunc i16 %x to i8
381 %lsl = shl i64 %off, 1
382 %lsl.trunc = trunc i64 %lsl to i8
383 %add = add i8 %trunc, %lsl.trunc