1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=armeb-unknown | FileCheck %s
3 ; RUN: llc < %s -mtriple=armv6eb-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
4 ; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv6
5 ; RUN: llc < %s -mtriple=thumbv6meb-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv7
7 ; i8* p; // p is 4 byte aligned
8 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
9 define i32 @load_i32_by_i8_big_endian(i32* %arg) {
10 ; CHECK-LABEL: load_i32_by_i8_big_endian:
12 ; CHECK-NEXT: ldr r0, [r0]
13 ; CHECK-NEXT: mov pc, lr
15 ; CHECK-ARMv6-LABEL: load_i32_by_i8_big_endian:
16 ; CHECK-ARMv6: @ %bb.0:
17 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
18 ; CHECK-ARMv6-NEXT: bx lr
20 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_big_endian:
21 ; CHECK-THUMBv6: @ %bb.0:
22 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
23 ; CHECK-THUMBv6-NEXT: bx lr
25 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_big_endian:
26 ; CHECK-THUMBv7: @ %bb.0:
27 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
28 ; CHECK-THUMBv7-NEXT: bx lr
30 %tmp = bitcast i32* %arg to i8*
31 %tmp1 = load i8, i8* %tmp, align 4
32 %tmp2 = zext i8 %tmp1 to i32
33 %tmp3 = shl nuw nsw i32 %tmp2, 24
34 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
35 %tmp5 = load i8, i8* %tmp4, align 1
36 %tmp6 = zext i8 %tmp5 to i32
37 %tmp7 = shl nuw nsw i32 %tmp6, 16
38 %tmp8 = or i32 %tmp7, %tmp3
39 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
40 %tmp10 = load i8, i8* %tmp9, align 1
41 %tmp11 = zext i8 %tmp10 to i32
42 %tmp12 = shl nuw nsw i32 %tmp11, 8
43 %tmp13 = or i32 %tmp8, %tmp12
44 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
45 %tmp15 = load i8, i8* %tmp14, align 1
46 %tmp16 = zext i8 %tmp15 to i32
47 %tmp17 = or i32 %tmp13, %tmp16
51 ; i8* p; // p is 4 byte aligned
52 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
53 define i32 @load_i32_by_i8_bswap(i32* %arg) {
54 ; BSWAP is not supported by 32 bit target
55 ; CHECK-LABEL: load_i32_by_i8_bswap:
57 ; CHECK-NEXT: ldr r0, [r0]
58 ; CHECK-NEXT: mov r1, #65280
59 ; CHECK-NEXT: mov r2, #16711680
60 ; CHECK-NEXT: and r1, r1, r0, lsr #8
61 ; CHECK-NEXT: and r2, r2, r0, lsl #8
62 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
63 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
64 ; CHECK-NEXT: orr r0, r0, r1
65 ; CHECK-NEXT: mov pc, lr
67 ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
68 ; CHECK-ARMv6: @ %bb.0:
69 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
70 ; CHECK-ARMv6-NEXT: rev r0, r0
71 ; CHECK-ARMv6-NEXT: bx lr
73 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_bswap:
74 ; CHECK-THUMBv6: @ %bb.0:
75 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
76 ; CHECK-THUMBv6-NEXT: rev r0, r0
77 ; CHECK-THUMBv6-NEXT: bx lr
79 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_bswap:
80 ; CHECK-THUMBv7: @ %bb.0:
81 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
82 ; CHECK-THUMBv7-NEXT: rev r0, r0
83 ; CHECK-THUMBv7-NEXT: bx lr
85 %tmp = bitcast i32* %arg to i8*
86 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
87 %tmp2 = load i8, i8* %tmp1, align 4
88 %tmp3 = zext i8 %tmp2 to i32
89 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
90 %tmp5 = load i8, i8* %tmp4, align 1
91 %tmp6 = zext i8 %tmp5 to i32
92 %tmp7 = shl nuw nsw i32 %tmp6, 8
93 %tmp8 = or i32 %tmp7, %tmp3
94 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
95 %tmp10 = load i8, i8* %tmp9, align 1
96 %tmp11 = zext i8 %tmp10 to i32
97 %tmp12 = shl nuw nsw i32 %tmp11, 16
98 %tmp13 = or i32 %tmp8, %tmp12
99 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
100 %tmp15 = load i8, i8* %tmp14, align 1
101 %tmp16 = zext i8 %tmp15 to i32
102 %tmp17 = shl nuw nsw i32 %tmp16, 24
103 %tmp18 = or i32 %tmp13, %tmp17
107 ; i8* p; // p is 4 byte aligned
108 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
109 define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) {
110 ; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian:
112 ; CHECK-NEXT: ldr r0, [r0]
113 ; CHECK-NEXT: mov pc, lr
115 ; CHECK-ARMv6-LABEL: load_i32_by_i16_by_i8_big_endian:
116 ; CHECK-ARMv6: @ %bb.0:
117 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
118 ; CHECK-ARMv6-NEXT: bx lr
120 ; CHECK-THUMBv6-LABEL: load_i32_by_i16_by_i8_big_endian:
121 ; CHECK-THUMBv6: @ %bb.0:
122 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
123 ; CHECK-THUMBv6-NEXT: bx lr
125 ; CHECK-THUMBv7-LABEL: load_i32_by_i16_by_i8_big_endian:
126 ; CHECK-THUMBv7: @ %bb.0:
127 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
128 ; CHECK-THUMBv7-NEXT: bx lr
130 %tmp = bitcast i32* %arg to i8*
131 %tmp1 = load i8, i8* %tmp, align 4
132 %tmp2 = zext i8 %tmp1 to i16
133 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
134 %tmp4 = load i8, i8* %tmp3, align 1
135 %tmp5 = zext i8 %tmp4 to i16
136 %tmp6 = shl nuw nsw i16 %tmp2, 8
137 %tmp7 = or i16 %tmp6, %tmp5
138 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
139 %tmp9 = load i8, i8* %tmp8, align 1
140 %tmp10 = zext i8 %tmp9 to i16
141 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
142 %tmp12 = load i8, i8* %tmp11, align 1
143 %tmp13 = zext i8 %tmp12 to i16
144 %tmp14 = shl nuw nsw i16 %tmp10, 8
145 %tmp15 = or i16 %tmp14, %tmp13
146 %tmp16 = zext i16 %tmp7 to i32
147 %tmp17 = zext i16 %tmp15 to i32
148 %tmp18 = shl nuw nsw i32 %tmp16, 16
149 %tmp19 = or i32 %tmp18, %tmp17
153 ; i16* p; // p is 4 byte aligned
154 ; ((i32) p[0] << 16) | (i32) p[1]
155 define i32 @load_i32_by_i16(i32* %arg) {
156 ; CHECK-LABEL: load_i32_by_i16:
158 ; CHECK-NEXT: ldr r0, [r0]
159 ; CHECK-NEXT: mov pc, lr
161 ; CHECK-ARMv6-LABEL: load_i32_by_i16:
162 ; CHECK-ARMv6: @ %bb.0:
163 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
164 ; CHECK-ARMv6-NEXT: bx lr
166 ; CHECK-THUMBv6-LABEL: load_i32_by_i16:
167 ; CHECK-THUMBv6: @ %bb.0:
168 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
169 ; CHECK-THUMBv6-NEXT: bx lr
171 ; CHECK-THUMBv7-LABEL: load_i32_by_i16:
172 ; CHECK-THUMBv7: @ %bb.0:
173 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
174 ; CHECK-THUMBv7-NEXT: bx lr
176 %tmp = bitcast i32* %arg to i16*
177 %tmp1 = load i16, i16* %tmp, align 4
178 %tmp2 = zext i16 %tmp1 to i32
179 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
180 %tmp4 = load i16, i16* %tmp3, align 1
181 %tmp5 = zext i16 %tmp4 to i32
182 %tmp6 = shl nuw nsw i32 %tmp2, 16
183 %tmp7 = or i32 %tmp6, %tmp5
187 ; i16* p_16; // p_16 is 4 byte aligned
188 ; i8* p_8 = (i8*) p_16;
189 ; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3]
190 define i32 @load_i32_by_i16_i8(i32* %arg) {
191 ; CHECK-LABEL: load_i32_by_i16_i8:
193 ; CHECK-NEXT: ldr r0, [r0]
194 ; CHECK-NEXT: mov pc, lr
196 ; CHECK-ARMv6-LABEL: load_i32_by_i16_i8:
197 ; CHECK-ARMv6: @ %bb.0:
198 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
199 ; CHECK-ARMv6-NEXT: bx lr
201 ; CHECK-THUMBv6-LABEL: load_i32_by_i16_i8:
202 ; CHECK-THUMBv6: @ %bb.0:
203 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
204 ; CHECK-THUMBv6-NEXT: bx lr
206 ; CHECK-THUMBv7-LABEL: load_i32_by_i16_i8:
207 ; CHECK-THUMBv7: @ %bb.0:
208 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
209 ; CHECK-THUMBv7-NEXT: bx lr
211 %tmp = bitcast i32* %arg to i16*
212 %tmp1 = bitcast i32* %arg to i8*
213 %tmp2 = load i16, i16* %tmp, align 4
214 %tmp3 = zext i16 %tmp2 to i32
215 %tmp4 = shl nuw nsw i32 %tmp3, 16
216 %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2
217 %tmp6 = load i8, i8* %tmp5, align 1
218 %tmp7 = zext i8 %tmp6 to i32
219 %tmp8 = shl nuw nsw i32 %tmp7, 8
220 %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3
221 %tmp10 = load i8, i8* %tmp9, align 1
222 %tmp11 = zext i8 %tmp10 to i32
223 %tmp12 = or i32 %tmp8, %tmp11
224 %tmp13 = or i32 %tmp12, %tmp4
228 ; i8* p; // p is 8 byte aligned
229 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
230 define i64 @load_i64_by_i8_bswap(i64* %arg) {
231 ; CHECK-LABEL: load_i64_by_i8_bswap:
233 ; CHECK-NEXT: push {r11, lr}
234 ; CHECK-NEXT: ldr r1, [r0]
235 ; CHECK-NEXT: mov r12, #65280
236 ; CHECK-NEXT: ldr r0, [r0, #4]
237 ; CHECK-NEXT: mov lr, #16711680
238 ; CHECK-NEXT: and r3, r12, r0, lsr #8
239 ; CHECK-NEXT: and r2, lr, r0, lsl #8
240 ; CHECK-NEXT: orr r3, r3, r0, lsr #24
241 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
242 ; CHECK-NEXT: and r2, r12, r1, lsr #8
243 ; CHECK-NEXT: orr r0, r0, r3
244 ; CHECK-NEXT: and r3, lr, r1, lsl #8
245 ; CHECK-NEXT: orr r2, r2, r1, lsr #24
246 ; CHECK-NEXT: orr r1, r3, r1, lsl #24
247 ; CHECK-NEXT: orr r1, r1, r2
248 ; CHECK-NEXT: pop {r11, lr}
249 ; CHECK-NEXT: mov pc, lr
251 ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
252 ; CHECK-ARMv6: @ %bb.0:
253 ; CHECK-ARMv6-NEXT: ldrd r2, r3, [r0]
254 ; CHECK-ARMv6-NEXT: rev r0, r3
255 ; CHECK-ARMv6-NEXT: rev r1, r2
256 ; CHECK-ARMv6-NEXT: bx lr
258 ; CHECK-THUMBv6-LABEL: load_i64_by_i8_bswap:
259 ; CHECK-THUMBv6: @ %bb.0:
260 ; CHECK-THUMBv6-NEXT: ldr r1, [r0]
261 ; CHECK-THUMBv6-NEXT: ldr r0, [r0, #4]
262 ; CHECK-THUMBv6-NEXT: rev r0, r0
263 ; CHECK-THUMBv6-NEXT: rev r1, r1
264 ; CHECK-THUMBv6-NEXT: bx lr
266 ; CHECK-THUMBv7-LABEL: load_i64_by_i8_bswap:
267 ; CHECK-THUMBv7: @ %bb.0:
268 ; CHECK-THUMBv7-NEXT: ldr r1, [r0]
269 ; CHECK-THUMBv7-NEXT: ldr r0, [r0, #4]
270 ; CHECK-THUMBv7-NEXT: rev r0, r0
271 ; CHECK-THUMBv7-NEXT: rev r1, r1
272 ; CHECK-THUMBv7-NEXT: bx lr
274 %tmp = bitcast i64* %arg to i8*
275 %tmp1 = load i8, i8* %tmp, align 8
276 %tmp2 = zext i8 %tmp1 to i64
277 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
278 %tmp4 = load i8, i8* %tmp3, align 1
279 %tmp5 = zext i8 %tmp4 to i64
280 %tmp6 = shl nuw nsw i64 %tmp5, 8
281 %tmp7 = or i64 %tmp6, %tmp2
282 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
283 %tmp9 = load i8, i8* %tmp8, align 1
284 %tmp10 = zext i8 %tmp9 to i64
285 %tmp11 = shl nuw nsw i64 %tmp10, 16
286 %tmp12 = or i64 %tmp7, %tmp11
287 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
288 %tmp14 = load i8, i8* %tmp13, align 1
289 %tmp15 = zext i8 %tmp14 to i64
290 %tmp16 = shl nuw nsw i64 %tmp15, 24
291 %tmp17 = or i64 %tmp12, %tmp16
292 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
293 %tmp19 = load i8, i8* %tmp18, align 1
294 %tmp20 = zext i8 %tmp19 to i64
295 %tmp21 = shl nuw nsw i64 %tmp20, 32
296 %tmp22 = or i64 %tmp17, %tmp21
297 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
298 %tmp24 = load i8, i8* %tmp23, align 1
299 %tmp25 = zext i8 %tmp24 to i64
300 %tmp26 = shl nuw nsw i64 %tmp25, 40
301 %tmp27 = or i64 %tmp22, %tmp26
302 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
303 %tmp29 = load i8, i8* %tmp28, align 1
304 %tmp30 = zext i8 %tmp29 to i64
305 %tmp31 = shl nuw nsw i64 %tmp30, 48
306 %tmp32 = or i64 %tmp27, %tmp31
307 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
308 %tmp34 = load i8, i8* %tmp33, align 1
309 %tmp35 = zext i8 %tmp34 to i64
310 %tmp36 = shl nuw i64 %tmp35, 56
311 %tmp37 = or i64 %tmp32, %tmp36
315 ; i8* p; // p is 8 byte aligned
316 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
317 define i64 @load_i64_by_i8(i64* %arg) {
318 ; CHECK-LABEL: load_i64_by_i8:
320 ; CHECK-NEXT: ldr r2, [r0]
321 ; CHECK-NEXT: ldr r1, [r0, #4]
322 ; CHECK-NEXT: mov r0, r2
323 ; CHECK-NEXT: mov pc, lr
325 ; CHECK-ARMv6-LABEL: load_i64_by_i8:
326 ; CHECK-ARMv6: @ %bb.0:
327 ; CHECK-ARMv6-NEXT: ldrd r0, r1, [r0]
328 ; CHECK-ARMv6-NEXT: bx lr
330 ; CHECK-THUMBv6-LABEL: load_i64_by_i8:
331 ; CHECK-THUMBv6: @ %bb.0:
332 ; CHECK-THUMBv6-NEXT: ldr r2, [r0]
333 ; CHECK-THUMBv6-NEXT: ldr r1, [r0, #4]
334 ; CHECK-THUMBv6-NEXT: mov r0, r2
335 ; CHECK-THUMBv6-NEXT: bx lr
337 ; CHECK-THUMBv7-LABEL: load_i64_by_i8:
338 ; CHECK-THUMBv7: @ %bb.0:
339 ; CHECK-THUMBv7-NEXT: ldr r2, [r0]
340 ; CHECK-THUMBv7-NEXT: ldr r1, [r0, #4]
341 ; CHECK-THUMBv7-NEXT: mov r0, r2
342 ; CHECK-THUMBv7-NEXT: bx lr
344 %tmp = bitcast i64* %arg to i8*
345 %tmp1 = load i8, i8* %tmp, align 8
346 %tmp2 = zext i8 %tmp1 to i64
347 %tmp3 = shl nuw i64 %tmp2, 56
348 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
349 %tmp5 = load i8, i8* %tmp4, align 1
350 %tmp6 = zext i8 %tmp5 to i64
351 %tmp7 = shl nuw nsw i64 %tmp6, 48
352 %tmp8 = or i64 %tmp7, %tmp3
353 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
354 %tmp10 = load i8, i8* %tmp9, align 1
355 %tmp11 = zext i8 %tmp10 to i64
356 %tmp12 = shl nuw nsw i64 %tmp11, 40
357 %tmp13 = or i64 %tmp8, %tmp12
358 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
359 %tmp15 = load i8, i8* %tmp14, align 1
360 %tmp16 = zext i8 %tmp15 to i64
361 %tmp17 = shl nuw nsw i64 %tmp16, 32
362 %tmp18 = or i64 %tmp13, %tmp17
363 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
364 %tmp20 = load i8, i8* %tmp19, align 1
365 %tmp21 = zext i8 %tmp20 to i64
366 %tmp22 = shl nuw nsw i64 %tmp21, 24
367 %tmp23 = or i64 %tmp18, %tmp22
368 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
369 %tmp25 = load i8, i8* %tmp24, align 1
370 %tmp26 = zext i8 %tmp25 to i64
371 %tmp27 = shl nuw nsw i64 %tmp26, 16
372 %tmp28 = or i64 %tmp23, %tmp27
373 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
374 %tmp30 = load i8, i8* %tmp29, align 1
375 %tmp31 = zext i8 %tmp30 to i64
376 %tmp32 = shl nuw nsw i64 %tmp31, 8
377 %tmp33 = or i64 %tmp28, %tmp32
378 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
379 %tmp35 = load i8, i8* %tmp34, align 1
380 %tmp36 = zext i8 %tmp35 to i64
381 %tmp37 = or i64 %tmp33, %tmp36
385 ; i8* p; // p[1] is 4 byte aligned
386 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
387 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
388 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
390 ; CHECK-NEXT: ldr r0, [r0, #1]
391 ; CHECK-NEXT: mov r1, #65280
392 ; CHECK-NEXT: mov r2, #16711680
393 ; CHECK-NEXT: and r1, r1, r0, lsr #8
394 ; CHECK-NEXT: and r2, r2, r0, lsl #8
395 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
396 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
397 ; CHECK-NEXT: orr r0, r0, r1
398 ; CHECK-NEXT: mov pc, lr
400 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
401 ; CHECK-ARMv6: @ %bb.0:
402 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #1]
403 ; CHECK-ARMv6-NEXT: rev r0, r0
404 ; CHECK-ARMv6-NEXT: bx lr
406 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset:
407 ; CHECK-THUMBv6: @ %bb.0:
408 ; CHECK-THUMBv6-NEXT: movs r1, #1
409 ; CHECK-THUMBv6-NEXT: ldr r0, [r0, r1]
410 ; CHECK-THUMBv6-NEXT: rev r0, r0
411 ; CHECK-THUMBv6-NEXT: bx lr
413 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset:
414 ; CHECK-THUMBv7: @ %bb.0:
415 ; CHECK-THUMBv7-NEXT: movs r1, #1
416 ; CHECK-THUMBv7-NEXT: ldr r0, [r0, r1]
417 ; CHECK-THUMBv7-NEXT: rev r0, r0
418 ; CHECK-THUMBv7-NEXT: bx lr
421 %tmp = bitcast i32* %arg to i8*
422 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
423 %tmp2 = load i8, i8* %tmp1, align 4
424 %tmp3 = zext i8 %tmp2 to i32
425 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
426 %tmp5 = load i8, i8* %tmp4, align 1
427 %tmp6 = zext i8 %tmp5 to i32
428 %tmp7 = shl nuw nsw i32 %tmp6, 8
429 %tmp8 = or i32 %tmp7, %tmp3
430 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
431 %tmp10 = load i8, i8* %tmp9, align 1
432 %tmp11 = zext i8 %tmp10 to i32
433 %tmp12 = shl nuw nsw i32 %tmp11, 16
434 %tmp13 = or i32 %tmp8, %tmp12
435 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
436 %tmp15 = load i8, i8* %tmp14, align 1
437 %tmp16 = zext i8 %tmp15 to i32
438 %tmp17 = shl nuw nsw i32 %tmp16, 24
439 %tmp18 = or i32 %tmp13, %tmp17
443 ; i8* p; // p[-4] is 4 byte aligned
444 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
445 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
446 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
448 ; CHECK-NEXT: ldr r0, [r0, #-4]
449 ; CHECK-NEXT: mov r1, #65280
450 ; CHECK-NEXT: mov r2, #16711680
451 ; CHECK-NEXT: and r1, r1, r0, lsr #8
452 ; CHECK-NEXT: and r2, r2, r0, lsl #8
453 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
454 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
455 ; CHECK-NEXT: orr r0, r0, r1
456 ; CHECK-NEXT: mov pc, lr
458 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
459 ; CHECK-ARMv6: @ %bb.0:
460 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #-4]
461 ; CHECK-ARMv6-NEXT: rev r0, r0
462 ; CHECK-ARMv6-NEXT: bx lr
464 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset:
465 ; CHECK-THUMBv6: @ %bb.0:
466 ; CHECK-THUMBv6-NEXT: subs r0, r0, #4
467 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
468 ; CHECK-THUMBv6-NEXT: rev r0, r0
469 ; CHECK-THUMBv6-NEXT: bx lr
471 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset:
472 ; CHECK-THUMBv7: @ %bb.0:
473 ; CHECK-THUMBv7-NEXT: subs r0, r0, #4
474 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
475 ; CHECK-THUMBv7-NEXT: rev r0, r0
476 ; CHECK-THUMBv7-NEXT: bx lr
479 %tmp = bitcast i32* %arg to i8*
480 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
481 %tmp2 = load i8, i8* %tmp1, align 4
482 %tmp3 = zext i8 %tmp2 to i32
483 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
484 %tmp5 = load i8, i8* %tmp4, align 1
485 %tmp6 = zext i8 %tmp5 to i32
486 %tmp7 = shl nuw nsw i32 %tmp6, 8
487 %tmp8 = or i32 %tmp7, %tmp3
488 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
489 %tmp10 = load i8, i8* %tmp9, align 1
490 %tmp11 = zext i8 %tmp10 to i32
491 %tmp12 = shl nuw nsw i32 %tmp11, 16
492 %tmp13 = or i32 %tmp8, %tmp12
493 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
494 %tmp15 = load i8, i8* %tmp14, align 1
495 %tmp16 = zext i8 %tmp15 to i32
496 %tmp17 = shl nuw nsw i32 %tmp16, 24
497 %tmp18 = or i32 %tmp13, %tmp17
501 ; i8* p; // p[1] is 4 byte aligned
502 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
503 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
504 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
506 ; CHECK-NEXT: ldr r0, [r0, #1]
507 ; CHECK-NEXT: mov pc, lr
509 ; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
510 ; CHECK-ARMv6: @ %bb.0:
511 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #1]
512 ; CHECK-ARMv6-NEXT: bx lr
514 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
515 ; CHECK-THUMBv6: @ %bb.0:
516 ; CHECK-THUMBv6-NEXT: movs r1, #1
517 ; CHECK-THUMBv6-NEXT: ldr r0, [r0, r1]
518 ; CHECK-THUMBv6-NEXT: bx lr
520 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset_bswap:
521 ; CHECK-THUMBv7: @ %bb.0:
522 ; CHECK-THUMBv7-NEXT: movs r1, #1
523 ; CHECK-THUMBv7-NEXT: ldr r0, [r0, r1]
524 ; CHECK-THUMBv7-NEXT: bx lr
527 %tmp = bitcast i32* %arg to i8*
528 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
529 %tmp2 = load i8, i8* %tmp1, align 1
530 %tmp3 = zext i8 %tmp2 to i32
531 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
532 %tmp5 = load i8, i8* %tmp4, align 1
533 %tmp6 = zext i8 %tmp5 to i32
534 %tmp7 = shl nuw nsw i32 %tmp6, 8
535 %tmp8 = or i32 %tmp7, %tmp3
536 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
537 %tmp10 = load i8, i8* %tmp9, align 1
538 %tmp11 = zext i8 %tmp10 to i32
539 %tmp12 = shl nuw nsw i32 %tmp11, 16
540 %tmp13 = or i32 %tmp8, %tmp12
541 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
542 %tmp15 = load i8, i8* %tmp14, align 4
543 %tmp16 = zext i8 %tmp15 to i32
544 %tmp17 = shl nuw nsw i32 %tmp16, 24
545 %tmp18 = or i32 %tmp13, %tmp17
549 ; i8* p; // p[-4] is 4 byte aligned
550 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
551 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
552 ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
554 ; CHECK-NEXT: ldr r0, [r0, #-4]
555 ; CHECK-NEXT: mov pc, lr
557 ; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
558 ; CHECK-ARMv6: @ %bb.0:
559 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #-4]
560 ; CHECK-ARMv6-NEXT: bx lr
562 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset_bswap:
563 ; CHECK-THUMBv6: @ %bb.0:
564 ; CHECK-THUMBv6-NEXT: subs r0, r0, #4
565 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
566 ; CHECK-THUMBv6-NEXT: bx lr
568 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset_bswap:
569 ; CHECK-THUMBv7: @ %bb.0:
570 ; CHECK-THUMBv7-NEXT: subs r0, r0, #4
571 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
572 ; CHECK-THUMBv7-NEXT: bx lr
575 %tmp = bitcast i32* %arg to i8*
576 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
577 %tmp2 = load i8, i8* %tmp1, align 1
578 %tmp3 = zext i8 %tmp2 to i32
579 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
580 %tmp5 = load i8, i8* %tmp4, align 1
581 %tmp6 = zext i8 %tmp5 to i32
582 %tmp7 = shl nuw nsw i32 %tmp6, 8
583 %tmp8 = or i32 %tmp7, %tmp3
584 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
585 %tmp10 = load i8, i8* %tmp9, align 1
586 %tmp11 = zext i8 %tmp10 to i32
587 %tmp12 = shl nuw nsw i32 %tmp11, 16
588 %tmp13 = or i32 %tmp8, %tmp12
589 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
590 %tmp15 = load i8, i8* %tmp14, align 4
591 %tmp16 = zext i8 %tmp15 to i32
592 %tmp17 = shl nuw nsw i32 %tmp16, 24
593 %tmp18 = or i32 %tmp13, %tmp17
597 declare i16 @llvm.bswap.i16(i16)
599 ; i16* p; // p is 4 byte aligned
600 ; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16)
601 define i32 @load_i32_by_bswap_i16(i32* %arg) {
602 ; CHECK-LABEL: load_i32_by_bswap_i16:
604 ; CHECK-NEXT: ldr r0, [r0]
605 ; CHECK-NEXT: mov r1, #65280
606 ; CHECK-NEXT: mov r2, #16711680
607 ; CHECK-NEXT: and r1, r1, r0, lsr #8
608 ; CHECK-NEXT: and r2, r2, r0, lsl #8
609 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
610 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
611 ; CHECK-NEXT: orr r0, r0, r1
612 ; CHECK-NEXT: mov pc, lr
614 ; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
615 ; CHECK-ARMv6: @ %bb.0:
616 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
617 ; CHECK-ARMv6-NEXT: rev r0, r0
618 ; CHECK-ARMv6-NEXT: bx lr
620 ; CHECK-THUMBv6-LABEL: load_i32_by_bswap_i16:
621 ; CHECK-THUMBv6: @ %bb.0:
622 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
623 ; CHECK-THUMBv6-NEXT: rev r0, r0
624 ; CHECK-THUMBv6-NEXT: bx lr
626 ; CHECK-THUMBv7-LABEL: load_i32_by_bswap_i16:
627 ; CHECK-THUMBv7: @ %bb.0:
628 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
629 ; CHECK-THUMBv7-NEXT: rev r0, r0
630 ; CHECK-THUMBv7-NEXT: bx lr
633 %tmp = bitcast i32* %arg to i16*
634 %tmp1 = load i16, i16* %tmp, align 4
635 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
636 %tmp2 = zext i16 %tmp11 to i32
637 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
638 %tmp4 = load i16, i16* %tmp3, align 1
639 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
640 %tmp5 = zext i16 %tmp41 to i32
641 %tmp6 = shl nuw nsw i32 %tmp5, 16
642 %tmp7 = or i32 %tmp6, %tmp2
646 ; i16* p; // p is 4 byte aligned
647 ; (i32) p[1] | (sext(p[0] << 16) to i32)
648 define i32 @load_i32_by_sext_i16(i32* %arg) {
649 ; CHECK-LABEL: load_i32_by_sext_i16:
651 ; CHECK-NEXT: ldr r0, [r0]
652 ; CHECK-NEXT: mov pc, lr
654 ; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
655 ; CHECK-ARMv6: @ %bb.0:
656 ; CHECK-ARMv6-NEXT: ldr r0, [r0]
657 ; CHECK-ARMv6-NEXT: bx lr
659 ; CHECK-THUMBv6-LABEL: load_i32_by_sext_i16:
660 ; CHECK-THUMBv6: @ %bb.0:
661 ; CHECK-THUMBv6-NEXT: ldr r0, [r0]
662 ; CHECK-THUMBv6-NEXT: bx lr
664 ; CHECK-THUMBv7-LABEL: load_i32_by_sext_i16:
665 ; CHECK-THUMBv7: @ %bb.0:
666 ; CHECK-THUMBv7-NEXT: ldr r0, [r0]
667 ; CHECK-THUMBv7-NEXT: bx lr
668 %tmp = bitcast i32* %arg to i16*
669 %tmp1 = load i16, i16* %tmp, align 4
670 %tmp2 = sext i16 %tmp1 to i32
671 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
672 %tmp4 = load i16, i16* %tmp3, align 1
673 %tmp5 = zext i16 %tmp4 to i32
674 %tmp6 = shl nuw nsw i32 %tmp2, 16
675 %tmp7 = or i32 %tmp6, %tmp5
681 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
682 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
683 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
685 ; CHECK-NEXT: add r0, r0, r1
686 ; CHECK-NEXT: mov r1, #65280
687 ; CHECK-NEXT: mov r2, #16711680
688 ; CHECK-NEXT: ldr r0, [r0, #12]
689 ; CHECK-NEXT: and r1, r1, r0, lsr #8
690 ; CHECK-NEXT: and r2, r2, r0, lsl #8
691 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
692 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
693 ; CHECK-NEXT: orr r0, r0, r1
694 ; CHECK-NEXT: mov pc, lr
696 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
697 ; CHECK-ARMv6: @ %bb.0:
698 ; CHECK-ARMv6-NEXT: add r0, r0, r1
699 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #12]
700 ; CHECK-ARMv6-NEXT: rev r0, r0
701 ; CHECK-ARMv6-NEXT: bx lr
703 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index:
704 ; CHECK-THUMBv6: @ %bb.0:
705 ; CHECK-THUMBv6-NEXT: adds r0, r0, r1
706 ; CHECK-THUMBv6-NEXT: ldr r0, [r0, #12]
707 ; CHECK-THUMBv6-NEXT: rev r0, r0
708 ; CHECK-THUMBv6-NEXT: bx lr
710 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index:
711 ; CHECK-THUMBv7: @ %bb.0:
712 ; CHECK-THUMBv7-NEXT: adds r0, r0, r1
713 ; CHECK-THUMBv7-NEXT: ldr r0, [r0, #12]
714 ; CHECK-THUMBv7-NEXT: rev r0, r0
715 ; CHECK-THUMBv7-NEXT: bx lr
716 %tmp = add nuw nsw i32 %i, 3
717 %tmp2 = add nuw nsw i32 %i, 2
718 %tmp3 = add nuw nsw i32 %i, 1
719 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
720 %tmp5 = zext i32 %i to i64
721 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
722 %tmp7 = load i8, i8* %tmp6, align 4
723 %tmp8 = zext i8 %tmp7 to i32
724 %tmp9 = zext i32 %tmp3 to i64
725 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
726 %tmp11 = load i8, i8* %tmp10, align 1
727 %tmp12 = zext i8 %tmp11 to i32
728 %tmp13 = shl nuw nsw i32 %tmp12, 8
729 %tmp14 = or i32 %tmp13, %tmp8
730 %tmp15 = zext i32 %tmp2 to i64
731 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
732 %tmp17 = load i8, i8* %tmp16, align 1
733 %tmp18 = zext i8 %tmp17 to i32
734 %tmp19 = shl nuw nsw i32 %tmp18, 16
735 %tmp20 = or i32 %tmp14, %tmp19
736 %tmp21 = zext i32 %tmp to i64
737 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
738 %tmp23 = load i8, i8* %tmp22, align 1
739 %tmp24 = zext i8 %tmp23 to i32
740 %tmp25 = shl nuw i32 %tmp24, 24
741 %tmp26 = or i32 %tmp20, %tmp25
747 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
748 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
749 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
751 ; CHECK-NEXT: add r0, r1, r0
752 ; CHECK-NEXT: mov r1, #65280
753 ; CHECK-NEXT: mov r2, #16711680
754 ; CHECK-NEXT: ldr r0, [r0, #13]
755 ; CHECK-NEXT: and r1, r1, r0, lsr #8
756 ; CHECK-NEXT: and r2, r2, r0, lsl #8
757 ; CHECK-NEXT: orr r1, r1, r0, lsr #24
758 ; CHECK-NEXT: orr r0, r2, r0, lsl #24
759 ; CHECK-NEXT: orr r0, r0, r1
760 ; CHECK-NEXT: mov pc, lr
762 ; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
763 ; CHECK-ARMv6: @ %bb.0:
764 ; CHECK-ARMv6-NEXT: add r0, r1, r0
765 ; CHECK-ARMv6-NEXT: ldr r0, [r0, #13]
766 ; CHECK-ARMv6-NEXT: rev r0, r0
767 ; CHECK-ARMv6-NEXT: bx lr
769 ; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index_2:
770 ; CHECK-THUMBv6: @ %bb.0:
771 ; CHECK-THUMBv6-NEXT: adds r0, r1, r0
772 ; CHECK-THUMBv6-NEXT: movs r1, #13
773 ; CHECK-THUMBv6-NEXT: ldr r0, [r0, r1]
774 ; CHECK-THUMBv6-NEXT: rev r0, r0
775 ; CHECK-THUMBv6-NEXT: bx lr
777 ; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index_2:
778 ; CHECK-THUMBv7: @ %bb.0:
779 ; CHECK-THUMBv7-NEXT: adds r0, r1, r0
780 ; CHECK-THUMBv7-NEXT: movs r1, #13
781 ; CHECK-THUMBv7-NEXT: ldr r0, [r0, r1]
782 ; CHECK-THUMBv7-NEXT: rev r0, r0
783 ; CHECK-THUMBv7-NEXT: bx lr
785 %tmp = add nuw nsw i32 %i, 4
786 %tmp2 = add nuw nsw i32 %i, 3
787 %tmp3 = add nuw nsw i32 %i, 2
788 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
789 %tmp5 = add nuw nsw i32 %i, 1
790 %tmp27 = zext i32 %tmp5 to i64
791 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
792 %tmp29 = load i8, i8* %tmp28, align 4
793 %tmp30 = zext i8 %tmp29 to i32
794 %tmp31 = zext i32 %tmp3 to i64
795 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
796 %tmp33 = load i8, i8* %tmp32, align 1
797 %tmp34 = zext i8 %tmp33 to i32
798 %tmp35 = shl nuw nsw i32 %tmp34, 8
799 %tmp36 = or i32 %tmp35, %tmp30
800 %tmp37 = zext i32 %tmp2 to i64
801 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
802 %tmp39 = load i8, i8* %tmp38, align 1
803 %tmp40 = zext i8 %tmp39 to i32
804 %tmp41 = shl nuw nsw i32 %tmp40, 16
805 %tmp42 = or i32 %tmp36, %tmp41
806 %tmp43 = zext i32 %tmp to i64
807 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
808 %tmp45 = load i8, i8* %tmp44, align 1
809 %tmp46 = zext i8 %tmp45 to i32
810 %tmp47 = shl nuw i32 %tmp46, 24
811 %tmp48 = or i32 %tmp42, %tmp47
815 ; i8* p; // p is 2 byte aligned
816 ; (i32) p[0] | ((i32) p[1] << 8)
817 define i32 @zext_load_i32_by_i8(i32* %arg) {
818 ; CHECK-LABEL: zext_load_i32_by_i8:
820 ; CHECK-NEXT: ldrb r1, [r0]
821 ; CHECK-NEXT: ldrb r0, [r0, #1]
822 ; CHECK-NEXT: orr r0, r1, r0, lsl #8
823 ; CHECK-NEXT: mov pc, lr
825 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
826 ; CHECK-ARMv6: @ %bb.0:
827 ; CHECK-ARMv6-NEXT: ldrh r0, [r0]
828 ; CHECK-ARMv6-NEXT: lsl r0, r0, #16
829 ; CHECK-ARMv6-NEXT: rev r0, r0
830 ; CHECK-ARMv6-NEXT: bx lr
832 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
833 ; CHECK-THUMBv6: @ %bb.0:
834 ; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
835 ; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
836 ; CHECK-THUMBv6-NEXT: rev r0, r0
837 ; CHECK-THUMBv6-NEXT: bx lr
839 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
840 ; CHECK-THUMBv7: @ %bb.0:
841 ; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
842 ; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
843 ; CHECK-THUMBv7-NEXT: rev r0, r0
844 ; CHECK-THUMBv7-NEXT: bx lr
846 %tmp = bitcast i32* %arg to i8*
847 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
848 %tmp2 = load i8, i8* %tmp1, align 2
849 %tmp3 = zext i8 %tmp2 to i32
850 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
851 %tmp5 = load i8, i8* %tmp4, align 1
852 %tmp6 = zext i8 %tmp5 to i32
853 %tmp7 = shl nuw nsw i32 %tmp6, 8
854 %tmp8 = or i32 %tmp7, %tmp3
858 ; i8* p; // p is 2 byte aligned
859 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
860 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
861 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
863 ; CHECK-NEXT: ldrb r1, [r0]
864 ; CHECK-NEXT: ldrb r0, [r0, #1]
865 ; CHECK-NEXT: lsl r0, r0, #16
866 ; CHECK-NEXT: orr r0, r0, r1, lsl #8
867 ; CHECK-NEXT: mov pc, lr
869 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
870 ; CHECK-ARMv6: @ %bb.0:
871 ; CHECK-ARMv6-NEXT: ldrb r1, [r0]
872 ; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
873 ; CHECK-ARMv6-NEXT: lsl r0, r0, #16
874 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
875 ; CHECK-ARMv6-NEXT: bx lr
877 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_8:
878 ; CHECK-THUMBv6: @ %bb.0:
879 ; CHECK-THUMBv6-NEXT: ldrb r1, [r0]
880 ; CHECK-THUMBv6-NEXT: lsls r1, r1, #8
881 ; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1]
882 ; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
883 ; CHECK-THUMBv6-NEXT: adds r0, r0, r1
884 ; CHECK-THUMBv6-NEXT: bx lr
886 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_8:
887 ; CHECK-THUMBv7: @ %bb.0:
888 ; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
889 ; CHECK-THUMBv7-NEXT: lsls r1, r1, #8
890 ; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
891 ; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
892 ; CHECK-THUMBv7-NEXT: adds r0, r0, r1
893 ; CHECK-THUMBv7-NEXT: bx lr
895 %tmp = bitcast i32* %arg to i8*
896 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
897 %tmp2 = load i8, i8* %tmp1, align 2
898 %tmp3 = zext i8 %tmp2 to i32
899 %tmp30 = shl nuw nsw i32 %tmp3, 8
900 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
901 %tmp5 = load i8, i8* %tmp4, align 1
902 %tmp6 = zext i8 %tmp5 to i32
903 %tmp7 = shl nuw nsw i32 %tmp6, 16
904 %tmp8 = or i32 %tmp7, %tmp30
908 ; i8* p; // p is 2 byte aligned
909 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
910 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
911 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
913 ; CHECK-NEXT: ldrb r1, [r0]
914 ; CHECK-NEXT: ldrb r0, [r0, #1]
915 ; CHECK-NEXT: lsl r0, r0, #24
916 ; CHECK-NEXT: orr r0, r0, r1, lsl #16
917 ; CHECK-NEXT: mov pc, lr
919 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
920 ; CHECK-ARMv6: @ %bb.0:
921 ; CHECK-ARMv6-NEXT: ldrb r1, [r0]
922 ; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
923 ; CHECK-ARMv6-NEXT: lsl r0, r0, #24
924 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #16
925 ; CHECK-ARMv6-NEXT: bx lr
927 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_16:
928 ; CHECK-THUMBv6: @ %bb.0:
929 ; CHECK-THUMBv6-NEXT: ldrb r1, [r0]
930 ; CHECK-THUMBv6-NEXT: lsls r1, r1, #16
931 ; CHECK-THUMBv6-NEXT: ldrb r0, [r0, #1]
932 ; CHECK-THUMBv6-NEXT: lsls r0, r0, #24
933 ; CHECK-THUMBv6-NEXT: adds r0, r0, r1
934 ; CHECK-THUMBv6-NEXT: bx lr
936 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_16:
937 ; CHECK-THUMBv7: @ %bb.0:
938 ; CHECK-THUMBv7-NEXT: ldrb r1, [r0]
939 ; CHECK-THUMBv7-NEXT: lsls r1, r1, #16
940 ; CHECK-THUMBv7-NEXT: ldrb r0, [r0, #1]
941 ; CHECK-THUMBv7-NEXT: lsls r0, r0, #24
942 ; CHECK-THUMBv7-NEXT: adds r0, r0, r1
943 ; CHECK-THUMBv7-NEXT: bx lr
945 %tmp = bitcast i32* %arg to i8*
946 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
947 %tmp2 = load i8, i8* %tmp1, align 2
948 %tmp3 = zext i8 %tmp2 to i32
949 %tmp30 = shl nuw nsw i32 %tmp3, 16
950 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
951 %tmp5 = load i8, i8* %tmp4, align 1
952 %tmp6 = zext i8 %tmp5 to i32
953 %tmp7 = shl nuw nsw i32 %tmp6, 24
954 %tmp8 = or i32 %tmp7, %tmp30
958 ; i8* p; // p is 2 byte aligned
959 ; (i32) p[1] | ((i32) p[0] << 8)
960 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
961 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
963 ; CHECK-NEXT: ldrh r0, [r0]
964 ; CHECK-NEXT: mov pc, lr
966 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
967 ; CHECK-ARMv6: @ %bb.0:
968 ; CHECK-ARMv6-NEXT: ldrh r0, [r0]
969 ; CHECK-ARMv6-NEXT: bx lr
971 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
972 ; CHECK-THUMBv6: @ %bb.0:
973 ; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
974 ; CHECK-THUMBv6-NEXT: bx lr
976 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
977 ; CHECK-THUMBv7: @ %bb.0:
978 ; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
979 ; CHECK-THUMBv7-NEXT: bx lr
981 %tmp = bitcast i32* %arg to i8*
982 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
983 %tmp2 = load i8, i8* %tmp1, align 1
984 %tmp3 = zext i8 %tmp2 to i32
985 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
986 %tmp5 = load i8, i8* %tmp4, align 2
987 %tmp6 = zext i8 %tmp5 to i32
988 %tmp7 = shl nuw nsw i32 %tmp6, 8
989 %tmp8 = or i32 %tmp7, %tmp3
993 ; i8* p; // p is 2 byte aligned
994 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
995 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
996 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
998 ; CHECK-NEXT: ldrb r1, [r0]
999 ; CHECK-NEXT: ldrb r0, [r0, #1]
1000 ; CHECK-NEXT: lsl r1, r1, #16
1001 ; CHECK-NEXT: orr r0, r1, r0, lsl #8
1002 ; CHECK-NEXT: mov pc, lr
1004 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1005 ; CHECK-ARMv6: @ %bb.0:
1006 ; CHECK-ARMv6-NEXT: ldrb r1, [r0]
1007 ; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
1008 ; CHECK-ARMv6-NEXT: lsl r1, r1, #16
1009 ; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #8
1010 ; CHECK-ARMv6-NEXT: bx lr
1012 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1013 ; CHECK-THUMBv6: @ %bb.0:
1014 ; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1]
1015 ; CHECK-THUMBv6-NEXT: lsls r1, r1, #8
1016 ; CHECK-THUMBv6-NEXT: ldrb r0, [r0]
1017 ; CHECK-THUMBv6-NEXT: lsls r0, r0, #16
1018 ; CHECK-THUMBv6-NEXT: adds r0, r0, r1
1019 ; CHECK-THUMBv6-NEXT: bx lr
1021 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1022 ; CHECK-THUMBv7: @ %bb.0:
1023 ; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #1]
1024 ; CHECK-THUMBv7-NEXT: lsls r1, r1, #8
1025 ; CHECK-THUMBv7-NEXT: ldrb r0, [r0]
1026 ; CHECK-THUMBv7-NEXT: lsls r0, r0, #16
1027 ; CHECK-THUMBv7-NEXT: adds r0, r0, r1
1028 ; CHECK-THUMBv7-NEXT: bx lr
1030 %tmp = bitcast i32* %arg to i8*
1031 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1032 %tmp2 = load i8, i8* %tmp1, align 1
1033 %tmp3 = zext i8 %tmp2 to i32
1034 %tmp30 = shl nuw nsw i32 %tmp3, 8
1035 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1036 %tmp5 = load i8, i8* %tmp4, align 2
1037 %tmp6 = zext i8 %tmp5 to i32
1038 %tmp7 = shl nuw nsw i32 %tmp6, 16
1039 %tmp8 = or i32 %tmp7, %tmp30
1043 ; i8* p; // p is 2 byte aligned
1044 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1045 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
1046 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1048 ; CHECK-NEXT: ldrb r1, [r0]
1049 ; CHECK-NEXT: ldrb r0, [r0, #1]
1050 ; CHECK-NEXT: lsl r1, r1, #24
1051 ; CHECK-NEXT: orr r0, r1, r0, lsl #16
1052 ; CHECK-NEXT: mov pc, lr
1054 ; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1055 ; CHECK-ARMv6: @ %bb.0:
1056 ; CHECK-ARMv6-NEXT: ldrb r1, [r0]
1057 ; CHECK-ARMv6-NEXT: ldrb r0, [r0, #1]
1058 ; CHECK-ARMv6-NEXT: lsl r1, r1, #24
1059 ; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #16
1060 ; CHECK-ARMv6-NEXT: bx lr
1062 ; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1063 ; CHECK-THUMBv6: @ %bb.0:
1064 ; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #1]
1065 ; CHECK-THUMBv6-NEXT: lsls r1, r1, #16
1066 ; CHECK-THUMBv6-NEXT: ldrb r0, [r0]
1067 ; CHECK-THUMBv6-NEXT: lsls r0, r0, #24
1068 ; CHECK-THUMBv6-NEXT: adds r0, r0, r1
1069 ; CHECK-THUMBv6-NEXT: bx lr
1071 ; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1072 ; CHECK-THUMBv7: @ %bb.0:
1073 ; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #1]
1074 ; CHECK-THUMBv7-NEXT: lsls r1, r1, #16
1075 ; CHECK-THUMBv7-NEXT: ldrb r0, [r0]
1076 ; CHECK-THUMBv7-NEXT: lsls r0, r0, #24
1077 ; CHECK-THUMBv7-NEXT: adds r0, r0, r1
1078 ; CHECK-THUMBv7-NEXT: bx lr
1080 %tmp = bitcast i32* %arg to i8*
1081 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1082 %tmp2 = load i8, i8* %tmp1, align 1
1083 %tmp3 = zext i8 %tmp2 to i32
1084 %tmp30 = shl nuw nsw i32 %tmp3, 16
1085 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1086 %tmp5 = load i8, i8* %tmp4, align 2
1087 %tmp6 = zext i8 %tmp5 to i32
1088 %tmp7 = shl nuw nsw i32 %tmp6, 24
1089 %tmp8 = or i32 %tmp7, %tmp30
1094 ; i16* p1.i16 = (i16*) p;
1095 ; (p1.i16[0] << 8) | ((i16) p[2])
1097 ; This is essentialy a i16 load from p[1], but we don't fold the pattern now
1098 ; because in the original DAG we don't have p[1] address available
1099 define i16 @load_i16_from_nonzero_offset(i8* %p) {
1100 ; CHECK-LABEL: load_i16_from_nonzero_offset:
1102 ; CHECK-NEXT: ldrh r1, [r0]
1103 ; CHECK-NEXT: ldrb r0, [r0, #2]
1104 ; CHECK-NEXT: orr r0, r0, r1, lsl #8
1105 ; CHECK-NEXT: mov pc, lr
1107 ; CHECK-ARMv6-LABEL: load_i16_from_nonzero_offset:
1108 ; CHECK-ARMv6: @ %bb.0:
1109 ; CHECK-ARMv6-NEXT: ldrh r1, [r0]
1110 ; CHECK-ARMv6-NEXT: ldrb r0, [r0, #2]
1111 ; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #8
1112 ; CHECK-ARMv6-NEXT: bx lr
1114 ; CHECK-THUMBv6-LABEL: load_i16_from_nonzero_offset:
1115 ; CHECK-THUMBv6: @ %bb.0:
1116 ; CHECK-THUMBv6-NEXT: ldrb r1, [r0, #2]
1117 ; CHECK-THUMBv6-NEXT: ldrh r0, [r0]
1118 ; CHECK-THUMBv6-NEXT: lsls r0, r0, #8
1119 ; CHECK-THUMBv6-NEXT: adds r0, r0, r1
1120 ; CHECK-THUMBv6-NEXT: bx lr
1122 ; CHECK-THUMBv7-LABEL: load_i16_from_nonzero_offset:
1123 ; CHECK-THUMBv7: @ %bb.0:
1124 ; CHECK-THUMBv7-NEXT: ldrb r1, [r0, #2]
1125 ; CHECK-THUMBv7-NEXT: ldrh r0, [r0]
1126 ; CHECK-THUMBv7-NEXT: lsls r0, r0, #8
1127 ; CHECK-THUMBv7-NEXT: adds r0, r0, r1
1128 ; CHECK-THUMBv7-NEXT: bx lr
1130 %p1.i16 = bitcast i8* %p to i16*
1131 %p2.i8 = getelementptr i8, i8* %p, i64 2
1132 %v1 = load i16, i16* %p1.i16
1133 %v2.i8 = load i8, i8* %p2.i8
1134 %v2 = zext i8 %v2.i8 to i16
1135 %v1.shl = shl i16 %v1, 8
1136 %res = or i16 %v1.shl, %v2