1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
4 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
8 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9 define i32 @load_i32_by_i8(i32* %arg) {
10 ; CHECK-LABEL: load_i32_by_i8:
12 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
13 ; CHECK-NEXT: movl (%eax), %eax
16 ; CHECK64-LABEL: load_i32_by_i8:
18 ; CHECK64-NEXT: movl (%rdi), %eax
20 %tmp = bitcast i32* %arg to i8*
21 %tmp1 = load i8, i8* %tmp, align 1
22 %tmp2 = zext i8 %tmp1 to i32
23 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
24 %tmp4 = load i8, i8* %tmp3, align 1
25 %tmp5 = zext i8 %tmp4 to i32
26 %tmp6 = shl nuw nsw i32 %tmp5, 8
27 %tmp7 = or i32 %tmp6, %tmp2
28 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
29 %tmp9 = load i8, i8* %tmp8, align 1
30 %tmp10 = zext i8 %tmp9 to i32
31 %tmp11 = shl nuw nsw i32 %tmp10, 16
32 %tmp12 = or i32 %tmp7, %tmp11
33 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 3
34 %tmp14 = load i8, i8* %tmp13, align 1
35 %tmp15 = zext i8 %tmp14 to i32
36 %tmp16 = shl nuw nsw i32 %tmp15, 24
37 %tmp17 = or i32 %tmp12, %tmp16
42 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
43 define i32 @load_i32_by_i8_bswap(i32* %arg) {
44 ; BSWAP-LABEL: load_i32_by_i8_bswap:
46 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
47 ; BSWAP-NEXT: movl (%eax), %eax
48 ; BSWAP-NEXT: bswapl %eax
51 ; MOVBE-LABEL: load_i32_by_i8_bswap:
53 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
54 ; MOVBE-NEXT: movbel (%eax), %eax
57 ; BSWAP64-LABEL: load_i32_by_i8_bswap:
59 ; BSWAP64-NEXT: movl (%rdi), %eax
60 ; BSWAP64-NEXT: bswapl %eax
63 ; MOVBE64-LABEL: load_i32_by_i8_bswap:
65 ; MOVBE64-NEXT: movbel (%rdi), %eax
67 %tmp = bitcast i32* %arg to i8*
68 %tmp1 = load i8, i8* %tmp, align 1
69 %tmp2 = zext i8 %tmp1 to i32
70 %tmp3 = shl nuw nsw i32 %tmp2, 24
71 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
72 %tmp5 = load i8, i8* %tmp4, align 1
73 %tmp6 = zext i8 %tmp5 to i32
74 %tmp7 = shl nuw nsw i32 %tmp6, 16
75 %tmp8 = or i32 %tmp7, %tmp3
76 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
77 %tmp10 = load i8, i8* %tmp9, align 1
78 %tmp11 = zext i8 %tmp10 to i32
79 %tmp12 = shl nuw nsw i32 %tmp11, 8
80 %tmp13 = or i32 %tmp8, %tmp12
81 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
82 %tmp15 = load i8, i8* %tmp14, align 1
83 %tmp16 = zext i8 %tmp15 to i32
84 %tmp17 = or i32 %tmp13, %tmp16
89 ; (i32) p[0] | ((i32) p[1] << 16)
90 define i32 @load_i32_by_i16(i32* %arg) {
91 ; CHECK-LABEL: load_i32_by_i16:
93 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
94 ; CHECK-NEXT: movl (%eax), %eax
97 ; CHECK64-LABEL: load_i32_by_i16:
99 ; CHECK64-NEXT: movl (%rdi), %eax
101 %tmp = bitcast i32* %arg to i16*
102 %tmp1 = load i16, i16* %tmp, align 1
103 %tmp2 = zext i16 %tmp1 to i32
104 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
105 %tmp4 = load i16, i16* %tmp3, align 1
106 %tmp5 = zext i16 %tmp4 to i32
107 %tmp6 = shl nuw nsw i32 %tmp5, 16
108 %tmp7 = or i32 %tmp6, %tmp2
113 ; i8* p_8 = (i8*) p_16;
114 ; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
115 define i32 @load_i32_by_i16_i8(i32* %arg) {
116 ; CHECK-LABEL: load_i32_by_i16_i8:
118 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
119 ; CHECK-NEXT: movl (%eax), %eax
122 ; CHECK64-LABEL: load_i32_by_i16_i8:
124 ; CHECK64-NEXT: movl (%rdi), %eax
126 %tmp = bitcast i32* %arg to i16*
127 %tmp1 = bitcast i32* %arg to i8*
128 %tmp2 = load i16, i16* %tmp, align 1
129 %tmp3 = zext i16 %tmp2 to i32
130 %tmp4 = getelementptr inbounds i8, i8* %tmp1, i32 2
131 %tmp5 = load i8, i8* %tmp4, align 1
132 %tmp6 = zext i8 %tmp5 to i32
133 %tmp7 = shl nuw nsw i32 %tmp6, 16
134 %tmp8 = getelementptr inbounds i8, i8* %tmp1, i32 3
135 %tmp9 = load i8, i8* %tmp8, align 1
136 %tmp10 = zext i8 %tmp9 to i32
137 %tmp11 = shl nuw nsw i32 %tmp10, 24
138 %tmp12 = or i32 %tmp7, %tmp11
139 %tmp13 = or i32 %tmp12, %tmp3
145 ; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
146 define i32 @load_i32_by_i16_by_i8(i32* %arg) {
147 ; CHECK-LABEL: load_i32_by_i16_by_i8:
149 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
150 ; CHECK-NEXT: movl (%eax), %eax
153 ; CHECK64-LABEL: load_i32_by_i16_by_i8:
155 ; CHECK64-NEXT: movl (%rdi), %eax
157 %tmp = bitcast i32* %arg to i8*
158 %tmp1 = load i8, i8* %tmp, align 1
159 %tmp2 = zext i8 %tmp1 to i16
160 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
161 %tmp4 = load i8, i8* %tmp3, align 1
162 %tmp5 = zext i8 %tmp4 to i16
163 %tmp6 = shl nuw nsw i16 %tmp5, 8
164 %tmp7 = or i16 %tmp6, %tmp2
165 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
166 %tmp9 = load i8, i8* %tmp8, align 1
167 %tmp10 = zext i8 %tmp9 to i16
168 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
169 %tmp12 = load i8, i8* %tmp11, align 1
170 %tmp13 = zext i8 %tmp12 to i16
171 %tmp14 = shl nuw nsw i16 %tmp13, 8
172 %tmp15 = or i16 %tmp14, %tmp10
173 %tmp16 = zext i16 %tmp7 to i32
174 %tmp17 = zext i16 %tmp15 to i32
175 %tmp18 = shl nuw nsw i32 %tmp17, 16
176 %tmp19 = or i32 %tmp18, %tmp16
181 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
182 define i32 @load_i32_by_i16_by_i8_bswap(i32* %arg) {
183 ; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
185 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
186 ; BSWAP-NEXT: movl (%eax), %eax
187 ; BSWAP-NEXT: bswapl %eax
190 ; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
192 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
193 ; MOVBE-NEXT: movbel (%eax), %eax
196 ; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
198 ; BSWAP64-NEXT: movl (%rdi), %eax
199 ; BSWAP64-NEXT: bswapl %eax
202 ; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
204 ; MOVBE64-NEXT: movbel (%rdi), %eax
206 %tmp = bitcast i32* %arg to i8*
207 %tmp1 = load i8, i8* %tmp, align 1
208 %tmp2 = zext i8 %tmp1 to i16
209 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1
210 %tmp4 = load i8, i8* %tmp3, align 1
211 %tmp5 = zext i8 %tmp4 to i16
212 %tmp6 = shl nuw nsw i16 %tmp2, 8
213 %tmp7 = or i16 %tmp6, %tmp5
214 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2
215 %tmp9 = load i8, i8* %tmp8, align 1
216 %tmp10 = zext i8 %tmp9 to i16
217 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3
218 %tmp12 = load i8, i8* %tmp11, align 1
219 %tmp13 = zext i8 %tmp12 to i16
220 %tmp14 = shl nuw nsw i16 %tmp10, 8
221 %tmp15 = or i16 %tmp14, %tmp13
222 %tmp16 = zext i16 %tmp7 to i32
223 %tmp17 = zext i16 %tmp15 to i32
224 %tmp18 = shl nuw nsw i32 %tmp16, 16
225 %tmp19 = or i32 %tmp18, %tmp17
230 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
231 define i64 @load_i64_by_i8(i64* %arg) {
232 ; CHECK-LABEL: load_i64_by_i8:
234 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
235 ; CHECK-NEXT: movl (%ecx), %eax
236 ; CHECK-NEXT: movl 4(%ecx), %edx
239 ; CHECK64-LABEL: load_i64_by_i8:
241 ; CHECK64-NEXT: movq (%rdi), %rax
243 %tmp = bitcast i64* %arg to i8*
244 %tmp1 = load i8, i8* %tmp, align 1
245 %tmp2 = zext i8 %tmp1 to i64
246 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
247 %tmp4 = load i8, i8* %tmp3, align 1
248 %tmp5 = zext i8 %tmp4 to i64
249 %tmp6 = shl nuw nsw i64 %tmp5, 8
250 %tmp7 = or i64 %tmp6, %tmp2
251 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
252 %tmp9 = load i8, i8* %tmp8, align 1
253 %tmp10 = zext i8 %tmp9 to i64
254 %tmp11 = shl nuw nsw i64 %tmp10, 16
255 %tmp12 = or i64 %tmp7, %tmp11
256 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
257 %tmp14 = load i8, i8* %tmp13, align 1
258 %tmp15 = zext i8 %tmp14 to i64
259 %tmp16 = shl nuw nsw i64 %tmp15, 24
260 %tmp17 = or i64 %tmp12, %tmp16
261 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
262 %tmp19 = load i8, i8* %tmp18, align 1
263 %tmp20 = zext i8 %tmp19 to i64
264 %tmp21 = shl nuw nsw i64 %tmp20, 32
265 %tmp22 = or i64 %tmp17, %tmp21
266 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
267 %tmp24 = load i8, i8* %tmp23, align 1
268 %tmp25 = zext i8 %tmp24 to i64
269 %tmp26 = shl nuw nsw i64 %tmp25, 40
270 %tmp27 = or i64 %tmp22, %tmp26
271 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
272 %tmp29 = load i8, i8* %tmp28, align 1
273 %tmp30 = zext i8 %tmp29 to i64
274 %tmp31 = shl nuw nsw i64 %tmp30, 48
275 %tmp32 = or i64 %tmp27, %tmp31
276 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
277 %tmp34 = load i8, i8* %tmp33, align 1
278 %tmp35 = zext i8 %tmp34 to i64
279 %tmp36 = shl nuw i64 %tmp35, 56
280 %tmp37 = or i64 %tmp32, %tmp36
285 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
286 define i64 @load_i64_by_i8_bswap(i64* %arg) {
287 ; BSWAP-LABEL: load_i64_by_i8_bswap:
289 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
290 ; BSWAP-NEXT: movl (%eax), %edx
291 ; BSWAP-NEXT: movl 4(%eax), %eax
292 ; BSWAP-NEXT: bswapl %eax
293 ; BSWAP-NEXT: bswapl %edx
296 ; MOVBE-LABEL: load_i64_by_i8_bswap:
298 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
299 ; MOVBE-NEXT: movbel 4(%ecx), %eax
300 ; MOVBE-NEXT: movbel (%ecx), %edx
303 ; BSWAP64-LABEL: load_i64_by_i8_bswap:
305 ; BSWAP64-NEXT: movq (%rdi), %rax
306 ; BSWAP64-NEXT: bswapq %rax
309 ; MOVBE64-LABEL: load_i64_by_i8_bswap:
311 ; MOVBE64-NEXT: movbeq (%rdi), %rax
313 %tmp = bitcast i64* %arg to i8*
314 %tmp1 = load i8, i8* %tmp, align 1
315 %tmp2 = zext i8 %tmp1 to i64
316 %tmp3 = shl nuw i64 %tmp2, 56
317 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
318 %tmp5 = load i8, i8* %tmp4, align 1
319 %tmp6 = zext i8 %tmp5 to i64
320 %tmp7 = shl nuw nsw i64 %tmp6, 48
321 %tmp8 = or i64 %tmp7, %tmp3
322 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
323 %tmp10 = load i8, i8* %tmp9, align 1
324 %tmp11 = zext i8 %tmp10 to i64
325 %tmp12 = shl nuw nsw i64 %tmp11, 40
326 %tmp13 = or i64 %tmp8, %tmp12
327 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
328 %tmp15 = load i8, i8* %tmp14, align 1
329 %tmp16 = zext i8 %tmp15 to i64
330 %tmp17 = shl nuw nsw i64 %tmp16, 32
331 %tmp18 = or i64 %tmp13, %tmp17
332 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
333 %tmp20 = load i8, i8* %tmp19, align 1
334 %tmp21 = zext i8 %tmp20 to i64
335 %tmp22 = shl nuw nsw i64 %tmp21, 24
336 %tmp23 = or i64 %tmp18, %tmp22
337 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
338 %tmp25 = load i8, i8* %tmp24, align 1
339 %tmp26 = zext i8 %tmp25 to i64
340 %tmp27 = shl nuw nsw i64 %tmp26, 16
341 %tmp28 = or i64 %tmp23, %tmp27
342 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
343 %tmp30 = load i8, i8* %tmp29, align 1
344 %tmp31 = zext i8 %tmp30 to i64
345 %tmp32 = shl nuw nsw i64 %tmp31, 8
346 %tmp33 = or i64 %tmp28, %tmp32
347 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
348 %tmp35 = load i8, i8* %tmp34, align 1
349 %tmp36 = zext i8 %tmp35 to i64
350 %tmp37 = or i64 %tmp33, %tmp36
354 ; Part of the load by bytes pattern is used outside of the pattern
357 ; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
359 define i32 @load_i32_by_i8_bswap_uses(i32* %arg) {
360 ; CHECK-LABEL: load_i32_by_i8_bswap_uses:
362 ; CHECK-NEXT: pushl %esi
363 ; CHECK-NEXT: .cfi_def_cfa_offset 8
364 ; CHECK-NEXT: .cfi_offset %esi, -8
365 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
366 ; CHECK-NEXT: movzbl (%eax), %ecx
367 ; CHECK-NEXT: shll $24, %ecx
368 ; CHECK-NEXT: movzbl 1(%eax), %edx
369 ; CHECK-NEXT: movl %edx, %esi
370 ; CHECK-NEXT: shll $16, %esi
371 ; CHECK-NEXT: orl %ecx, %esi
372 ; CHECK-NEXT: movzbl 2(%eax), %ecx
373 ; CHECK-NEXT: shll $8, %ecx
374 ; CHECK-NEXT: orl %esi, %ecx
375 ; CHECK-NEXT: movzbl 3(%eax), %eax
376 ; CHECK-NEXT: orl %ecx, %eax
377 ; CHECK-NEXT: orl %edx, %eax
378 ; CHECK-NEXT: popl %esi
379 ; CHECK-NEXT: .cfi_def_cfa_offset 4
382 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
384 ; CHECK64-NEXT: movzbl (%rdi), %eax
385 ; CHECK64-NEXT: shll $24, %eax
386 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
387 ; CHECK64-NEXT: movl %ecx, %edx
388 ; CHECK64-NEXT: shll $16, %edx
389 ; CHECK64-NEXT: orl %eax, %edx
390 ; CHECK64-NEXT: movzbl 2(%rdi), %esi
391 ; CHECK64-NEXT: shll $8, %esi
392 ; CHECK64-NEXT: orl %edx, %esi
393 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
394 ; CHECK64-NEXT: orl %esi, %eax
395 ; CHECK64-NEXT: orl %ecx, %eax
397 %tmp = bitcast i32* %arg to i8*
398 %tmp1 = load i8, i8* %tmp, align 1
399 %tmp2 = zext i8 %tmp1 to i32
400 %tmp3 = shl nuw nsw i32 %tmp2, 24
401 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
402 %tmp5 = load i8, i8* %tmp4, align 1
403 %tmp6 = zext i8 %tmp5 to i32
404 %tmp7 = shl nuw nsw i32 %tmp6, 16
405 %tmp8 = or i32 %tmp7, %tmp3
406 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
407 %tmp10 = load i8, i8* %tmp9, align 1
408 %tmp11 = zext i8 %tmp10 to i32
409 %tmp12 = shl nuw nsw i32 %tmp11, 8
410 %tmp13 = or i32 %tmp8, %tmp12
411 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
412 %tmp15 = load i8, i8* %tmp14, align 1
413 %tmp16 = zext i8 %tmp15 to i32
414 %tmp17 = or i32 %tmp13, %tmp16
415 ; Use individual part of the pattern outside of the pattern
416 %tmp18 = or i32 %tmp6, %tmp17
420 ; One of the loads is volatile
423 ; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
424 define i32 @load_i32_by_i8_bswap_volatile(i32* %arg) {
425 ; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
427 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
428 ; CHECK-NEXT: movzbl (%eax), %ecx
429 ; CHECK-NEXT: shll $24, %ecx
430 ; CHECK-NEXT: movzbl 1(%eax), %edx
431 ; CHECK-NEXT: shll $16, %edx
432 ; CHECK-NEXT: orl %ecx, %edx
433 ; CHECK-NEXT: movzbl 2(%eax), %ecx
434 ; CHECK-NEXT: shll $8, %ecx
435 ; CHECK-NEXT: orl %edx, %ecx
436 ; CHECK-NEXT: movzbl 3(%eax), %eax
437 ; CHECK-NEXT: orl %ecx, %eax
440 ; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
442 ; CHECK64-NEXT: movzbl (%rdi), %eax
443 ; CHECK64-NEXT: shll $24, %eax
444 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
445 ; CHECK64-NEXT: shll $16, %ecx
446 ; CHECK64-NEXT: orl %eax, %ecx
447 ; CHECK64-NEXT: movzbl 2(%rdi), %edx
448 ; CHECK64-NEXT: shll $8, %edx
449 ; CHECK64-NEXT: orl %ecx, %edx
450 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
451 ; CHECK64-NEXT: orl %edx, %eax
453 %tmp = bitcast i32* %arg to i8*
454 %tmp1 = load volatile i8, i8* %tmp, align 1
455 %tmp2 = zext i8 %tmp1 to i32
456 %tmp3 = shl nuw nsw i32 %tmp2, 24
457 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
458 %tmp5 = load i8, i8* %tmp4, align 1
459 %tmp6 = zext i8 %tmp5 to i32
460 %tmp7 = shl nuw nsw i32 %tmp6, 16
461 %tmp8 = or i32 %tmp7, %tmp3
462 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
463 %tmp10 = load i8, i8* %tmp9, align 1
464 %tmp11 = zext i8 %tmp10 to i32
465 %tmp12 = shl nuw nsw i32 %tmp11, 8
466 %tmp13 = or i32 %tmp8, %tmp12
467 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
468 %tmp15 = load i8, i8* %tmp14, align 1
469 %tmp16 = zext i8 %tmp15 to i32
470 %tmp17 = or i32 %tmp13, %tmp16
474 ; There is a store in between individual loads
476 ; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
478 ; res2 = ((i32) p[2] << 8) | (i32) p[3]
480 define i32 @load_i32_by_i8_bswap_store_in_between(i32* %arg, i32* %arg1) {
481 ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
483 ; CHECK-NEXT: pushl %esi
484 ; CHECK-NEXT: .cfi_def_cfa_offset 8
485 ; CHECK-NEXT: .cfi_offset %esi, -8
486 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
487 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
488 ; CHECK-NEXT: movzbl (%ecx), %edx
489 ; CHECK-NEXT: shll $24, %edx
490 ; CHECK-NEXT: movzbl 1(%ecx), %esi
491 ; CHECK-NEXT: movl $0, (%eax)
492 ; CHECK-NEXT: shll $16, %esi
493 ; CHECK-NEXT: orl %edx, %esi
494 ; CHECK-NEXT: movzbl 2(%ecx), %edx
495 ; CHECK-NEXT: shll $8, %edx
496 ; CHECK-NEXT: orl %esi, %edx
497 ; CHECK-NEXT: movzbl 3(%ecx), %eax
498 ; CHECK-NEXT: orl %edx, %eax
499 ; CHECK-NEXT: popl %esi
500 ; CHECK-NEXT: .cfi_def_cfa_offset 4
503 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
505 ; CHECK64-NEXT: movzbl (%rdi), %eax
506 ; CHECK64-NEXT: shll $24, %eax
507 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
508 ; CHECK64-NEXT: movl $0, (%rsi)
509 ; CHECK64-NEXT: shll $16, %ecx
510 ; CHECK64-NEXT: orl %eax, %ecx
511 ; CHECK64-NEXT: movzbl 2(%rdi), %edx
512 ; CHECK64-NEXT: shll $8, %edx
513 ; CHECK64-NEXT: orl %ecx, %edx
514 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
515 ; CHECK64-NEXT: orl %edx, %eax
517 %tmp = bitcast i32* %arg to i8*
518 %tmp2 = load i8, i8* %tmp, align 1
519 %tmp3 = zext i8 %tmp2 to i32
520 %tmp4 = shl nuw nsw i32 %tmp3, 24
521 %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
522 %tmp6 = load i8, i8* %tmp5, align 1
523 ; This store will prevent folding of the pattern
524 store i32 0, i32* %arg1
525 %tmp7 = zext i8 %tmp6 to i32
526 %tmp8 = shl nuw nsw i32 %tmp7, 16
527 %tmp9 = or i32 %tmp8, %tmp4
528 %tmp10 = getelementptr inbounds i8, i8* %tmp, i32 2
529 %tmp11 = load i8, i8* %tmp10, align 1
530 %tmp12 = zext i8 %tmp11 to i32
531 %tmp13 = shl nuw nsw i32 %tmp12, 8
532 %tmp14 = or i32 %tmp9, %tmp13
533 %tmp15 = getelementptr inbounds i8, i8* %tmp, i32 3
534 %tmp16 = load i8, i8* %tmp15, align 1
535 %tmp17 = zext i8 %tmp16 to i32
536 %tmp18 = or i32 %tmp14, %tmp17
540 ; One of the loads is from an unrelated location
542 ; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
543 define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
544 ; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
546 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
547 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
548 ; CHECK-NEXT: movzbl (%ecx), %edx
549 ; CHECK-NEXT: shll $24, %edx
550 ; CHECK-NEXT: movzbl 1(%eax), %eax
551 ; CHECK-NEXT: shll $16, %eax
552 ; CHECK-NEXT: orl %edx, %eax
553 ; CHECK-NEXT: movzbl 2(%ecx), %edx
554 ; CHECK-NEXT: shll $8, %edx
555 ; CHECK-NEXT: orl %eax, %edx
556 ; CHECK-NEXT: movzbl 3(%ecx), %eax
557 ; CHECK-NEXT: orl %edx, %eax
560 ; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
562 ; CHECK64-NEXT: movzbl (%rdi), %eax
563 ; CHECK64-NEXT: shll $24, %eax
564 ; CHECK64-NEXT: movzbl 1(%rsi), %ecx
565 ; CHECK64-NEXT: shll $16, %ecx
566 ; CHECK64-NEXT: orl %eax, %ecx
567 ; CHECK64-NEXT: movzbl 2(%rdi), %edx
568 ; CHECK64-NEXT: shll $8, %edx
569 ; CHECK64-NEXT: orl %ecx, %edx
570 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
571 ; CHECK64-NEXT: orl %edx, %eax
573 %tmp = bitcast i32* %arg to i8*
574 %tmp2 = bitcast i32* %arg1 to i8*
575 %tmp3 = load i8, i8* %tmp, align 1
576 %tmp4 = zext i8 %tmp3 to i32
577 %tmp5 = shl nuw nsw i32 %tmp4, 24
578 ; Load from an unrelated address
579 %tmp6 = getelementptr inbounds i8, i8* %tmp2, i32 1
580 %tmp7 = load i8, i8* %tmp6, align 1
581 %tmp8 = zext i8 %tmp7 to i32
582 %tmp9 = shl nuw nsw i32 %tmp8, 16
583 %tmp10 = or i32 %tmp9, %tmp5
584 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 2
585 %tmp12 = load i8, i8* %tmp11, align 1
586 %tmp13 = zext i8 %tmp12 to i32
587 %tmp14 = shl nuw nsw i32 %tmp13, 8
588 %tmp15 = or i32 %tmp10, %tmp14
589 %tmp16 = getelementptr inbounds i8, i8* %tmp, i32 3
590 %tmp17 = load i8, i8* %tmp16, align 1
591 %tmp18 = zext i8 %tmp17 to i32
592 %tmp19 = or i32 %tmp15, %tmp18
597 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
598 define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
599 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
601 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
602 ; CHECK-NEXT: movl 1(%eax), %eax
605 ; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
607 ; CHECK64-NEXT: movl 1(%rdi), %eax
609 %tmp = bitcast i32* %arg to i8*
610 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
611 %tmp2 = load i8, i8* %tmp1, align 1
612 %tmp3 = zext i8 %tmp2 to i32
613 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
614 %tmp5 = load i8, i8* %tmp4, align 1
615 %tmp6 = zext i8 %tmp5 to i32
616 %tmp7 = shl nuw nsw i32 %tmp6, 8
617 %tmp8 = or i32 %tmp7, %tmp3
618 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
619 %tmp10 = load i8, i8* %tmp9, align 1
620 %tmp11 = zext i8 %tmp10 to i32
621 %tmp12 = shl nuw nsw i32 %tmp11, 16
622 %tmp13 = or i32 %tmp8, %tmp12
623 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
624 %tmp15 = load i8, i8* %tmp14, align 1
625 %tmp16 = zext i8 %tmp15 to i32
626 %tmp17 = shl nuw nsw i32 %tmp16, 24
627 %tmp18 = or i32 %tmp13, %tmp17
632 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
633 define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
634 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
636 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
637 ; CHECK-NEXT: movl -4(%eax), %eax
640 ; CHECK64-LABEL: load_i32_by_i8_neg_offset:
642 ; CHECK64-NEXT: movl -4(%rdi), %eax
644 %tmp = bitcast i32* %arg to i8*
645 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
646 %tmp2 = load i8, i8* %tmp1, align 1
647 %tmp3 = zext i8 %tmp2 to i32
648 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
649 %tmp5 = load i8, i8* %tmp4, align 1
650 %tmp6 = zext i8 %tmp5 to i32
651 %tmp7 = shl nuw nsw i32 %tmp6, 8
652 %tmp8 = or i32 %tmp7, %tmp3
653 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
654 %tmp10 = load i8, i8* %tmp9, align 1
655 %tmp11 = zext i8 %tmp10 to i32
656 %tmp12 = shl nuw nsw i32 %tmp11, 16
657 %tmp13 = or i32 %tmp8, %tmp12
658 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
659 %tmp15 = load i8, i8* %tmp14, align 1
660 %tmp16 = zext i8 %tmp15 to i32
661 %tmp17 = shl nuw nsw i32 %tmp16, 24
662 %tmp18 = or i32 %tmp13, %tmp17
667 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
668 define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
669 ; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
671 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
672 ; BSWAP-NEXT: movl 1(%eax), %eax
673 ; BSWAP-NEXT: bswapl %eax
676 ; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
678 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
679 ; MOVBE-NEXT: movbel 1(%eax), %eax
682 ; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
684 ; BSWAP64-NEXT: movl 1(%rdi), %eax
685 ; BSWAP64-NEXT: bswapl %eax
688 ; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
690 ; MOVBE64-NEXT: movbel 1(%rdi), %eax
692 %tmp = bitcast i32* %arg to i8*
693 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
694 %tmp2 = load i8, i8* %tmp1, align 1
695 %tmp3 = zext i8 %tmp2 to i32
696 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
697 %tmp5 = load i8, i8* %tmp4, align 1
698 %tmp6 = zext i8 %tmp5 to i32
699 %tmp7 = shl nuw nsw i32 %tmp6, 8
700 %tmp8 = or i32 %tmp7, %tmp3
701 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
702 %tmp10 = load i8, i8* %tmp9, align 1
703 %tmp11 = zext i8 %tmp10 to i32
704 %tmp12 = shl nuw nsw i32 %tmp11, 16
705 %tmp13 = or i32 %tmp8, %tmp12
706 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
707 %tmp15 = load i8, i8* %tmp14, align 1
708 %tmp16 = zext i8 %tmp15 to i32
709 %tmp17 = shl nuw nsw i32 %tmp16, 24
710 %tmp18 = or i32 %tmp13, %tmp17
715 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
716 define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
717 ; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
719 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
720 ; BSWAP-NEXT: movl -4(%eax), %eax
721 ; BSWAP-NEXT: bswapl %eax
724 ; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
726 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
727 ; MOVBE-NEXT: movbel -4(%eax), %eax
730 ; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
732 ; BSWAP64-NEXT: movl -4(%rdi), %eax
733 ; BSWAP64-NEXT: bswapl %eax
736 ; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
738 ; MOVBE64-NEXT: movbel -4(%rdi), %eax
740 %tmp = bitcast i32* %arg to i8*
741 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
742 %tmp2 = load i8, i8* %tmp1, align 1
743 %tmp3 = zext i8 %tmp2 to i32
744 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
745 %tmp5 = load i8, i8* %tmp4, align 1
746 %tmp6 = zext i8 %tmp5 to i32
747 %tmp7 = shl nuw nsw i32 %tmp6, 8
748 %tmp8 = or i32 %tmp7, %tmp3
749 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
750 %tmp10 = load i8, i8* %tmp9, align 1
751 %tmp11 = zext i8 %tmp10 to i32
752 %tmp12 = shl nuw nsw i32 %tmp11, 16
753 %tmp13 = or i32 %tmp8, %tmp12
754 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
755 %tmp15 = load i8, i8* %tmp14, align 1
756 %tmp16 = zext i8 %tmp15 to i32
757 %tmp17 = shl nuw nsw i32 %tmp16, 24
758 %tmp18 = or i32 %tmp13, %tmp17
763 ; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
764 define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {
765 ; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
767 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
768 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx
769 ; BSWAP-NEXT: movl (%ecx,%eax), %eax
770 ; BSWAP-NEXT: bswapl %eax
773 ; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
775 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
776 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
777 ; MOVBE-NEXT: movbel (%ecx,%eax), %eax
780 ; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
782 ; BSWAP64-NEXT: movslq %esi, %rax
783 ; BSWAP64-NEXT: movl (%rdi,%rax), %eax
784 ; BSWAP64-NEXT: bswapl %eax
787 ; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
789 ; MOVBE64-NEXT: movslq %esi, %rax
790 ; MOVBE64-NEXT: movbel (%rdi,%rax), %eax
792 %tmp = bitcast i32* %arg to i8*
793 %tmp2 = getelementptr inbounds i8, i8* %tmp, i32 %arg1
794 %tmp3 = load i8, i8* %tmp2, align 1
795 %tmp4 = zext i8 %tmp3 to i32
796 %tmp5 = shl nuw nsw i32 %tmp4, 24
797 %tmp6 = add nuw nsw i32 %arg1, 1
798 %tmp7 = getelementptr inbounds i8, i8* %tmp, i32 %tmp6
799 %tmp8 = load i8, i8* %tmp7, align 1
800 %tmp9 = zext i8 %tmp8 to i32
801 %tmp10 = shl nuw nsw i32 %tmp9, 16
802 %tmp11 = or i32 %tmp10, %tmp5
803 %tmp12 = add nuw nsw i32 %arg1, 2
804 %tmp13 = getelementptr inbounds i8, i8* %tmp, i32 %tmp12
805 %tmp14 = load i8, i8* %tmp13, align 1
806 %tmp15 = zext i8 %tmp14 to i32
807 %tmp16 = shl nuw nsw i32 %tmp15, 8
808 %tmp17 = or i32 %tmp11, %tmp16
809 %tmp18 = add nuw nsw i32 %arg1, 3
810 %tmp19 = getelementptr inbounds i8, i8* %tmp, i32 %tmp18
811 %tmp20 = load i8, i8* %tmp19, align 1
812 %tmp21 = zext i8 %tmp20 to i32
813 %tmp22 = or i32 %tmp17, %tmp21
817 ; Verify that we don't crash handling shl i32 %conv57, 32
818 define void @shift_i32_by_32(i8* %src1, i8* %src2, i64* %dst) {
819 ; CHECK-LABEL: shift_i32_by_32:
820 ; CHECK: # %bb.0: # %entry
821 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
822 ; CHECK-NEXT: movl $-1, 4(%eax)
823 ; CHECK-NEXT: movl $-1, (%eax)
826 ; CHECK64-LABEL: shift_i32_by_32:
827 ; CHECK64: # %bb.0: # %entry
828 ; CHECK64-NEXT: movq $-1, (%rdx)
831 %load1 = load i8, i8* %src1, align 1
832 %conv46 = zext i8 %load1 to i32
833 %shl47 = shl i32 %conv46, 56
834 %or55 = or i32 %shl47, 0
835 %load2 = load i8, i8* %src2, align 1
836 %conv57 = zext i8 %load2 to i32
837 %shl58 = shl i32 %conv57, 32
838 %or59 = or i32 %or55, %shl58
839 %or74 = or i32 %or59, 0
840 %conv75 = sext i32 %or74 to i64
841 store i64 %conv75, i64* %dst, align 8
845 declare i16 @llvm.bswap.i16(i16)
848 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
849 define i32 @load_i32_by_bswap_i16(i32* %arg) {
850 ; BSWAP-LABEL: load_i32_by_bswap_i16:
852 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
853 ; BSWAP-NEXT: movl (%eax), %eax
854 ; BSWAP-NEXT: bswapl %eax
857 ; MOVBE-LABEL: load_i32_by_bswap_i16:
859 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
860 ; MOVBE-NEXT: movbel (%eax), %eax
863 ; BSWAP64-LABEL: load_i32_by_bswap_i16:
865 ; BSWAP64-NEXT: movl (%rdi), %eax
866 ; BSWAP64-NEXT: bswapl %eax
869 ; MOVBE64-LABEL: load_i32_by_bswap_i16:
871 ; MOVBE64-NEXT: movbel (%rdi), %eax
873 %tmp = bitcast i32* %arg to i16*
874 %tmp1 = load i16, i16* %tmp, align 4
875 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
876 %tmp2 = zext i16 %tmp11 to i32
877 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
878 %tmp4 = load i16, i16* %tmp3, align 1
879 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
880 %tmp5 = zext i16 %tmp41 to i32
881 %tmp6 = shl nuw nsw i32 %tmp2, 16
882 %tmp7 = or i32 %tmp6, %tmp5
887 ; (i32) p[0] | (sext(p[1] << 16) to i32)
888 define i32 @load_i32_by_sext_i16(i32* %arg) {
889 ; CHECK-LABEL: load_i32_by_sext_i16:
891 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
892 ; CHECK-NEXT: movl (%eax), %eax
895 ; CHECK64-LABEL: load_i32_by_sext_i16:
897 ; CHECK64-NEXT: movl (%rdi), %eax
899 %tmp = bitcast i32* %arg to i16*
900 %tmp1 = load i16, i16* %tmp, align 1
901 %tmp2 = zext i16 %tmp1 to i32
902 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
903 %tmp4 = load i16, i16* %tmp3, align 1
904 %tmp5 = sext i16 %tmp4 to i32
905 %tmp6 = shl nuw nsw i32 %tmp5, 16
906 %tmp7 = or i32 %tmp6, %tmp2
912 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
913 define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
914 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
916 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
917 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
918 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
921 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
923 ; CHECK64-NEXT: movl %esi, %eax
924 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
926 %tmp = add nuw nsw i32 %i, 3
927 %tmp2 = add nuw nsw i32 %i, 2
928 %tmp3 = add nuw nsw i32 %i, 1
929 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
930 %tmp5 = zext i32 %i to i64
931 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
932 %tmp7 = load i8, i8* %tmp6, align 1
933 %tmp8 = zext i8 %tmp7 to i32
934 %tmp9 = zext i32 %tmp3 to i64
935 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
936 %tmp11 = load i8, i8* %tmp10, align 1
937 %tmp12 = zext i8 %tmp11 to i32
938 %tmp13 = shl nuw nsw i32 %tmp12, 8
939 %tmp14 = or i32 %tmp13, %tmp8
940 %tmp15 = zext i32 %tmp2 to i64
941 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
942 %tmp17 = load i8, i8* %tmp16, align 1
943 %tmp18 = zext i8 %tmp17 to i32
944 %tmp19 = shl nuw nsw i32 %tmp18, 16
945 %tmp20 = or i32 %tmp14, %tmp19
946 %tmp21 = zext i32 %tmp to i64
947 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
948 %tmp23 = load i8, i8* %tmp22, align 1
949 %tmp24 = zext i8 %tmp23 to i32
950 %tmp25 = shl nuw i32 %tmp24, 24
951 %tmp26 = or i32 %tmp20, %tmp25
957 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
958 define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
959 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
961 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
962 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
963 ; CHECK-NEXT: movl 13(%eax,%ecx), %eax
966 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
968 ; CHECK64-NEXT: movl %esi, %eax
969 ; CHECK64-NEXT: movl 13(%rax,%rdi), %eax
971 %tmp = add nuw nsw i32 %i, 4
972 %tmp2 = add nuw nsw i32 %i, 3
973 %tmp3 = add nuw nsw i32 %i, 2
974 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
975 %tmp5 = add nuw nsw i32 %i, 1
976 %tmp27 = zext i32 %tmp5 to i64
977 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
978 %tmp29 = load i8, i8* %tmp28, align 1
979 %tmp30 = zext i8 %tmp29 to i32
980 %tmp31 = zext i32 %tmp3 to i64
981 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
982 %tmp33 = load i8, i8* %tmp32, align 1
983 %tmp34 = zext i8 %tmp33 to i32
984 %tmp35 = shl nuw nsw i32 %tmp34, 8
985 %tmp36 = or i32 %tmp35, %tmp30
986 %tmp37 = zext i32 %tmp2 to i64
987 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
988 %tmp39 = load i8, i8* %tmp38, align 1
989 %tmp40 = zext i8 %tmp39 to i32
990 %tmp41 = shl nuw nsw i32 %tmp40, 16
991 %tmp42 = or i32 %tmp36, %tmp41
992 %tmp43 = zext i32 %tmp to i64
993 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
994 %tmp45 = load i8, i8* %tmp44, align 1
995 %tmp46 = zext i8 %tmp45 to i32
996 %tmp47 = shl nuw i32 %tmp46, 24
997 %tmp48 = or i32 %tmp42, %tmp47
1008 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1010 ; This test excercises zero and any extend loads as a part of load combine pattern.
1011 ; In order to fold the pattern above we need to reassociate the address computation
1012 ; first. By the time the address computation is reassociated loads are combined to
1013 ; to zext and aext loads.
1014 define i32 @load_i32_by_i8_zaext_loads(i8* %arg, i32 %arg1) {
1015 ; CHECK-LABEL: load_i32_by_i8_zaext_loads:
1017 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1018 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1019 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
1022 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1024 ; CHECK64-NEXT: movl %esi, %eax
1025 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
1026 ; CHECK64-NEXT: retq
1027 %tmp = add nuw nsw i32 %arg1, 3
1028 %tmp2 = add nuw nsw i32 %arg1, 2
1029 %tmp3 = add nuw nsw i32 %arg1, 1
1030 %tmp4 = zext i32 %tmp to i64
1031 %tmp5 = zext i32 %tmp2 to i64
1032 %tmp6 = zext i32 %tmp3 to i64
1033 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1034 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1035 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1036 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1037 %tmp33 = zext i32 %arg1 to i64
1038 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1039 %tmp35 = load i8, i8* %tmp34, align 1
1040 %tmp36 = zext i8 %tmp35 to i32
1041 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1042 %tmp38 = load i8, i8* %tmp37, align 1
1043 %tmp39 = zext i8 %tmp38 to i32
1044 %tmp40 = shl nuw nsw i32 %tmp39, 8
1045 %tmp41 = or i32 %tmp40, %tmp36
1046 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1047 %tmp43 = load i8, i8* %tmp42, align 1
1048 %tmp44 = zext i8 %tmp43 to i32
1049 %tmp45 = shl nuw nsw i32 %tmp44, 16
1050 %tmp46 = or i32 %tmp41, %tmp45
1051 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1052 %tmp48 = load i8, i8* %tmp47, align 1
1053 %tmp49 = zext i8 %tmp48 to i32
1054 %tmp50 = shl nuw i32 %tmp49, 24
1055 %tmp51 = or i32 %tmp46, %tmp50
1059 ; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1069 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1070 define i32 @load_i32_by_i8_zsext_loads(i8* %arg, i32 %arg1) {
1071 ; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1073 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1074 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1075 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
1078 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1080 ; CHECK64-NEXT: movl %esi, %eax
1081 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
1082 ; CHECK64-NEXT: retq
1083 %tmp = add nuw nsw i32 %arg1, 3
1084 %tmp2 = add nuw nsw i32 %arg1, 2
1085 %tmp3 = add nuw nsw i32 %arg1, 1
1086 %tmp4 = zext i32 %tmp to i64
1087 %tmp5 = zext i32 %tmp2 to i64
1088 %tmp6 = zext i32 %tmp3 to i64
1089 %tmp24 = getelementptr inbounds i8, i8* %arg, i64 %tmp4
1090 %tmp30 = getelementptr inbounds i8, i8* %arg, i64 %tmp5
1091 %tmp31 = getelementptr inbounds i8, i8* %arg, i64 %tmp6
1092 %tmp32 = getelementptr inbounds i8, i8* %arg, i64 12
1093 %tmp33 = zext i32 %arg1 to i64
1094 %tmp34 = getelementptr inbounds i8, i8* %tmp32, i64 %tmp33
1095 %tmp35 = load i8, i8* %tmp34, align 1
1096 %tmp36 = zext i8 %tmp35 to i32
1097 %tmp37 = getelementptr inbounds i8, i8* %tmp31, i64 12
1098 %tmp38 = load i8, i8* %tmp37, align 1
1099 %tmp39 = zext i8 %tmp38 to i32
1100 %tmp40 = shl nuw nsw i32 %tmp39, 8
1101 %tmp41 = or i32 %tmp40, %tmp36
1102 %tmp42 = getelementptr inbounds i8, i8* %tmp30, i64 12
1103 %tmp43 = load i8, i8* %tmp42, align 1
1104 %tmp44 = zext i8 %tmp43 to i32
1105 %tmp45 = shl nuw nsw i32 %tmp44, 16
1106 %tmp46 = or i32 %tmp41, %tmp45
1107 %tmp47 = getelementptr inbounds i8, i8* %tmp24, i64 12
1108 %tmp48 = load i8, i8* %tmp47, align 1
1109 %tmp49 = sext i8 %tmp48 to i16
1110 %tmp50 = zext i16 %tmp49 to i32
1111 %tmp51 = shl nuw i32 %tmp50, 24
1112 %tmp52 = or i32 %tmp46, %tmp51
1117 ; (i32) p[0] | ((i32) p[1] << 8)
1118 define i32 @zext_load_i32_by_i8(i32* %arg) {
1119 ; CHECK-LABEL: zext_load_i32_by_i8:
1121 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1122 ; CHECK-NEXT: movzbl (%eax), %ecx
1123 ; CHECK-NEXT: movzbl 1(%eax), %eax
1124 ; CHECK-NEXT: shll $8, %eax
1125 ; CHECK-NEXT: orl %ecx, %eax
1128 ; CHECK64-LABEL: zext_load_i32_by_i8:
1130 ; CHECK64-NEXT: movzbl (%rdi), %ecx
1131 ; CHECK64-NEXT: movzbl 1(%rdi), %eax
1132 ; CHECK64-NEXT: shll $8, %eax
1133 ; CHECK64-NEXT: orl %ecx, %eax
1134 ; CHECK64-NEXT: retq
1135 %tmp = bitcast i32* %arg to i8*
1136 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1137 %tmp2 = load i8, i8* %tmp1, align 1
1138 %tmp3 = zext i8 %tmp2 to i32
1139 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1140 %tmp5 = load i8, i8* %tmp4, align 1
1141 %tmp6 = zext i8 %tmp5 to i32
1142 %tmp7 = shl nuw nsw i32 %tmp6, 8
1143 %tmp8 = or i32 %tmp7, %tmp3
1148 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1149 define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
1150 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1152 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1153 ; CHECK-NEXT: movzbl (%eax), %ecx
1154 ; CHECK-NEXT: shll $8, %ecx
1155 ; CHECK-NEXT: movzbl 1(%eax), %eax
1156 ; CHECK-NEXT: shll $16, %eax
1157 ; CHECK-NEXT: orl %ecx, %eax
1160 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1162 ; CHECK64-NEXT: movzbl (%rdi), %ecx
1163 ; CHECK64-NEXT: shll $8, %ecx
1164 ; CHECK64-NEXT: movzbl 1(%rdi), %eax
1165 ; CHECK64-NEXT: shll $16, %eax
1166 ; CHECK64-NEXT: orl %ecx, %eax
1167 ; CHECK64-NEXT: retq
1168 %tmp = bitcast i32* %arg to i8*
1169 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1170 %tmp2 = load i8, i8* %tmp1, align 1
1171 %tmp3 = zext i8 %tmp2 to i32
1172 %tmp30 = shl nuw nsw i32 %tmp3, 8
1173 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1174 %tmp5 = load i8, i8* %tmp4, align 1
1175 %tmp6 = zext i8 %tmp5 to i32
1176 %tmp7 = shl nuw nsw i32 %tmp6, 16
1177 %tmp8 = or i32 %tmp7, %tmp30
1182 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1183 define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
1184 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1186 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1187 ; CHECK-NEXT: movzbl (%eax), %ecx
1188 ; CHECK-NEXT: shll $16, %ecx
1189 ; CHECK-NEXT: movzbl 1(%eax), %eax
1190 ; CHECK-NEXT: shll $24, %eax
1191 ; CHECK-NEXT: orl %ecx, %eax
1194 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1196 ; CHECK64-NEXT: movzbl (%rdi), %ecx
1197 ; CHECK64-NEXT: shll $16, %ecx
1198 ; CHECK64-NEXT: movzbl 1(%rdi), %eax
1199 ; CHECK64-NEXT: shll $24, %eax
1200 ; CHECK64-NEXT: orl %ecx, %eax
1201 ; CHECK64-NEXT: retq
1202 %tmp = bitcast i32* %arg to i8*
1203 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
1204 %tmp2 = load i8, i8* %tmp1, align 1
1205 %tmp3 = zext i8 %tmp2 to i32
1206 %tmp30 = shl nuw nsw i32 %tmp3, 16
1207 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
1208 %tmp5 = load i8, i8* %tmp4, align 1
1209 %tmp6 = zext i8 %tmp5 to i32
1210 %tmp7 = shl nuw nsw i32 %tmp6, 24
1211 %tmp8 = or i32 %tmp7, %tmp30
1216 ; (i32) p[1] | ((i32) p[0] << 8)
1217 define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
1218 ; CHECK-LABEL: zext_load_i32_by_i8_bswap:
1220 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1221 ; CHECK-NEXT: movzbl 1(%eax), %ecx
1222 ; CHECK-NEXT: movzbl (%eax), %eax
1223 ; CHECK-NEXT: shll $8, %eax
1224 ; CHECK-NEXT: orl %ecx, %eax
1227 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap:
1229 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1230 ; CHECK64-NEXT: movzbl (%rdi), %eax
1231 ; CHECK64-NEXT: shll $8, %eax
1232 ; CHECK64-NEXT: orl %ecx, %eax
1233 ; CHECK64-NEXT: retq
1234 %tmp = bitcast i32* %arg to i8*
1235 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1236 %tmp2 = load i8, i8* %tmp1, align 1
1237 %tmp3 = zext i8 %tmp2 to i32
1238 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1239 %tmp5 = load i8, i8* %tmp4, align 1
1240 %tmp6 = zext i8 %tmp5 to i32
1241 %tmp7 = shl nuw nsw i32 %tmp6, 8
1242 %tmp8 = or i32 %tmp7, %tmp3
1247 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1248 define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
1249 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1251 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1252 ; CHECK-NEXT: movzbl 1(%eax), %ecx
1253 ; CHECK-NEXT: shll $8, %ecx
1254 ; CHECK-NEXT: movzbl (%eax), %eax
1255 ; CHECK-NEXT: shll $16, %eax
1256 ; CHECK-NEXT: orl %ecx, %eax
1259 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1261 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1262 ; CHECK64-NEXT: shll $8, %ecx
1263 ; CHECK64-NEXT: movzbl (%rdi), %eax
1264 ; CHECK64-NEXT: shll $16, %eax
1265 ; CHECK64-NEXT: orl %ecx, %eax
1266 ; CHECK64-NEXT: retq
1267 %tmp = bitcast i32* %arg to i8*
1268 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1269 %tmp2 = load i8, i8* %tmp1, align 1
1270 %tmp3 = zext i8 %tmp2 to i32
1271 %tmp30 = shl nuw nsw i32 %tmp3, 8
1272 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1273 %tmp5 = load i8, i8* %tmp4, align 1
1274 %tmp6 = zext i8 %tmp5 to i32
1275 %tmp7 = shl nuw nsw i32 %tmp6, 16
1276 %tmp8 = or i32 %tmp7, %tmp30
1281 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1282 define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
1283 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1285 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1286 ; CHECK-NEXT: movzbl 1(%eax), %ecx
1287 ; CHECK-NEXT: shll $16, %ecx
1288 ; CHECK-NEXT: movzbl (%eax), %eax
1289 ; CHECK-NEXT: shll $24, %eax
1290 ; CHECK-NEXT: orl %ecx, %eax
1293 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1295 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1296 ; CHECK64-NEXT: shll $16, %ecx
1297 ; CHECK64-NEXT: movzbl (%rdi), %eax
1298 ; CHECK64-NEXT: shll $24, %eax
1299 ; CHECK64-NEXT: orl %ecx, %eax
1300 ; CHECK64-NEXT: retq
1301 %tmp = bitcast i32* %arg to i8*
1302 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
1303 %tmp2 = load i8, i8* %tmp1, align 1
1304 %tmp3 = zext i8 %tmp2 to i32
1305 %tmp30 = shl nuw nsw i32 %tmp3, 16
1306 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
1307 %tmp5 = load i8, i8* %tmp4, align 1
1308 %tmp6 = zext i8 %tmp5 to i32
1309 %tmp7 = shl nuw nsw i32 %tmp6, 24
1310 %tmp8 = or i32 %tmp7, %tmp30