1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP
3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE
4 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64
5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64
8 ; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9 define i32 @load_i32_by_i8(ptr %arg) {
10 ; CHECK-LABEL: load_i32_by_i8:
12 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
13 ; CHECK-NEXT: movl (%eax), %eax
16 ; CHECK64-LABEL: load_i32_by_i8:
18 ; CHECK64-NEXT: movl (%rdi), %eax
20 %tmp1 = load i8, ptr %arg, align 1
21 %tmp2 = zext i8 %tmp1 to i32
22 %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
23 %tmp4 = load i8, ptr %tmp3, align 1
24 %tmp5 = zext i8 %tmp4 to i32
25 %tmp6 = shl nuw nsw i32 %tmp5, 8
26 %tmp7 = or i32 %tmp6, %tmp2
27 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
28 %tmp9 = load i8, ptr %tmp8, align 1
29 %tmp10 = zext i8 %tmp9 to i32
30 %tmp11 = shl nuw nsw i32 %tmp10, 16
31 %tmp12 = or i32 %tmp7, %tmp11
32 %tmp13 = getelementptr inbounds i8, ptr %arg, i32 3
33 %tmp14 = load i8, ptr %tmp13, align 1
34 %tmp15 = zext i8 %tmp14 to i32
35 %tmp16 = shl nuw nsw i32 %tmp15, 24
36 %tmp17 = or i32 %tmp12, %tmp16
41 ; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
42 define i32 @load_i32_by_i8_bswap(ptr %arg) {
43 ; BSWAP-LABEL: load_i32_by_i8_bswap:
45 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
46 ; BSWAP-NEXT: movl (%eax), %eax
47 ; BSWAP-NEXT: bswapl %eax
50 ; MOVBE-LABEL: load_i32_by_i8_bswap:
52 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
53 ; MOVBE-NEXT: movbel (%eax), %eax
56 ; BSWAP64-LABEL: load_i32_by_i8_bswap:
58 ; BSWAP64-NEXT: movl (%rdi), %eax
59 ; BSWAP64-NEXT: bswapl %eax
62 ; MOVBE64-LABEL: load_i32_by_i8_bswap:
64 ; MOVBE64-NEXT: movbel (%rdi), %eax
66 %tmp1 = load i8, ptr %arg, align 1
67 %tmp2 = zext i8 %tmp1 to i32
68 %tmp3 = shl nuw nsw i32 %tmp2, 24
69 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
70 %tmp5 = load i8, ptr %tmp4, align 1
71 %tmp6 = zext i8 %tmp5 to i32
72 %tmp7 = shl nuw nsw i32 %tmp6, 16
73 %tmp8 = or i32 %tmp7, %tmp3
74 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
75 %tmp10 = load i8, ptr %tmp9, align 1
76 %tmp11 = zext i8 %tmp10 to i32
77 %tmp12 = shl nuw nsw i32 %tmp11, 8
78 %tmp13 = or i32 %tmp8, %tmp12
79 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
80 %tmp15 = load i8, ptr %tmp14, align 1
81 %tmp16 = zext i8 %tmp15 to i32
82 %tmp17 = or i32 %tmp13, %tmp16
87 ; (i32) p[0] | ((i32) p[1] << 16)
88 define i32 @load_i32_by_i16(ptr %arg) {
89 ; CHECK-LABEL: load_i32_by_i16:
91 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
92 ; CHECK-NEXT: movl (%eax), %eax
95 ; CHECK64-LABEL: load_i32_by_i16:
97 ; CHECK64-NEXT: movl (%rdi), %eax
99 %tmp1 = load i16, ptr %arg, align 1
100 %tmp2 = zext i16 %tmp1 to i32
101 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
102 %tmp4 = load i16, ptr %tmp3, align 1
103 %tmp5 = zext i16 %tmp4 to i32
104 %tmp6 = shl nuw nsw i32 %tmp5, 16
105 %tmp7 = or i32 %tmp6, %tmp2
110 ; ptr p_8 = (ptr) p_16;
111 ; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
112 define i32 @load_i32_by_i16_i8(ptr %arg) {
113 ; CHECK-LABEL: load_i32_by_i16_i8:
115 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
116 ; CHECK-NEXT: movl (%eax), %eax
119 ; CHECK64-LABEL: load_i32_by_i16_i8:
121 ; CHECK64-NEXT: movl (%rdi), %eax
123 %tmp2 = load i16, ptr %arg, align 1
124 %tmp3 = zext i16 %tmp2 to i32
125 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
126 %tmp5 = load i8, ptr %tmp4, align 1
127 %tmp6 = zext i8 %tmp5 to i32
128 %tmp7 = shl nuw nsw i32 %tmp6, 16
129 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 3
130 %tmp9 = load i8, ptr %tmp8, align 1
131 %tmp10 = zext i8 %tmp9 to i32
132 %tmp11 = shl nuw nsw i32 %tmp10, 24
133 %tmp12 = or i32 %tmp7, %tmp11
134 %tmp13 = or i32 %tmp12, %tmp3
140 ; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
141 define i32 @load_i32_by_i16_by_i8(ptr %arg) {
142 ; CHECK-LABEL: load_i32_by_i16_by_i8:
144 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
145 ; CHECK-NEXT: movl (%eax), %eax
148 ; CHECK64-LABEL: load_i32_by_i16_by_i8:
150 ; CHECK64-NEXT: movl (%rdi), %eax
152 %tmp1 = load i8, ptr %arg, align 1
153 %tmp2 = zext i8 %tmp1 to i16
154 %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
155 %tmp4 = load i8, ptr %tmp3, align 1
156 %tmp5 = zext i8 %tmp4 to i16
157 %tmp6 = shl nuw nsw i16 %tmp5, 8
158 %tmp7 = or i16 %tmp6, %tmp2
159 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
160 %tmp9 = load i8, ptr %tmp8, align 1
161 %tmp10 = zext i8 %tmp9 to i16
162 %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
163 %tmp12 = load i8, ptr %tmp11, align 1
164 %tmp13 = zext i8 %tmp12 to i16
165 %tmp14 = shl nuw nsw i16 %tmp13, 8
166 %tmp15 = or i16 %tmp14, %tmp10
167 %tmp16 = zext i16 %tmp7 to i32
168 %tmp17 = zext i16 %tmp15 to i32
169 %tmp18 = shl nuw nsw i32 %tmp17, 16
170 %tmp19 = or i32 %tmp18, %tmp16
175 ; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
176 define i32 @load_i32_by_i16_by_i8_bswap(ptr %arg) {
177 ; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap:
179 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
180 ; BSWAP-NEXT: movl (%eax), %eax
181 ; BSWAP-NEXT: bswapl %eax
184 ; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap:
186 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
187 ; MOVBE-NEXT: movbel (%eax), %eax
190 ; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap:
192 ; BSWAP64-NEXT: movl (%rdi), %eax
193 ; BSWAP64-NEXT: bswapl %eax
196 ; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap:
198 ; MOVBE64-NEXT: movbel (%rdi), %eax
200 %tmp1 = load i8, ptr %arg, align 1
201 %tmp2 = zext i8 %tmp1 to i16
202 %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1
203 %tmp4 = load i8, ptr %tmp3, align 1
204 %tmp5 = zext i8 %tmp4 to i16
205 %tmp6 = shl nuw nsw i16 %tmp2, 8
206 %tmp7 = or i16 %tmp6, %tmp5
207 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2
208 %tmp9 = load i8, ptr %tmp8, align 1
209 %tmp10 = zext i8 %tmp9 to i16
210 %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3
211 %tmp12 = load i8, ptr %tmp11, align 1
212 %tmp13 = zext i8 %tmp12 to i16
213 %tmp14 = shl nuw nsw i16 %tmp10, 8
214 %tmp15 = or i16 %tmp14, %tmp13
215 %tmp16 = zext i16 %tmp7 to i32
216 %tmp17 = zext i16 %tmp15 to i32
217 %tmp18 = shl nuw nsw i32 %tmp16, 16
218 %tmp19 = or i32 %tmp18, %tmp17
223 ; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
224 define i64 @load_i64_by_i8(ptr %arg) {
225 ; CHECK-LABEL: load_i64_by_i8:
227 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
228 ; CHECK-NEXT: movl (%ecx), %eax
229 ; CHECK-NEXT: movl 4(%ecx), %edx
232 ; CHECK64-LABEL: load_i64_by_i8:
234 ; CHECK64-NEXT: movq (%rdi), %rax
236 %tmp1 = load i8, ptr %arg, align 1
237 %tmp2 = zext i8 %tmp1 to i64
238 %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1
239 %tmp4 = load i8, ptr %tmp3, align 1
240 %tmp5 = zext i8 %tmp4 to i64
241 %tmp6 = shl nuw nsw i64 %tmp5, 8
242 %tmp7 = or i64 %tmp6, %tmp2
243 %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2
244 %tmp9 = load i8, ptr %tmp8, align 1
245 %tmp10 = zext i8 %tmp9 to i64
246 %tmp11 = shl nuw nsw i64 %tmp10, 16
247 %tmp12 = or i64 %tmp7, %tmp11
248 %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3
249 %tmp14 = load i8, ptr %tmp13, align 1
250 %tmp15 = zext i8 %tmp14 to i64
251 %tmp16 = shl nuw nsw i64 %tmp15, 24
252 %tmp17 = or i64 %tmp12, %tmp16
253 %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4
254 %tmp19 = load i8, ptr %tmp18, align 1
255 %tmp20 = zext i8 %tmp19 to i64
256 %tmp21 = shl nuw nsw i64 %tmp20, 32
257 %tmp22 = or i64 %tmp17, %tmp21
258 %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5
259 %tmp24 = load i8, ptr %tmp23, align 1
260 %tmp25 = zext i8 %tmp24 to i64
261 %tmp26 = shl nuw nsw i64 %tmp25, 40
262 %tmp27 = or i64 %tmp22, %tmp26
263 %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6
264 %tmp29 = load i8, ptr %tmp28, align 1
265 %tmp30 = zext i8 %tmp29 to i64
266 %tmp31 = shl nuw nsw i64 %tmp30, 48
267 %tmp32 = or i64 %tmp27, %tmp31
268 %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7
269 %tmp34 = load i8, ptr %tmp33, align 1
270 %tmp35 = zext i8 %tmp34 to i64
271 %tmp36 = shl nuw i64 %tmp35, 56
272 %tmp37 = or i64 %tmp32, %tmp36
277 ; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
278 define i64 @load_i64_by_i8_bswap(ptr %arg) {
279 ; BSWAP-LABEL: load_i64_by_i8_bswap:
281 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
282 ; BSWAP-NEXT: movl (%eax), %edx
283 ; BSWAP-NEXT: movl 4(%eax), %eax
284 ; BSWAP-NEXT: bswapl %eax
285 ; BSWAP-NEXT: bswapl %edx
288 ; MOVBE-LABEL: load_i64_by_i8_bswap:
290 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
291 ; MOVBE-NEXT: movbel 4(%ecx), %eax
292 ; MOVBE-NEXT: movbel (%ecx), %edx
295 ; BSWAP64-LABEL: load_i64_by_i8_bswap:
297 ; BSWAP64-NEXT: movq (%rdi), %rax
298 ; BSWAP64-NEXT: bswapq %rax
301 ; MOVBE64-LABEL: load_i64_by_i8_bswap:
303 ; MOVBE64-NEXT: movbeq (%rdi), %rax
305 %tmp1 = load i8, ptr %arg, align 1
306 %tmp2 = zext i8 %tmp1 to i64
307 %tmp3 = shl nuw i64 %tmp2, 56
308 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1
309 %tmp5 = load i8, ptr %tmp4, align 1
310 %tmp6 = zext i8 %tmp5 to i64
311 %tmp7 = shl nuw nsw i64 %tmp6, 48
312 %tmp8 = or i64 %tmp7, %tmp3
313 %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2
314 %tmp10 = load i8, ptr %tmp9, align 1
315 %tmp11 = zext i8 %tmp10 to i64
316 %tmp12 = shl nuw nsw i64 %tmp11, 40
317 %tmp13 = or i64 %tmp8, %tmp12
318 %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3
319 %tmp15 = load i8, ptr %tmp14, align 1
320 %tmp16 = zext i8 %tmp15 to i64
321 %tmp17 = shl nuw nsw i64 %tmp16, 32
322 %tmp18 = or i64 %tmp13, %tmp17
323 %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4
324 %tmp20 = load i8, ptr %tmp19, align 1
325 %tmp21 = zext i8 %tmp20 to i64
326 %tmp22 = shl nuw nsw i64 %tmp21, 24
327 %tmp23 = or i64 %tmp18, %tmp22
328 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5
329 %tmp25 = load i8, ptr %tmp24, align 1
330 %tmp26 = zext i8 %tmp25 to i64
331 %tmp27 = shl nuw nsw i64 %tmp26, 16
332 %tmp28 = or i64 %tmp23, %tmp27
333 %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6
334 %tmp30 = load i8, ptr %tmp29, align 1
335 %tmp31 = zext i8 %tmp30 to i64
336 %tmp32 = shl nuw nsw i64 %tmp31, 8
337 %tmp33 = or i64 %tmp28, %tmp32
338 %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7
339 %tmp35 = load i8, ptr %tmp34, align 1
340 %tmp36 = zext i8 %tmp35 to i64
341 %tmp37 = or i64 %tmp33, %tmp36
345 ; Part of the load by bytes pattern is used outside of the pattern
348 ; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
350 define i32 @load_i32_by_i8_bswap_uses(ptr %arg) {
351 ; CHECK-LABEL: load_i32_by_i8_bswap_uses:
353 ; CHECK-NEXT: pushl %esi
354 ; CHECK-NEXT: .cfi_def_cfa_offset 8
355 ; CHECK-NEXT: .cfi_offset %esi, -8
356 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
357 ; CHECK-NEXT: movzbl (%eax), %ecx
358 ; CHECK-NEXT: shll $24, %ecx
359 ; CHECK-NEXT: movzbl 1(%eax), %edx
360 ; CHECK-NEXT: movl %edx, %esi
361 ; CHECK-NEXT: shll $16, %esi
362 ; CHECK-NEXT: orl %ecx, %esi
363 ; CHECK-NEXT: movzbl 2(%eax), %ecx
364 ; CHECK-NEXT: shll $8, %ecx
365 ; CHECK-NEXT: orl %esi, %ecx
366 ; CHECK-NEXT: movzbl 3(%eax), %eax
367 ; CHECK-NEXT: orl %ecx, %eax
368 ; CHECK-NEXT: orl %edx, %eax
369 ; CHECK-NEXT: popl %esi
370 ; CHECK-NEXT: .cfi_def_cfa_offset 4
373 ; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
375 ; CHECK64-NEXT: movzbl (%rdi), %eax
376 ; CHECK64-NEXT: shll $24, %eax
377 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
378 ; CHECK64-NEXT: movl %ecx, %edx
379 ; CHECK64-NEXT: shll $16, %edx
380 ; CHECK64-NEXT: orl %eax, %edx
381 ; CHECK64-NEXT: movzbl 2(%rdi), %esi
382 ; CHECK64-NEXT: shll $8, %esi
383 ; CHECK64-NEXT: orl %edx, %esi
384 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
385 ; CHECK64-NEXT: orl %esi, %eax
386 ; CHECK64-NEXT: orl %ecx, %eax
388 %tmp1 = load i8, ptr %arg, align 1
389 %tmp2 = zext i8 %tmp1 to i32
390 %tmp3 = shl nuw nsw i32 %tmp2, 24
391 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
392 %tmp5 = load i8, ptr %tmp4, align 1
393 %tmp6 = zext i8 %tmp5 to i32
394 %tmp7 = shl nuw nsw i32 %tmp6, 16
395 %tmp8 = or i32 %tmp7, %tmp3
396 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
397 %tmp10 = load i8, ptr %tmp9, align 1
398 %tmp11 = zext i8 %tmp10 to i32
399 %tmp12 = shl nuw nsw i32 %tmp11, 8
400 %tmp13 = or i32 %tmp8, %tmp12
401 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
402 %tmp15 = load i8, ptr %tmp14, align 1
403 %tmp16 = zext i8 %tmp15 to i32
404 %tmp17 = or i32 %tmp13, %tmp16
405 ; Use individual part of the pattern outside of the pattern
406 %tmp18 = or i32 %tmp6, %tmp17
410 ; One of the loads is volatile
413 ; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
414 define i32 @load_i32_by_i8_bswap_volatile(ptr %arg) {
415 ; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
417 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
418 ; CHECK-NEXT: movzbl (%eax), %ecx
419 ; CHECK-NEXT: shll $24, %ecx
420 ; CHECK-NEXT: movzbl 1(%eax), %edx
421 ; CHECK-NEXT: shll $16, %edx
422 ; CHECK-NEXT: orl %ecx, %edx
423 ; CHECK-NEXT: movzbl 2(%eax), %ecx
424 ; CHECK-NEXT: shll $8, %ecx
425 ; CHECK-NEXT: orl %edx, %ecx
426 ; CHECK-NEXT: movzbl 3(%eax), %eax
427 ; CHECK-NEXT: orl %ecx, %eax
430 ; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
432 ; CHECK64-NEXT: movzbl (%rdi), %eax
433 ; CHECK64-NEXT: shll $24, %eax
434 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
435 ; CHECK64-NEXT: shll $16, %ecx
436 ; CHECK64-NEXT: orl %eax, %ecx
437 ; CHECK64-NEXT: movzbl 2(%rdi), %edx
438 ; CHECK64-NEXT: shll $8, %edx
439 ; CHECK64-NEXT: orl %ecx, %edx
440 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
441 ; CHECK64-NEXT: orl %edx, %eax
443 %tmp1 = load volatile i8, ptr %arg, align 1
444 %tmp2 = zext i8 %tmp1 to i32
445 %tmp3 = shl nuw nsw i32 %tmp2, 24
446 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
447 %tmp5 = load i8, ptr %tmp4, align 1
448 %tmp6 = zext i8 %tmp5 to i32
449 %tmp7 = shl nuw nsw i32 %tmp6, 16
450 %tmp8 = or i32 %tmp7, %tmp3
451 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
452 %tmp10 = load i8, ptr %tmp9, align 1
453 %tmp11 = zext i8 %tmp10 to i32
454 %tmp12 = shl nuw nsw i32 %tmp11, 8
455 %tmp13 = or i32 %tmp8, %tmp12
456 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3
457 %tmp15 = load i8, ptr %tmp14, align 1
458 %tmp16 = zext i8 %tmp15 to i32
459 %tmp17 = or i32 %tmp13, %tmp16
463 ; There is a store in between individual loads
465 ; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
467 ; res2 = ((i32) p[2] << 8) | (i32) p[3]
469 define i32 @load_i32_by_i8_bswap_store_in_between(ptr %arg, ptr %arg1) {
470 ; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
472 ; CHECK-NEXT: pushl %esi
473 ; CHECK-NEXT: .cfi_def_cfa_offset 8
474 ; CHECK-NEXT: .cfi_offset %esi, -8
475 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
476 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
477 ; CHECK-NEXT: movzbl (%eax), %edx
478 ; CHECK-NEXT: shll $24, %edx
479 ; CHECK-NEXT: movzbl 1(%eax), %esi
480 ; CHECK-NEXT: movl $0, (%ecx)
481 ; CHECK-NEXT: shll $16, %esi
482 ; CHECK-NEXT: orl %edx, %esi
483 ; CHECK-NEXT: movzbl 2(%eax), %ecx
484 ; CHECK-NEXT: shll $8, %ecx
485 ; CHECK-NEXT: orl %esi, %ecx
486 ; CHECK-NEXT: movzbl 3(%eax), %eax
487 ; CHECK-NEXT: orl %ecx, %eax
488 ; CHECK-NEXT: popl %esi
489 ; CHECK-NEXT: .cfi_def_cfa_offset 4
492 ; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
494 ; CHECK64-NEXT: movzbl (%rdi), %eax
495 ; CHECK64-NEXT: shll $24, %eax
496 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
497 ; CHECK64-NEXT: movl $0, (%rsi)
498 ; CHECK64-NEXT: shll $16, %ecx
499 ; CHECK64-NEXT: orl %eax, %ecx
500 ; CHECK64-NEXT: movzbl 2(%rdi), %edx
501 ; CHECK64-NEXT: shll $8, %edx
502 ; CHECK64-NEXT: orl %ecx, %edx
503 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
504 ; CHECK64-NEXT: orl %edx, %eax
506 %tmp2 = load i8, ptr %arg, align 1
507 %tmp3 = zext i8 %tmp2 to i32
508 %tmp4 = shl nuw nsw i32 %tmp3, 24
509 %tmp5 = getelementptr inbounds i8, ptr %arg, i32 1
510 %tmp6 = load i8, ptr %tmp5, align 1
511 ; This store will prevent folding of the pattern
512 store i32 0, ptr %arg1
513 %tmp7 = zext i8 %tmp6 to i32
514 %tmp8 = shl nuw nsw i32 %tmp7, 16
515 %tmp9 = or i32 %tmp8, %tmp4
516 %tmp10 = getelementptr inbounds i8, ptr %arg, i32 2
517 %tmp11 = load i8, ptr %tmp10, align 1
518 %tmp12 = zext i8 %tmp11 to i32
519 %tmp13 = shl nuw nsw i32 %tmp12, 8
520 %tmp14 = or i32 %tmp9, %tmp13
521 %tmp15 = getelementptr inbounds i8, ptr %arg, i32 3
522 %tmp16 = load i8, ptr %tmp15, align 1
523 %tmp17 = zext i8 %tmp16 to i32
524 %tmp18 = or i32 %tmp14, %tmp17
528 ; One of the loads is from an unrelated location
530 ; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
531 define i32 @load_i32_by_i8_bswap_unrelated_load(ptr %arg, ptr %arg1) {
532 ; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
534 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
535 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
536 ; CHECK-NEXT: movzbl (%ecx), %edx
537 ; CHECK-NEXT: shll $24, %edx
538 ; CHECK-NEXT: movzbl 1(%eax), %eax
539 ; CHECK-NEXT: shll $16, %eax
540 ; CHECK-NEXT: orl %edx, %eax
541 ; CHECK-NEXT: movzbl 2(%ecx), %edx
542 ; CHECK-NEXT: shll $8, %edx
543 ; CHECK-NEXT: orl %eax, %edx
544 ; CHECK-NEXT: movzbl 3(%ecx), %eax
545 ; CHECK-NEXT: orl %edx, %eax
548 ; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
550 ; CHECK64-NEXT: movzbl (%rdi), %eax
551 ; CHECK64-NEXT: shll $24, %eax
552 ; CHECK64-NEXT: movzbl 1(%rsi), %ecx
553 ; CHECK64-NEXT: shll $16, %ecx
554 ; CHECK64-NEXT: orl %eax, %ecx
555 ; CHECK64-NEXT: movzbl 2(%rdi), %edx
556 ; CHECK64-NEXT: shll $8, %edx
557 ; CHECK64-NEXT: orl %ecx, %edx
558 ; CHECK64-NEXT: movzbl 3(%rdi), %eax
559 ; CHECK64-NEXT: orl %edx, %eax
561 %tmp3 = load i8, ptr %arg, align 1
562 %tmp4 = zext i8 %tmp3 to i32
563 %tmp5 = shl nuw nsw i32 %tmp4, 24
564 ; Load from an unrelated address
565 %tmp6 = getelementptr inbounds i8, ptr %arg1, i32 1
566 %tmp7 = load i8, ptr %tmp6, align 1
567 %tmp8 = zext i8 %tmp7 to i32
568 %tmp9 = shl nuw nsw i32 %tmp8, 16
569 %tmp10 = or i32 %tmp9, %tmp5
570 %tmp11 = getelementptr inbounds i8, ptr %arg, i32 2
571 %tmp12 = load i8, ptr %tmp11, align 1
572 %tmp13 = zext i8 %tmp12 to i32
573 %tmp14 = shl nuw nsw i32 %tmp13, 8
574 %tmp15 = or i32 %tmp10, %tmp14
575 %tmp16 = getelementptr inbounds i8, ptr %arg, i32 3
576 %tmp17 = load i8, ptr %tmp16, align 1
577 %tmp18 = zext i8 %tmp17 to i32
578 %tmp19 = or i32 %tmp15, %tmp18
583 ; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
584 define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) {
585 ; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
587 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
588 ; CHECK-NEXT: movl 1(%eax), %eax
591 ; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
593 ; CHECK64-NEXT: movl 1(%rdi), %eax
595 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
596 %tmp2 = load i8, ptr %tmp1, align 1
597 %tmp3 = zext i8 %tmp2 to i32
598 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2
599 %tmp5 = load i8, ptr %tmp4, align 1
600 %tmp6 = zext i8 %tmp5 to i32
601 %tmp7 = shl nuw nsw i32 %tmp6, 8
602 %tmp8 = or i32 %tmp7, %tmp3
603 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3
604 %tmp10 = load i8, ptr %tmp9, align 1
605 %tmp11 = zext i8 %tmp10 to i32
606 %tmp12 = shl nuw nsw i32 %tmp11, 16
607 %tmp13 = or i32 %tmp8, %tmp12
608 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4
609 %tmp15 = load i8, ptr %tmp14, align 1
610 %tmp16 = zext i8 %tmp15 to i32
611 %tmp17 = shl nuw nsw i32 %tmp16, 24
612 %tmp18 = or i32 %tmp13, %tmp17
617 ; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
618 define i32 @load_i32_by_i8_neg_offset(ptr %arg) {
619 ; CHECK-LABEL: load_i32_by_i8_neg_offset:
621 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
622 ; CHECK-NEXT: movl -4(%eax), %eax
625 ; CHECK64-LABEL: load_i32_by_i8_neg_offset:
627 ; CHECK64-NEXT: movl -4(%rdi), %eax
629 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4
630 %tmp2 = load i8, ptr %tmp1, align 1
631 %tmp3 = zext i8 %tmp2 to i32
632 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3
633 %tmp5 = load i8, ptr %tmp4, align 1
634 %tmp6 = zext i8 %tmp5 to i32
635 %tmp7 = shl nuw nsw i32 %tmp6, 8
636 %tmp8 = or i32 %tmp7, %tmp3
637 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2
638 %tmp10 = load i8, ptr %tmp9, align 1
639 %tmp11 = zext i8 %tmp10 to i32
640 %tmp12 = shl nuw nsw i32 %tmp11, 16
641 %tmp13 = or i32 %tmp8, %tmp12
642 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1
643 %tmp15 = load i8, ptr %tmp14, align 1
644 %tmp16 = zext i8 %tmp15 to i32
645 %tmp17 = shl nuw nsw i32 %tmp16, 24
646 %tmp18 = or i32 %tmp13, %tmp17
651 ; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
652 define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) {
653 ; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap:
655 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
656 ; BSWAP-NEXT: movl 1(%eax), %eax
657 ; BSWAP-NEXT: bswapl %eax
660 ; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap:
662 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
663 ; MOVBE-NEXT: movbel 1(%eax), %eax
666 ; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
668 ; BSWAP64-NEXT: movl 1(%rdi), %eax
669 ; BSWAP64-NEXT: bswapl %eax
672 ; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
674 ; MOVBE64-NEXT: movbel 1(%rdi), %eax
676 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4
677 %tmp2 = load i8, ptr %tmp1, align 1
678 %tmp3 = zext i8 %tmp2 to i32
679 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3
680 %tmp5 = load i8, ptr %tmp4, align 1
681 %tmp6 = zext i8 %tmp5 to i32
682 %tmp7 = shl nuw nsw i32 %tmp6, 8
683 %tmp8 = or i32 %tmp7, %tmp3
684 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2
685 %tmp10 = load i8, ptr %tmp9, align 1
686 %tmp11 = zext i8 %tmp10 to i32
687 %tmp12 = shl nuw nsw i32 %tmp11, 16
688 %tmp13 = or i32 %tmp8, %tmp12
689 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1
690 %tmp15 = load i8, ptr %tmp14, align 1
691 %tmp16 = zext i8 %tmp15 to i32
692 %tmp17 = shl nuw nsw i32 %tmp16, 24
693 %tmp18 = or i32 %tmp13, %tmp17
698 ; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
699 define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) {
700 ; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap:
702 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
703 ; BSWAP-NEXT: movl -4(%eax), %eax
704 ; BSWAP-NEXT: bswapl %eax
707 ; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap:
709 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
710 ; MOVBE-NEXT: movbel -4(%eax), %eax
713 ; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap:
715 ; BSWAP64-NEXT: movl -4(%rdi), %eax
716 ; BSWAP64-NEXT: bswapl %eax
719 ; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap:
721 ; MOVBE64-NEXT: movbel -4(%rdi), %eax
723 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1
724 %tmp2 = load i8, ptr %tmp1, align 1
725 %tmp3 = zext i8 %tmp2 to i32
726 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2
727 %tmp5 = load i8, ptr %tmp4, align 1
728 %tmp6 = zext i8 %tmp5 to i32
729 %tmp7 = shl nuw nsw i32 %tmp6, 8
730 %tmp8 = or i32 %tmp7, %tmp3
731 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3
732 %tmp10 = load i8, ptr %tmp9, align 1
733 %tmp11 = zext i8 %tmp10 to i32
734 %tmp12 = shl nuw nsw i32 %tmp11, 16
735 %tmp13 = or i32 %tmp8, %tmp12
736 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4
737 %tmp15 = load i8, ptr %tmp14, align 1
738 %tmp16 = zext i8 %tmp15 to i32
739 %tmp17 = shl nuw nsw i32 %tmp16, 24
740 %tmp18 = or i32 %tmp13, %tmp17
745 ; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
746 define i32 @load_i32_by_i8_bswap_base_index_offset(ptr %arg, i32 %arg1) {
747 ; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset:
749 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
750 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx
751 ; BSWAP-NEXT: movl (%ecx,%eax), %eax
752 ; BSWAP-NEXT: bswapl %eax
755 ; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset:
757 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
758 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx
759 ; MOVBE-NEXT: movbel (%ecx,%eax), %eax
762 ; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset:
764 ; BSWAP64-NEXT: movslq %esi, %rax
765 ; BSWAP64-NEXT: movl (%rdi,%rax), %eax
766 ; BSWAP64-NEXT: bswapl %eax
769 ; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset:
771 ; MOVBE64-NEXT: movslq %esi, %rax
772 ; MOVBE64-NEXT: movbel (%rdi,%rax), %eax
774 %tmp2 = getelementptr inbounds i8, ptr %arg, i32 %arg1
775 %tmp3 = load i8, ptr %tmp2, align 1
776 %tmp4 = zext i8 %tmp3 to i32
777 %tmp5 = shl nuw nsw i32 %tmp4, 24
778 %tmp6 = add nuw nsw i32 %arg1, 1
779 %tmp7 = getelementptr inbounds i8, ptr %arg, i32 %tmp6
780 %tmp8 = load i8, ptr %tmp7, align 1
781 %tmp9 = zext i8 %tmp8 to i32
782 %tmp10 = shl nuw nsw i32 %tmp9, 16
783 %tmp11 = or i32 %tmp10, %tmp5
784 %tmp12 = add nuw nsw i32 %arg1, 2
785 %tmp13 = getelementptr inbounds i8, ptr %arg, i32 %tmp12
786 %tmp14 = load i8, ptr %tmp13, align 1
787 %tmp15 = zext i8 %tmp14 to i32
788 %tmp16 = shl nuw nsw i32 %tmp15, 8
789 %tmp17 = or i32 %tmp11, %tmp16
790 %tmp18 = add nuw nsw i32 %arg1, 3
791 %tmp19 = getelementptr inbounds i8, ptr %arg, i32 %tmp18
792 %tmp20 = load i8, ptr %tmp19, align 1
793 %tmp21 = zext i8 %tmp20 to i32
794 %tmp22 = or i32 %tmp17, %tmp21
798 ; Verify that we don't crash handling shl i32 %conv57, 32
799 define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) {
800 ; CHECK-LABEL: shift_i32_by_32:
801 ; CHECK: # %bb.0: # %entry
802 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
803 ; CHECK-NEXT: movl $-1, 4(%eax)
804 ; CHECK-NEXT: movl $-1, (%eax)
807 ; CHECK64-LABEL: shift_i32_by_32:
808 ; CHECK64: # %bb.0: # %entry
809 ; CHECK64-NEXT: movq $-1, (%rdx)
812 %load1 = load i8, ptr %src1, align 1
813 %conv46 = zext i8 %load1 to i32
814 %shl47 = shl i32 %conv46, 56
815 %or55 = or i32 %shl47, 0
816 %load2 = load i8, ptr %src2, align 1
817 %conv57 = zext i8 %load2 to i32
818 %shl58 = shl i32 %conv57, 32
819 %or59 = or i32 %or55, %shl58
820 %or74 = or i32 %or59, 0
821 %conv75 = sext i32 %or74 to i64
822 store i64 %conv75, ptr %dst, align 8
826 declare i16 @llvm.bswap.i16(i16)
829 ; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
830 define i32 @load_i32_by_bswap_i16(ptr %arg) {
831 ; BSWAP-LABEL: load_i32_by_bswap_i16:
833 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
834 ; BSWAP-NEXT: movl (%eax), %eax
835 ; BSWAP-NEXT: bswapl %eax
838 ; MOVBE-LABEL: load_i32_by_bswap_i16:
840 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
841 ; MOVBE-NEXT: movbel (%eax), %eax
844 ; BSWAP64-LABEL: load_i32_by_bswap_i16:
846 ; BSWAP64-NEXT: movl (%rdi), %eax
847 ; BSWAP64-NEXT: bswapl %eax
850 ; MOVBE64-LABEL: load_i32_by_bswap_i16:
852 ; MOVBE64-NEXT: movbel (%rdi), %eax
854 %tmp1 = load i16, ptr %arg, align 4
855 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
856 %tmp2 = zext i16 %tmp11 to i32
857 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
858 %tmp4 = load i16, ptr %tmp3, align 1
859 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
860 %tmp5 = zext i16 %tmp41 to i32
861 %tmp6 = shl nuw nsw i32 %tmp2, 16
862 %tmp7 = or i32 %tmp6, %tmp5
867 ; (i32) p[0] | (sext(p[1] << 16) to i32)
868 define i32 @load_i32_by_sext_i16(ptr %arg) {
869 ; CHECK-LABEL: load_i32_by_sext_i16:
871 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
872 ; CHECK-NEXT: movl (%eax), %eax
875 ; CHECK64-LABEL: load_i32_by_sext_i16:
877 ; CHECK64-NEXT: movl (%rdi), %eax
879 %tmp1 = load i16, ptr %arg, align 1
880 %tmp2 = zext i16 %tmp1 to i32
881 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1
882 %tmp4 = load i16, ptr %tmp3, align 1
883 %tmp5 = sext i16 %tmp4 to i32
884 %tmp6 = shl nuw nsw i32 %tmp5, 16
885 %tmp7 = or i32 %tmp6, %tmp2
891 ; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
892 define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) {
893 ; CHECK-LABEL: load_i32_by_i8_base_offset_index:
895 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
896 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
897 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
900 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index:
902 ; CHECK64-NEXT: movl %esi, %eax
903 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
905 %tmp = add nuw nsw i32 %i, 3
906 %tmp2 = add nuw nsw i32 %i, 2
907 %tmp3 = add nuw nsw i32 %i, 1
908 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
909 %tmp5 = zext i32 %i to i64
910 %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5
911 %tmp7 = load i8, ptr %tmp6, align 1
912 %tmp8 = zext i8 %tmp7 to i32
913 %tmp9 = zext i32 %tmp3 to i64
914 %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9
915 %tmp11 = load i8, ptr %tmp10, align 1
916 %tmp12 = zext i8 %tmp11 to i32
917 %tmp13 = shl nuw nsw i32 %tmp12, 8
918 %tmp14 = or i32 %tmp13, %tmp8
919 %tmp15 = zext i32 %tmp2 to i64
920 %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15
921 %tmp17 = load i8, ptr %tmp16, align 1
922 %tmp18 = zext i8 %tmp17 to i32
923 %tmp19 = shl nuw nsw i32 %tmp18, 16
924 %tmp20 = or i32 %tmp14, %tmp19
925 %tmp21 = zext i32 %tmp to i64
926 %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21
927 %tmp23 = load i8, ptr %tmp22, align 1
928 %tmp24 = zext i8 %tmp23 to i32
929 %tmp25 = shl nuw i32 %tmp24, 24
930 %tmp26 = or i32 %tmp20, %tmp25
936 ; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
937 define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) {
938 ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
940 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
941 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
942 ; CHECK-NEXT: movl 13(%eax,%ecx), %eax
945 ; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2:
947 ; CHECK64-NEXT: movl %esi, %eax
948 ; CHECK64-NEXT: movl 13(%rax,%rdi), %eax
950 %tmp = add nuw nsw i32 %i, 4
951 %tmp2 = add nuw nsw i32 %i, 3
952 %tmp3 = add nuw nsw i32 %i, 2
953 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12
954 %tmp5 = add nuw nsw i32 %i, 1
955 %tmp27 = zext i32 %tmp5 to i64
956 %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27
957 %tmp29 = load i8, ptr %tmp28, align 1
958 %tmp30 = zext i8 %tmp29 to i32
959 %tmp31 = zext i32 %tmp3 to i64
960 %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31
961 %tmp33 = load i8, ptr %tmp32, align 1
962 %tmp34 = zext i8 %tmp33 to i32
963 %tmp35 = shl nuw nsw i32 %tmp34, 8
964 %tmp36 = or i32 %tmp35, %tmp30
965 %tmp37 = zext i32 %tmp2 to i64
966 %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37
967 %tmp39 = load i8, ptr %tmp38, align 1
968 %tmp40 = zext i8 %tmp39 to i32
969 %tmp41 = shl nuw nsw i32 %tmp40, 16
970 %tmp42 = or i32 %tmp36, %tmp41
971 %tmp43 = zext i32 %tmp to i64
972 %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43
973 %tmp45 = load i8, ptr %tmp44, align 1
974 %tmp46 = zext i8 %tmp45 to i32
975 %tmp47 = shl nuw i32 %tmp46, 24
976 %tmp48 = or i32 %tmp42, %tmp47
987 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
989 ; This test excercises zero and any extend loads as a part of load combine pattern.
990 ; In order to fold the pattern above we need to reassociate the address computation
991 ; first. By the time the address computation is reassociated loads are combined to
992 ; to zext and aext loads.
993 define i32 @load_i32_by_i8_zaext_loads(ptr %arg, i32 %arg1) {
994 ; CHECK-LABEL: load_i32_by_i8_zaext_loads:
996 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
997 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
998 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
1001 ; CHECK64-LABEL: load_i32_by_i8_zaext_loads:
1003 ; CHECK64-NEXT: movl %esi, %eax
1004 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
1005 ; CHECK64-NEXT: retq
1006 %tmp = add nuw nsw i32 %arg1, 3
1007 %tmp2 = add nuw nsw i32 %arg1, 2
1008 %tmp3 = add nuw nsw i32 %arg1, 1
1009 %tmp4 = zext i32 %tmp to i64
1010 %tmp5 = zext i32 %tmp2 to i64
1011 %tmp6 = zext i32 %tmp3 to i64
1012 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4
1013 %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5
1014 %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6
1015 %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12
1016 %tmp33 = zext i32 %arg1 to i64
1017 %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33
1018 %tmp35 = load i8, ptr %tmp34, align 1
1019 %tmp36 = zext i8 %tmp35 to i32
1020 %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12
1021 %tmp38 = load i8, ptr %tmp37, align 1
1022 %tmp39 = zext i8 %tmp38 to i32
1023 %tmp40 = shl nuw nsw i32 %tmp39, 8
1024 %tmp41 = or i32 %tmp40, %tmp36
1025 %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12
1026 %tmp43 = load i8, ptr %tmp42, align 1
1027 %tmp44 = zext i8 %tmp43 to i32
1028 %tmp45 = shl nuw nsw i32 %tmp44, 16
1029 %tmp46 = or i32 %tmp41, %tmp45
1030 %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12
1031 %tmp48 = load i8, ptr %tmp47, align 1
1032 %tmp49 = zext i8 %tmp48 to i32
1033 %tmp50 = shl nuw i32 %tmp49, 24
1034 %tmp51 = or i32 %tmp46, %tmp50
1038 ; The same as load_i32_by_i8_zaext_loads but the last load is combined to
1048 ; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24)
1049 define i32 @load_i32_by_i8_zsext_loads(ptr %arg, i32 %arg1) {
1050 ; CHECK-LABEL: load_i32_by_i8_zsext_loads:
1052 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1053 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1054 ; CHECK-NEXT: movl 12(%eax,%ecx), %eax
1057 ; CHECK64-LABEL: load_i32_by_i8_zsext_loads:
1059 ; CHECK64-NEXT: movl %esi, %eax
1060 ; CHECK64-NEXT: movl 12(%rdi,%rax), %eax
1061 ; CHECK64-NEXT: retq
1062 %tmp = add nuw nsw i32 %arg1, 3
1063 %tmp2 = add nuw nsw i32 %arg1, 2
1064 %tmp3 = add nuw nsw i32 %arg1, 1
1065 %tmp4 = zext i32 %tmp to i64
1066 %tmp5 = zext i32 %tmp2 to i64
1067 %tmp6 = zext i32 %tmp3 to i64
1068 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4
1069 %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5
1070 %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6
1071 %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12
1072 %tmp33 = zext i32 %arg1 to i64
1073 %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33
1074 %tmp35 = load i8, ptr %tmp34, align 1
1075 %tmp36 = zext i8 %tmp35 to i32
1076 %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12
1077 %tmp38 = load i8, ptr %tmp37, align 1
1078 %tmp39 = zext i8 %tmp38 to i32
1079 %tmp40 = shl nuw nsw i32 %tmp39, 8
1080 %tmp41 = or i32 %tmp40, %tmp36
1081 %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12
1082 %tmp43 = load i8, ptr %tmp42, align 1
1083 %tmp44 = zext i8 %tmp43 to i32
1084 %tmp45 = shl nuw nsw i32 %tmp44, 16
1085 %tmp46 = or i32 %tmp41, %tmp45
1086 %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12
1087 %tmp48 = load i8, ptr %tmp47, align 1
1088 %tmp49 = sext i8 %tmp48 to i16
1089 %tmp50 = zext i16 %tmp49 to i32
1090 %tmp51 = shl nuw i32 %tmp50, 24
1091 %tmp52 = or i32 %tmp46, %tmp51
1096 ; (i32) p[0] | ((i32) p[1] << 8)
1097 define i32 @zext_load_i32_by_i8(ptr %arg) {
1098 ; CHECK-LABEL: zext_load_i32_by_i8:
1100 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1101 ; CHECK-NEXT: movzwl (%eax), %eax
1104 ; CHECK64-LABEL: zext_load_i32_by_i8:
1106 ; CHECK64-NEXT: movzwl (%rdi), %eax
1107 ; CHECK64-NEXT: retq
1108 %tmp2 = load i8, ptr %arg, align 1
1109 %tmp3 = zext i8 %tmp2 to i32
1110 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1111 %tmp5 = load i8, ptr %tmp4, align 1
1112 %tmp6 = zext i8 %tmp5 to i32
1113 %tmp7 = shl nuw nsw i32 %tmp6, 8
1114 %tmp8 = or i32 %tmp7, %tmp3
1119 ; ((i32) p[0] << 8) | ((i32) p[1] << 16)
1120 define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) {
1121 ; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
1123 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1124 ; CHECK-NEXT: movzbl (%eax), %ecx
1125 ; CHECK-NEXT: shll $8, %ecx
1126 ; CHECK-NEXT: movzbl 1(%eax), %eax
1127 ; CHECK-NEXT: shll $16, %eax
1128 ; CHECK-NEXT: orl %ecx, %eax
1131 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_8:
1133 ; CHECK64-NEXT: movzbl (%rdi), %ecx
1134 ; CHECK64-NEXT: shll $8, %ecx
1135 ; CHECK64-NEXT: movzbl 1(%rdi), %eax
1136 ; CHECK64-NEXT: shll $16, %eax
1137 ; CHECK64-NEXT: orl %ecx, %eax
1138 ; CHECK64-NEXT: retq
1139 %tmp2 = load i8, ptr %arg, align 1
1140 %tmp3 = zext i8 %tmp2 to i32
1141 %tmp30 = shl nuw nsw i32 %tmp3, 8
1142 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1143 %tmp5 = load i8, ptr %tmp4, align 1
1144 %tmp6 = zext i8 %tmp5 to i32
1145 %tmp7 = shl nuw nsw i32 %tmp6, 16
1146 %tmp8 = or i32 %tmp7, %tmp30
1151 ; ((i32) p[0] << 16) | ((i32) p[1] << 24)
1152 define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) {
1153 ; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
1155 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1156 ; CHECK-NEXT: movzbl (%eax), %ecx
1157 ; CHECK-NEXT: shll $16, %ecx
1158 ; CHECK-NEXT: movzbl 1(%eax), %eax
1159 ; CHECK-NEXT: shll $24, %eax
1160 ; CHECK-NEXT: orl %ecx, %eax
1163 ; CHECK64-LABEL: zext_load_i32_by_i8_shl_16:
1165 ; CHECK64-NEXT: movzbl (%rdi), %ecx
1166 ; CHECK64-NEXT: shll $16, %ecx
1167 ; CHECK64-NEXT: movzbl 1(%rdi), %eax
1168 ; CHECK64-NEXT: shll $24, %eax
1169 ; CHECK64-NEXT: orl %ecx, %eax
1170 ; CHECK64-NEXT: retq
1171 %tmp2 = load i8, ptr %arg, align 1
1172 %tmp3 = zext i8 %tmp2 to i32
1173 %tmp30 = shl nuw nsw i32 %tmp3, 16
1174 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1
1175 %tmp5 = load i8, ptr %tmp4, align 1
1176 %tmp6 = zext i8 %tmp5 to i32
1177 %tmp7 = shl nuw nsw i32 %tmp6, 24
1178 %tmp8 = or i32 %tmp7, %tmp30
1183 ; (i32) p[1] | ((i32) p[0] << 8)
1184 define i32 @zext_load_i32_by_i8_bswap(ptr %arg) {
1185 ; BSWAP-LABEL: zext_load_i32_by_i8_bswap:
1187 ; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax
1188 ; BSWAP-NEXT: movzwl (%eax), %eax
1189 ; BSWAP-NEXT: rolw $8, %ax
1190 ; BSWAP-NEXT: movzwl %ax, %eax
1193 ; MOVBE-LABEL: zext_load_i32_by_i8_bswap:
1195 ; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax
1196 ; MOVBE-NEXT: movbew (%eax), %ax
1197 ; MOVBE-NEXT: movzwl %ax, %eax
1200 ; BSWAP64-LABEL: zext_load_i32_by_i8_bswap:
1202 ; BSWAP64-NEXT: movzwl (%rdi), %eax
1203 ; BSWAP64-NEXT: rolw $8, %ax
1204 ; BSWAP64-NEXT: movzwl %ax, %eax
1205 ; BSWAP64-NEXT: retq
1207 ; MOVBE64-LABEL: zext_load_i32_by_i8_bswap:
1209 ; MOVBE64-NEXT: movbew (%rdi), %ax
1210 ; MOVBE64-NEXT: movzwl %ax, %eax
1211 ; MOVBE64-NEXT: retq
1212 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1213 %tmp2 = load i8, ptr %tmp1, align 1
1214 %tmp3 = zext i8 %tmp2 to i32
1215 %tmp5 = load i8, ptr %arg, align 1
1216 %tmp6 = zext i8 %tmp5 to i32
1217 %tmp7 = shl nuw nsw i32 %tmp6, 8
1218 %tmp8 = or i32 %tmp7, %tmp3
1223 ; ((i32) p[1] << 8) | ((i32) p[0] << 16)
1224 define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) {
1225 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1227 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1228 ; CHECK-NEXT: movzbl 1(%eax), %ecx
1229 ; CHECK-NEXT: shll $8, %ecx
1230 ; CHECK-NEXT: movzbl (%eax), %eax
1231 ; CHECK-NEXT: shll $16, %eax
1232 ; CHECK-NEXT: orl %ecx, %eax
1235 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8:
1237 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1238 ; CHECK64-NEXT: shll $8, %ecx
1239 ; CHECK64-NEXT: movzbl (%rdi), %eax
1240 ; CHECK64-NEXT: shll $16, %eax
1241 ; CHECK64-NEXT: orl %ecx, %eax
1242 ; CHECK64-NEXT: retq
1243 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1244 %tmp2 = load i8, ptr %tmp1, align 1
1245 %tmp3 = zext i8 %tmp2 to i32
1246 %tmp30 = shl nuw nsw i32 %tmp3, 8
1247 %tmp5 = load i8, ptr %arg, align 1
1248 %tmp6 = zext i8 %tmp5 to i32
1249 %tmp7 = shl nuw nsw i32 %tmp6, 16
1250 %tmp8 = or i32 %tmp7, %tmp30
1255 ; ((i32) p[1] << 16) | ((i32) p[0] << 24)
1256 define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) {
1257 ; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1259 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
1260 ; CHECK-NEXT: movzbl 1(%eax), %ecx
1261 ; CHECK-NEXT: shll $16, %ecx
1262 ; CHECK-NEXT: movzbl (%eax), %eax
1263 ; CHECK-NEXT: shll $24, %eax
1264 ; CHECK-NEXT: orl %ecx, %eax
1267 ; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16:
1269 ; CHECK64-NEXT: movzbl 1(%rdi), %ecx
1270 ; CHECK64-NEXT: shll $16, %ecx
1271 ; CHECK64-NEXT: movzbl (%rdi), %eax
1272 ; CHECK64-NEXT: shll $24, %eax
1273 ; CHECK64-NEXT: orl %ecx, %eax
1274 ; CHECK64-NEXT: retq
1275 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1
1276 %tmp2 = load i8, ptr %tmp1, align 1
1277 %tmp3 = zext i8 %tmp2 to i32
1278 %tmp30 = shl nuw nsw i32 %tmp3, 16
1279 %tmp5 = load i8, ptr %arg, align 1
1280 %tmp6 = zext i8 %tmp5 to i32
1281 %tmp7 = shl nuw nsw i32 %tmp6, 24
1282 %tmp8 = or i32 %tmp7, %tmp30
1286 define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind {
1287 ; CHECK-LABEL: pr80911_vector_load_multiuse:
1289 ; CHECK-NEXT: pushl %esi
1290 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
1291 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
1292 ; CHECK-NEXT: movl (%edx), %esi
1293 ; CHECK-NEXT: movzwl (%edx), %eax
1294 ; CHECK-NEXT: movl $0, (%ecx)
1295 ; CHECK-NEXT: movl %esi, (%edx)
1296 ; CHECK-NEXT: popl %esi
1299 ; CHECK64-LABEL: pr80911_vector_load_multiuse:
1301 ; CHECK64-NEXT: movl (%rdi), %ecx
1302 ; CHECK64-NEXT: movzwl (%rdi), %eax
1303 ; CHECK64-NEXT: movl $0, (%rsi)
1304 ; CHECK64-NEXT: movl %ecx, (%rdi)
1305 ; CHECK64-NEXT: retq
1306 %load = load <4 x i8>, ptr %ptr, align 16
1307 store i32 0, ptr %clobber
1308 store <4 x i8> %load, ptr %ptr, align 16
1309 %e1 = extractelement <4 x i8> %load, i64 1
1310 %e1.ext = zext i8 %e1 to i32
1311 %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8
1312 %e0 = extractelement <4 x i8> %load, i64 0
1313 %e0.ext = zext i8 %e0 to i32
1314 %res = or i32 %e1.ext.shift, %e0.ext