1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -debugify-and-strip-all-safe -mtriple aarch64 -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="load_or_combine" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LITTLE
3 # RUN: llc -debugify-and-strip-all-safe -mtriple arm64eb -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombiner-only-enable-rule="load_or_combine" -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=BIG
7 # Test that we can combine patterns like
10 # s32 y = (x[0] | (x[1] << 8)) | ((x[2] << 16) | (x[3] << 24))
12 # Into either a load, or a load with a bswap.
16 name: s8_loads_to_s32_little_endian_pat
17 tracksRegLiveness: true
23 ; s32 y = (x[0] | (x[1] << 8)) | ((x[2] << 16) | (x[3] << 24))
25 ; -> Little endian: Load from x[0]
26 ; -> Big endian: Load from x[0] + BSWAP
28 ; LITTLE-LABEL: name: s8_loads_to_s32_little_endian_pat
29 ; LITTLE: liveins: $x0, $x1
30 ; LITTLE: %ptr:_(p0) = COPY $x1
31 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
32 ; LITTLE: $w1 = COPY %full_load(s32)
33 ; LITTLE: RET_ReallyLR implicit $w1
34 ; BIG-LABEL: name: s8_loads_to_s32_little_endian_pat
35 ; BIG: liveins: $x0, $x1
36 ; BIG: %ptr:_(p0) = COPY $x1
37 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
38 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
39 ; BIG: $w1 = COPY %full_load(s32)
40 ; BIG: RET_ReallyLR implicit $w1
41 %cst_1:_(s32) = G_CONSTANT i32 1
42 %cst_2:_(s32) = G_CONSTANT i32 2
43 %cst_3:_(s32) = G_CONSTANT i32 3
45 %cst_8:_(s32) = G_CONSTANT i32 8
46 %cst_16:_(s32) = G_CONSTANT i32 16
47 %cst_24:_(s32) = G_CONSTANT i32 24
50 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
51 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
52 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
54 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
56 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
57 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
58 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
60 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
61 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
62 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
64 ; Note the shape of the tree:
73 %or1:_(s32) = G_OR %byte0, %byte1
74 %or2:_(s32) = G_OR %byte2, %byte3
75 %full_load:_(s32) = G_OR %or1, %or2
77 $w1 = COPY %full_load(s32)
78 RET_ReallyLR implicit $w1
82 name: s8_loads_to_s32_big_endian_pat
83 tracksRegLiveness: true
89 ; s32 y = (x[0] << 24 | (x[1] << 16)) | ((x[2] << 8) | x[3]))
91 ; -> Little endian: Load from x[0] + BSWAP
92 ; -> Big endian: Load from x[0]
94 ; LITTLE-LABEL: name: s8_loads_to_s32_big_endian_pat
95 ; LITTLE: liveins: $x0, $x1
96 ; LITTLE: %ptr:_(p0) = COPY $x1
97 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
98 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
99 ; LITTLE: $w1 = COPY %full_load(s32)
100 ; LITTLE: RET_ReallyLR implicit $w1
101 ; BIG-LABEL: name: s8_loads_to_s32_big_endian_pat
102 ; BIG: liveins: $x0, $x1
103 ; BIG: %ptr:_(p0) = COPY $x1
104 ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
105 ; BIG: $w1 = COPY %full_load(s32)
106 ; BIG: RET_ReallyLR implicit $w1
107 %cst_1:_(s32) = G_CONSTANT i32 1
108 %cst_2:_(s32) = G_CONSTANT i32 2
109 %cst_3:_(s32) = G_CONSTANT i32 3
111 %cst_8:_(s32) = G_CONSTANT i32 8
112 %cst_16:_(s32) = G_CONSTANT i32 16
113 %cst_24:_(s32) = G_CONSTANT i32 24
115 %ptr:_(p0) = COPY $x1
116 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
117 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
118 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
120 %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
121 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
122 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
124 %byte0:_(s32) = nuw G_SHL %elt0, %cst_24(s32)
125 %byte1:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
126 %byte2:_(s32) = nuw G_SHL %elt2, %cst_8(s32)
127 %byte3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
129 %or1:_(s32) = G_OR %byte0, %byte1
130 %or2:_(s32) = G_OR %byte2, %byte3
131 %full_load:_(s32) = G_OR %or1, %or2
133 $w1 = COPY %full_load(s32)
134 RET_ReallyLR implicit $w1
138 name: different_or_pattern
139 tracksRegLiveness: true
144 ; Slightly different OR tree.
147 ; s32 y = (((x[0] | (x[1] << 8)) | (x[2] << 16)) | (x[3] << 24))
149 ; -> Little endian: Load from x[0]
150 ; -> Big endian: Load from x[0] + BSWAP
152 ; LITTLE-LABEL: name: different_or_pattern
153 ; LITTLE: liveins: $x0, $x1
154 ; LITTLE: %ptr:_(p0) = COPY $x1
155 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
156 ; LITTLE: $w1 = COPY %full_load(s32)
157 ; LITTLE: RET_ReallyLR implicit $w1
158 ; BIG-LABEL: name: different_or_pattern
159 ; BIG: liveins: $x0, $x1
160 ; BIG: %ptr:_(p0) = COPY $x1
161 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1)
162 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
163 ; BIG: $w1 = COPY %full_load(s32)
164 ; BIG: RET_ReallyLR implicit $w1
165 %cst_1:_(s32) = G_CONSTANT i32 1
166 %cst_2:_(s32) = G_CONSTANT i32 2
167 %cst_3:_(s32) = G_CONSTANT i32 3
169 %cst_8:_(s32) = G_CONSTANT i32 8
170 %cst_16:_(s32) = G_CONSTANT i32 16
171 %cst_24:_(s32) = G_CONSTANT i32 24
173 %ptr:_(p0) = COPY $x1
174 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
175 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
176 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
178 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
180 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
181 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
182 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
184 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
185 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
186 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
188 ; Note the shape of the tree:
198 %or1:_(s32) = G_OR %byte0, %byte1
199 %or2:_(s32) = G_OR %or1, %byte2
200 %full_load:_(s32) = G_OR %or2, %byte3
202 $w1 = COPY %full_load(s32)
203 RET_ReallyLR implicit $w1
207 name: s16_loads_to_s32_little_endian_pat
208 tracksRegLiveness: true
214 ; s32 y = x[0] | (x[1] << 16)
216 ; -> Little endian: Load from x[0]
217 ; -> Big endian: Load from x[0] + BSWAP
219 ; LITTLE-LABEL: name: s16_loads_to_s32_little_endian_pat
220 ; LITTLE: liveins: $x0, $x1
221 ; LITTLE: %ptr:_(p0) = COPY $x1
222 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2)
223 ; LITTLE: $w1 = COPY %full_load(s32)
224 ; LITTLE: RET_ReallyLR implicit $w1
225 ; BIG-LABEL: name: s16_loads_to_s32_little_endian_pat
226 ; BIG: liveins: $x0, $x1
227 ; BIG: %ptr:_(p0) = COPY $x1
228 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2)
229 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
230 ; BIG: $w1 = COPY %full_load(s32)
231 ; BIG: RET_ReallyLR implicit $w1
232 %cst_1:_(s64) = G_CONSTANT i64 1
233 %cst_16:_(s32) = G_CONSTANT i32 16
235 %ptr:_(p0) = COPY $x1
236 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
238 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
239 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
240 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
242 %full_load:_(s32) = G_OR %low_half, %high_half
243 $w1 = COPY %full_load(s32)
244 RET_ReallyLR implicit $w1
248 name: s16_loads_to_s32_big_endian_pat
249 tracksRegLiveness: true
255 ; s32 y = x[1] | (x[0] << 16)
257 ; -> Little endian: Load from x[0] + BSWAP
258 ; -> Big endian: Load from x[0]
260 ; LITTLE-LABEL: name: s16_loads_to_s32_big_endian_pat
261 ; LITTLE: liveins: $x0, $x1
262 ; LITTLE: %ptr:_(p0) = COPY $x1
263 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2)
264 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
265 ; LITTLE: $w1 = COPY %full_load(s32)
266 ; LITTLE: RET_ReallyLR implicit $w1
267 ; BIG-LABEL: name: s16_loads_to_s32_big_endian_pat
268 ; BIG: liveins: $x0, $x1
269 ; BIG: %ptr:_(p0) = COPY $x1
270 ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2)
271 ; BIG: $w1 = COPY %full_load(s32)
272 ; BIG: RET_ReallyLR implicit $w1
273 %cst_1:_(s64) = G_CONSTANT i64 1
274 %cst_16:_(s32) = G_CONSTANT i32 16
276 %ptr:_(p0) = COPY $x1
277 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
279 %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
280 %high_half:_(s32) = nuw G_SHL %elt0, %cst_16(s32)
281 %low_half:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
283 %full_load:_(s32) = G_OR %low_half, %high_half
284 $w1 = COPY %full_load(s32)
285 RET_ReallyLR implicit $w1
289 name: s16_loads_to_s64_little_endian_pat
290 tracksRegLiveness: true
296 ; s32 y = (x[0] | (x[1] << 16)) | ((x[2] << 32) | (x[3] << 48))
298 ; -> Little endian: Load from x[0]
299 ; -> Big endian: Load from x[0] + BSWAP
301 ; LITTLE-LABEL: name: s16_loads_to_s64_little_endian_pat
302 ; LITTLE: liveins: $x0, $x1
303 ; LITTLE: %ptr:_(p0) = COPY $x1
304 ; LITTLE: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2)
305 ; LITTLE: $x1 = COPY %full_load(s64)
306 ; LITTLE: RET_ReallyLR implicit $x1
307 ; BIG-LABEL: name: s16_loads_to_s64_little_endian_pat
308 ; BIG: liveins: $x0, $x1
309 ; BIG: %ptr:_(p0) = COPY $x1
310 ; BIG: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2)
311 ; BIG: %full_load:_(s64) = G_BSWAP [[LOAD]]
312 ; BIG: $x1 = COPY %full_load(s64)
313 ; BIG: RET_ReallyLR implicit $x1
314 %cst_1:_(s64) = G_CONSTANT i64 1
315 %cst_2:_(s64) = G_CONSTANT i64 2
316 %cst_3:_(s64) = G_CONSTANT i64 3
318 %cst_16:_(s64) = G_CONSTANT i64 16
319 %cst_32:_(s64) = G_CONSTANT i64 32
320 %cst_48:_(s64) = G_CONSTANT i64 48
322 %ptr:_(p0) = COPY $x1
323 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
324 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
325 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
327 %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
329 %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
330 %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16))
331 %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16))
333 %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64)
334 %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_32(s64)
335 %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64)
337 %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3
338 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7
339 %full_load:_(s64) = G_OR %or1, %or2
341 $x1 = COPY %full_load(s64)
342 RET_ReallyLR implicit $x1
346 name: s16_loads_to_s64_big_endian_pat
347 tracksRegLiveness: true
353 ; s64 y = (x[3] | (x[2] << 16)) | ((x[1] << 32) | (x[0] << 48))
355 ; -> Little endian: Load from x[0] + BSWAP
356 ; -> Big endian: Load from x[0]
358 ; LITTLE-LABEL: name: s16_loads_to_s64_big_endian_pat
359 ; LITTLE: liveins: $x0, $x1
360 ; LITTLE: %ptr:_(p0) = COPY $x1
361 ; LITTLE: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2)
362 ; LITTLE: %full_load:_(s64) = G_BSWAP [[LOAD]]
363 ; LITTLE: $x1 = COPY %full_load(s64)
364 ; LITTLE: RET_ReallyLR implicit $x1
365 ; BIG-LABEL: name: s16_loads_to_s64_big_endian_pat
366 ; BIG: liveins: $x0, $x1
367 ; BIG: %ptr:_(p0) = COPY $x1
368 ; BIG: %full_load:_(s64) = G_LOAD %ptr(p0) :: (load (s64), align 2)
369 ; BIG: $x1 = COPY %full_load(s64)
370 ; BIG: RET_ReallyLR implicit $x1
371 %cst_1:_(s64) = G_CONSTANT i64 1
372 %cst_2:_(s64) = G_CONSTANT i64 2
373 %cst_3:_(s64) = G_CONSTANT i64 3
375 %cst_16:_(s64) = G_CONSTANT i64 16
376 %cst_32:_(s64) = G_CONSTANT i64 32
377 %cst_48:_(s64) = G_CONSTANT i64 48
379 %ptr:_(p0) = COPY $x1
380 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
381 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64)
382 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
384 %elt0:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
385 %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
386 %elt2:_(s64) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s16))
388 %byte0_byte1:_(s64) = nuw G_SHL %elt0, %cst_48(s64)
389 %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_32(s64)
390 %byte4_byte5:_(s64) = nuw G_SHL %elt2, %cst_16(s64)
391 %byte6_byte7:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16))
393 %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3
394 %or2:_(s64) = G_OR %byte4_byte5, %byte6_byte7
395 %full_load:_(s64) = G_OR %or1, %or2
397 $x1 = COPY %full_load(s64)
398 RET_ReallyLR implicit $x1
403 name: nonzero_start_idx_positive_little_endian_pat
404 tracksRegLiveness: true
410 ; s32 y = (x[1] | (x[2] << 8)) | ((x[3] << 16) | (x[4] << 24))
412 ; -> Little endian: Load from x[1]
413 ; -> Big endian: Load from x[1] + BSWAP
415 ; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat
416 ; LITTLE: liveins: $x0, $x1
417 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
418 ; LITTLE: %ptr:_(p0) = COPY $x0
419 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
420 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
421 ; LITTLE: $w1 = COPY %full_load(s32)
422 ; LITTLE: RET_ReallyLR implicit $w1
423 ; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat
424 ; BIG: liveins: $x0, $x1
425 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
426 ; BIG: %ptr:_(p0) = COPY $x0
427 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
428 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
429 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
430 ; BIG: $w1 = COPY %full_load(s32)
431 ; BIG: RET_ReallyLR implicit $w1
432 %cst_1:_(s32) = G_CONSTANT i32 1
433 %cst_2:_(s32) = G_CONSTANT i32 2
434 %cst_3:_(s32) = G_CONSTANT i32 3
435 %cst_4:_(s32) = G_CONSTANT i32 4
437 %cst_8:_(s32) = G_CONSTANT i32 8
438 %cst_16:_(s32) = G_CONSTANT i32 16
439 %cst_24:_(s32) = G_CONSTANT i32 24
441 %ptr:_(p0) = COPY $x0
442 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
443 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
444 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
445 %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
447 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
448 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
449 %elt4:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8))
451 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
452 %byte1:_(s32) = nuw G_SHL %elt2, %cst_8(s32)
453 %byte2:_(s32) = nuw G_SHL %elt3, %cst_16(s32)
454 %byte3:_(s32) = nuw G_SHL %elt4, %cst_24(s32)
456 %or1:_(s32) = G_OR %byte0, %byte1
457 %or2:_(s32) = G_OR %byte2, %byte3
458 %full_load:_(s32) = G_OR %or1, %or2
460 $w1 = COPY %full_load(s32)
461 RET_ReallyLR implicit $w1
465 name: nonzero_start_idx_positive_big_endian_pat
466 tracksRegLiveness: true
472 ; s32 y = (x[4] | (x[3] << 8)) | ((x[2] << 16) | (x[1] << 24))
474 ; -> Little endian: Load from x[1] + BSWAP
475 ; -> Big endian: Load from x[1]
477 ; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat
478 ; LITTLE: liveins: $x0, $x1
479 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
480 ; LITTLE: %ptr:_(p0) = COPY $x0
481 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
482 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
483 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
484 ; LITTLE: $w1 = COPY %full_load(s32)
485 ; LITTLE: RET_ReallyLR implicit $w1
486 ; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat
487 ; BIG: liveins: $x0, $x1
488 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
489 ; BIG: %ptr:_(p0) = COPY $x0
490 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
491 ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1)
492 ; BIG: $w1 = COPY %full_load(s32)
493 ; BIG: RET_ReallyLR implicit $w1
494 %cst_1:_(s32) = G_CONSTANT i32 1
495 %cst_2:_(s32) = G_CONSTANT i32 2
496 %cst_3:_(s32) = G_CONSTANT i32 3
497 %cst_4:_(s32) = G_CONSTANT i32 4
499 %cst_8:_(s32) = G_CONSTANT i32 8
500 %cst_16:_(s32) = G_CONSTANT i32 16
501 %cst_24:_(s32) = G_CONSTANT i32 24
503 %ptr:_(p0) = COPY $x0
504 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
505 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
506 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
507 %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32)
509 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
510 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
511 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
513 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_4(p0) :: (load (s8))
514 %byte1:_(s32) = nuw G_SHL %elt3, %cst_8(s32)
515 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
516 %byte3:_(s32) = nuw G_SHL %elt1, %cst_24(s32)
518 %or1:_(s32) = G_OR %byte0, %byte1
519 %or2:_(s32) = G_OR %byte2, %byte3
520 %full_load:_(s32) = G_OR %or1, %or2
522 $w1 = COPY %full_load(s32)
523 RET_ReallyLR implicit $w1
527 name: nonzero_start_idx_negative_little_endian_pat
528 tracksRegLiveness: true
534 ; s32 y = (x[-3] | (x[-2] << 8)) | ((x[-1] << 16) | (x[0] << 24))
536 ; -> Little endian: Load from x[-3]
537 ; -> Big endian: Load from x[-3] + BSWAP
539 ; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat
540 ; LITTLE: liveins: $x0, $x1
541 ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
542 ; LITTLE: %ptr:_(p0) = COPY $x0
543 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
544 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
545 ; LITTLE: $w1 = COPY %full_load(s32)
546 ; LITTLE: RET_ReallyLR implicit $w1
547 ; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat
548 ; BIG: liveins: $x0, $x1
549 ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
550 ; BIG: %ptr:_(p0) = COPY $x0
551 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
552 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
553 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
554 ; BIG: $w1 = COPY %full_load(s32)
555 ; BIG: RET_ReallyLR implicit $w1
556 %cst_neg_1:_(s32) = G_CONSTANT i32 -1
557 %cst_neg_2:_(s32) = G_CONSTANT i32 -2
558 %cst_neg_3:_(s32) = G_CONSTANT i32 -3
560 %cst_8:_(s32) = G_CONSTANT i32 8
561 %cst_16:_(s32) = G_CONSTANT i32 16
562 %cst_24:_(s32) = G_CONSTANT i32 24
564 %ptr:_(p0) = COPY $x0
565 %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
566 %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
567 %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
569 %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
570 %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8))
571 %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
573 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8))
574 %byte1:_(s32) = nuw G_SHL %elt_neg_2, %cst_8(s32)
575 %byte2:_(s32) = nuw G_SHL %elt_neg_1, %cst_16(s32)
576 %byte3:_(s32) = nuw G_SHL %elt_0, %cst_24(s32)
578 %or1:_(s32) = G_OR %byte0, %byte1
579 %or2:_(s32) = G_OR %byte2, %byte3
580 %full_load:_(s32) = G_OR %or1, %or2
582 $w1 = COPY %full_load(s32)
583 RET_ReallyLR implicit $w1
587 name: nonzero_start_idx_negative_big_endian_pat
588 tracksRegLiveness: true
594 ; s32 y = (x[0] | (x[-1] << 8)) | ((x[-2] << 16) | (x[-3] << 24))
596 ; -> Little endian: Load from x[-3] + BSWAP
597 ; -> Big endian: Load from x[-3]
599 ; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat
600 ; LITTLE: liveins: $x0, $x1
601 ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
602 ; LITTLE: %ptr:_(p0) = COPY $x0
603 ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
604 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
605 ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]]
606 ; LITTLE: $w1 = COPY %full_load(s32)
607 ; LITTLE: RET_ReallyLR implicit $w1
608 ; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat
609 ; BIG: liveins: $x0, $x1
610 ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3
611 ; BIG: %ptr:_(p0) = COPY $x0
612 ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
613 ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1)
614 ; BIG: $w1 = COPY %full_load(s32)
615 ; BIG: RET_ReallyLR implicit $w1
616 %cst_neg_1:_(s32) = G_CONSTANT i32 -1
617 %cst_neg_2:_(s32) = G_CONSTANT i32 -2
618 %cst_neg_3:_(s32) = G_CONSTANT i32 -3
620 %cst_8:_(s32) = G_CONSTANT i32 8
621 %cst_16:_(s32) = G_CONSTANT i32 16
622 %cst_24:_(s32) = G_CONSTANT i32 24
624 %ptr:_(p0) = COPY $x0
625 %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32)
626 %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32)
627 %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32)
629 %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8))
630 %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8))
631 %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8))
632 %elt_0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
634 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
635 %byte1:_(s32) = nuw G_SHL %elt_neg_1, %cst_8(s32)
636 %byte2:_(s32) = nuw G_SHL %elt_neg_2, %cst_16(s32)
637 %byte3:_(s32) = nuw G_SHL %elt_neg_3, %cst_24(s32)
639 %or1:_(s32) = G_OR %byte0, %byte1
640 %or2:_(s32) = G_OR %byte2, %byte3
641 %full_load:_(s32) = G_OR %or1, %or2
643 $w1 = COPY %full_load(s32)
644 RET_ReallyLR implicit $w1
648 name: dont_combine_volatile
649 tracksRegLiveness: true
654 ; Combine should only happen with unordered loads.
656 ; LITTLE-LABEL: name: dont_combine_volatile
657 ; LITTLE: liveins: $x0, $x1
658 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
659 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
660 ; LITTLE: %ptr:_(p0) = COPY $x1
661 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
662 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
663 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16))
664 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
665 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
666 ; LITTLE: $w1 = COPY %full_load(s32)
667 ; LITTLE: RET_ReallyLR implicit $w1
668 ; BIG-LABEL: name: dont_combine_volatile
669 ; BIG: liveins: $x0, $x1
670 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
671 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
672 ; BIG: %ptr:_(p0) = COPY $x1
673 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
674 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
675 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16))
676 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
677 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
678 ; BIG: $w1 = COPY %full_load(s32)
679 ; BIG: RET_ReallyLR implicit $w1
680 %cst_1:_(s64) = G_CONSTANT i64 1
681 %cst_16:_(s32) = G_CONSTANT i32 16
683 %ptr:_(p0) = COPY $x1
684 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
686 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
687 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (volatile load (s16))
688 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
690 %full_load:_(s32) = G_OR %low_half, %high_half
691 $w1 = COPY %full_load(s32)
692 RET_ReallyLR implicit $w1
696 name: dont_wrong_memop_size
697 tracksRegLiveness: true
702 ; Combine should only happen when the loads load the same size.
704 ; LITTLE-LABEL: name: dont_wrong_memop_size
705 ; LITTLE: liveins: $x0, $x1
706 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
707 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
708 ; LITTLE: %ptr:_(p0) = COPY $x1
709 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
710 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
711 ; LITTLE: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
712 ; LITTLE: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32)
713 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
714 ; LITTLE: $w1 = COPY %full_load(s32)
715 ; LITTLE: RET_ReallyLR implicit $w1
716 ; BIG-LABEL: name: dont_wrong_memop_size
717 ; BIG: liveins: $x0, $x1
718 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
719 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
720 ; BIG: %ptr:_(p0) = COPY $x1
721 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
722 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
723 ; BIG: %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
724 ; BIG: %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32)
725 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
726 ; BIG: $w1 = COPY %full_load(s32)
727 ; BIG: RET_ReallyLR implicit $w1
728 %cst_1:_(s64) = G_CONSTANT i64 1
729 %cst_16:_(s32) = G_CONSTANT i32 16
731 %ptr:_(p0) = COPY $x1
732 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
734 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
735 %wrong_size_load:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
736 %high_half:_(s32) = nuw G_SHL %wrong_size_load, %cst_16(s32)
738 %full_load:_(s32) = G_OR %low_half, %high_half
739 $w1 = COPY %full_load(s32)
740 RET_ReallyLR implicit $w1
744 name: dont_combine_wrong_offset
745 tracksRegLiveness: true
750 ; This is not equivalent to a 32-bit load with/without a BSWAP:
753 ; s32 y = x[0] | (x[1] << 24)
755 ; LITTLE-LABEL: name: dont_combine_wrong_offset
756 ; LITTLE: liveins: $x0, $x1
757 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
758 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
759 ; LITTLE: %ptr:_(p0) = COPY $x1
760 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
761 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
762 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
763 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32)
764 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
765 ; LITTLE: $w1 = COPY %full_load(s32)
766 ; LITTLE: RET_ReallyLR implicit $w1
767 ; BIG-LABEL: name: dont_combine_wrong_offset
768 ; BIG: liveins: $x0, $x1
769 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
770 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
771 ; BIG: %ptr:_(p0) = COPY $x1
772 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
773 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
774 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
775 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32)
776 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
777 ; BIG: $w1 = COPY %full_load(s32)
778 ; BIG: RET_ReallyLR implicit $w1
779 %cst_1:_(s64) = G_CONSTANT i64 1
780 %cst_24:_(s32) = G_CONSTANT i32 24
782 %ptr:_(p0) = COPY $x1
783 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
785 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
786 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
787 %high_half:_(s32) = nuw G_SHL %elt1, %cst_24(s32)
789 %full_load:_(s32) = G_OR %low_half, %high_half
790 $w1 = COPY %full_load(s32)
791 RET_ReallyLR implicit $w1
795 name: dont_combine_wrong_offset_2
796 tracksRegLiveness: true
801 ; This does not correspond to a 32-bit load with/without a BSWAP:
804 ; s32 y = x[0] | (x[1] << 8)
806 ; LITTLE-LABEL: name: dont_combine_wrong_offset_2
807 ; LITTLE: liveins: $x0, $x1
808 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
809 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
810 ; LITTLE: %ptr:_(p0) = COPY $x1
811 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
812 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
813 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
814 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
815 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
816 ; LITTLE: $w1 = COPY %full_load(s32)
817 ; LITTLE: RET_ReallyLR implicit $w1
818 ; BIG-LABEL: name: dont_combine_wrong_offset_2
819 ; BIG: liveins: $x0, $x1
820 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
821 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
822 ; BIG: %ptr:_(p0) = COPY $x1
823 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
824 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
825 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
826 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
827 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
828 ; BIG: $w1 = COPY %full_load(s32)
829 ; BIG: RET_ReallyLR implicit $w1
830 %cst_1:_(s64) = G_CONSTANT i64 1
831 %cst_8:_(s32) = G_CONSTANT i32 8
833 %ptr:_(p0) = COPY $x1
834 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
836 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
837 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
838 %high_half:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
840 %full_load:_(s32) = G_OR %low_half, %high_half
841 $w1 = COPY %full_load(s32)
842 RET_ReallyLR implicit $w1
846 name: dont_combine_missing_load
847 tracksRegLiveness: true
852 ; This is missing x[2], so we shouldn't combine:
855 ; s64 y = (x[0] | (x[1] << 16)) | (x[3] << 48)
857 ; LITTLE-LABEL: name: dont_combine_missing_load
858 ; LITTLE: liveins: $x0, $x1
859 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
860 ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3
861 ; LITTLE: %cst_16:_(s64) = G_CONSTANT i64 16
862 ; LITTLE: %cst_48:_(s64) = G_CONSTANT i64 48
863 ; LITTLE: %ptr:_(p0) = COPY $x1
864 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
865 ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
866 ; LITTLE: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
867 ; LITTLE: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
868 ; LITTLE: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16))
869 ; LITTLE: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64)
870 ; LITTLE: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64)
871 ; LITTLE: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3
872 ; LITTLE: %full_load:_(s64) = G_OR %or1, %byte6_byte7
873 ; LITTLE: $x1 = COPY %full_load(s64)
874 ; LITTLE: RET_ReallyLR implicit $x1
875 ; BIG-LABEL: name: dont_combine_missing_load
876 ; BIG: liveins: $x0, $x1
877 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
878 ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3
879 ; BIG: %cst_16:_(s64) = G_CONSTANT i64 16
880 ; BIG: %cst_48:_(s64) = G_CONSTANT i64 48
881 ; BIG: %ptr:_(p0) = COPY $x1
882 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
883 ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
884 ; BIG: %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
885 ; BIG: %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
886 ; BIG: %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16))
887 ; BIG: %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64)
888 ; BIG: %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64)
889 ; BIG: %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3
890 ; BIG: %full_load:_(s64) = G_OR %or1, %byte6_byte7
891 ; BIG: $x1 = COPY %full_load(s64)
892 ; BIG: RET_ReallyLR implicit $x1
893 %cst_1:_(s64) = G_CONSTANT i64 1
894 %cst_3:_(s64) = G_CONSTANT i64 3
896 %cst_16:_(s64) = G_CONSTANT i64 16
897 %cst_48:_(s64) = G_CONSTANT i64 48
899 %ptr:_(p0) = COPY $x1
900 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
901 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64)
903 %byte0_byte1:_(s64) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
905 %elt1:_(s64) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
906 %elt3:_(s64) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s16))
908 %byte2_byte3:_(s64) = nuw G_SHL %elt1, %cst_16(s64)
909 %byte6_byte7:_(s64) = nuw G_SHL %elt3, %cst_48(s64)
911 %or1:_(s64) = G_OR %byte0_byte1, %byte2_byte3
912 %full_load:_(s64) = G_OR %or1, %byte6_byte7
914 $x1 = COPY %full_load(s64)
915 RET_ReallyLR implicit $x1
919 name: dont_combine_different_addr_spaces
920 tracksRegLiveness: true
925 ; When the loads are from different address spaces, don't combine.
927 ; LITTLE-LABEL: name: dont_combine_different_addr_spaces
928 ; LITTLE: liveins: $x0, $x1
929 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
930 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
931 ; LITTLE: %ptr:_(p0) = COPY $x1
932 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
933 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
934 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1)
935 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
936 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
937 ; LITTLE: $w1 = COPY %full_load(s32)
938 ; LITTLE: RET_ReallyLR implicit $w1
939 ; BIG-LABEL: name: dont_combine_different_addr_spaces
940 ; BIG: liveins: $x0, $x1
941 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
942 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
943 ; BIG: %ptr:_(p0) = COPY $x1
944 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
945 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
946 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1)
947 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
948 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
949 ; BIG: $w1 = COPY %full_load(s32)
950 ; BIG: RET_ReallyLR implicit $w1
951 %cst_1:_(s64) = G_CONSTANT i64 1
952 %cst_16:_(s32) = G_CONSTANT i32 16
954 %ptr:_(p0) = COPY $x1
955 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
957 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16), addrspace 0)
958 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16), addrspace 1)
959 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
961 %full_load:_(s32) = G_OR %low_half, %high_half
962 $w1 = COPY %full_load(s32)
963 RET_ReallyLR implicit $w1
967 name: dont_combine_duplicate_idx
968 tracksRegLiveness: true
973 ; If two of the G_PTR_ADDs have the same index, then don't combine.
976 ; sM y = (x[i] << A) | (x[i] << B) ...
978 ; LITTLE-LABEL: name: dont_combine_duplicate_idx
979 ; LITTLE: liveins: $x0, $x1
980 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
981 ; LITTLE: %reused_idx:_(s32) = G_CONSTANT i32 2
982 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
983 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
984 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
985 ; LITTLE: %ptr:_(p0) = COPY $x1
986 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
987 ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
988 ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
989 ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
990 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
991 ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
992 ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8))
993 ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
994 ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
995 ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
996 ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1
997 ; LITTLE: %or2:_(s32) = G_OR %byte2, %byte3
998 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2
999 ; LITTLE: $w1 = COPY %full_load(s32)
1000 ; LITTLE: RET_ReallyLR implicit $w1
1001 ; BIG-LABEL: name: dont_combine_duplicate_idx
1002 ; BIG: liveins: $x0, $x1
1003 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
1004 ; BIG: %reused_idx:_(s32) = G_CONSTANT i32 2
1005 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
1006 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1007 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
1008 ; BIG: %ptr:_(p0) = COPY $x1
1009 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1010 ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
1011 ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
1012 ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1013 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1014 ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
1015 ; BIG: %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8))
1016 ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
1017 ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
1018 ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
1019 ; BIG: %or1:_(s32) = G_OR %byte0, %byte1
1020 ; BIG: %or2:_(s32) = G_OR %byte2, %byte3
1021 ; BIG: %full_load:_(s32) = G_OR %or1, %or2
1022 ; BIG: $w1 = COPY %full_load(s32)
1023 ; BIG: RET_ReallyLR implicit $w1
1024 %cst_1:_(s32) = G_CONSTANT i32 1
1025 %reused_idx:_(s32) = G_CONSTANT i32 2
1027 %cst_8:_(s32) = G_CONSTANT i32 8
1028 %cst_16:_(s32) = G_CONSTANT i32 16
1029 %cst_24:_(s32) = G_CONSTANT i32 24
1031 %ptr:_(p0) = COPY $x1
1032 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1033 %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
1034 %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32)
1036 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1038 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1039 %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8))
1040 %elt3:_(s32) = G_ZEXTLOAD %also_uses_idx_2(p0) :: (load (s8))
1042 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
1043 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
1044 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
1046 %or1:_(s32) = G_OR %byte0, %byte1
1047 %or2:_(s32) = G_OR %byte2, %byte3
1048 %full_load:_(s32) = G_OR %or1, %or2
1050 $w1 = COPY %full_load(s32)
1051 RET_ReallyLR implicit $w1
1054 name: dont_combine_duplicate_offset
1055 tracksRegLiveness: true
1060 ; If two of the G_SHLs have the same constant, then we should not combine.
1063 ; sM y = (x[i] << A) | (x[i+1] << A) ...
1065 ; LITTLE-LABEL: name: dont_combine_duplicate_offset
1066 ; LITTLE: liveins: $x0, $x1
1067 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
1068 ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
1069 ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
1070 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
1071 ; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
1072 ; LITTLE: %ptr:_(p0) = COPY $x1
1073 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1074 ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
1075 ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
1076 ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1077 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1078 ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
1079 ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
1080 ; LITTLE: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
1081 ; LITTLE: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32)
1082 ; LITTLE: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32)
1083 ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1
1084 ; LITTLE: %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2
1085 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2
1086 ; LITTLE: $w1 = COPY %full_load(s32)
1087 ; LITTLE: RET_ReallyLR implicit $w1
1088 ; BIG-LABEL: name: dont_combine_duplicate_offset
1089 ; BIG: liveins: $x0, $x1
1090 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
1091 ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
1092 ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
1093 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
1094 ; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
1095 ; BIG: %ptr:_(p0) = COPY $x1
1096 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1097 ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
1098 ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
1099 ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1100 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1101 ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
1102 ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
1103 ; BIG: %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
1104 ; BIG: %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32)
1105 ; BIG: %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32)
1106 ; BIG: %or1:_(s32) = G_OR %byte0, %byte1
1107 ; BIG: %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2
1108 ; BIG: %full_load:_(s32) = G_OR %or1, %or2
1109 ; BIG: $w1 = COPY %full_load(s32)
1110 ; BIG: RET_ReallyLR implicit $w1
1111 %cst_1:_(s32) = G_CONSTANT i32 1
1112 %cst_2:_(s32) = G_CONSTANT i32 2
1113 %cst_3:_(s32) = G_CONSTANT i32 3
1115 %cst_8:_(s32) = G_CONSTANT i32 8
1116 %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16
1118 %ptr:_(p0) = COPY $x1
1119 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1120 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
1121 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
1123 %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1125 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1126 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
1127 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
1129 %byte1:_(s32) = nuw G_SHL %elt1, %cst_8(s32)
1130 %duplicate_shl_1:_(s32) = nuw G_SHL %elt2, %duplicate_shl_cst(s32)
1131 %duplicate_shl_2:_(s32) = nuw G_SHL %elt3, %duplicate_shl_cst(s32)
1133 %or1:_(s32) = G_OR %byte0, %byte1
1134 %or2:_(s32) = G_OR %duplicate_shl_1, %duplicate_shl_2
1135 %full_load:_(s32) = G_OR %or1, %or2
1137 $w1 = COPY %full_load(s32)
1138 RET_ReallyLR implicit $w1
1142 name: dont_combine_lowest_index_not_zero_offset
1143 tracksRegLiveness: true
1148 ; In this case, the lowest index load (e.g. x[0]) does not end up at byte
1149 ; offset 0. We shouldn't combine.
1152 ; s32 y = (x[0] << 8) | (x[1]) | (x[2] << 16) ...
1154 ; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset
1155 ; LITTLE: liveins: $x0, $x1
1156 ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1
1157 ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2
1158 ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3
1159 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8
1160 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
1161 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24
1162 ; LITTLE: %ptr:_(p0) = COPY $x1
1163 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1164 ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
1165 ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
1166 ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1167 ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1168 ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
1169 ; LITTLE: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
1170 ; LITTLE: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32)
1171 ; LITTLE: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
1172 ; LITTLE: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
1173 ; LITTLE: %or1:_(s32) = G_OR %byte0, %byte1
1174 ; LITTLE: %or2:_(s32) = G_OR %byte2, %byte3
1175 ; LITTLE: %full_load:_(s32) = G_OR %or1, %or2
1176 ; LITTLE: $w1 = COPY %full_load(s32)
1177 ; LITTLE: RET_ReallyLR implicit $w1
1178 ; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset
1179 ; BIG: liveins: $x0, $x1
1180 ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1
1181 ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2
1182 ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3
1183 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8
1184 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1185 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24
1186 ; BIG: %ptr:_(p0) = COPY $x1
1187 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1188 ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
1189 ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
1190 ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1191 ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1192 ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
1193 ; BIG: %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
1194 ; BIG: %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32)
1195 ; BIG: %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
1196 ; BIG: %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
1197 ; BIG: %or1:_(s32) = G_OR %byte0, %byte1
1198 ; BIG: %or2:_(s32) = G_OR %byte2, %byte3
1199 ; BIG: %full_load:_(s32) = G_OR %or1, %or2
1200 ; BIG: $w1 = COPY %full_load(s32)
1201 ; BIG: RET_ReallyLR implicit $w1
1202 %cst_1:_(s32) = G_CONSTANT i32 1
1203 %cst_2:_(s32) = G_CONSTANT i32 2
1204 %cst_3:_(s32) = G_CONSTANT i32 3
1206 %cst_8:_(s32) = G_CONSTANT i32 8
1207 %cst_16:_(s32) = G_CONSTANT i32 16
1208 %cst_24:_(s32) = G_CONSTANT i32 24
1210 %ptr:_(p0) = COPY $x1
1211 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32)
1212 %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32)
1213 %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32)
1215 ; This load is index 0
1216 %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8))
1217 %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8))
1218 %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8))
1219 %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8))
1221 ; ... But it ends up being shifted, so we shouldn't combine.
1222 %byte1:_(s32) = nuw G_SHL %lowest_idx_load, %cst_8(s32)
1223 %byte2:_(s32) = nuw G_SHL %elt2, %cst_16(s32)
1224 %byte3:_(s32) = nuw G_SHL %elt3, %cst_24(s32)
1226 %or1:_(s32) = G_OR %byte0, %byte1
1227 %or2:_(s32) = G_OR %byte2, %byte3
1228 %full_load:_(s32) = G_OR %or1, %or2
1230 $w1 = COPY %full_load(s32)
1231 RET_ReallyLR implicit $w1
1235 name: dont_combine_more_than_one_use_load
1236 tracksRegLiveness: true
1241 ; If any load is used more than once, don't combine. We want to remove the
1244 ; LITTLE-LABEL: name: dont_combine_more_than_one_use_load
1245 ; LITTLE: liveins: $x0, $x1
1246 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
1247 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
1248 ; LITTLE: %ptr:_(p0) = COPY $x1
1249 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1250 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1251 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1252 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1253 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
1254 ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %low_half
1255 ; LITTLE: $w1 = COPY %extra_use(s32)
1256 ; LITTLE: RET_ReallyLR implicit $w1
1257 ; BIG-LABEL: name: dont_combine_more_than_one_use_load
1258 ; BIG: liveins: $x0, $x1
1259 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
1260 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1261 ; BIG: %ptr:_(p0) = COPY $x1
1262 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1263 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1264 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1265 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1266 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
1267 ; BIG: %extra_use:_(s32) = G_AND %full_load, %low_half
1268 ; BIG: $w1 = COPY %extra_use(s32)
1269 ; BIG: RET_ReallyLR implicit $w1
1270 %cst_1:_(s64) = G_CONSTANT i64 1
1271 %cst_16:_(s32) = G_CONSTANT i32 16
1273 %ptr:_(p0) = COPY $x1
1274 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1276 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1277 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1278 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1280 %full_load:_(s32) = G_OR %low_half, %high_half
1281 %extra_use:_(s32) = G_AND %full_load, %low_half
1282 $w1 = COPY %extra_use(s32)
1283 RET_ReallyLR implicit $w1
1287 name: dont_combine_more_than_one_use_shl
1288 tracksRegLiveness: true
1293 ; If anything feeding into any of the ors is used more than once, don't
1296 ; LITTLE-LABEL: name: dont_combine_more_than_one_use_shl
1297 ; LITTLE: liveins: $x0, $x1
1298 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
1299 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
1300 ; LITTLE: %ptr:_(p0) = COPY $x1
1301 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1302 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1303 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1304 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1305 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
1306 ; LITTLE: %extra_use:_(s32) = G_AND %full_load, %high_half
1307 ; LITTLE: $w1 = COPY %extra_use(s32)
1308 ; LITTLE: RET_ReallyLR implicit $w1
1309 ; BIG-LABEL: name: dont_combine_more_than_one_use_shl
1310 ; BIG: liveins: $x0, $x1
1311 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
1312 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1313 ; BIG: %ptr:_(p0) = COPY $x1
1314 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1315 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1316 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1317 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1318 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
1319 ; BIG: %extra_use:_(s32) = G_AND %full_load, %high_half
1320 ; BIG: $w1 = COPY %extra_use(s32)
1321 ; BIG: RET_ReallyLR implicit $w1
1322 %cst_1:_(s64) = G_CONSTANT i64 1
1323 %cst_16:_(s32) = G_CONSTANT i32 16
1325 %ptr:_(p0) = COPY $x1
1326 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1328 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1329 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1330 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1332 %full_load:_(s32) = G_OR %low_half, %high_half
1333 %extra_use:_(s32) = G_AND %full_load, %high_half
1334 $w1 = COPY %extra_use(s32)
1335 RET_ReallyLR implicit $w1
1339 name: dont_combine_store_between_same_mbb
1340 tracksRegLiveness: true
1344 ; If there is a store between any of the loads, then do not combine.
1346 ; LITTLE-LABEL: name: dont_combine_store_between_same_mbb
1347 ; LITTLE: liveins: $x0, $x1
1348 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
1349 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
1350 ; LITTLE: %ptr:_(p0) = COPY $x1
1351 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1352 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1353 ; LITTLE: %other_ptr:_(p0) = COPY $x1
1354 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12
1355 ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16))
1356 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1357 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1358 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
1359 ; LITTLE: $w1 = COPY %full_load(s32)
1360 ; LITTLE: RET_ReallyLR implicit $w1
1361 ; BIG-LABEL: name: dont_combine_store_between_same_mbb
1362 ; BIG: liveins: $x0, $x1
1363 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
1364 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1365 ; BIG: %ptr:_(p0) = COPY $x1
1366 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1367 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1368 ; BIG: %other_ptr:_(p0) = COPY $x1
1369 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12
1370 ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16))
1371 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1372 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1373 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
1374 ; BIG: $w1 = COPY %full_load(s32)
1375 ; BIG: RET_ReallyLR implicit $w1
1376 %cst_1:_(s64) = G_CONSTANT i64 1
1377 %cst_16:_(s32) = G_CONSTANT i32 16
1379 %ptr:_(p0) = COPY $x1
1380 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1381 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1383 ; Memory could be modified here, so don't combine!
1384 %other_ptr:_(p0) = COPY $x1
1385 %some_val:_(s32) = G_CONSTANT i32 12
1386 G_STORE %some_val, %other_ptr :: (store (s16))
1388 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1389 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1391 %full_load:_(s32) = G_OR %low_half, %high_half
1392 $w1 = COPY %full_load(s32)
1393 RET_ReallyLR implicit $w1
1397 name: dont_combine_store_between_different_mbb
1398 tracksRegLiveness: true
1400 ; LITTLE-LABEL: name: dont_combine_store_between_different_mbb
1402 ; LITTLE: successors: %bb.1(0x80000000)
1403 ; LITTLE: liveins: $x0, $x1
1404 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
1405 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
1406 ; LITTLE: %ptr:_(p0) = COPY $x1
1407 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1408 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1410 ; LITTLE: successors: %bb.2(0x80000000)
1411 ; LITTLE: liveins: $x0, $x1
1412 ; LITTLE: %other_ptr:_(p0) = COPY $x1
1413 ; LITTLE: %some_val:_(s32) = G_CONSTANT i32 12
1414 ; LITTLE: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16))
1416 ; LITTLE: liveins: $x0, $x1
1417 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1418 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1419 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
1420 ; LITTLE: $w1 = COPY %full_load(s32)
1421 ; LITTLE: RET_ReallyLR implicit $w1
1422 ; BIG-LABEL: name: dont_combine_store_between_different_mbb
1424 ; BIG: successors: %bb.1(0x80000000)
1425 ; BIG: liveins: $x0, $x1
1426 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
1427 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1428 ; BIG: %ptr:_(p0) = COPY $x1
1429 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1430 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1432 ; BIG: successors: %bb.2(0x80000000)
1433 ; BIG: liveins: $x0, $x1
1434 ; BIG: %other_ptr:_(p0) = COPY $x1
1435 ; BIG: %some_val:_(s32) = G_CONSTANT i32 12
1436 ; BIG: G_STORE %some_val(s32), %other_ptr(p0) :: (store (s16))
1438 ; BIG: liveins: $x0, $x1
1439 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1440 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1441 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
1442 ; BIG: $w1 = COPY %full_load(s32)
1443 ; BIG: RET_ReallyLR implicit $w1
1444 ; There is a store between the two loads, hidden away in a different MBB.
1445 ; We should not combine here.
1449 successors: %bb.1(0x80000000)
1451 ; If there is a store between any of the loads, then do not combine.
1453 %cst_1:_(s64) = G_CONSTANT i64 1
1454 %cst_16:_(s32) = G_CONSTANT i32 16
1456 %ptr:_(p0) = COPY $x1
1457 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1458 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1462 successors: %bb.2(0x80000000)
1463 ; Memory could be modified here, so don't combine!
1464 %other_ptr:_(p0) = COPY $x1
1465 %some_val:_(s32) = G_CONSTANT i32 12
1466 G_STORE %some_val, %other_ptr :: (store (s16))
1470 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1471 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1473 %full_load:_(s32) = G_OR %low_half, %high_half
1474 $w1 = COPY %full_load(s32)
1475 RET_ReallyLR implicit $w1
1480 tracksRegLiveness: true
1482 ; LITTLE-LABEL: name: different_mbb
1484 ; LITTLE: successors: %bb.1(0x80000000)
1485 ; LITTLE: liveins: $x0, $x1
1486 ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1
1487 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16
1488 ; LITTLE: %ptr:_(p0) = COPY $x1
1489 ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1490 ; LITTLE: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1492 ; LITTLE: liveins: $x0, $x1
1493 ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1494 ; LITTLE: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1495 ; LITTLE: %full_load:_(s32) = G_OR %low_half, %high_half
1496 ; LITTLE: $w1 = COPY %full_load(s32)
1497 ; LITTLE: RET_ReallyLR implicit $w1
1498 ; BIG-LABEL: name: different_mbb
1500 ; BIG: successors: %bb.1(0x80000000)
1501 ; BIG: liveins: $x0, $x1
1502 ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1
1503 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16
1504 ; BIG: %ptr:_(p0) = COPY $x1
1505 ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1506 ; BIG: %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1508 ; BIG: liveins: $x0, $x1
1509 ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1510 ; BIG: %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1511 ; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
1512 ; BIG: $w1 = COPY %full_load(s32)
1513 ; BIG: RET_ReallyLR implicit $w1
1514 ; It should be possible to combine here, but it's not supported right now.
1518 successors: %bb.1(0x80000000)
1521 %cst_1:_(s64) = G_CONSTANT i64 1
1522 %cst_16:_(s32) = G_CONSTANT i32 16
1524 %ptr:_(p0) = COPY $x1
1525 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1526 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1530 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1531 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1533 %full_load:_(s32) = G_OR %low_half, %high_half
1534 $w1 = COPY %full_load(s32)
1535 RET_ReallyLR implicit $w1
1540 tracksRegLiveness: true
1544 ; Test for a bug fix for predecessor-checking code.
1546 ; LITTLE-LABEL: name: load_first
1547 ; LITTLE: liveins: $x0, $x1
1548 ; LITTLE: %ptr:_(p0) = COPY $x1
1549 ; LITTLE: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2)
1550 ; LITTLE: $w1 = COPY %full_load(s32)
1551 ; LITTLE: RET_ReallyLR implicit $w1
1552 ; BIG-LABEL: name: load_first
1553 ; BIG: liveins: $x0, $x1
1554 ; BIG: %ptr:_(p0) = COPY $x1
1555 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 2)
1556 ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]]
1557 ; BIG: $w1 = COPY %full_load(s32)
1558 ; BIG: RET_ReallyLR implicit $w1
1559 %low_half:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s16))
1560 %cst_1:_(s64) = G_CONSTANT i64 1
1561 %cst_16:_(s32) = G_CONSTANT i32 16
1563 %ptr:_(p0) = COPY $x1
1564 %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64)
1566 %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s16))
1567 %high_half:_(s32) = nuw G_SHL %elt1, %cst_16(s32)
1569 %full_load:_(s32) = G_OR %low_half, %high_half
1570 $w1 = COPY %full_load(s32)
1571 RET_ReallyLR implicit $w1
1575 name: store_between_loads_and_or
1577 tracksRegLiveness: true
1587 ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR.
1588 ; We could have a G_STORE in between which may not be safe to move the load across.
1590 ; LITTLE-LABEL: name: store_between_loads_and_or
1591 ; LITTLE: liveins: $x0, $x1, $x0, $x1
1592 ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
1593 ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
1594 ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
1595 ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
1596 ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
1597 ; LITTLE: $w0 = COPY [[LOAD]](s32)
1598 ; LITTLE: RET_ReallyLR implicit $w0
1599 ; BIG-LABEL: name: store_between_loads_and_or
1600 ; BIG: liveins: $x0, $x1, $x0, $x1
1601 ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
1602 ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
1603 ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
1604 ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
1605 ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]]
1606 ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
1607 ; BIG: $w0 = COPY [[BSWAP]](s32)
1608 ; BIG: RET_ReallyLR implicit $w0
1611 %12:_(s8) = G_CONSTANT i8 1
1612 %15:_(s32) = G_CONSTANT i32 8
1613 %19:_(s32) = G_CONSTANT i32 16
1614 %23:_(s32) = G_CONSTANT i32 24
1615 %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8))
1616 %3:_(s64) = G_CONSTANT i64 1
1617 %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64)
1618 %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8))
1619 %6:_(s64) = G_CONSTANT i64 2
1620 %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64)
1621 %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8))
1622 %9:_(s64) = G_CONSTANT i64 3
1623 %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64)
1624 %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8))
1625 G_STORE %12:_(s8), %1:_(p0) :: (store (s8))
1626 %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32)
1627 %17:_(s32) = G_OR %16:_, %13:_
1628 %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32)
1629 %21:_(s32) = G_OR %17:_, %20:_
1630 %24:_(s32) = nuw G_SHL %22:_, %23:_(s32)
1631 %25:_(s32) = G_OR %21:_, %24:_
1632 $w0 = COPY %25:_(s32)
1633 RET_ReallyLR implicit $w0