2 ; The regression tests need to test for order of emitted instructions, and
3 ; therefore, the tests are a bit fragile/reliant on instruction scheduling. The
4 ; test cases have been minimized as much as possible, but still most of the test
5 ; cases could break if instruction scheduling heuristics for cortex-a53 change
6 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=1 -stats 2>&1 \
8 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=0 -stats 2>&1 \
9 ; RUN: | FileCheck %s --check-prefix CHECK-NOWORKAROUND
10 ; The following run lines are just to verify whether or not this pass runs by
11 ; default for given CPUs. Given the fragility of the tests, this is only run on
12 ; a test case where the scheduler has not freedom at all to reschedule the
13 ; instructions, so the potentially massively different scheduling heuristics
14 ; will not break the test case.
15 ; RUN: llc < %s -mcpu=generic | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
16 ; RUN: llc < %s -mcpu=cortex-a53 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
17 ; RUN: llc < %s -mcpu=cortex-a57 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
18 ; RUN: llc < %s -mcpu=cyclone | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
20 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
21 target triple = "aarch64--linux-gnu"
23 define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
25 %0 = load i64, i64* %c, align 8
26 %mul = mul nsw i64 %0, %b
27 %add = add nsw i64 %mul, %a
30 ; CHECK-LABEL: f_load_madd_64:
34 ; CHECK-NOWORKAROUND-LABEL: f_load_madd_64:
35 ; CHECK-NOWORKAROUND: ldr
36 ; CHECK-NOWORKAROUND-NEXT: madd
37 ; CHECK-BASIC-PASS-DISABLED-LABEL: f_load_madd_64:
38 ; CHECK-BASIC-PASS-DISABLED: ldr
39 ; CHECK-BASIC-PASS-DISABLED-NEXT: madd
42 define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
44 %0 = load i32, i32* %c, align 4
45 %mul = mul nsw i32 %0, %b
46 %add = add nsw i32 %mul, %a
49 ; CHECK-LABEL: f_load_madd_32:
52 ; CHECK-NOWORKAROUND-LABEL: f_load_madd_32:
53 ; CHECK-NOWORKAROUND: ldr
54 ; CHECK-NOWORKAROUND-NEXT: madd
57 define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
59 %0 = load i64, i64* %c, align 8
60 %mul = mul nsw i64 %0, %b
61 %sub = sub nsw i64 %a, %mul
64 ; CHECK-LABEL: f_load_msub_64:
68 ; CHECK-NOWORKAROUND-LABEL: f_load_msub_64:
69 ; CHECK-NOWORKAROUND: ldr
70 ; CHECK-NOWORKAROUND-NEXT: msub
73 define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
75 %0 = load i32, i32* %c, align 4
76 %mul = mul nsw i32 %0, %b
77 %sub = sub nsw i32 %a, %mul
80 ; CHECK-LABEL: f_load_msub_32:
83 ; CHECK-NOWORKAROUND-LABEL: f_load_msub_32:
84 ; CHECK-NOWORKAROUND: ldr
85 ; CHECK-NOWORKAROUND-NEXT: msub
88 define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
90 %0 = load i64, i64* %c, align 8
91 %mul = mul nsw i64 %0, %b
94 ; CHECK-LABEL: f_load_mul_64:
97 ; CHECK-NOWORKAROUND-LABEL: f_load_mul_64:
98 ; CHECK-NOWORKAROUND: ldr
99 ; CHECK-NOWORKAROUND-NEXT: mul
102 define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
104 %0 = load i32, i32* %c, align 4
105 %mul = mul nsw i32 %0, %b
108 ; CHECK-LABEL: f_load_mul_32:
111 ; CHECK-NOWORKAROUND-LABEL: f_load_mul_32:
112 ; CHECK-NOWORKAROUND: ldr
113 ; CHECK-NOWORKAROUND-NEXT: mul
116 define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
118 %0 = load i64, i64* %c, align 8
120 %sub = mul i64 %0, %mul
123 ; CHECK-LABEL: f_load_mneg_64:
124 ; CHECK-NOWORKAROUND-LABEL: f_load_mneg_64:
125 ; FIXME: only add further checks here once LLVM actually produces
128 ; FIXME-CHECK-NEXT: nop
129 ; FIXME-CHECK-NEXT: mneg
130 ; FIXME-CHECK-NOWORKAROUND: ldr
131 ; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
134 define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
136 %0 = load i32, i32* %c, align 4
138 %sub = mul i32 %0, %mul
141 ; CHECK-LABEL: f_load_mneg_32:
142 ; CHECK-NOWORKAROUND-LABEL: f_load_mneg_32:
143 ; FIXME: only add further checks here once LLVM actually produces
146 ; FIXME-CHECK-NEXT: mneg
147 ; FIXME-CHECK-NOWORKAROUND: ldr
148 ; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
151 define i64 @f_load_smaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
153 %conv = sext i32 %b to i64
154 %conv1 = sext i32 %c to i64
155 %mul = mul nsw i64 %conv1, %conv
156 %add = add nsw i64 %mul, %a
157 %0 = load i32, i32* %d, align 4
158 %conv2 = sext i32 %0 to i64
159 %add3 = add nsw i64 %add, %conv2
162 ; CHECK-LABEL: f_load_smaddl:
166 ; CHECK-NOWORKAROUND-LABEL: f_load_smaddl:
167 ; CHECK-NOWORKAROUND: ldrsw
168 ; CHECK-NOWORKAROUND-NEXT: smaddl
171 define i64 @f_load_smsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
173 %conv = sext i32 %b to i64
174 %conv1 = sext i32 %c to i64
175 %mul = mul nsw i64 %conv1, %conv
176 %sub = sub i64 %a, %mul
177 %0 = load i32, i32* %d, align 4
178 %conv2 = sext i32 %0 to i64
179 %add = add nsw i64 %sub, %conv2
182 ; CHECK-LABEL: f_load_smsubl_64:
186 ; CHECK-NOWORKAROUND-LABEL: f_load_smsubl_64:
187 ; CHECK-NOWORKAROUND: ldrsw
188 ; CHECK-NOWORKAROUND-NEXT: smsubl
191 define i64 @f_load_smull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
193 %conv = sext i32 %b to i64
194 %conv1 = sext i32 %c to i64
195 %mul = mul nsw i64 %conv1, %conv
196 %0 = load i32, i32* %d, align 4
197 %conv2 = sext i32 %0 to i64
198 %div = sdiv i64 %mul, %conv2
201 ; CHECK-LABEL: f_load_smull:
204 ; CHECK-NOWORKAROUND-LABEL: f_load_smull:
205 ; CHECK-NOWORKAROUND: ldrsw
206 ; CHECK-NOWORKAROUND-NEXT: smull
209 define i64 @f_load_smnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
211 %conv = sext i32 %b to i64
212 %conv1 = sext i32 %c to i64
213 %mul = sub nsw i64 0, %conv
214 %sub = mul i64 %conv1, %mul
215 %0 = load i32, i32* %d, align 4
216 %conv2 = sext i32 %0 to i64
217 %div = sdiv i64 %sub, %conv2
220 ; CHECK-LABEL: f_load_smnegl_64:
221 ; CHECK-NOWORKAROUND-LABEL: f_load_smnegl_64:
222 ; FIXME: only add further checks here once LLVM actually produces
223 ; smnegl instructions
226 define i64 @f_load_umaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
228 %conv = zext i32 %b to i64
229 %conv1 = zext i32 %c to i64
230 %mul = mul i64 %conv1, %conv
231 %add = add i64 %mul, %a
232 %0 = load i32, i32* %d, align 4
233 %conv2 = zext i32 %0 to i64
234 %add3 = add i64 %add, %conv2
237 ; CHECK-LABEL: f_load_umaddl:
241 ; CHECK-NOWORKAROUND-LABEL: f_load_umaddl:
242 ; CHECK-NOWORKAROUND: ldr
243 ; CHECK-NOWORKAROUND-NEXT: umaddl
246 define i64 @f_load_umsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
248 %conv = zext i32 %b to i64
249 %conv1 = zext i32 %c to i64
250 %mul = mul i64 %conv1, %conv
251 %sub = sub i64 %a, %mul
252 %0 = load i32, i32* %d, align 4
253 %conv2 = zext i32 %0 to i64
254 %add = add i64 %sub, %conv2
257 ; CHECK-LABEL: f_load_umsubl_64:
261 ; CHECK-NOWORKAROUND-LABEL: f_load_umsubl_64:
262 ; CHECK-NOWORKAROUND: ldr
263 ; CHECK-NOWORKAROUND-NEXT: umsubl
266 define i64 @f_load_umull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
268 %conv = zext i32 %b to i64
269 %conv1 = zext i32 %c to i64
270 %mul = mul i64 %conv1, %conv
271 %0 = load i32, i32* %d, align 4
272 %conv2 = zext i32 %0 to i64
273 %div = udiv i64 %mul, %conv2
276 ; CHECK-LABEL: f_load_umull:
279 ; CHECK-NOWORKAROUND-LABEL: f_load_umull:
280 ; CHECK-NOWORKAROUND: ldr
281 ; CHECK-NOWORKAROUND-NEXT: umull
284 define i64 @f_load_umnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
286 %conv = zext i32 %b to i64
287 %conv1 = zext i32 %c to i64
288 %mul = sub nsw i64 0, %conv
289 %sub = mul i64 %conv1, %mul
290 %0 = load i32, i32* %d, align 4
291 %conv2 = zext i32 %0 to i64
292 %div = udiv i64 %sub, %conv2
295 ; CHECK-LABEL: f_load_umnegl_64:
296 ; CHECK-NOWORKAROUND-LABEL: f_load_umnegl_64:
297 ; FIXME: only add further checks here once LLVM actually produces
298 ; umnegl instructions
301 define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
303 %0 = load i64, i64* %cp, align 8
304 store i64 %a, i64* %e, align 8
305 %mul = mul nsw i64 %0, %b
306 %add = add nsw i64 %mul, %a
309 ; CHECK-LABEL: f_store_madd_64:
313 ; CHECK-NOWORKAROUND-LABEL: f_store_madd_64:
314 ; CHECK-NOWORKAROUND: str
315 ; CHECK-NOWORKAROUND-NEXT: madd
318 define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
320 %0 = load i32, i32* %cp, align 4
321 store i32 %a, i32* %e, align 4
322 %mul = mul nsw i32 %0, %b
323 %add = add nsw i32 %mul, %a
326 ; CHECK-LABEL: f_store_madd_32:
329 ; CHECK-NOWORKAROUND-LABEL: f_store_madd_32:
330 ; CHECK-NOWORKAROUND: str
331 ; CHECK-NOWORKAROUND-NEXT: madd
334 define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
336 %0 = load i64, i64* %cp, align 8
337 store i64 %a, i64* %e, align 8
338 %mul = mul nsw i64 %0, %b
339 %sub = sub nsw i64 %a, %mul
342 ; CHECK-LABEL: f_store_msub_64:
346 ; CHECK-NOWORKAROUND-LABEL: f_store_msub_64:
347 ; CHECK-NOWORKAROUND: str
348 ; CHECK-NOWORKAROUND-NEXT: msub
351 define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
353 %0 = load i32, i32* %cp, align 4
354 store i32 %a, i32* %e, align 4
355 %mul = mul nsw i32 %0, %b
356 %sub = sub nsw i32 %a, %mul
359 ; CHECK-LABEL: f_store_msub_32:
362 ; CHECK-NOWORKAROUND-LABEL: f_store_msub_32:
363 ; CHECK-NOWORKAROUND: str
364 ; CHECK-NOWORKAROUND-NEXT: msub
367 define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
369 %0 = load i64, i64* %cp, align 8
370 store i64 %a, i64* %e, align 8
371 %mul = mul nsw i64 %0, %b
374 ; CHECK-LABEL: f_store_mul_64:
377 ; CHECK-NOWORKAROUND-LABEL: f_store_mul_64:
378 ; CHECK-NOWORKAROUND: str
379 ; CHECK-NOWORKAROUND-NEXT: mul
382 define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
384 %0 = load i32, i32* %cp, align 4
385 store i32 %a, i32* %e, align 4
386 %mul = mul nsw i32 %0, %b
389 ; CHECK-LABEL: f_store_mul_32:
392 ; CHECK-NOWORKAROUND-LABEL: f_store_mul_32:
393 ; CHECK-NOWORKAROUND: str
394 ; CHECK-NOWORKAROUND-NEXT: mul
397 define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
399 %0 = load i64, i64* %cp, align 8
400 %1 = bitcast i64* %e to i8*
401 tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
402 %mul = mul nsw i64 %0, %b
403 %add = add nsw i64 %mul, %a
406 ; CHECK-LABEL: f_prefetch_madd_64:
410 ; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_64:
411 ; CHECK-NOWORKAROUND: prfm
412 ; CHECK-NOWORKAROUND-NEXT: madd
414 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) #2
416 define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
418 %0 = load i32, i32* %cp, align 4
419 %1 = bitcast i32* %e to i8*
420 tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1)
421 %mul = mul nsw i32 %0, %b
422 %add = add nsw i32 %mul, %a
425 ; CHECK-LABEL: f_prefetch_madd_32:
428 ; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_32:
429 ; CHECK-NOWORKAROUND: prfm
430 ; CHECK-NOWORKAROUND-NEXT: madd
432 define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
434 %0 = load i64, i64* %cp, align 8
435 %1 = bitcast i64* %e to i8*
436 tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1)
437 %mul = mul nsw i64 %0, %b
438 %sub = sub nsw i64 %a, %mul
441 ; CHECK-LABEL: f_prefetch_msub_64:
445 ; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_64:
446 ; CHECK-NOWORKAROUND: prfm
447 ; CHECK-NOWORKAROUND-NEXT: msub
449 define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
451 %0 = load i32, i32* %cp, align 4
452 %1 = bitcast i32* %e to i8*
453 tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1)
454 %mul = mul nsw i32 %0, %b
455 %sub = sub nsw i32 %a, %mul
458 ; CHECK-LABEL: f_prefetch_msub_32:
461 ; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_32:
462 ; CHECK-NOWORKAROUND: prfm
463 ; CHECK-NOWORKAROUND-NEXT: msub
465 define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
467 %0 = load i64, i64* %cp, align 8
468 %1 = bitcast i64* %e to i8*
469 tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
470 %mul = mul nsw i64 %0, %b
473 ; CHECK-LABEL: f_prefetch_mul_64:
476 ; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_64:
477 ; CHECK-NOWORKAROUND: prfm
478 ; CHECK-NOWORKAROUND-NEXT: mul
480 define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
482 %0 = load i32, i32* %cp, align 4
483 %1 = bitcast i32* %e to i8*
484 tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1)
485 %mul = mul nsw i32 %0, %b
488 ; CHECK-LABEL: f_prefetch_mul_32:
491 ; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_32:
492 ; CHECK-NOWORKAROUND: prfm
493 ; CHECK-NOWORKAROUND-NEXT: mul
495 define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
497 %0 = load i64, i64* %c, align 8
501 %mul = mul nsw i64 %0, %b
502 %add = add nsw i64 %mul, %a
503 %tmp = ptrtoint i8* blockaddress(@fall_through, %block1) to i64
504 %ret = add nsw i64 %tmp, %add
507 ; CHECK-LABEL: fall_through
513 ; CHECK-NOWORKAROUND-LABEL: fall_through
514 ; CHECK-NOWORKAROUND: ldr
515 ; CHECK-NOWORKAROUND-NEXT: .Ltmp
516 ; CHECK-NOWORKAROUND-NEXT: %bb.
517 ; CHECK-NOWORKAROUND-NEXT: madd
519 ; No checks for this, just check it doesn't crash
520 define i32 @crash_check(i8** nocapture readnone %data) #0 {
528 attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
529 attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
530 attributes #2 = { nounwind }
533 ; CHECK-LABEL: ... Statistics Collected ...
534 ; CHECK: 11 aarch64-fix-cortex-a53-835769 - Number of Nops added to work around erratum 835769