1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc < %s | FileCheck %s
4 ; Verify that the following code can be compiled without +sme, because if the
5 ; call is not entered in streaming-SVE mode at runtime, the codepath leading
6 ; to the smstop/smstart pair will not be executed either.
8 target triple = "aarch64"
10 define void @streaming_compatible() #0 {
11 ; CHECK-LABEL: streaming_compatible:
13 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
14 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
15 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
16 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
17 ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
18 ; CHECK-NEXT: bl __arm_get_current_vg
19 ; CHECK-NEXT: stp x0, x19, [sp, #72] // 16-byte Folded Spill
20 ; CHECK-NEXT: bl __arm_sme_state
21 ; CHECK-NEXT: and x19, x0, #0x1
22 ; CHECK-NEXT: tbz w19, #0, .LBB0_2
23 ; CHECK-NEXT: // %bb.1:
24 ; CHECK-NEXT: smstop sm
25 ; CHECK-NEXT: .LBB0_2:
26 ; CHECK-NEXT: bl non_streaming
27 ; CHECK-NEXT: tbz w19, #0, .LBB0_4
28 ; CHECK-NEXT: // %bb.3:
29 ; CHECK-NEXT: smstart sm
30 ; CHECK-NEXT: .LBB0_4:
31 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
32 ; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload
33 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
34 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
35 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
36 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
38 call void @non_streaming()
42 declare void @non_streaming()
45 ; Verify that COALESCER_BARRIER is also supported without +sme.
47 define void @streaming_compatible_arg(float %f) #0 {
48 ; CHECK-LABEL: streaming_compatible_arg:
50 ; CHECK-NEXT: sub sp, sp, #112
51 ; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
52 ; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
53 ; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
54 ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
55 ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
56 ; CHECK-NEXT: bl __arm_get_current_vg
57 ; CHECK-NEXT: stp x0, x19, [sp, #88] // 16-byte Folded Spill
58 ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
59 ; CHECK-NEXT: bl __arm_sme_state
60 ; CHECK-NEXT: and x19, x0, #0x1
61 ; CHECK-NEXT: tbz w19, #0, .LBB1_2
62 ; CHECK-NEXT: // %bb.1:
63 ; CHECK-NEXT: smstop sm
64 ; CHECK-NEXT: .LBB1_2:
65 ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
66 ; CHECK-NEXT: bl non_streaming
67 ; CHECK-NEXT: tbz w19, #0, .LBB1_4
68 ; CHECK-NEXT: // %bb.3:
69 ; CHECK-NEXT: smstart sm
70 ; CHECK-NEXT: .LBB1_4:
71 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
72 ; CHECK-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload
73 ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
74 ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
75 ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
76 ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
77 ; CHECK-NEXT: add sp, sp, #112
79 call void @non_streaming(float %f)
84 attributes #0 = { nounwind "aarch64_pstate_sm_compatible" }