1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s
4 declare void @normal_callee();
5 declare void @streaming_callee() "aarch64_pstate_sm_enabled";
6 declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
8 define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
9 ; CHECK-LABEL: sm_body_sm_compatible_simple:
11 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
12 ; CHECK-NEXT: rdsvl x9, #1
13 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
14 ; CHECK-NEXT: lsr x9, x9, #3
15 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
16 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
17 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
19 ; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
20 ; CHECK-NEXT: bl __arm_sme_state
21 ; CHECK-NEXT: and x8, x0, #0x1
22 ; CHECK-NEXT: tbnz w8, #0, .LBB0_2
23 ; CHECK-NEXT: // %bb.1:
24 ; CHECK-NEXT: smstart sm
25 ; CHECK-NEXT: .LBB0_2:
26 ; CHECK-NEXT: tbnz w8, #0, .LBB0_4
27 ; CHECK-NEXT: // %bb.3:
28 ; CHECK-NEXT: smstop sm
29 ; CHECK-NEXT: .LBB0_4:
30 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
31 ; CHECK-NEXT: fmov s0, wzr
32 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
33 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
34 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
35 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
37 ret float zeroinitializer
40 define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
41 ; CHECK-LABEL: sm_body_caller_sm_compatible_caller_normal_callee:
43 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
44 ; CHECK-NEXT: rdsvl x9, #1
45 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
46 ; CHECK-NEXT: lsr x9, x9, #3
47 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
48 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
49 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
51 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
52 ; CHECK-NEXT: bl __arm_sme_state
53 ; CHECK-NEXT: and x19, x0, #0x1
54 ; CHECK-NEXT: tbnz w19, #0, .LBB1_2
55 ; CHECK-NEXT: // %bb.1:
56 ; CHECK-NEXT: smstart sm
57 ; CHECK-NEXT: .LBB1_2:
58 ; CHECK-NEXT: smstop sm
59 ; CHECK-NEXT: bl normal_callee
60 ; CHECK-NEXT: smstart sm
61 ; CHECK-NEXT: tbnz w19, #0, .LBB1_4
62 ; CHECK-NEXT: // %bb.3:
63 ; CHECK-NEXT: smstop sm
64 ; CHECK-NEXT: .LBB1_4:
65 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
66 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
67 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
68 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
69 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
70 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
72 call void @normal_callee()
76 ; Function Attrs: nounwind uwtable vscale_range(1,16)
77 define void @streaming_body_and_streaming_compatible_interface_multi_basic_block(i32 noundef %x) "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
78 ; CHECK-LABEL: streaming_body_and_streaming_compatible_interface_multi_basic_block:
79 ; CHECK: // %bb.0: // %entry
80 ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
81 ; CHECK-NEXT: rdsvl x9, #1
82 ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
83 ; CHECK-NEXT: mov w8, w0
84 ; CHECK-NEXT: lsr x9, x9, #3
85 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
86 ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
87 ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill
89 ; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill
90 ; CHECK-NEXT: bl __arm_sme_state
91 ; CHECK-NEXT: and x19, x0, #0x1
92 ; CHECK-NEXT: tbnz w19, #0, .LBB2_2
93 ; CHECK-NEXT: // %bb.1: // %entry
94 ; CHECK-NEXT: smstart sm
95 ; CHECK-NEXT: .LBB2_2: // %entry
96 ; CHECK-NEXT: cbz w8, .LBB2_6
97 ; CHECK-NEXT: // %bb.3: // %if.else
98 ; CHECK-NEXT: bl streaming_compatible_callee
99 ; CHECK-NEXT: tbnz w19, #0, .LBB2_5
100 ; CHECK-NEXT: // %bb.4: // %if.else
101 ; CHECK-NEXT: smstop sm
102 ; CHECK-NEXT: .LBB2_5: // %if.else
103 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
104 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
105 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
106 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
107 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
108 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
110 ; CHECK-NEXT: .LBB2_6: // %if.then
111 ; CHECK-NEXT: smstop sm
112 ; CHECK-NEXT: bl normal_callee
113 ; CHECK-NEXT: smstart sm
114 ; CHECK-NEXT: tbnz w19, #0, .LBB2_8
115 ; CHECK-NEXT: // %bb.7: // %if.then
116 ; CHECK-NEXT: smstop sm
117 ; CHECK-NEXT: .LBB2_8: // %if.then
118 ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
119 ; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload
120 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
121 ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
122 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
123 ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
126 %cmp = icmp eq i32 %x, 0
127 br i1 %cmp, label %if.then, label %if.else
129 if.then: ; preds = %entry
130 tail call void @normal_callee()
133 if.else: ; preds = %entry
134 tail call void @streaming_compatible_callee()
137 return: ; preds = %if.else, %if.then