Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / compiler-rt / lib / builtins / hexagon / common_entry_exit_legacy.S
blob8a6044573236e8f9d5b682f0b665252059955f54
1 //===----------------------Hexagon builtin routine ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
10 // Functions that implement common sequences in function prologues and epilogues
11 // used to save code size
13         .macro FUNCTION_BEGIN name
14         .text
15         .globl \name
16         .type  \name, @function
17         .falign
18 \name:
19         .endm
21         .macro FUNCTION_END name
22         .size  \name, . - \name
23         .endm
25         .macro FALLTHROUGH_TAIL_CALL name0 name1
26         .size \name0, . - \name0
27         .globl \name1
28         .type \name1, @function
29         .falign
30 \name1:
31         .endm
36 // Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
37 // fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48.
42 // The compiler knows that the __save_* functions clobber LR.  No other
43 // registers should be used without informing the compiler.
45 // Since we can only issue one store per packet, we don't hurt performance by
46 // simply jumping to the right point in this sequence of stores.
48 FUNCTION_BEGIN __save_r27_through_r16
49                 memd(fp+#-48) = r17:16
50 FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
51                 memd(fp+#-40) = r19:18
52 FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
53                 memd(fp+#-32) = r21:20
54 FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
55                 memd(fp+#-24) = r23:22
56 FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
57                 memd(fp+#-16) = r25:24
58         {
59                 memd(fp+#-8) = r27:26
60                 jumpr lr
61         }
62 FUNCTION_END __save_r27_through_r24
67 // For each of the *_before_sibcall functions, jumpr lr is executed in parallel
68 // with deallocframe.  That way, the return gets the old value of lr, which is
69 // where these functions need to return, and at the same time, lr gets the value
70 // it needs going into the sibcall.
72 FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
73         {
74                 r21:20 = memd(fp+#-32)
75                 r23:22 = memd(fp+#-24)
76         }
77 FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
78         {
79                 r25:24 = memd(fp+#-16)
80                 jump __restore_r27_through_r26_and_deallocframe_before_sibcall
81         }
82 FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
87 FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
88                 r17:16 = memd(fp+#-48)
89 FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
90         {
91                 r19:18 = memd(fp+#-40)
92                 r21:20 = memd(fp+#-32)
93         }
94 FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
95         {
96                 r23:22 = memd(fp+#-24)
97                 r25:24 = memd(fp+#-16)
98         }
99 FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
100         {
101                 r27:26 = memd(fp+#-8)
102                 deallocframe
103                 jumpr lr
104         }
105 FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
110 // Here we use the extra load bandwidth to restore LR early, allowing the return
111 // to occur in parallel with the deallocframe.
113 FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
114         {
115                 r17:16 = memd(fp+#-48)
116                 r19:18 = memd(fp+#-40)
117         }
118 FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
119         {
120                 r21:20 = memd(fp+#-32)
121                 r23:22 = memd(fp+#-24)
122         }
123 FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
124         {
125                 lr = memw(fp+#4)
126                 r25:24 = memd(fp+#-16)
127         }
128         {
129                 r27:26 = memd(fp+#-8)
130                 deallocframe
131                 jumpr lr
132         }
133 FUNCTION_END __restore_r27_through_r24_and_deallocframe
138 // Here the load bandwidth is maximized for all three functions.
140 FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
141         {
142                 r19:18 = memd(fp+#-40)
143                 r21:20 = memd(fp+#-32)
144         }
145 FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
146         {
147                 r23:22 = memd(fp+#-24)
148                 r25:24 = memd(fp+#-16)
149         }
150 FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
151         {
152                 r27:26 = memd(fp+#-8)
153                 deallocframe
154         }
155                 jumpr lr
156 FUNCTION_END __restore_r27_through_r26_and_deallocframe