compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S

   1 //===----------------------Hexagon builtin routine ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 // Functions that implement common sequences in function prologues and epilogues
  10 // used to save code size
  11
  12         .macro FUNCTION_BEGIN name
  13         .text
  14         .globl \name
  15         .type  \name, @function
  16         .falign
  17 \name:
  18         .endm
  19
  20         .macro FUNCTION_END name
  21         .size  \name, . - \name
  22         .endm
  23
  24         .macro FALLTHROUGH_TAIL_CALL name0 name1
  25         .size \name0, . - \name0
  26         .globl \name1
  27         .type \name1, @function
  28         .falign
  29 \name1:
  30         .endm
  31
  32
  33
  34
  35 // Save r25:24 at fp+#-8 and r27:26 at fp+#-16.
  36
  37
  38
  39
  40 // The compiler knows that the __save_* functions clobber LR.  No other
  41 // registers should be used without informing the compiler.
  42
  43 // Since we can only issue one store per packet, we don't hurt performance by
  44 // simply jumping to the right point in this sequence of stores.
  45
  46 FUNCTION_BEGIN __save_r24_through_r27
  47                 memd(fp+#-16) = r27:26
  48 FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
  49         {
  50                 memd(fp+#-8) = r25:24
  51                 jumpr lr
  52         }
  53 FUNCTION_END __save_r24_through_r25
  54
  55
  56
  57
  58 // For each of the *_before_tailcall functions, jumpr lr is executed in parallel
  59 // with deallocframe.  That way, the return gets the old value of lr, which is
  60 // where these functions need to return, and at the same time, lr gets the value
  61 // it needs going into the tail call.
  62
  63 FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
  64                 r27:26 = memd(fp+#-16)
  65 FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
  66         {
  67                 r25:24 = memd(fp+#-8)
  68                 deallocframe
  69                 jumpr lr
  70         }
  71 FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
  72
  73
  74
  75
  76 // Here we use the extra load bandwidth to restore LR early, allowing the return
  77 // to occur in parallel with the deallocframe.
  78
  79 FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
  80         {
  81                 lr = memw(fp+#4)
  82                 r27:26 = memd(fp+#-16)
  83         }
  84         {
  85                 r25:24 = memd(fp+#-8)
  86                 deallocframe
  87                 jumpr lr
  88         }
  89 FUNCTION_END __restore_r24_through_r27_and_deallocframe
  90
  91
  92
  93
  94 // Here the load bandwidth is maximized.
  95
  96 FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
  97         {
  98                 r25:24 = memd(fp+#-8)
  99                 deallocframe
 100         }
 101                 jumpr lr
 102 FUNCTION_END __restore_r24_through_r25_and_deallocframe