1 // z_Linux_asm.S: - microtasking routines specifically
2 // written for Intel platforms running Linux* OS
5 ////===----------------------------------------------------------------------===//
7 //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 //// See https://llvm.org/LICENSE.txt for license information.
9 //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 ////===----------------------------------------------------------------------===//
14 // -----------------------------------------------------------------------
16 // -----------------------------------------------------------------------
18 #include "kmp_config.h"
20 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
23 // the 'delay r16/r32/r64' should be used instead of the 'pause'.
24 // The delay operation has the effect of removing the current thread from
25 // the round-robin HT mechanism, and therefore speeds up the issue rate of
26 // the other threads on the same core.
28 // A value of 0 works fine for <= 2 threads per core, but causes the EPCC
29 // barrier time to increase greatly for 3 or more threads per core.
31 // A value of 100 works pretty well for up to 4 threads per core, but isn't
32 // quite as fast as 0 for 2 threads per core.
34 // We need to check what happens for oversubscription / > 4 threads per core.
35 // It is possible that we need to pass the delay value in as a parameter
36 // that the caller determines based on the total # threads / # cores.
43 # define pause_op .byte 0xf3,0x90
47 # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
48 # define KMP_LABEL(x) L_##x // form the name of label
49 .macro KMP_CFI_DEF_OFFSET
53 .macro KMP_CFI_REGISTER
61 /* Not sure what .size does in icc, not sure if we need to do something
67 .globl KMP_PREFIX_UNDERSCORE($0)
68 KMP_PREFIX_UNDERSCORE($0):
70 # else // KMP_OS_DARWIN
71 # define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
72 // Format labels so that they don't override function names in gdb's backtraces
73 // MIC assembler doesn't accept .L syntax, the L works fine there (as well as
76 # define KMP_LABEL(x) L_##x // local label
78 # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
83 .macro DEBUG_INFO proc
85 // Not sure why we need .type and .size for the functions
92 .globl KMP_PREFIX_UNDERSCORE(\proc)
93 KMP_PREFIX_UNDERSCORE(\proc):
96 .macro KMP_CFI_DEF_OFFSET sz
97 .cfi_def_cfa_offset \sz
99 .macro KMP_CFI_OFFSET reg, sz
102 .macro KMP_CFI_REGISTER reg
103 .cfi_def_cfa_register \reg
105 .macro KMP_CFI_DEF reg, sz
106 .cfi_def_cfa \reg,\sz
108 # endif // KMP_OS_DARWIN
109 #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
111 #if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
114 # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
115 # define KMP_LABEL(x) L_##x // form the name of label
122 /* Not sure what .size does in icc, not sure if we need to do something
129 .globl KMP_PREFIX_UNDERSCORE($0)
130 KMP_PREFIX_UNDERSCORE($0):
132 # elif KMP_OS_WINDOWS
133 # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols
134 // Format labels so that they don't override function names in gdb's backtraces
135 # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
141 .macro DEBUG_INFO proc
147 .globl KMP_PREFIX_UNDERSCORE(\proc)
148 KMP_PREFIX_UNDERSCORE(\proc):
150 # else // KMP_OS_DARWIN || KMP_OS_WINDOWS
151 # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
152 // Format labels so that they don't override function names in gdb's backtraces
153 # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
159 .macro DEBUG_INFO proc
161 // Not sure why we need .type and .size for the functions
164 .type \proc,%function
166 .type \proc,@function
173 .globl KMP_PREFIX_UNDERSCORE(\proc)
174 KMP_PREFIX_UNDERSCORE(\proc):
177 # endif // KMP_OS_DARWIN
180 // BTI and PAC gnu property note
181 # define NT_GNU_PROPERTY_TYPE_0 5
182 # define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
183 # define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
184 # define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
186 # define GNU_PROPERTY(type, value) \
187 .pushsection .note.gnu.property, "a"; \
191 .word NT_GNU_PROPERTY_TYPE_0; \
200 # if defined(__ARM_FEATURE_BTI_DEFAULT)
201 # define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
205 # if __ARM_FEATURE_PAC_DEFAULT & 3
206 # define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
211 # if (BTI_FLAG | PAC_FLAG) != 0
213 # define PACBTI_C hint #25
214 # define PACBTI_RET hint #29
216 # define PACBTI_C hint #34
219 # define GNU_PROPERTY_BTI_PAC \
220 GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
224 # define GNU_PROPERTY_BTI_PAC
226 #endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
228 .macro COMMON name, size, align_power
232 .comm \name, \size, \align_power
233 #else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS
234 .comm \name, \size, (1<<(\align_power))
238 // -----------------------------------------------------------------------
240 // -----------------------------------------------------------------------
242 #ifdef KMP_GOMP_COMPAT
244 // Support for unnamed common blocks.
246 // Because the symbol ".gomp_critical_user_" contains a ".", we have to
247 // put this stuff in assembly.
252 .comm .gomp_critical_user_,32
254 .globl ___kmp_unnamed_critical_addr
255 ___kmp_unnamed_critical_addr:
256 .long .gomp_critical_user_
257 # else /* Linux* OS */
259 .comm .gomp_critical_user_,32,8
262 .global __kmp_unnamed_critical_addr
263 __kmp_unnamed_critical_addr:
264 .4byte .gomp_critical_user_
265 .type __kmp_unnamed_critical_addr,@object
266 .size __kmp_unnamed_critical_addr,4
267 # endif /* KMP_OS_DARWIN */
268 # endif /* KMP_ARCH_X86 */
273 .comm .gomp_critical_user_,32
275 .globl ___kmp_unnamed_critical_addr
276 ___kmp_unnamed_critical_addr:
277 .quad .gomp_critical_user_
278 # else /* Linux* OS */
280 .comm .gomp_critical_user_,32,8
283 .global __kmp_unnamed_critical_addr
284 __kmp_unnamed_critical_addr:
285 .8byte .gomp_critical_user_
286 .type __kmp_unnamed_critical_addr,@object
287 .size __kmp_unnamed_critical_addr,8
288 # endif /* KMP_OS_DARWIN */
289 # endif /* KMP_ARCH_X86_64 */
291 #endif /* KMP_GOMP_COMPAT */
294 #if KMP_ARCH_X86 && !KMP_ARCH_PPC64
296 // -----------------------------------------------------------------------
297 // microtasking routines specifically written for IA-32 architecture
299 // -----------------------------------------------------------------------
301 .ident "Intel Corporation"
305 // __kmp_x86_pause( void );
313 DEBUG_INFO __kmp_x86_pause
315 # if !KMP_ASM_INTRINS
317 //------------------------------------------------------------------------
319 // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
321 PROC __kmp_test_then_add32
329 DEBUG_INFO __kmp_test_then_add32
331 //------------------------------------------------------------------------
332 // FUNCTION __kmp_xchg_fixed8
335 // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
342 PROC __kmp_xchg_fixed8
344 movl 4(%esp), %ecx // "p"
345 movb 8(%esp), %al // "d"
351 DEBUG_INFO __kmp_xchg_fixed8
354 //------------------------------------------------------------------------
355 // FUNCTION __kmp_xchg_fixed16
358 // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
364 PROC __kmp_xchg_fixed16
366 movl 4(%esp), %ecx // "p"
367 movw 8(%esp), %ax // "d"
373 DEBUG_INFO __kmp_xchg_fixed16
376 //------------------------------------------------------------------------
377 // FUNCTION __kmp_xchg_fixed32
380 // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
387 PROC __kmp_xchg_fixed32
389 movl 4(%esp), %ecx // "p"
390 movl 8(%esp), %eax // "d"
396 DEBUG_INFO __kmp_xchg_fixed32
400 // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
401 PROC __kmp_compare_and_store8
408 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
409 and $1, %eax // sign extend previous instruction
412 DEBUG_INFO __kmp_compare_and_store8
415 // __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
416 PROC __kmp_compare_and_store16
423 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
424 and $1, %eax // sign extend previous instruction
427 DEBUG_INFO __kmp_compare_and_store16
430 // __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
431 PROC __kmp_compare_and_store32
438 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
439 and $1, %eax // sign extend previous instruction
442 DEBUG_INFO __kmp_compare_and_store32
445 // __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
446 PROC __kmp_compare_and_store64
453 movl 12(%ebp), %eax // "cv" low order word
454 movl 16(%ebp), %edx // "cv" high order word
455 movl 20(%ebp), %ebx // "sv" low order word
456 movl 24(%ebp), %ecx // "sv" high order word
459 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
460 and $1, %eax // sign extend previous instruction
467 DEBUG_INFO __kmp_compare_and_store64
470 // __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
471 PROC __kmp_compare_and_store_ret8
480 DEBUG_INFO __kmp_compare_and_store_ret8
483 // __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
485 PROC __kmp_compare_and_store_ret16
494 DEBUG_INFO __kmp_compare_and_store_ret16
497 // __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
499 PROC __kmp_compare_and_store_ret32
508 DEBUG_INFO __kmp_compare_and_store_ret32
511 // __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
513 PROC __kmp_compare_and_store_ret64
520 movl 12(%ebp), %eax // "cv" low order word
521 movl 16(%ebp), %edx // "cv" high order word
522 movl 20(%ebp), %ebx // "sv" low order word
523 movl 24(%ebp), %ecx // "sv" high order word
532 DEBUG_INFO __kmp_compare_and_store_ret64
535 //------------------------------------------------------------------------
536 // FUNCTION __kmp_xchg_real32
539 // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
546 PROC __kmp_xchg_real32
572 DEBUG_INFO __kmp_xchg_real32
574 # endif /* !KMP_ASM_INTRINS */
576 //------------------------------------------------------------------------
578 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
579 // int gtid, int tid,
580 // int argc, void *p_argv[]
583 // void **exit_frame_ptr
587 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
590 // (*pkfn)( & gtid, & tid, argv[0], ... );
594 // -- Begin __kmp_invoke_microtask
596 PROC __kmp_invoke_microtask
600 KMP_CFI_OFFSET ebp,-8
601 movl %esp,%ebp // establish the base pointer for this routine.
603 subl $8,%esp // allocate space for two local variables.
604 // These varibales are:
608 pushl %ebx // save %ebx to use during this routine
611 movl 28(%ebp),%ebx // get exit_frame address
612 movl %ebp,(%ebx) // save exit_frame
615 movl 20(%ebp),%ebx // Stack alignment - # args
616 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
617 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
619 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
620 movl %eax,%ebx // Save to %ebx
621 andl $0xFFFFFF80,%eax // mask off 7 bits
622 subl %eax,%ebx // Amount to subtract from %esp
623 subl %ebx,%esp // Prepare the stack ptr --
624 // now it will be aligned on 128-byte boundary at the call
626 movl 24(%ebp),%eax // copy from p_argv[]
627 movl %eax,-4(%ebp) // into the local variable *argv.
629 movl 20(%ebp),%ebx // argc is 20(%ebp)
634 jg KMP_LABEL(invoke_4)
635 jmp KMP_LABEL(invoke_3)
639 subl $4,%ebx // decrement argc.
640 addl %ebx,%eax // index into argv.
644 jmp KMP_LABEL(invoke_2)
647 leal 16(%ebp),%eax // push & tid
650 leal 12(%ebp),%eax // push & gtid
654 call *%ebx // call (*pkfn)();
656 movl $1,%eax // return 1;
658 movl -12(%ebp),%ebx // restore %ebx
663 DEBUG_INFO __kmp_invoke_microtask
664 // -- End __kmp_invoke_microtask
668 // __kmp_hardware_timestamp(void)
669 PROC __kmp_hardware_timestamp
673 DEBUG_INFO __kmp_hardware_timestamp
674 // -- End __kmp_hardware_timestamp
676 #endif /* KMP_ARCH_X86 */
681 // -----------------------------------------------------------------------
682 // microtasking routines specifically written for IA-32 architecture and
683 // Intel(R) 64 running Linux* OS
684 // -----------------------------------------------------------------------
687 // mark_description "Intel Corporation";
688 .ident "Intel Corporation"
689 // -- .file "z_Linux_asm.S"
693 // To prevent getting our code into .data section .text added to every routine
694 // definition for x86_64.
695 //------------------------------------------------------------------------
696 # if !KMP_ASM_INTRINS
698 //------------------------------------------------------------------------
699 // FUNCTION __kmp_test_then_add32
702 // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
710 PROC __kmp_test_then_add32
712 movl %esi, %eax // "d"
717 DEBUG_INFO __kmp_test_then_add32
720 //------------------------------------------------------------------------
721 // FUNCTION __kmp_test_then_add64
724 // __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
731 PROC __kmp_test_then_add64
733 movq %rsi, %rax // "d"
738 DEBUG_INFO __kmp_test_then_add64
741 //------------------------------------------------------------------------
742 // FUNCTION __kmp_xchg_fixed8
745 // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
753 PROC __kmp_xchg_fixed8
755 movb %sil, %al // "d"
761 DEBUG_INFO __kmp_xchg_fixed8
764 //------------------------------------------------------------------------
765 // FUNCTION __kmp_xchg_fixed16
768 // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
775 PROC __kmp_xchg_fixed16
783 DEBUG_INFO __kmp_xchg_fixed16
786 //------------------------------------------------------------------------
787 // FUNCTION __kmp_xchg_fixed32
790 // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
798 PROC __kmp_xchg_fixed32
800 movl %esi, %eax // "d"
806 DEBUG_INFO __kmp_xchg_fixed32
809 //------------------------------------------------------------------------
810 // FUNCTION __kmp_xchg_fixed64
813 // __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
820 PROC __kmp_xchg_fixed64
822 movq %rsi, %rax // "d"
828 DEBUG_INFO __kmp_xchg_fixed64
831 //------------------------------------------------------------------------
832 // FUNCTION __kmp_compare_and_store8
835 // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
844 PROC __kmp_compare_and_store8
846 movb %sil, %al // "cv"
849 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
850 andq $1, %rax // sign extend previous instruction for return value
853 DEBUG_INFO __kmp_compare_and_store8
856 //------------------------------------------------------------------------
857 // FUNCTION __kmp_compare_and_store16
860 // __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
869 PROC __kmp_compare_and_store16
871 movw %si, %ax // "cv"
874 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
875 andq $1, %rax // sign extend previous instruction for return value
878 DEBUG_INFO __kmp_compare_and_store16
881 //------------------------------------------------------------------------
882 // FUNCTION __kmp_compare_and_store32
885 // __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
894 PROC __kmp_compare_and_store32
896 movl %esi, %eax // "cv"
899 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
900 andq $1, %rax // sign extend previous instruction for return value
903 DEBUG_INFO __kmp_compare_and_store32
906 //------------------------------------------------------------------------
907 // FUNCTION __kmp_compare_and_store64
910 // __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
918 PROC __kmp_compare_and_store64
920 movq %rsi, %rax // "cv"
923 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
924 andq $1, %rax // sign extend previous instruction for return value
927 DEBUG_INFO __kmp_compare_and_store64
929 //------------------------------------------------------------------------
930 // FUNCTION __kmp_compare_and_store_ret8
933 // __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
942 PROC __kmp_compare_and_store_ret8
944 movb %sil, %al // "cv"
949 DEBUG_INFO __kmp_compare_and_store_ret8
952 //------------------------------------------------------------------------
953 // FUNCTION __kmp_compare_and_store_ret16
956 // __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
965 PROC __kmp_compare_and_store_ret16
967 movw %si, %ax // "cv"
972 DEBUG_INFO __kmp_compare_and_store_ret16
975 //------------------------------------------------------------------------
976 // FUNCTION __kmp_compare_and_store_ret32
979 // __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
988 PROC __kmp_compare_and_store_ret32
990 movl %esi, %eax // "cv"
995 DEBUG_INFO __kmp_compare_and_store_ret32
998 //------------------------------------------------------------------------
999 // FUNCTION __kmp_compare_and_store_ret64
1002 // __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1010 PROC __kmp_compare_and_store_ret64
1012 movq %rsi, %rax // "cv"
1014 cmpxchgq %rdx,(%rdi)
1017 DEBUG_INFO __kmp_compare_and_store_ret64
1019 # endif /* !KMP_ASM_INTRINS */
1024 # if !KMP_ASM_INTRINS
1026 //------------------------------------------------------------------------
1027 // FUNCTION __kmp_xchg_real32
1030 // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
1034 // data: %xmm0 (lower 4 bytes)
1036 // return: %xmm0 (lower 4 bytes)
1038 PROC __kmp_xchg_real32
1040 movd %xmm0, %eax // load "data" to eax
1045 movd %eax, %xmm0 // load old value into return register
1049 DEBUG_INFO __kmp_xchg_real32
1052 //------------------------------------------------------------------------
1053 // FUNCTION __kmp_xchg_real64
1056 // __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
1060 // data: %xmm0 (lower 8 bytes)
1061 // return: %xmm0 (lower 8 bytes)
1063 PROC __kmp_xchg_real64
1065 movd %xmm0, %rax // load "data" to rax
1070 movd %rax, %xmm0 // load old value into return register
1073 DEBUG_INFO __kmp_xchg_real64
1076 # endif /* !KMP_MIC */
1078 # endif /* !KMP_ASM_INTRINS */
1080 //------------------------------------------------------------------------
1082 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1083 // int gtid, int tid,
1084 // int argc, void *p_argv[]
1087 // void **exit_frame_ptr
1091 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1094 // (*pkfn)( & gtid, & tid, argv[0], ... );
1098 // note: at call to pkfn must have %rsp 128-byte aligned for compiler
1109 // __gtid: gtid parm pushed on stack so can pass >id to pkfn
1110 // __tid: tid parm pushed on stack so can pass &tid to pkfn
1113 // %rax: used all over the place
1114 // %rdx: used in stack pointer alignment calculation
1115 // %r11: used to traverse p_argv array
1116 // %rsi: used as temporary for stack parameters
1117 // used as temporary for number of pkfn parms to push
1118 // %rbx: used to hold pkfn address, and zero constant, callee-save
1120 // return: %eax (always 1/TRUE)
1124 // -- Begin __kmp_invoke_microtask
1127 PROC __kmp_invoke_microtask
1129 pushq %rbp // save base pointer
1130 KMP_CFI_DEF_OFFSET 16
1131 KMP_CFI_OFFSET rbp,-16
1132 movq %rsp,%rbp // establish the base pointer for this routine.
1133 KMP_CFI_REGISTER rbp
1136 movq %rbp, (%r9) // save exit_frame
1139 pushq %rbx // %rbx is callee-saved register
1140 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
1141 pushq %rdx // Put tid on stack so can pass &tid to pkfn
1143 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
1144 movq $0, %rbx // constant for cmovs later
1145 subq $4, %rax // subtract four args passed in registers to pkfn
1147 js KMP_LABEL(kmp_0) // jump to movq
1148 jmp KMP_LABEL(kmp_0_exit) // jump ahead
1150 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1151 KMP_LABEL(kmp_0_exit):
1153 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
1156 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
1157 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
1160 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
1161 // without align, stack ptr would be this
1162 movq %rdx, %rax // Save to %rax
1164 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
1165 subq %rax, %rdx // Amount to subtract from %rsp
1166 subq %rdx, %rsp // Prepare the stack ptr --
1167 // now %rsp will align to 128-byte boundary at call site
1169 // setup pkfn parameter reg and stack
1170 movq %rcx, %rax // argc -> %rax
1172 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
1173 shlq $3, %rcx // argc*8 -> %rcx
1174 movq %r8, %rdx // p_argv -> %rdx
1175 addq %rcx, %rdx // &p_argv[argc] -> %rdx
1177 movq %rsi, %rcx // max (0, argc-4) -> %rcx
1179 KMP_LABEL(kmp_invoke_push_parms):
1180 // push nth - 7th parms to pkfn on stack
1181 subq $8, %rdx // decrement p_argv pointer to previous parm
1182 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
1183 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
1186 // C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
1187 // if the name of the label that is an operand of this jecxz starts with a dot (".");
1188 // Apple's linker does not support 1-byte length relocation;
1189 // Resolution: replace all .labelX entries with L_labelX.
1191 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
1192 jmp KMP_LABEL(kmp_invoke_push_parms)
1194 KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
1195 // order here is important to avoid trashing
1196 // registers used for both input and output parms!
1197 movq %rdi, %rbx // pkfn -> %rbx
1198 leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn)
1199 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
1200 // Check if argc is 0
1202 je KMP_LABEL(kmp_no_args) // Jump ahead
1204 movq %r8, %r11 // p_argv -> %r11
1207 cmpq $4, %rax // argc >= 4?
1208 jns KMP_LABEL(kmp_4) // jump to movq
1209 jmp KMP_LABEL(kmp_4_exit) // jump ahead
1211 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1212 KMP_LABEL(kmp_4_exit):
1214 cmpq $3, %rax // argc >= 3?
1215 jns KMP_LABEL(kmp_3) // jump to movq
1216 jmp KMP_LABEL(kmp_3_exit) // jump ahead
1218 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1219 KMP_LABEL(kmp_3_exit):
1221 cmpq $2, %rax // argc >= 2?
1222 jns KMP_LABEL(kmp_2) // jump to movq
1223 jmp KMP_LABEL(kmp_2_exit) // jump ahead
1225 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1226 KMP_LABEL(kmp_2_exit):
1228 cmpq $1, %rax // argc >= 1?
1229 jns KMP_LABEL(kmp_1) // jump to movq
1230 jmp KMP_LABEL(kmp_1_exit) // jump ahead
1232 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1233 KMP_LABEL(kmp_1_exit):
1235 cmpq $4, %rax // argc >= 4?
1236 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
1238 cmpq $3, %rax // argc >= 3?
1239 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
1241 cmpq $2, %rax // argc >= 2?
1242 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
1244 cmpq $1, %rax // argc >= 1?
1245 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
1248 KMP_LABEL(kmp_no_args):
1249 call *%rbx // call (*pkfn)();
1250 movq $1, %rax // move 1 into return register;
1252 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
1253 movq %rbp, %rsp // restore stack pointer
1254 popq %rbp // restore frame pointer
1258 DEBUG_INFO __kmp_invoke_microtask
1259 // -- End __kmp_invoke_microtask
1262 // __kmp_hardware_timestamp(void)
1264 PROC __kmp_hardware_timestamp
1270 DEBUG_INFO __kmp_hardware_timestamp
1271 // -- End __kmp_hardware_timestamp
1273 //------------------------------------------------------------------------
1274 // FUNCTION __kmp_bsr32
1277 // __kmp_bsr32( int );
1284 DEBUG_INFO __kmp_bsr32
1286 // -----------------------------------------------------------------------
1287 #endif /* KMP_ARCH_X86_64 */
1290 #if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
1292 //------------------------------------------------------------------------
1294 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1295 // int gtid, int tid,
1296 // int argc, void *p_argv[]
1299 // void **exit_frame_ptr
1303 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1306 // (*pkfn)( & gtid, & tid, argv[0], ... );
1308 // // FIXME: This is done at call-site and can be removed here.
1310 // *exit_frame_ptr = 0;
1325 // __gtid: gtid parm pushed on stack so can pass >id to pkfn
1326 // __tid: tid parm pushed on stack so can pass &tid to pkfn
1329 // x8: used to hold pkfn address
1330 // w9: used as temporary for number of pkfn parms
1331 // x10: used to traverse p_argv array
1332 // x11: used as temporary for stack placement calculation
1333 // x12: used as temporary for stack parameters
1334 // x19: used to preserve exit_frame_ptr, callee-save
1336 // return: w0 (always 1/TRUE)
1342 // -- Begin __kmp_invoke_microtask
1345 PROC __kmp_invoke_microtask
1348 stp x29, x30, [sp, #-16]!
1350 stp x19, x20, [sp, #-16]!
1355 add w9, w9, w3, lsr #1
1356 sub sp, sp, w9, uxtw #4
1360 str w1, [x29, #-__gtid]
1361 str w2, [x29, #-__tid]
1369 sub x0, x29, #__gtid
1372 cbz w9, KMP_LABEL(kmp_1)
1376 cbz w9, KMP_LABEL(kmp_1)
1380 cbz w9, KMP_LABEL(kmp_1)
1384 cbz w9, KMP_LABEL(kmp_1)
1388 cbz w9, KMP_LABEL(kmp_1)
1392 cbz w9, KMP_LABEL(kmp_1)
1397 cbz w9, KMP_LABEL(kmp_1)
1407 ldp x19, x20, [sp], #16
1409 ldp x29, x30, [sp], #16
1413 DEBUG_INFO __kmp_invoke_microtask
1414 // -- End __kmp_invoke_microtask
1416 #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */
1418 #if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
1420 //------------------------------------------------------------------------
1422 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1423 // int gtid, int tid,
1424 // int argc, void *p_argv[]
1427 // void **exit_frame_ptr
1431 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1434 // (*pkfn)( & gtid, & tid, argv[0], ... );
1436 // // FIXME: This is done at call-site and can be removed here.
1438 // *exit_frame_ptr = 0;
1449 // r4(stack): p_argv
1450 // r5(stack): &exit_frame
1453 // __gtid: gtid parm pushed on stack so can pass >id to pkfn
1454 // __tid: tid parm pushed on stack so can pass &tid to pkfn
1457 // r4: used to hold pkfn address
1458 // r5: used as temporary for number of pkfn parms
1459 // r6: used to traverse p_argv array
1460 // r7: frame pointer (in some configurations)
1461 // r8: used as temporary for stack placement calculation
1462 // and as pointer to base of callee saved area
1463 // r9: used as temporary for stack parameters
1464 // r10: used to preserve exit_frame_ptr, callee-save
1465 // r11: frame pointer (in some configurations)
1467 // return: r0 (always 1/TRUE)
1473 // -- Begin __kmp_invoke_microtask
1476 PROC __kmp_invoke_microtask
1478 // Pushing one extra register (r3) to keep the stack aligned
1479 // for when we call pkfn below
1481 // Load p_argv and &exit_frame
1487 # if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS)
1501 // Calculate how much stack to allocate, in increments of 8 bytes.
1502 // We strictly need 4*(argc-2) bytes (2 arguments are passed in
1503 // registers) but allocate 4*argc for simplicity (to avoid needing
1504 // to handle the argc<2 cases). We align the number of bytes
1505 // allocated to 8 bytes, to keep the stack aligned. (Since we
1506 // already allocate more than enough, it's ok to round down
1507 // instead of up for the alignment.) We allocate another extra
1508 // 8 bytes for gtid and tid.
1510 add r5, r5, r3, lsr #1
1511 sub sp, sp, r5, lsl #3
1513 str r1, [r8, #-__gtid]
1514 str r2, [r8, #-__tid]
1519 // Prepare the first 2 parameters to pkfn - pointers to gtid and tid
1520 // in our stack frame.
1526 // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2
1528 beq KMP_LABEL(kmp_1)
1532 beq KMP_LABEL(kmp_1)
1535 // Loop, loading the rest of p_argv and writing the elements on the
1539 beq KMP_LABEL(kmp_1)
1558 DEBUG_INFO __kmp_invoke_microtask
1559 // -- End __kmp_invoke_microtask
1561 #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */
1565 //------------------------------------------------------------------------
1567 // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1568 // int gtid, int tid,
1569 // int argc, void *p_argv[]
1572 // void **exit_frame_ptr
1576 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1579 // (*pkfn)( & gtid, & tid, argv[0], ... );
1581 // // FIXME: This is done at call-site and can be removed here.
1583 // *exit_frame_ptr = 0;
1597 // return: r3 (always 1/TRUE)
1600 # if KMP_ARCH_PPC64_ELFv2
1603 .globl __kmp_invoke_microtask
1605 # if KMP_ARCH_PPC64_ELFv2
1611 .type __kmp_invoke_microtask,@function
1613 # if KMP_ARCH_PPC64_ELFv2
1614 __kmp_invoke_microtask:
1617 addis 2, 12, .TOC.-.Lfunc_gep0@ha
1618 addi 2, 2, .TOC.-.Lfunc_gep0@l
1620 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
1622 .section .opd,"aw",@progbits
1623 __kmp_invoke_microtask:
1632 // -- Begin __kmp_invoke_microtask
1635 // We need to allocate a stack frame large enough to hold all of the parameters
1636 // on the stack for the microtask plus what this function needs. That's 48
1637 // bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
1638 // parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
1639 // and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
1640 // to save r30 to hold a copy of r8.
1647 // This is unusual because normally we'd set r31 equal to r1 after the stack
1648 // frame is established. In this case, however, we need to dynamically compute
1649 // the stack frame size, and so we keep a direct copy of r1 to access our
1650 // register save areas and restore the r1 value before returning.
1652 .cfi_def_cfa_register r31
1656 // Compute the size necessary for the local stack frame.
1657 # if KMP_ARCH_PPC64_ELFv2
1666 // We need to make sure that the stack frame stays aligned (to 16 bytes).
1670 // Establish the local stack frame.
1674 .cfi_offset r30, -16
1680 // Store gtid and tid to the stack because they're passed by reference to the microtask.
1720 // There are more than 6 microtask parameters, so we need to store the
1721 // remainder to the stack.
1725 // These are set to 8 bytes before the first desired store address (we're using
1726 // pre-increment loads and stores in the loop below). The parameter save area
1727 // for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
1728 // 32 + 8*8 == 96 bytes above r1 for ELFv2.
1730 # if KMP_ARCH_PPC64_ELFv2
1742 # if KMP_ARCH_PPC64_ELFv2
1747 // For ELFv1, we need to load the actual function address from the function descriptor.
1758 # if KMP_ARCH_PPC64_ELFv2
1784 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
1787 // -- End __kmp_invoke_microtask
1789 #endif /* KMP_ARCH_PPC64 */
1791 #if KMP_ARCH_RISCV64
1793 //------------------------------------------------------------------------
1795 // typedef void (*microtask_t)(int *gtid, int *tid, ...);
1797 // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1801 // void **exit_frame_ptr
1805 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1808 // (*pkfn)(>id, &tid, argv[0], ...);
1819 // a5: exit_frame_ptr
1822 // __gtid: gtid param pushed on stack so can pass >id to pkfn
1823 // __tid: tid param pushed on stack so can pass &tid to pkfn
1827 // t0: used to calculate the dynamic stack size / used to hold pkfn address
1828 // t1: used as temporary for stack placement calculation
1829 // t2: used as temporary for stack arguments
1830 // t3: used as temporary for number of remaining pkfn parms
1831 // t4: used to traverse p_argv array
1833 // return: a0 (always 1/TRUE)
1839 // -- Begin __kmp_invoke_microtask
1842 .globl __kmp_invoke_microtask
1844 .type __kmp_invoke_microtask,@function
1845 __kmp_invoke_microtask:
1848 // First, save ra and fp
1857 // Compute the dynamic stack size:
1859 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
1861 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
1862 // function by register. Given that we have 8 of such registers (a[0-7])
1863 // and two + 'argc' arguments (consider >id and &tid), we need to
1864 // reserve max(0, argc - 6)*8 extra bytes
1866 // The total number of bytes is then max(0, argc - 6)*8 + 8
1868 // Compute max(0, argc - 6) using the following bithack:
1869 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6
1870 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
1881 // Align the stack to 16 bytes
1889 // Save frame pointer into exit_frame
1893 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
1924 // Prepare any additional argument passed through the stack
1938 // Call pkfn function
1941 // Restore stack and return
1951 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
1954 // -- End __kmp_invoke_microtask
1956 #endif /* KMP_ARCH_RISCV64 */
1958 #if KMP_ARCH_LOONGARCH64
1960 //------------------------------------------------------------------------
1962 // typedef void (*microtask_t)(int *gtid, int *tid, ...);
1964 // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1968 // void **exit_frame_ptr
1972 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1975 // (*pkfn)(>id, &tid, argv[0], ...);
1986 // a5: exit_frame_ptr
1989 // __gtid: gtid param pushed on stack so can pass >id to pkfn
1990 // __tid: tid param pushed on stack so can pass &tid to pkfn
1994 // t0: used to calculate the dynamic stack size / used to hold pkfn address
1995 // t1: used as temporary for stack placement calculation
1996 // t2: used as temporary for stack arguments
1997 // t3: used as temporary for number of remaining pkfn parms
1998 // t4: used to traverse p_argv array
2000 // return: a0 (always 1/TRUE)
2003 // -- Begin __kmp_invoke_microtask
2006 .globl __kmp_invoke_microtask
2008 .type __kmp_invoke_microtask,@function
2009 __kmp_invoke_microtask:
2012 // First, save ra and fp
2013 addi.d $sp, $sp, -16
2021 // Compute the dynamic stack size:
2023 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
2025 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
2026 // function by register. Given that we have 8 of such registers (a[0-7])
2027 // and two + 'argc' arguments (consider >id and &tid), we need to
2028 // reserve max(0, argc - 6)*8 extra bytes
2030 // The total number of bytes is then max(0, argc - 6)*8 + 8
2034 masknez $t0, $t0, $t1
2039 // Align the stack to 16 bytes
2040 bstrins.d $sp, $zero, 3, 0
2047 // Save frame pointer into exit_frame
2051 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
2056 addi.d $a0, $fp, -20
2057 addi.d $a1, $fp, -24
2082 // Prepare any additional argument passed through the stack
2096 // Call pkfn function
2099 // Restore stack and return
2101 addi.d $a0, $zero, 1
2103 addi.d $sp, $fp, -16
2109 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2112 // -- End __kmp_invoke_microtask
2114 #endif /* KMP_ARCH_LOONGARCH64 */
2118 //------------------------------------------------------------------------
2120 // typedef void (*microtask_t)(int *gtid, int *tid, ...);
2122 // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2126 // void **exit_frame_ptr
2130 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2133 // (*pkfn)(>id, &tid, argv[0], ...);
2144 // s5: exit_frame_ptr
2147 // __gtid: gtid param pushed on stack so can pass >id to pkfn
2148 // __tid: tid param pushed on stack so can pass &tid to pkfn
2152 // s34: used to calculate the dynamic stack size
2153 // s35: used as temporary for stack placement calculation
2154 // s36: used as temporary for stack arguments
2155 // s37: used as temporary for number of remaining pkfn parms
2156 // s38: used to traverse p_argv array
2158 // return: s0 (always 1/TRUE)
2164 // -- Begin __kmp_invoke_microtask
2167 .globl __kmp_invoke_microtask
2168 // A function requires 8 bytes align.
2170 .type __kmp_invoke_microtask,@function
2171 __kmp_invoke_microtask:
2174 // First, save fp and lr. VE stores them at caller stack frame.
2182 // Compute the dynamic stack size:
2184 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
2186 // - We need 8 bytes for whole arguments. We have two + 'argc'
2187 // arguments (condider >id and &tid). We need to reserve
2188 // (argc + 2) * 8 bytes.
2189 // - We need 176 bytes for RSA and others
2191 // The total number of bytes is then (argc + 2) * 8 + 8 + 176.
2193 // |------------------------------|
2194 // | return address of callee | 8(%fp)
2195 // |------------------------------|
2196 // | frame pointer of callee | 0(%fp)
2197 // |------------------------------| <------------------ %fp
2198 // | __tid / __gtid | -8(%fp) / -4(%fp)
2199 // |------------------------------|
2200 // | argc+2 for arguments | 176(%sp)
2201 // |------------------------------|
2203 // |------------------------------|
2204 // | return address |
2205 // |------------------------------|
2206 // | frame pointer |
2207 // |------------------------------| <------------------ %sp
2209 adds.w.sx %s34, 2, %s3
2211 lea %s34, 184(, %s34)
2212 subs.l %sp, %sp, %s34
2214 // Align the stack to 16 bytes.
2220 // Call host to allocate stack if it is necessary.
2221 brge.l %sp, %sl, .L_kmp_pass
2230 lea %s35, 176(, %sp)
2231 adds.w.sx %s37, 0, %s3
2235 // Save frame pointer into exit_frame.
2239 // Prepare arguments for the pkfn function (first 8 using s0-s7
2240 // registers, but need to store stack also because of varargs).
2242 stl %s1, __gtid(%fp)
2245 adds.l %s0, __gtid, %fp
2247 adds.l %s1, __tid, %fp
2250 breq.l 0, %s37, .L_kmp_call
2254 breq.l 1, %s37, .L_kmp_call
2258 breq.l 2, %s37, .L_kmp_call
2262 breq.l 3, %s37, .L_kmp_call
2266 breq.l 4, %s37, .L_kmp_call
2270 breq.l 5, %s37, .L_kmp_call
2274 breq.l 6, %s37, .L_kmp_call
2276 // Prepare any additional argument passed through the stack.
2277 adds.l %s37, -6, %s37
2278 lea %s38, 48(, %s38)
2279 lea %s35, 64(, %s35)
2283 adds.l %s37, -1, %s37
2284 adds.l %s38, 8, %s38
2285 adds.l %s35, 8, %s35
2286 brne.l 0, %s37, .L_kmp_loop
2289 // Call pkfn function.
2295 // Restore stack and return.
2301 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2304 // -- End __kmp_invoke_microtask
2306 #endif /* KMP_ARCH_VE */
2310 //------------------------------------------------------------------------
2312 // typedef void (*microtask_t)(int *gtid, int *tid, ...);
2314 // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2318 // void **exit_frame_ptr
2322 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2325 // (*pkfn)(>id, &tid, argv[0], ...);
2336 // SP+160: exit_frame_ptr
2339 // __gtid: gtid param pushed on stack so can pass >id to pkfn
2340 // __tid: tid param pushed on stack so can pass &tid to pkfn
2344 // r0: used to fetch argv slots
2345 // r7: used as temporary for number of remaining pkfn parms
2350 // r12: stack parameter area
2353 // return: r2 (always 1/TRUE)
2356 // -- Begin __kmp_invoke_microtask
2359 .globl __kmp_invoke_microtask
2361 .type __kmp_invoke_microtask,@function
2362 __kmp_invoke_microtask:
2365 stmg %r6,%r14,48(%r15)
2366 .cfi_offset %r6, -112
2367 .cfi_offset %r7, -104
2368 .cfi_offset %r8, -96
2369 .cfi_offset %r9, -88
2370 .cfi_offset %r10, -80
2371 .cfi_offset %r11, -72
2372 .cfi_offset %r12, -64
2373 .cfi_offset %r13, -56
2374 .cfi_offset %r14, -48
2375 .cfi_offset %r15, -40
2377 .cfi_def_cfa %r11, 160
2379 // Compute the dynamic stack size:
2381 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
2383 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
2384 // function by register. Given that we have 5 of such registers (r[2-6])
2385 // and two + 'argc' arguments (consider >id and &tid), we need to
2386 // reserve max(0, argc - 3)*8 extra bytes
2388 // The total number of bytes is then max(0, argc - 3)*8 + 8
2405 // Save frame pointer into exit_frame
2410 // Prepare arguments for the pkfn function (first 5 using r2-r6 registers)
2413 la %r2,164(%r12) // gid
2415 la %r3,172(%r12) // tid
2422 lg %r4,0(%r8) // argv[0]
2427 lg %r5,8(%r8) // argv[1]
2432 lg %r6,16(%r8) // argv[2]
2436 lghi %r13,0 // Index [n]
2438 lg %r0,24(%r13,%r8) // argv[2+n]
2439 stg %r0,160(%r13,%r15) // parm[2+n]
2445 basr %r14,%r9 // Call pkfn
2447 // Restore stack and return
2450 lmg %r6,%r14,48(%r15)
2454 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2457 // -- End __kmp_invoke_microtask
2459 #endif /* KMP_ARCH_S390X */
2461 #if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32
2462 #ifndef KMP_PREFIX_UNDERSCORE
2463 # define KMP_PREFIX_UNDERSCORE(x) x
2466 COMMON .gomp_critical_user_, 32, 3
2469 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2470 KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
2471 .4byte .gomp_critical_user_
2473 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
2475 #endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */
2477 #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
2478 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \
2480 #ifndef KMP_PREFIX_UNDERSCORE
2481 # define KMP_PREFIX_UNDERSCORE(x) x
2484 COMMON .gomp_critical_user_, 32, 3
2487 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2488 KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
2489 .8byte .gomp_critical_user_
2491 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
2493 #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
2494 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||
2498 # if KMP_ARCH_ARM || KMP_ARCH_AARCH64
2499 .section .note.GNU-stack,"",%progbits
2500 # elif !KMP_ARCH_WASM
2501 .section .note.GNU-stack,"",@progbits
2505 #if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
2506 GNU_PROPERTY_BTI_PAC