1 // z_AIX_asm.S: - microtasking routines specifically
2 // written for Power platforms running AIX OS
5 ////===----------------------------------------------------------------------===//
7 //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 //// See https://llvm.org/LICENSE.txt for license information.
9 //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 ////===----------------------------------------------------------------------===//
14 // -----------------------------------------------------------------------
16 // -----------------------------------------------------------------------
18 #include "kmp_config.h"
21 //------------------------------------------------------------------------
23 // __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...),
25 // int argc, void *p_argv[]
28 // void **exit_frame_ptr
32 // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
35 // (*pkfn)( & gtid, & tid, p_argv[0], ... );
37 // // FIXME: This is done at call-site and can be removed here.
39 // *exit_frame_ptr = 0;
53 // return: r3 (always 1/TRUE)
56 #if KMP_ARCH_PPC64_XCOFF
58 .globl __kmp_invoke_microtask[DS]
59 .globl .__kmp_invoke_microtask
61 .csect __kmp_invoke_microtask[DS],3
62 .vbyte 8, .__kmp_invoke_microtask
67 .__kmp_invoke_microtask:
70 // -- Begin __kmp_invoke_microtask
73 // We need to allocate a stack frame large enough to hold all of the parameters
74 // on the stack for the microtask plus what this function needs. That's 48
75 // bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for
76 // the parameters to the microtask (gtid, tid, argc elements of p_argv),
77 // plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31.
78 // With OMP-T support, we need an additional 8 bytes to save r30 to hold
80 // Stack offsets relative to stack pointer:
81 // r31: -8, r30: -16, gtid: -20, tid: -24
84 std 31, -8(1) # Save r31 to the stack
85 std 0, 16(1) # Save LR to the linkage area
87 // This is unusual because normally we'd set r31 equal to r1 after the stack
88 // frame is established. In this case, however, we need to dynamically compute
89 // the stack frame size, and so we keep a direct copy of r1 to access our
90 // register save areas and restore the r1 value before returning.
93 // Compute the size of the "argc" portion of the parameter save area.
94 // The parameter save area is always at least 64 bytes long (i.e. 8 regs)
95 // The microtask has (2 + argc) parameters, so if argc <= 6, we need to
96 // to allocate 8*6 bytes, not 8*argc.
99 iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6
100 sldi 0, 0, 3 # r0 = 8 * max(argc, 6)
102 // Compute the size necessary for the local stack frame.
103 // 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) +
104 // 8 (parameter gtid) + 8 (parameter tid)
109 // We need to make sure that the stack frame stays aligned (to 16 bytes).
113 // Establish the local stack frame.
117 std 30, -16(31) # Save r30 to the stack
122 // Store gtid and tid to the stack because they're passed by reference to the microtask.
123 stw 4, -20(31) # Save gtid to the stack
124 stw 5, -24(31) # Save tid to the stack
126 mr 12, 6 # r12 = argc
127 mr 4, 7 # r4 = p_argv
130 blt 0, .Lcall # if (argc < 1) goto .Lcall
132 ld 5, 0(4) # r5 = p_argv[0]
135 blt 0, .Lcall # if (argc < 2) goto .Lcall
137 ld 6, 8(4) # r6 = p_argv[1]
140 blt 0, .Lcall # if (argc < 3) goto .Lcall
142 ld 7, 16(4) # r7 = p_argv[2]
145 blt 0, .Lcall # if (argc < 4) goto .Lcall
147 ld 8, 24(4) # r8 = p_argv[3]
150 blt 0, .Lcall # if (argc < 5) goto .Lcall
152 ld 9, 32(4) # r9 = p_argv[4]
155 blt 0, .Lcall # if (argc < 6) goto .Lcall
157 ld 10, 40(4) # r10 = p_argv[5]
160 blt 0, .Lcall # if (argc < 7) goto .Lcall
162 // There are more than 6 microtask parameters, so we need to store the
163 // remainder to the stack.
164 addi 12, 12, -6 # argc -= 6
167 // These are set to 8 bytes before the first desired store address (we're using
168 // pre-increment loads and stores in the loop below). The parameter save area
169 // for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64.
170 addi 4, 4, 40 # p_argv = p_argv + 5
171 # (i.e. skip the 5 elements we already processed)
172 addi 12, 1, 104 # r12 = stack offset (112 - 8)
180 std 2, 40(1) # Save the TOC pointer to the linkage area
181 // Load the actual function address from the function descriptor.
182 ld 12, 0(3) # Function address
183 ld 2, 8(3) # TOC pointer
184 ld 11, 16(3) # Environment pointer
186 addi 3, 31, -20 # r3 = >id
187 addi 4, 31, -24 # r4 = &tid
189 mtctr 12 # CTR = function address
190 bctrl # Branch to CTR
191 ld 2, 40(1) # Restore TOC pointer from linkage area
201 ld 30, -16(31) # Restore r30 from the saved value on the stack
205 ld 31, -8(1) # Restore r31 from the saved value on the stack
207 mtlr 0 # Restore LR from the linkage area
210 #else // KMP_ARCH_PPC_XCOFF
212 .globl __kmp_invoke_microtask[DS]
213 .globl .__kmp_invoke_microtask
215 .csect __kmp_invoke_microtask[DS],2
216 .vbyte 4, .__kmp_invoke_microtask
221 .__kmp_invoke_microtask:
224 // -- Begin __kmp_invoke_microtask
227 // We need to allocate a stack frame large enough to hold all of the parameters
228 // on the stack for the microtask plus what this function needs. That's 24
229 // bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for
230 // the parameters to the microtask (gtid, tid, argc elements of p_argv),
231 // plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31.
232 // With OMP-T support, we need an additional 4 bytes to save r30 to hold
234 // Stack offsets relative to stack pointer:
235 // r31: -4, r30: -8, gtid: -12, tid: -16
238 stw 31, -4(1) # Save r31 to the stack
239 stw 0, 8(1) # Save LR to the linkage area
241 // This is unusual because normally we'd set r31 equal to r1 after the stack
242 // frame is established. In this case, however, we need to dynamically compute
243 // the stack frame size, and so we keep a direct copy of r1 to access our
244 // register save areas and restore the r1 value before returning.
247 // Compute the size of the "argc" portion of the parameter save area.
248 // The parameter save area is always at least 32 bytes long (i.e. 8 regs)
249 // The microtask has (2 + argc) parameters, so if argc <= 6, we need to
250 // to allocate 4*6 bytes, not 4*argc.
253 iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6
254 slwi 0, 0, 2 # r0 = 4 * max(argc, 6)
256 // Compute the size necessary for the local stack frame.
257 // 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) +
258 // 4 (parameter gtid) + 4 (parameter tid)
263 // We need to make sure that the stack frame stays aligned (to 16 bytes).
267 // Establish the local stack frame.
271 stw 30, -8(31) # Save r30 to the stack
276 // Store gtid and tid to the stack because they're passed by reference to the microtask.
277 stw 4, -12(31) # Save gtid to the stack
278 stw 5, -16(31) # Save tid to the stack
280 mr 12, 6 # r12 = argc
281 mr 4, 7 # r4 = p_argv
284 blt 0, .Lcall # if (argc < 1) goto .Lcall
286 lwz 5, 0(4) # r5 = p_argv[0]
289 blt 0, .Lcall # if (argc < 2) goto .Lcall
291 lwz 6, 4(4) # r6 = p_argv[1]
294 blt 0, .Lcall # if (argc < 3) goto .Lcall
296 lwz 7, 8(4) # r7 = p_argv[2]
299 blt 0, .Lcall # if (argc < 4) goto .Lcall
301 lwz 8, 12(4) # r8 = p_argv[3]
304 blt 0, .Lcall # if (argc < 5) goto .Lcall
306 lwz 9, 16(4) # r9 = p_argv[4]
309 blt 0, .Lcall # if (argc < 6) goto .Lcall
311 lwz 10, 20(4) # r10 = p_argv[5]
314 blt 0, .Lcall # if (argc < 7) goto .Lcall
316 // There are more than 6 microtask parameters, so we need to store the
317 // remainder to the stack.
318 addi 12, 12, -6 # argc -= 6
321 // These are set to 4 bytes before the first desired store address (we're using
322 // pre-increment loads and stores in the loop below). The parameter save area
323 // for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF.
324 addi 4, 4, 20 # p_argv = p_argv + 5
325 # (i.e. skip the 5 elements we already processed)
326 addi 12, 1, 52 # r12 = stack offset (56 - 4)
334 stw 2, 20(1) # Save the TOC pointer to the linkage area
335 // Load the actual function address from the function descriptor.
336 lwz 12, 0(3) # Function address
337 lwz 2, 4(3) # TOC pointer
338 lwz 11, 8(3) # Environment pointer
340 addi 3, 31, -12 # r3 = >id
341 addi 4, 31, -16 # r4 = &tid
343 mtctr 12 # CTR = function address
344 bctrl # Branch to CTR
345 lwz 2, 20(1) # Restore TOC pointer from linkage area
355 lwz 30, -8(31) # Restore r30 from the saved value on the stack
359 lwz 31, -4(1) # Restore r31 from the saved value on the stack
361 mtlr 0 # Restore LR from the linkage area
364 #endif // KMP_ARCH_PPC64_XCOFF
367 .vbyte 4, 0x00000000 # Traceback table begin
368 .byte 0x00 # Version = 0
369 .byte 0x09 # Language = CPlusPlus
370 .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue
371 # +HasTraceBackTableOffset, -IsInternalProcedure
372 # -HasControlledStorage, -IsTOCless
373 # -IsFloatingPointPresent
374 # -IsFloatingPointOperationLogOrAbortEnabled
375 .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed
376 # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved
377 .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0
379 .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2
380 .byte 0x06 # NumberOfFixedParms = 6
382 .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1
383 .byte 0x05 # NumberOfFixedParms = 5
385 .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack
386 .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i
387 .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size
388 .vbyte 2, 0x0016 # Function name len = 22
389 .byte "__kmp_invoke_microtask" # Function Name
390 .byte 0x1f # AllocaRegister = 31
393 // -- End __kmp_invoke_microtask
395 // Support for unnamed common blocks.
397 .comm .gomp_critical_user_, 32, 3
398 #if KMP_ARCH_PPC64_XCOFF
399 .csect __kmp_unnamed_critical_addr[RW],3
401 .csect __kmp_unnamed_critical_addr[RW],2
403 .globl __kmp_unnamed_critical_addr[RW]
404 .ptr .gomp_critical_user_
406 // -- End unnamed common block