Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / runtime / src / z_Windows_NT-586_asm.asm
blob7d0e32e107e4de3a9376e27292a9ae2060ad71be
1 ; z_Windows_NT-586_asm.asm: - microtasking routines specifically
2 ; written for IA-32 architecture and Intel(R) 64 running Windows* OS
5 ;//===----------------------------------------------------------------------===//
6 ;//
7 ;// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 ;// See https://llvm.org/LICENSE.txt for license information.
9 ;// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 ;//
11 ;//===----------------------------------------------------------------------===//
14 TITLE z_Windows_NT-586_asm.asm
16 ; ============================= IA-32 architecture ==========================
17 ifdef _M_IA32
19 .586P
21 if @Version gt 510
22 .model HUGE
23 else
24 _TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
25 _TEXT ENDS
26 _DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
27 _DATA ENDS
28 CONST SEGMENT DWORD USE32 PUBLIC 'CONST'
29 CONST ENDS
30 _BSS SEGMENT DWORD USE32 PUBLIC 'BSS'
31 _BSS ENDS
32 $$SYMBOLS SEGMENT BYTE USE32 'DEBSYM'
33 $$SYMBOLS ENDS
34 $$TYPES SEGMENT BYTE USE32 'DEBTYP'
35 $$TYPES ENDS
36 _TLS SEGMENT DWORD USE32 PUBLIC 'TLS'
37 _TLS ENDS
38 FLAT GROUP _DATA, CONST, _BSS
39 ASSUME CS: FLAT, DS: FLAT, SS: FLAT
40 endif
43 ;------------------------------------------------------------------------
44 ; FUNCTION ___kmp_x86_pause
46 ; void
47 ; __kmp_x86_pause( void )
48 PUBLIC ___kmp_x86_pause
49 _p$ = 4
50 _d$ = 8
51 _TEXT SEGMENT
52 ALIGN 16
53 ___kmp_x86_pause PROC NEAR
55 db 0f3H
56 db 090H ;; pause
57 ret
59 ___kmp_x86_pause ENDP
60 _TEXT ENDS
62 ;------------------------------------------------------------------------
63 ; FUNCTION ___kmp_x86_cpuid
65 ; void
66 ; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
67 PUBLIC ___kmp_x86_cpuid
68 _TEXT SEGMENT
69 ALIGN 16
70 _mode$ = 8
71 _mode2$ = 12
72 _p$ = 16
73 _eax$ = 0
74 _ebx$ = 4
75 _ecx$ = 8
76 _edx$ = 12
78 ___kmp_x86_cpuid PROC NEAR
80 push ebp
81 mov ebp, esp
83 push edi
84 push ebx
85 push ecx
86 push edx
88 mov eax, DWORD PTR _mode$[ebp]
89 mov ecx, DWORD PTR _mode2$[ebp]
90 cpuid ; Query the CPUID for the current processor
92 mov edi, DWORD PTR _p$[ebp]
93 mov DWORD PTR _eax$[ edi ], eax
94 mov DWORD PTR _ebx$[ edi ], ebx
95 mov DWORD PTR _ecx$[ edi ], ecx
96 mov DWORD PTR _edx$[ edi ], edx
98 pop edx
99 pop ecx
100 pop ebx
101 pop edi
103 mov esp, ebp
104 pop ebp
107 ___kmp_x86_cpuid ENDP
108 _TEXT ENDS
110 ;------------------------------------------------------------------------
111 ; FUNCTION ___kmp_test_then_add32
113 ; kmp_int32
114 ; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
115 PUBLIC ___kmp_test_then_add32
116 _p$ = 4
117 _d$ = 8
118 _TEXT SEGMENT
119 ALIGN 16
120 ___kmp_test_then_add32 PROC NEAR
122 mov eax, DWORD PTR _d$[esp]
123 mov ecx, DWORD PTR _p$[esp]
124 lock xadd DWORD PTR [ecx], eax
127 ___kmp_test_then_add32 ENDP
128 _TEXT ENDS
130 ;------------------------------------------------------------------------
131 ; FUNCTION ___kmp_compare_and_store8
133 ; kmp_int8
134 ; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
135 PUBLIC ___kmp_compare_and_store8
136 _TEXT SEGMENT
137 ALIGN 16
138 _p$ = 4
139 _cv$ = 8
140 _sv$ = 12
142 ___kmp_compare_and_store8 PROC NEAR
144 mov ecx, DWORD PTR _p$[esp]
145 mov al, BYTE PTR _cv$[esp]
146 mov dl, BYTE PTR _sv$[esp]
147 lock cmpxchg BYTE PTR [ecx], dl
148 sete al ; if al == [ecx] set al = 1 else set al = 0
149 and eax, 1 ; sign extend previous instruction
152 ___kmp_compare_and_store8 ENDP
153 _TEXT ENDS
155 ;------------------------------------------------------------------------
156 ; FUNCTION ___kmp_compare_and_store16
158 ; kmp_int16
159 ; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
160 PUBLIC ___kmp_compare_and_store16
161 _TEXT SEGMENT
162 ALIGN 16
163 _p$ = 4
164 _cv$ = 8
165 _sv$ = 12
167 ___kmp_compare_and_store16 PROC NEAR
169 mov ecx, DWORD PTR _p$[esp]
170 mov ax, WORD PTR _cv$[esp]
171 mov dx, WORD PTR _sv$[esp]
172 lock cmpxchg WORD PTR [ecx], dx
173 sete al ; if ax == [ecx] set al = 1 else set al = 0
174 and eax, 1 ; sign extend previous instruction
177 ___kmp_compare_and_store16 ENDP
178 _TEXT ENDS
180 ;------------------------------------------------------------------------
181 ; FUNCTION ___kmp_compare_and_store32
183 ; kmp_int32
184 ; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
185 PUBLIC ___kmp_compare_and_store32
186 _TEXT SEGMENT
187 ALIGN 16
188 _p$ = 4
189 _cv$ = 8
190 _sv$ = 12
192 ___kmp_compare_and_store32 PROC NEAR
194 mov ecx, DWORD PTR _p$[esp]
195 mov eax, DWORD PTR _cv$[esp]
196 mov edx, DWORD PTR _sv$[esp]
197 lock cmpxchg DWORD PTR [ecx], edx
198 sete al ; if eax == [ecx] set al = 1 else set al = 0
199 and eax, 1 ; sign extend previous instruction
202 ___kmp_compare_and_store32 ENDP
203 _TEXT ENDS
205 ;------------------------------------------------------------------------
206 ; FUNCTION ___kmp_compare_and_store64
208 ; kmp_int32
209 ; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
210 PUBLIC ___kmp_compare_and_store64
211 _TEXT SEGMENT
212 ALIGN 16
213 _p$ = 8
214 _cv_low$ = 12
215 _cv_high$ = 16
216 _sv_low$ = 20
217 _sv_high$ = 24
219 ___kmp_compare_and_store64 PROC NEAR
221 push ebp
222 mov ebp, esp
223 push ebx
224 push edi
225 mov edi, DWORD PTR _p$[ebp]
226 mov eax, DWORD PTR _cv_low$[ebp]
227 mov edx, DWORD PTR _cv_high$[ebp]
228 mov ebx, DWORD PTR _sv_low$[ebp]
229 mov ecx, DWORD PTR _sv_high$[ebp]
230 lock cmpxchg8b QWORD PTR [edi]
231 sete al ; if edx:eax == [edi] set al = 1 else set al = 0
232 and eax, 1 ; sign extend previous instruction
233 pop edi
234 pop ebx
235 mov esp, ebp
236 pop ebp
239 ___kmp_compare_and_store64 ENDP
240 _TEXT ENDS
242 ;------------------------------------------------------------------------
243 ; FUNCTION ___kmp_xchg_fixed8
245 ; kmp_int8
246 ; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
247 PUBLIC ___kmp_xchg_fixed8
248 _TEXT SEGMENT
249 ALIGN 16
250 _p$ = 4
251 _d$ = 8
253 ___kmp_xchg_fixed8 PROC NEAR
255 mov ecx, DWORD PTR _p$[esp]
256 mov al, BYTE PTR _d$[esp]
257 lock xchg BYTE PTR [ecx], al
260 ___kmp_xchg_fixed8 ENDP
261 _TEXT ENDS
263 ;------------------------------------------------------------------------
264 ; FUNCTION ___kmp_xchg_fixed16
266 ; kmp_int16
267 ; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
268 PUBLIC ___kmp_xchg_fixed16
269 _TEXT SEGMENT
270 ALIGN 16
271 _p$ = 4
272 _d$ = 8
274 ___kmp_xchg_fixed16 PROC NEAR
276 mov ecx, DWORD PTR _p$[esp]
277 mov ax, WORD PTR _d$[esp]
278 lock xchg WORD PTR [ecx], ax
281 ___kmp_xchg_fixed16 ENDP
282 _TEXT ENDS
284 ;------------------------------------------------------------------------
285 ; FUNCTION ___kmp_xchg_fixed32
287 ; kmp_int32
288 ; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
289 PUBLIC ___kmp_xchg_fixed32
290 _TEXT SEGMENT
291 ALIGN 16
292 _p$ = 4
293 _d$ = 8
295 ___kmp_xchg_fixed32 PROC NEAR
297 mov ecx, DWORD PTR _p$[esp]
298 mov eax, DWORD PTR _d$[esp]
299 lock xchg DWORD PTR [ecx], eax
302 ___kmp_xchg_fixed32 ENDP
303 _TEXT ENDS
306 ;------------------------------------------------------------------------
307 ; FUNCTION ___kmp_xchg_real32
309 ; kmp_real32
310 ; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
311 PUBLIC ___kmp_xchg_real32
312 _TEXT SEGMENT
313 ALIGN 16
314 _p$ = 8
315 _d$ = 12
316 _old_value$ = -4
318 ___kmp_xchg_real32 PROC NEAR
320 push ebp
321 mov ebp, esp
322 sub esp, 4
323 push esi
324 mov esi, DWORD PTR _p$[ebp]
326 fld DWORD PTR [esi]
327 ;; load <addr>
328 fst DWORD PTR _old_value$[ebp]
329 ;; store into old_value
331 mov eax, DWORD PTR _d$[ebp]
333 lock xchg DWORD PTR [esi], eax
335 fld DWORD PTR _old_value$[ebp]
336 ;; return old_value
337 pop esi
338 mov esp, ebp
339 pop ebp
342 ___kmp_xchg_real32 ENDP
343 _TEXT ENDS
346 ;------------------------------------------------------------------------
347 ; FUNCTION ___kmp_compare_and_store_ret8
349 ; kmp_int8
350 ; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
351 PUBLIC ___kmp_compare_and_store_ret8
352 _TEXT SEGMENT
353 ALIGN 16
354 _p$ = 4
355 _cv$ = 8
356 _sv$ = 12
358 ___kmp_compare_and_store_ret8 PROC NEAR
360 mov ecx, DWORD PTR _p$[esp]
361 mov al, BYTE PTR _cv$[esp]
362 mov dl, BYTE PTR _sv$[esp]
363 lock cmpxchg BYTE PTR [ecx], dl
366 ___kmp_compare_and_store_ret8 ENDP
367 _TEXT ENDS
369 ;------------------------------------------------------------------------
370 ; FUNCTION ___kmp_compare_and_store_ret16
372 ; kmp_int16
373 ; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
374 PUBLIC ___kmp_compare_and_store_ret16
375 _TEXT SEGMENT
376 ALIGN 16
377 _p$ = 4
378 _cv$ = 8
379 _sv$ = 12
381 ___kmp_compare_and_store_ret16 PROC NEAR
383 mov ecx, DWORD PTR _p$[esp]
384 mov ax, WORD PTR _cv$[esp]
385 mov dx, WORD PTR _sv$[esp]
386 lock cmpxchg WORD PTR [ecx], dx
389 ___kmp_compare_and_store_ret16 ENDP
390 _TEXT ENDS
392 ;------------------------------------------------------------------------
393 ; FUNCTION ___kmp_compare_and_store_ret32
395 ; kmp_int32
396 ; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
397 PUBLIC ___kmp_compare_and_store_ret32
398 _TEXT SEGMENT
399 ALIGN 16
400 _p$ = 4
401 _cv$ = 8
402 _sv$ = 12
404 ___kmp_compare_and_store_ret32 PROC NEAR
406 mov ecx, DWORD PTR _p$[esp]
407 mov eax, DWORD PTR _cv$[esp]
408 mov edx, DWORD PTR _sv$[esp]
409 lock cmpxchg DWORD PTR [ecx], edx
412 ___kmp_compare_and_store_ret32 ENDP
413 _TEXT ENDS
415 ;------------------------------------------------------------------------
416 ; FUNCTION ___kmp_compare_and_store_ret64
418 ; kmp_int64
419 ; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
420 PUBLIC ___kmp_compare_and_store_ret64
421 _TEXT SEGMENT
422 ALIGN 16
423 _p$ = 8
424 _cv_low$ = 12
425 _cv_high$ = 16
426 _sv_low$ = 20
427 _sv_high$ = 24
429 ___kmp_compare_and_store_ret64 PROC NEAR
431 push ebp
432 mov ebp, esp
433 push ebx
434 push edi
435 mov edi, DWORD PTR _p$[ebp]
436 mov eax, DWORD PTR _cv_low$[ebp]
437 mov edx, DWORD PTR _cv_high$[ebp]
438 mov ebx, DWORD PTR _sv_low$[ebp]
439 mov ecx, DWORD PTR _sv_high$[ebp]
440 lock cmpxchg8b QWORD PTR [edi]
441 pop edi
442 pop ebx
443 mov esp, ebp
444 pop ebp
447 ___kmp_compare_and_store_ret64 ENDP
448 _TEXT ENDS
450 ;------------------------------------------------------------------------
451 ; FUNCTION ___kmp_load_x87_fpu_control_word
453 ; void
454 ; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
456 ; parameters:
457 ; p: 4(%esp)
458 PUBLIC ___kmp_load_x87_fpu_control_word
459 _TEXT SEGMENT
460 ALIGN 16
461 _p$ = 4
463 ___kmp_load_x87_fpu_control_word PROC NEAR
465 mov eax, DWORD PTR _p$[esp]
466 fldcw WORD PTR [eax]
469 ___kmp_load_x87_fpu_control_word ENDP
470 _TEXT ENDS
472 ;------------------------------------------------------------------------
473 ; FUNCTION ___kmp_store_x87_fpu_control_word
475 ; void
476 ; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
478 ; parameters:
479 ; p: 4(%esp)
480 PUBLIC ___kmp_store_x87_fpu_control_word
481 _TEXT SEGMENT
482 ALIGN 16
483 _p$ = 4
485 ___kmp_store_x87_fpu_control_word PROC NEAR
487 mov eax, DWORD PTR _p$[esp]
488 fstcw WORD PTR [eax]
491 ___kmp_store_x87_fpu_control_word ENDP
492 _TEXT ENDS
494 ;------------------------------------------------------------------------
495 ; FUNCTION ___kmp_clear_x87_fpu_status_word
497 ; void
498 ; __kmp_clear_x87_fpu_status_word();
499 PUBLIC ___kmp_clear_x87_fpu_status_word
500 _TEXT SEGMENT
501 ALIGN 16
503 ___kmp_clear_x87_fpu_status_word PROC NEAR
505 fnclex
508 ___kmp_clear_x87_fpu_status_word ENDP
509 _TEXT ENDS
512 ;------------------------------------------------------------------------
513 ; FUNCTION ___kmp_invoke_microtask
515 ; typedef void (*microtask_t)( int *gtid, int *tid, ... );
517 ; int
518 ; __kmp_invoke_microtask( microtask_t pkfn,
519 ; int gtid, int tid,
520 ; int argc, void *p_argv[] )
521 PUBLIC ___kmp_invoke_microtask
522 _TEXT SEGMENT
523 ALIGN 16
524 _pkfn$ = 8
525 _gtid$ = 12
526 _tid$ = 16
527 _argc$ = 20
528 _argv$ = 24
529 if OMPT_SUPPORT
530 _exit_frame$ = 28
531 endif
532 _i$ = -8
533 _stk_adj$ = -16
534 _vptr$ = -12
535 _qptr$ = -4
537 ___kmp_invoke_microtask PROC NEAR
538 ; Line 102
539 push ebp
540 mov ebp, esp
541 sub esp, 16 ; 00000010H
542 push ebx
543 push esi
544 push edi
545 if OMPT_SUPPORT
546 mov eax, DWORD PTR _exit_frame$[ebp]
547 mov DWORD PTR [eax], ebp
548 endif
549 ; Line 114
550 mov eax, DWORD PTR _argc$[ebp]
551 mov DWORD PTR _i$[ebp], eax
553 ;; ------------------------------------------------------------
554 lea edx, DWORD PTR [eax*4+8]
555 mov ecx, esp ; Save current SP into ECX
556 mov eax,edx ; Save the size of the args in eax
557 sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this
558 mov edx,ecx ; Save to edx
559 and ecx,-128 ; Mask off 7 bits
560 sub edx,ecx ; Amount to subtract from esp
561 sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call
563 add edx,eax ; Calculate total size of the stack decrement.
564 mov DWORD PTR _stk_adj$[ebp], edx
565 ;; ------------------------------------------------------------
567 jmp SHORT $L22237
568 $L22238:
569 mov ecx, DWORD PTR _i$[ebp]
570 sub ecx, 1
571 mov DWORD PTR _i$[ebp], ecx
572 $L22237:
573 cmp DWORD PTR _i$[ebp], 0
574 jle SHORT $L22239
575 ; Line 116
576 mov edx, DWORD PTR _i$[ebp]
577 mov eax, DWORD PTR _argv$[ebp]
578 mov ecx, DWORD PTR [eax+edx*4-4]
579 mov DWORD PTR _vptr$[ebp], ecx
580 ; Line 123
581 mov eax, DWORD PTR _vptr$[ebp]
582 ; Line 124
583 push eax
584 ; Line 127
585 jmp SHORT $L22238
586 $L22239:
587 ; Line 129
588 lea edx, DWORD PTR _tid$[ebp]
589 mov DWORD PTR _vptr$[ebp], edx
590 ; Line 130
591 lea eax, DWORD PTR _gtid$[ebp]
592 mov DWORD PTR _qptr$[ebp], eax
593 ; Line 143
594 mov eax, DWORD PTR _vptr$[ebp]
595 ; Line 144
596 push eax
597 ; Line 145
598 mov eax, DWORD PTR _qptr$[ebp]
599 ; Line 146
600 push eax
601 ; Line 147
602 call DWORD PTR _pkfn$[ebp]
603 ; Line 148
604 add esp, DWORD PTR _stk_adj$[ebp]
605 ; Line 152
606 mov eax, 1
607 ; Line 153
608 pop edi
609 pop esi
610 pop ebx
611 mov esp, ebp
612 pop ebp
613 ret 0
614 ___kmp_invoke_microtask ENDP
615 _TEXT ENDS
617 endif
619 ; ==================================== Intel(R) 64 ===================================
621 ifdef _M_AMD64
623 ;------------------------------------------------------------------------
624 ; FUNCTION __kmp_x86_cpuid
626 ; void
627 ; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
629 ; parameters:
630 ; mode: ecx
631 ; mode2: edx
632 ; cpuid_buffer: r8
633 PUBLIC __kmp_x86_cpuid
634 _TEXT SEGMENT
635 ALIGN 16
637 __kmp_x86_cpuid PROC FRAME ;NEAR
639 push rbp
640 .pushreg rbp
641 mov rbp, rsp
642 .setframe rbp, 0
643 push rbx ; callee-save register
644 .pushreg rbx
645 .ENDPROLOG
647 mov r10, r8 ; p parameter
648 mov eax, ecx ; mode parameter
649 mov ecx, edx ; mode2 parameter
650 cpuid ; Query the CPUID for the current processor
652 mov DWORD PTR 0[ r10 ], eax ; store results into buffer
653 mov DWORD PTR 4[ r10 ], ebx
654 mov DWORD PTR 8[ r10 ], ecx
655 mov DWORD PTR 12[ r10 ], edx
657 pop rbx ; callee-save register
658 mov rsp, rbp
659 pop rbp
662 __kmp_x86_cpuid ENDP
663 _TEXT ENDS
666 ;------------------------------------------------------------------------
667 ; FUNCTION __kmp_test_then_add32
669 ; kmp_int32
670 ; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
672 ; parameters:
673 ; p: rcx
674 ; d: edx
676 ; return: eax
677 PUBLIC __kmp_test_then_add32
678 _TEXT SEGMENT
679 ALIGN 16
680 __kmp_test_then_add32 PROC ;NEAR
682 mov eax, edx
683 lock xadd DWORD PTR [rcx], eax
686 __kmp_test_then_add32 ENDP
687 _TEXT ENDS
690 ;------------------------------------------------------------------------
691 ; FUNCTION __kmp_test_then_add64
693 ; kmp_int32
694 ; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
696 ; parameters:
697 ; p: rcx
698 ; d: rdx
700 ; return: rax
701 PUBLIC __kmp_test_then_add64
702 _TEXT SEGMENT
703 ALIGN 16
704 __kmp_test_then_add64 PROC ;NEAR
706 mov rax, rdx
707 lock xadd QWORD PTR [rcx], rax
710 __kmp_test_then_add64 ENDP
711 _TEXT ENDS
714 ;------------------------------------------------------------------------
715 ; FUNCTION __kmp_compare_and_store8
717 ; kmp_int8
718 ; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
719 ; parameters:
720 ; p: rcx
721 ; cv: edx
722 ; sv: r8d
724 ; return: eax
725 PUBLIC __kmp_compare_and_store8
726 _TEXT SEGMENT
727 ALIGN 16
729 __kmp_compare_and_store8 PROC ;NEAR
731 mov al, dl ; "cv"
732 mov edx, r8d ; "sv"
733 lock cmpxchg BYTE PTR [rcx], dl
734 sete al ; if al == [rcx] set al = 1 else set al = 0
735 and rax, 1 ; sign extend previous instruction
738 __kmp_compare_and_store8 ENDP
739 _TEXT ENDS
742 ;------------------------------------------------------------------------
743 ; FUNCTION __kmp_compare_and_store16
745 ; kmp_int16
746 ; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
747 ; parameters:
748 ; p: rcx
749 ; cv: edx
750 ; sv: r8d
752 ; return: eax
753 PUBLIC __kmp_compare_and_store16
754 _TEXT SEGMENT
755 ALIGN 16
757 __kmp_compare_and_store16 PROC ;NEAR
759 mov ax, dx ; "cv"
760 mov edx, r8d ; "sv"
761 lock cmpxchg WORD PTR [rcx], dx
762 sete al ; if ax == [rcx] set al = 1 else set al = 0
763 and rax, 1 ; sign extend previous instruction
766 __kmp_compare_and_store16 ENDP
767 _TEXT ENDS
770 ;------------------------------------------------------------------------
771 ; FUNCTION __kmp_compare_and_store32
773 ; kmp_int32
774 ; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
775 ; parameters:
776 ; p: rcx
777 ; cv: edx
778 ; sv: r8d
780 ; return: eax
781 PUBLIC __kmp_compare_and_store32
782 _TEXT SEGMENT
783 ALIGN 16
785 __kmp_compare_and_store32 PROC ;NEAR
787 mov eax, edx ; "cv"
788 mov edx, r8d ; "sv"
789 lock cmpxchg DWORD PTR [rcx], edx
790 sete al ; if eax == [rcx] set al = 1 else set al = 0
791 and rax, 1 ; sign extend previous instruction
794 __kmp_compare_and_store32 ENDP
795 _TEXT ENDS
798 ;------------------------------------------------------------------------
799 ; FUNCTION __kmp_compare_and_store64
801 ; kmp_int32
802 ; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
803 ; parameters:
804 ; p: rcx
805 ; cv: rdx
806 ; sv: r8
808 ; return: eax
809 PUBLIC __kmp_compare_and_store64
810 _TEXT SEGMENT
811 ALIGN 16
813 __kmp_compare_and_store64 PROC ;NEAR
815 mov rax, rdx ; "cv"
816 mov rdx, r8 ; "sv"
817 lock cmpxchg QWORD PTR [rcx], rdx
818 sete al ; if rax == [rcx] set al = 1 else set al = 0
819 and rax, 1 ; sign extend previous instruction
822 __kmp_compare_and_store64 ENDP
823 _TEXT ENDS
826 ;------------------------------------------------------------------------
827 ; FUNCTION ___kmp_xchg_fixed8
829 ; kmp_int8
830 ; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
832 ; parameters:
833 ; p: rcx
834 ; d: dl
836 ; return: al
837 PUBLIC __kmp_xchg_fixed8
838 _TEXT SEGMENT
839 ALIGN 16
841 __kmp_xchg_fixed8 PROC ;NEAR
843 mov al, dl
844 lock xchg BYTE PTR [rcx], al
847 __kmp_xchg_fixed8 ENDP
848 _TEXT ENDS
851 ;------------------------------------------------------------------------
852 ; FUNCTION ___kmp_xchg_fixed16
854 ; kmp_int16
855 ; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
857 ; parameters:
858 ; p: rcx
859 ; d: dx
861 ; return: ax
862 PUBLIC __kmp_xchg_fixed16
863 _TEXT SEGMENT
864 ALIGN 16
866 __kmp_xchg_fixed16 PROC ;NEAR
868 mov ax, dx
869 lock xchg WORD PTR [rcx], ax
872 __kmp_xchg_fixed16 ENDP
873 _TEXT ENDS
876 ;------------------------------------------------------------------------
877 ; FUNCTION ___kmp_xchg_fixed32
879 ; kmp_int32
880 ; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
882 ; parameters:
883 ; p: rcx
884 ; d: edx
886 ; return: eax
887 PUBLIC __kmp_xchg_fixed32
888 _TEXT SEGMENT
889 ALIGN 16
890 __kmp_xchg_fixed32 PROC ;NEAR
892 mov eax, edx
893 lock xchg DWORD PTR [rcx], eax
896 __kmp_xchg_fixed32 ENDP
897 _TEXT ENDS
900 ;------------------------------------------------------------------------
901 ; FUNCTION ___kmp_xchg_fixed64
903 ; kmp_int64
904 ; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
906 ; parameters:
907 ; p: rcx
908 ; d: rdx
910 ; return: rax
911 PUBLIC __kmp_xchg_fixed64
912 _TEXT SEGMENT
913 ALIGN 16
914 __kmp_xchg_fixed64 PROC ;NEAR
916 mov rax, rdx
917 lock xchg QWORD PTR [rcx], rax
920 __kmp_xchg_fixed64 ENDP
921 _TEXT ENDS
924 ;------------------------------------------------------------------------
925 ; FUNCTION __kmp_compare_and_store_ret8
927 ; kmp_int8
928 ; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
929 ; parameters:
930 ; p: rcx
931 ; cv: edx
932 ; sv: r8d
934 ; return: eax
935 PUBLIC __kmp_compare_and_store_ret8
936 _TEXT SEGMENT
937 ALIGN 16
939 __kmp_compare_and_store_ret8 PROC ;NEAR
940 mov al, dl ; "cv"
941 mov edx, r8d ; "sv"
942 lock cmpxchg BYTE PTR [rcx], dl
943 ; Compare AL with [rcx]. If equal set
944 ; ZF and exchange DL with [rcx]. Else, clear
945 ; ZF and load [rcx] into AL.
948 __kmp_compare_and_store_ret8 ENDP
949 _TEXT ENDS
952 ;------------------------------------------------------------------------
953 ; FUNCTION __kmp_compare_and_store_ret16
955 ; kmp_int16
956 ; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
957 ; parameters:
958 ; p: rcx
959 ; cv: edx
960 ; sv: r8d
962 ; return: eax
963 PUBLIC __kmp_compare_and_store_ret16
964 _TEXT SEGMENT
965 ALIGN 16
967 __kmp_compare_and_store_ret16 PROC ;NEAR
969 mov ax, dx ; "cv"
970 mov edx, r8d ; "sv"
971 lock cmpxchg WORD PTR [rcx], dx
974 __kmp_compare_and_store_ret16 ENDP
975 _TEXT ENDS
978 ;------------------------------------------------------------------------
979 ; FUNCTION __kmp_compare_and_store_ret32
981 ; kmp_int32
982 ; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
983 ; parameters:
984 ; p: rcx
985 ; cv: edx
986 ; sv: r8d
988 ; return: eax
989 PUBLIC __kmp_compare_and_store_ret32
990 _TEXT SEGMENT
991 ALIGN 16
993 __kmp_compare_and_store_ret32 PROC ;NEAR
995 mov eax, edx ; "cv"
996 mov edx, r8d ; "sv"
997 lock cmpxchg DWORD PTR [rcx], edx
1000 __kmp_compare_and_store_ret32 ENDP
1001 _TEXT ENDS
1004 ;------------------------------------------------------------------------
1005 ; FUNCTION __kmp_compare_and_store_ret64
1007 ; kmp_int64
1008 ; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1009 ; parameters:
1010 ; p: rcx
1011 ; cv: rdx
1012 ; sv: r8
1014 ; return: rax
1015 PUBLIC __kmp_compare_and_store_ret64
1016 _TEXT SEGMENT
1017 ALIGN 16
1019 __kmp_compare_and_store_ret64 PROC ;NEAR
1021 mov rax, rdx ; "cv"
1022 mov rdx, r8 ; "sv"
1023 lock cmpxchg QWORD PTR [rcx], rdx
1026 __kmp_compare_and_store_ret64 ENDP
1027 _TEXT ENDS
1030 ;------------------------------------------------------------------------
1031 ; FUNCTION __kmp_compare_and_store_loop8
1033 ; kmp_int8
1034 ; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1035 ; parameters:
1036 ; p: rcx
1037 ; cv: edx
1038 ; sv: r8d
1040 ; return: al
1041 PUBLIC __kmp_compare_and_store_loop8
1042 _TEXT SEGMENT
1043 ALIGN 16
1045 __kmp_compare_and_store_loop8 PROC ;NEAR
1046 $__kmp_loop:
1047 mov al, dl ; "cv"
1048 mov edx, r8d ; "sv"
1049 lock cmpxchg BYTE PTR [rcx], dl
1050 ; Compare AL with [rcx]. If equal set
1051 ; ZF and exchange DL with [rcx]. Else, clear
1052 ; ZF and load [rcx] into AL.
1053 jz SHORT $__kmp_success
1055 db 0f3H
1056 db 090H ; pause
1058 jmp SHORT $__kmp_loop
1060 $__kmp_success:
1063 __kmp_compare_and_store_loop8 ENDP
1064 _TEXT ENDS
1067 ;------------------------------------------------------------------------
1068 ; FUNCTION __kmp_xchg_real32
1070 ; kmp_real32
1071 ; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
1073 ; parameters:
1074 ; p: rcx
1075 ; d: xmm1 (lower 4 bytes)
1077 ; return: xmm0 (lower 4 bytes)
1078 PUBLIC __kmp_xchg_real32
1079 _TEXT SEGMENT
1080 ALIGN 16
1081 __kmp_xchg_real32 PROC ;NEAR
1083 movd eax, xmm1 ; load d
1085 lock xchg DWORD PTR [rcx], eax
1087 movd xmm0, eax ; load old value into return register
1090 __kmp_xchg_real32 ENDP
1091 _TEXT ENDS
1094 ;------------------------------------------------------------------------
1095 ; FUNCTION __kmp_xchg_real64
1097 ; kmp_real64
1098 ; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d );
1100 ; parameters:
1101 ; p: rcx
1102 ; d: xmm1 (lower 8 bytes)
1104 ; return: xmm0 (lower 8 bytes)
1105 PUBLIC __kmp_xchg_real64
1106 _TEXT SEGMENT
1107 ALIGN 16
1108 __kmp_xchg_real64 PROC ;NEAR
1110 movd rax, xmm1 ; load "d"
1112 lock xchg QWORD PTR [rcx], rax
1114 movd xmm0, rax ; load old value into return register
1117 __kmp_xchg_real64 ENDP
1118 _TEXT ENDS
1120 ;------------------------------------------------------------------------
1121 ; FUNCTION __kmp_load_x87_fpu_control_word
1123 ; void
1124 ; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1126 ; parameters:
1127 ; p: rcx
1128 PUBLIC __kmp_load_x87_fpu_control_word
1129 _TEXT SEGMENT
1130 ALIGN 16
1131 __kmp_load_x87_fpu_control_word PROC ;NEAR
1133 fldcw WORD PTR [rcx]
1136 __kmp_load_x87_fpu_control_word ENDP
1137 _TEXT ENDS
1140 ;------------------------------------------------------------------------
1141 ; FUNCTION __kmp_store_x87_fpu_control_word
1143 ; void
1144 ; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1146 ; parameters:
1147 ; p: rcx
1148 PUBLIC __kmp_store_x87_fpu_control_word
1149 _TEXT SEGMENT
1150 ALIGN 16
1151 __kmp_store_x87_fpu_control_word PROC ;NEAR
1153 fstcw WORD PTR [rcx]
1156 __kmp_store_x87_fpu_control_word ENDP
1157 _TEXT ENDS
1160 ;------------------------------------------------------------------------
1161 ; FUNCTION __kmp_clear_x87_fpu_status_word
1163 ; void
1164 ; __kmp_clear_x87_fpu_status_word()
1165 PUBLIC __kmp_clear_x87_fpu_status_word
1166 _TEXT SEGMENT
1167 ALIGN 16
1168 __kmp_clear_x87_fpu_status_word PROC ;NEAR
1170 fnclex
1173 __kmp_clear_x87_fpu_status_word ENDP
1174 _TEXT ENDS
1177 ;------------------------------------------------------------------------
1178 ; FUNCTION __kmp_invoke_microtask
1180 ; typedef void (*microtask_t)( int *gtid, int *tid, ... );
1182 ; int
1183 ; __kmp_invoke_microtask( microtask_t pkfn,
1184 ; int gtid, int tid,
1185 ; int argc, void *p_argv[] ) {
1187 ; (*pkfn) ( &gtid, &tid, argv[0], ... );
1188 ; return 1;
1191 ; note:
1192 ; just before call to pkfn must have rsp 128-byte aligned for compiler
1194 ; parameters:
1195 ; rcx: pkfn 16[rbp]
1196 ; edx: gtid 24[rbp]
1197 ; r8d: tid 32[rbp]
1198 ; r9d: argc 40[rbp]
1199 ; [st]: p_argv 48[rbp]
1201 ; reg temps:
1202 ; rax: used all over the place
1203 ; rdx: used all over the place
1204 ; rcx: used as argument counter for push parms loop
1205 ; r10: used to hold pkfn function pointer argument
1207 ; return: eax (always 1/TRUE)
1208 $_pkfn = 16
1209 $_gtid = 24
1210 $_tid = 32
1211 $_argc = 40
1212 $_p_argv = 48
1213 if OMPT_SUPPORT
1214 $_exit_frame = 56
1215 endif
1217 PUBLIC __kmp_invoke_microtask
1218 _TEXT SEGMENT
1219 ALIGN 16
1221 __kmp_invoke_microtask PROC FRAME ;NEAR
1222 mov QWORD PTR 16[rsp], rdx ; home gtid parameter
1223 mov QWORD PTR 24[rsp], r8 ; home tid parameter
1224 push rbp ; save base pointer
1225 .pushreg rbp
1226 sub rsp, 0 ; no fixed allocation necessary - end prolog
1228 lea rbp, QWORD PTR [rsp] ; establish the base pointer
1229 .setframe rbp, 0
1230 .ENDPROLOG
1231 if OMPT_SUPPORT
1232 mov rax, QWORD PTR $_exit_frame[rbp]
1233 mov QWORD PTR [rax], rbp
1234 endif
1235 mov r10, rcx ; save pkfn pointer for later
1237 ;; ------------------------------------------------------------
1238 mov rax, r9 ; rax <= argc
1239 cmp rax, 2
1240 jge SHORT $_kmp_invoke_stack_align
1241 mov rax, 2 ; set 4 homes if less than 2 parms
1242 $_kmp_invoke_stack_align:
1243 lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8
1244 mov rax, rsp ; Save current SP into rax
1245 sub rax, rdx ; rsp - ((argc+2)*8) -> rax
1246 ; without align, rsp would be this
1247 and rax, -128 ; Mask off 7 bits (128-byte align)
1248 add rax, rdx ; add space for push's in a loop below
1249 mov rsp, rax ; Prepare the stack ptr
1250 ; Now it will align to 128-byte at the call
1251 ;; ------------------------------------------------------------
1252 ; setup pkfn parameter stack
1253 mov rax, r9 ; rax <= argc
1254 shl rax, 3 ; rax <= argc*8
1255 mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv
1256 add rdx, rax ; rdx <= &p_argv[argc]
1257 mov rcx, r9 ; rcx <= argc
1258 jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0
1259 cmp ecx, 1 ; if argc=1 branch ahead
1260 je SHORT $_kmp_invoke_one_parm
1261 sub ecx, 2 ; if argc=2 branch ahead, subtract two from
1262 je SHORT $_kmp_invoke_two_parms
1264 $_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack
1265 sub rdx, 8 ; decrement p_argv pointer to previous parm
1266 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1]
1267 push r8 ; push p_argv[rcx-1] onto stack (reverse order)
1268 sub ecx, 1
1269 jecxz SHORT $_kmp_invoke_two_parms
1270 jmp SHORT $_kmp_invoke_push_parms
1272 $_kmp_invoke_two_parms:
1273 sub rdx, 8 ; put 4th parm to pkfn in r9
1274 mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1]
1276 $_kmp_invoke_one_parm:
1277 sub rdx, 8 ; put 3rd parm to pkfn in r8
1278 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0]
1280 $_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers
1281 lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn)
1282 lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn)
1283 sub rsp, 32 ; add stack space for first four parms
1284 mov rax, r10 ; rax <= pkfn
1285 call rax ; call (*pkfn)()
1286 mov rax, 1 ; move 1 into return register;
1288 lea rsp, QWORD PTR [rbp] ; restore stack pointer
1290 ; add rsp, 0 ; no fixed allocation necessary - start epilog
1291 pop rbp ; restore frame pointer
1293 __kmp_invoke_microtask ENDP
1294 _TEXT ENDS
1296 endif