5 * Copyright 2011 Austin English
6 * Copyright 2012 Dan Kegel
7 * Copyright 2015-2016 Sebastian Lackner
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30 #include "wine/debug.h"
31 #include "wine/list.h"
34 WINE_DEFAULT_DEBUG_CHANNEL(vcomp
);
36 #define MAX_VECT_PARALLEL_CALLBACK_ARGS 128
38 typedef CRITICAL_SECTION
*omp_lock_t
;
39 typedef CRITICAL_SECTION
*omp_nest_lock_t
;
41 static struct list vcomp_idle_threads
= LIST_INIT(vcomp_idle_threads
);
42 static DWORD vcomp_context_tls
= TLS_OUT_OF_INDEXES
;
43 static HMODULE vcomp_module
;
44 static int vcomp_max_threads
;
45 static int vcomp_num_threads
;
46 static BOOL vcomp_nested_fork
= FALSE
;
48 static RTL_CRITICAL_SECTION vcomp_section
;
49 static RTL_CRITICAL_SECTION_DEBUG critsect_debug
=
52 { &critsect_debug
.ProcessLocksList
, &critsect_debug
.ProcessLocksList
},
53 0, 0, { (DWORD_PTR
)(__FILE__
": vcomp_section") }
55 static RTL_CRITICAL_SECTION vcomp_section
= { &critsect_debug
, -1, 0, 0, 0, 0 };
57 #define VCOMP_DYNAMIC_FLAGS_STATIC 0x01
58 #define VCOMP_DYNAMIC_FLAGS_CHUNKED 0x02
59 #define VCOMP_DYNAMIC_FLAGS_GUIDED 0x03
60 #define VCOMP_DYNAMIC_FLAGS_INCREMENT 0x40
62 struct vcomp_thread_data
64 struct vcomp_team_data
*team
;
65 struct vcomp_task_data
*task
;
70 /* only used for concurrent tasks */
72 CONDITION_VARIABLE cond
;
82 unsigned int dynamic_type
;
83 unsigned int dynamic_begin
;
84 unsigned int dynamic_end
;
87 struct vcomp_team_data
89 CONDITION_VARIABLE cond
;
93 /* callback arguments */
103 struct vcomp_task_data
109 unsigned int section
;
114 unsigned int dynamic
;
115 unsigned int dynamic_first
;
116 unsigned int dynamic_last
;
117 unsigned int dynamic_iterations
;
119 unsigned int dynamic_chunksize
;
122 static void **ptr_from_va_list(__ms_va_list valist
)
124 return *(void ***)&valist
;
127 static void copy_va_list_data(void **args
, __ms_va_list valist
, int args_count
)
131 for (i
= 0; i
< args_count
; ++i
)
132 args
[i
] = va_arg(valist
, void *);
135 #if defined(__i386__)
137 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
138 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
140 __ASM_CFI(".cfi_adjust_cfa_offset 4\n\t")
141 __ASM_CFI(".cfi_rel_offset %ebp,0\n\t")
143 __ASM_CFI(".cfi_def_cfa_register %ebp\n\t")
145 __ASM_CFI(".cfi_rel_offset %esi,-4\n\t")
147 __ASM_CFI(".cfi_rel_offset %edi,-8\n\t")
148 "movl 12(%ebp),%edx\n\t"
155 "movl 12(%ebp),%ecx\n\t"
156 "movl 16(%ebp),%esi\n\t"
159 "1:\tcall *8(%ebp)\n\t"
160 "leal -8(%ebp),%esp\n\t"
162 __ASM_CFI(".cfi_same_value %edi\n\t")
164 __ASM_CFI(".cfi_same_value %esi\n\t")
166 __ASM_CFI(".cfi_def_cfa %esp,4\n\t")
167 __ASM_CFI(".cfi_same_value %ebp\n\t")
170 #elif defined(__x86_64__)
172 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
173 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
175 __ASM_SEH(".seh_pushreg %rbp\n\t")
176 __ASM_CFI(".cfi_adjust_cfa_offset 8\n\t")
177 __ASM_CFI(".cfi_rel_offset %rbp,0\n\t")
179 __ASM_SEH(".seh_setframe %rbp,0\n\t")
180 __ASM_CFI(".cfi_def_cfa_register %rbp\n\t")
182 __ASM_SEH(".seh_pushreg %rsi\n\t")
183 __ASM_CFI(".cfi_rel_offset %rsi,-8\n\t")
185 __ASM_SEH(".seh_pushreg %rdi\n\t")
186 __ASM_SEH(".seh_endprologue\n\t")
187 __ASM_CFI(".cfi_rel_offset %rdi,-16\n\t")
191 "cmovgq %rdx,%rcx\n\t"
192 "leaq 0(,%rcx,8),%rdx\n\t"
198 "movq 0(%rsp),%rcx\n\t"
199 "movq 8(%rsp),%rdx\n\t"
200 "movq 16(%rsp),%r8\n\t"
201 "movq 24(%rsp),%r9\n\t"
203 "leaq -16(%rbp),%rsp\n\t"
205 __ASM_CFI(".cfi_same_value %rdi\n\t")
207 __ASM_CFI(".cfi_same_value %rsi\n\t")
208 __ASM_CFI(".cfi_def_cfa_register %rsp\n\t")
210 __ASM_CFI(".cfi_adjust_cfa_offset -8\n\t")
211 __ASM_CFI(".cfi_same_value %rbp\n\t")
214 #elif defined(__arm__)
216 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
217 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
218 "push {r4, r5, LR}\n\t"
227 "subeq SP, SP, #4\n\t"
228 "1:\tsub r3, r3, #4\n\t"
229 "ldr r0, [r2, r3]\n\t"
230 "str r0, [SP, r3]\n\t"
245 "4:\tpop {r0-r3}\n\t"
250 #elif defined(__aarch64__)
252 extern void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
);
253 __ASM_GLOBAL_FUNC( _vcomp_fork_call_wrapper
,
254 "stp x29, x30, [SP,#-16]!\n\t"
262 "1:\ttbz w8, #3, 2f\n\t"
264 "2:\tsub x10, x29, x8\n\t"
266 "3:\tldr x0, [x2], #8\n\t"
267 "str x0, [x10], #8\n\t"
268 "subs w1, w1, #1\n\t"
270 "ldp x0, x1, [sp], #16\n\t"
271 "ldp x2, x3, [sp], #16\n\t"
272 "ldp x4, x5, [sp], #16\n\t"
273 "ldp x6, x7, [sp], #16\n"
276 "ldp x29, x30, [SP], #16\n\t"
281 static void CDECL
_vcomp_fork_call_wrapper(void *wrapper
, int nargs
, void **args
)
283 ERR("Not implemented for this architecture\n");
288 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
290 static inline char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
293 __asm__
__volatile__( "lock; cmpxchgb %2,(%1)"
294 : "=a" (ret
) : "r" (dest
), "q" (xchg
), "0" (compare
) : "memory" );
298 static inline short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
301 __asm__
__volatile__( "lock; cmpxchgw %2,(%1)"
302 : "=a" (ret
) : "r" (dest
), "r" (xchg
), "0" (compare
) : "memory" );
306 static inline char interlocked_xchg_add8(char *dest
, char incr
)
309 __asm__
__volatile__( "lock; xaddb %0,(%1)"
310 : "=q" (ret
) : "r" (dest
), "0" (incr
) : "memory" );
314 static inline short interlocked_xchg_add16(short *dest
, short incr
)
317 __asm__
__volatile__( "lock; xaddw %0,(%1)"
318 : "=r" (ret
) : "r" (dest
), "0" (incr
) : "memory" );
324 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
325 static inline char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
327 return __sync_val_compare_and_swap(dest
, compare
, xchg
);
330 static inline char interlocked_xchg_add8(char *dest
, char incr
)
332 return __sync_fetch_and_add(dest
, incr
);
335 static char interlocked_cmpxchg8(char *dest
, char xchg
, char compare
)
337 EnterCriticalSection(&vcomp_section
);
338 if (*dest
== compare
) *dest
= xchg
; else compare
= *dest
;
339 LeaveCriticalSection(&vcomp_section
);
343 static char interlocked_xchg_add8(char *dest
, char incr
)
346 EnterCriticalSection(&vcomp_section
);
347 ret
= *dest
; *dest
+= incr
;
348 LeaveCriticalSection(&vcomp_section
);
353 #ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
354 static inline short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
356 return __sync_val_compare_and_swap(dest
, compare
, xchg
);
359 static inline short interlocked_xchg_add16(short *dest
, short incr
)
361 return __sync_fetch_and_add(dest
, incr
);
364 static short interlocked_cmpxchg16(short *dest
, short xchg
, short compare
)
366 EnterCriticalSection(&vcomp_section
);
367 if (*dest
== compare
) *dest
= xchg
; else compare
= *dest
;
368 LeaveCriticalSection(&vcomp_section
);
372 static short interlocked_xchg_add16(short *dest
, short incr
)
375 EnterCriticalSection(&vcomp_section
);
376 ret
= *dest
; *dest
+= incr
;
377 LeaveCriticalSection(&vcomp_section
);
382 #endif /* __GNUC__ */
384 static inline struct vcomp_thread_data
*vcomp_get_thread_data(void)
386 return (struct vcomp_thread_data
*)TlsGetValue(vcomp_context_tls
);
389 static inline void vcomp_set_thread_data(struct vcomp_thread_data
*thread_data
)
391 TlsSetValue(vcomp_context_tls
, thread_data
);
394 static struct vcomp_thread_data
*vcomp_init_thread_data(void)
396 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
399 struct vcomp_thread_data thread
;
400 struct vcomp_task_data task
;
403 if (thread_data
) return thread_data
;
404 if (!(data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
))))
406 ERR("could not create thread data\n");
410 data
->task
.single
= 0;
411 data
->task
.section
= 0;
412 data
->task
.dynamic
= 0;
414 thread_data
= &data
->thread
;
415 thread_data
->team
= NULL
;
416 thread_data
->task
= &data
->task
;
417 thread_data
->thread_num
= 0;
418 thread_data
->parallel
= FALSE
;
419 thread_data
->fork_threads
= 0;
420 thread_data
->single
= 1;
421 thread_data
->section
= 1;
422 thread_data
->dynamic
= 1;
423 thread_data
->dynamic_type
= 0;
425 vcomp_set_thread_data(thread_data
);
429 static void vcomp_free_thread_data(void)
431 struct vcomp_thread_data
*thread_data
= vcomp_get_thread_data();
432 if (!thread_data
) return;
434 HeapFree(GetProcessHeap(), 0, thread_data
);
435 vcomp_set_thread_data(NULL
);
438 void CDECL
_vcomp_atomic_add_i1(char *dest
, char val
)
440 interlocked_xchg_add8(dest
, val
);
443 void CDECL
_vcomp_atomic_and_i1(char *dest
, char val
)
446 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
& val
, old
) != old
);
449 void CDECL
_vcomp_atomic_div_i1(signed char *dest
, signed char val
)
452 do old
= *dest
; while ((signed char)interlocked_cmpxchg8((char *)dest
, old
/ val
, old
) != old
);
455 void CDECL
_vcomp_atomic_div_ui1(unsigned char *dest
, unsigned char val
)
458 do old
= *dest
; while ((unsigned char)interlocked_cmpxchg8((char *)dest
, old
/ val
, old
) != old
);
461 void CDECL
_vcomp_atomic_mul_i1(char *dest
, char val
)
464 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
* val
, old
) != old
);
467 void CDECL
_vcomp_atomic_or_i1(char *dest
, char val
)
470 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
| val
, old
) != old
);
473 void CDECL
_vcomp_atomic_shl_i1(char *dest
, unsigned int val
)
476 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
<< val
, old
) != old
);
479 void CDECL
_vcomp_atomic_shr_i1(signed char *dest
, unsigned int val
)
482 do old
= *dest
; while ((signed char)interlocked_cmpxchg8((char *)dest
, old
>> val
, old
) != old
);
485 void CDECL
_vcomp_atomic_shr_ui1(unsigned char *dest
, unsigned int val
)
488 do old
= *dest
; while ((unsigned char)interlocked_cmpxchg8((char *)dest
, old
>> val
, old
) != old
);
491 void CDECL
_vcomp_atomic_sub_i1(char *dest
, char val
)
493 interlocked_xchg_add8(dest
, -val
);
496 void CDECL
_vcomp_atomic_xor_i1(char *dest
, char val
)
499 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
^ val
, old
) != old
);
502 static void CDECL
_vcomp_atomic_bool_and_i1(char *dest
, char val
)
505 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
&& val
, old
) != old
);
508 static void CDECL
_vcomp_atomic_bool_or_i1(char *dest
, char val
)
511 do old
= *dest
; while (interlocked_cmpxchg8(dest
, old
? old
: (val
!= 0), old
) != old
);
514 void CDECL
_vcomp_reduction_i1(unsigned int flags
, char *dest
, char val
)
516 static void (CDECL
* const funcs
[])(char *, char) =
518 _vcomp_atomic_add_i1
,
519 _vcomp_atomic_add_i1
,
520 _vcomp_atomic_mul_i1
,
521 _vcomp_atomic_and_i1
,
523 _vcomp_atomic_xor_i1
,
524 _vcomp_atomic_bool_and_i1
,
525 _vcomp_atomic_bool_or_i1
,
527 unsigned int op
= (flags
>> 8) & 0xf;
528 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
529 funcs
[op
](dest
, val
);
532 void CDECL
_vcomp_atomic_add_i2(short *dest
, short val
)
534 interlocked_xchg_add16(dest
, val
);
537 void CDECL
_vcomp_atomic_and_i2(short *dest
, short val
)
540 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
& val
, old
) != old
);
543 void CDECL
_vcomp_atomic_div_i2(short *dest
, short val
)
546 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
/ val
, old
) != old
);
549 void CDECL
_vcomp_atomic_div_ui2(unsigned short *dest
, unsigned short val
)
552 do old
= *dest
; while ((unsigned short)interlocked_cmpxchg16((short *)dest
, old
/ val
, old
) != old
);
555 void CDECL
_vcomp_atomic_mul_i2(short *dest
, short val
)
558 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
* val
, old
) != old
);
561 void CDECL
_vcomp_atomic_or_i2(short *dest
, short val
)
564 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
| val
, old
) != old
);
567 void CDECL
_vcomp_atomic_shl_i2(short *dest
, unsigned int val
)
570 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
<< val
, old
) != old
);
573 void CDECL
_vcomp_atomic_shr_i2(short *dest
, unsigned int val
)
576 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
>> val
, old
) != old
);
579 void CDECL
_vcomp_atomic_shr_ui2(unsigned short *dest
, unsigned int val
)
582 do old
= *dest
; while ((unsigned short)interlocked_cmpxchg16((short *)dest
, old
>> val
, old
) != old
);
585 void CDECL
_vcomp_atomic_sub_i2(short *dest
, short val
)
587 interlocked_xchg_add16(dest
, -val
);
590 void CDECL
_vcomp_atomic_xor_i2(short *dest
, short val
)
593 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
^ val
, old
) != old
);
596 static void CDECL
_vcomp_atomic_bool_and_i2(short *dest
, short val
)
599 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
&& val
, old
) != old
);
602 static void CDECL
_vcomp_atomic_bool_or_i2(short *dest
, short val
)
605 do old
= *dest
; while (interlocked_cmpxchg16(dest
, old
? old
: (val
!= 0), old
) != old
);
608 void CDECL
_vcomp_reduction_i2(unsigned int flags
, short *dest
, short val
)
610 static void (CDECL
* const funcs
[])(short *, short) =
612 _vcomp_atomic_add_i2
,
613 _vcomp_atomic_add_i2
,
614 _vcomp_atomic_mul_i2
,
615 _vcomp_atomic_and_i2
,
617 _vcomp_atomic_xor_i2
,
618 _vcomp_atomic_bool_and_i2
,
619 _vcomp_atomic_bool_or_i2
,
621 unsigned int op
= (flags
>> 8) & 0xf;
622 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
623 funcs
[op
](dest
, val
);
626 void CDECL
_vcomp_atomic_add_i4(int *dest
, int val
)
628 InterlockedExchangeAdd(dest
, val
);
631 void CDECL
_vcomp_atomic_and_i4(int *dest
, int val
)
634 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
& val
, old
) != old
);
637 void CDECL
_vcomp_atomic_div_i4(int *dest
, int val
)
640 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
/ val
, old
) != old
);
643 void CDECL
_vcomp_atomic_div_ui4(unsigned int *dest
, unsigned int val
)
646 do old
= *dest
; while (InterlockedCompareExchange((int *)dest
, old
/ val
, old
) != old
);
649 void CDECL
_vcomp_atomic_mul_i4(int *dest
, int val
)
652 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
* val
, old
) != old
);
655 void CDECL
_vcomp_atomic_or_i4(int *dest
, int val
)
658 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
| val
, old
) != old
);
661 void CDECL
_vcomp_atomic_shl_i4(int *dest
, int val
)
664 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
<< val
, old
) != old
);
667 void CDECL
_vcomp_atomic_shr_i4(int *dest
, int val
)
670 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
>> val
, old
) != old
);
673 void CDECL
_vcomp_atomic_shr_ui4(unsigned int *dest
, unsigned int val
)
676 do old
= *dest
; while (InterlockedCompareExchange((int *)dest
, old
>> val
, old
) != old
);
679 void CDECL
_vcomp_atomic_sub_i4(int *dest
, int val
)
681 InterlockedExchangeAdd(dest
, -val
);
684 void CDECL
_vcomp_atomic_xor_i4(int *dest
, int val
)
687 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
^ val
, old
) != old
);
690 static void CDECL
_vcomp_atomic_bool_and_i4(int *dest
, int val
)
693 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
&& val
, old
) != old
);
696 static void CDECL
_vcomp_atomic_bool_or_i4(int *dest
, int val
)
699 do old
= *dest
; while (InterlockedCompareExchange(dest
, old
? old
: (val
!= 0), old
) != old
);
702 void CDECL
_vcomp_reduction_i4(unsigned int flags
, int *dest
, int val
)
704 static void (CDECL
* const funcs
[])(int *, int) =
706 _vcomp_atomic_add_i4
,
707 _vcomp_atomic_add_i4
,
708 _vcomp_atomic_mul_i4
,
709 _vcomp_atomic_and_i4
,
711 _vcomp_atomic_xor_i4
,
712 _vcomp_atomic_bool_and_i4
,
713 _vcomp_atomic_bool_or_i4
,
715 unsigned int op
= (flags
>> 8) & 0xf;
716 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
717 funcs
[op
](dest
, val
);
720 void CDECL
_vcomp_atomic_add_i8(LONG64
*dest
, LONG64 val
)
723 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
+ val
, old
) != old
);
726 void CDECL
_vcomp_atomic_and_i8(LONG64
*dest
, LONG64 val
)
729 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
& val
, old
) != old
);
732 void CDECL
_vcomp_atomic_div_i8(LONG64
*dest
, LONG64 val
)
735 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
/ val
, old
) != old
);
738 void CDECL
_vcomp_atomic_div_ui8(ULONG64
*dest
, ULONG64 val
)
741 do old
= *dest
; while (InterlockedCompareExchange64((LONG64
*)dest
, old
/ val
, old
) != old
);
744 void CDECL
_vcomp_atomic_mul_i8(LONG64
*dest
, LONG64 val
)
747 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
* val
, old
) != old
);
750 void CDECL
_vcomp_atomic_or_i8(LONG64
*dest
, LONG64 val
)
753 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
| val
, old
) != old
);
756 void CDECL
_vcomp_atomic_shl_i8(LONG64
*dest
, unsigned int val
)
759 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
<< val
, old
) != old
);
762 void CDECL
_vcomp_atomic_shr_i8(LONG64
*dest
, unsigned int val
)
765 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
>> val
, old
) != old
);
768 void CDECL
_vcomp_atomic_shr_ui8(ULONG64
*dest
, unsigned int val
)
771 do old
= *dest
; while (InterlockedCompareExchange64((LONG64
*)dest
, old
>> val
, old
) != old
);
774 void CDECL
_vcomp_atomic_sub_i8(LONG64
*dest
, LONG64 val
)
777 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
- val
, old
) != old
);
780 void CDECL
_vcomp_atomic_xor_i8(LONG64
*dest
, LONG64 val
)
783 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
^ val
, old
) != old
);
786 static void CDECL
_vcomp_atomic_bool_and_i8(LONG64
*dest
, LONG64 val
)
789 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
&& val
, old
) != old
);
792 static void CDECL
_vcomp_atomic_bool_or_i8(LONG64
*dest
, LONG64 val
)
795 do old
= *dest
; while (InterlockedCompareExchange64(dest
, old
? old
: (val
!= 0), old
) != old
);
798 void CDECL
_vcomp_reduction_i8(unsigned int flags
, LONG64
*dest
, LONG64 val
)
800 static void (CDECL
* const funcs
[])(LONG64
*, LONG64
) =
802 _vcomp_atomic_add_i8
,
803 _vcomp_atomic_add_i8
,
804 _vcomp_atomic_mul_i8
,
805 _vcomp_atomic_and_i8
,
807 _vcomp_atomic_xor_i8
,
808 _vcomp_atomic_bool_and_i8
,
809 _vcomp_atomic_bool_or_i8
,
811 unsigned int op
= (flags
>> 8) & 0xf;
812 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
813 funcs
[op
](dest
, val
);
816 void CDECL
_vcomp_atomic_add_r4(float *dest
, float val
)
822 *(float *)&new = *(float *)&old
+ val
;
824 while (InterlockedCompareExchange((int *)dest
, new, old
) != old
);
827 void CDECL
_vcomp_atomic_div_r4(float *dest
, float val
)
833 *(float *)&new = *(float *)&old
/ val
;
835 while (InterlockedCompareExchange((int *)dest
, new, old
) != old
);
838 void CDECL
_vcomp_atomic_mul_r4(float *dest
, float val
)
844 *(float *)&new = *(float *)&old
* val
;
846 while (InterlockedCompareExchange((int *)dest
, new, old
) != old
);
849 void CDECL
_vcomp_atomic_sub_r4(float *dest
, float val
)
855 *(float *)&new = *(float *)&old
- val
;
857 while (InterlockedCompareExchange((int *)dest
, new, old
) != old
);
860 static void CDECL
_vcomp_atomic_bool_and_r4(float *dest
, float val
)
866 *(float *)&new = (*(float *)&old
!= 0.0) ? (val
!= 0.0) : 0.0;
868 while (InterlockedCompareExchange((int *)dest
, new, old
) != old
);
871 static void CDECL
_vcomp_atomic_bool_or_r4(float *dest
, float val
)
877 *(float *)&new = (*(float *)&old
!= 0.0) ? *(float *)&old
: (val
!= 0.0);
879 while (InterlockedCompareExchange((int *)dest
, new, old
) != old
);
882 void CDECL
_vcomp_reduction_r4(unsigned int flags
, float *dest
, float val
)
884 static void (CDECL
* const funcs
[])(float *, float) =
886 _vcomp_atomic_add_r4
,
887 _vcomp_atomic_add_r4
,
888 _vcomp_atomic_mul_r4
,
889 _vcomp_atomic_bool_or_r4
,
890 _vcomp_atomic_bool_or_r4
,
891 _vcomp_atomic_bool_or_r4
,
892 _vcomp_atomic_bool_and_r4
,
893 _vcomp_atomic_bool_or_r4
,
895 unsigned int op
= (flags
>> 8) & 0xf;
896 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
897 funcs
[op
](dest
, val
);
900 void CDECL
_vcomp_atomic_add_r8(double *dest
, double val
)
905 old
= *(LONG64
*)dest
;
906 *(double *)&new = *(double *)&old
+ val
;
908 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
911 void CDECL
_vcomp_atomic_div_r8(double *dest
, double val
)
916 old
= *(LONG64
*)dest
;
917 *(double *)&new = *(double *)&old
/ val
;
919 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
922 void CDECL
_vcomp_atomic_mul_r8(double *dest
, double val
)
927 old
= *(LONG64
*)dest
;
928 *(double *)&new = *(double *)&old
* val
;
930 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
933 void CDECL
_vcomp_atomic_sub_r8(double *dest
, double val
)
938 old
= *(LONG64
*)dest
;
939 *(double *)&new = *(double *)&old
- val
;
941 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
944 static void CDECL
_vcomp_atomic_bool_and_r8(double *dest
, double val
)
949 old
= *(LONG64
*)dest
;
950 *(double *)&new = (*(double *)&old
!= 0.0) ? (val
!= 0.0) : 0.0;
952 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
955 static void CDECL
_vcomp_atomic_bool_or_r8(double *dest
, double val
)
960 old
= *(LONG64
*)dest
;
961 *(double *)&new = (*(double *)&old
!= 0.0) ? *(double *)&old
: (val
!= 0.0);
963 while (InterlockedCompareExchange64((LONG64
*)dest
, new, old
) != old
);
966 void CDECL
_vcomp_reduction_r8(unsigned int flags
, double *dest
, double val
)
968 static void (CDECL
* const funcs
[])(double *, double) =
970 _vcomp_atomic_add_r8
,
971 _vcomp_atomic_add_r8
,
972 _vcomp_atomic_mul_r8
,
973 _vcomp_atomic_bool_or_r8
,
974 _vcomp_atomic_bool_or_r8
,
975 _vcomp_atomic_bool_or_r8
,
976 _vcomp_atomic_bool_and_r8
,
977 _vcomp_atomic_bool_or_r8
,
979 unsigned int op
= (flags
>> 8) & 0xf;
980 op
= min(op
, ARRAY_SIZE(funcs
) - 1);
981 funcs
[op
](dest
, val
);
984 int CDECL
omp_get_dynamic(void)
990 int CDECL
omp_get_max_threads(void)
993 return vcomp_max_threads
;
996 int CDECL
omp_get_nested(void)
999 return vcomp_nested_fork
;
1002 int CDECL
omp_get_num_procs(void)
1008 int CDECL
omp_get_num_threads(void)
1010 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
1012 return team_data
? team_data
->num_threads
: 1;
1015 int CDECL
omp_get_thread_num(void)
1018 return vcomp_init_thread_data()->thread_num
;
1021 int CDECL
_vcomp_get_thread_num(void)
1024 return vcomp_init_thread_data()->thread_num
;
1027 /* Time in seconds since "some time in the past" */
1028 double CDECL
omp_get_wtime(void)
1030 return GetTickCount() / 1000.0;
1033 void CDECL
omp_set_dynamic(int val
)
1035 TRACE("(%d): stub\n", val
);
1038 void CDECL
omp_set_nested(int nested
)
1040 TRACE("(%d)\n", nested
);
1041 vcomp_nested_fork
= (nested
!= 0);
1044 void CDECL
omp_set_num_threads(int num_threads
)
1046 TRACE("(%d)\n", num_threads
);
1047 if (num_threads
>= 1)
1048 vcomp_num_threads
= num_threads
;
1051 void CDECL
_vcomp_flush(void)
1053 TRACE("(): stub\n");
1056 void CDECL
_vcomp_barrier(void)
1058 struct vcomp_team_data
*team_data
= vcomp_init_thread_data()->team
;
1065 EnterCriticalSection(&vcomp_section
);
1066 if (++team_data
->barrier_count
>= team_data
->num_threads
)
1068 team_data
->barrier
++;
1069 team_data
->barrier_count
= 0;
1070 WakeAllConditionVariable(&team_data
->cond
);
1074 unsigned int barrier
= team_data
->barrier
;
1075 while (team_data
->barrier
== barrier
)
1076 SleepConditionVariableCS(&team_data
->cond
, &vcomp_section
, INFINITE
);
1078 LeaveCriticalSection(&vcomp_section
);
1081 void CDECL
_vcomp_set_num_threads(int num_threads
)
1083 TRACE("(%d)\n", num_threads
);
1084 if (num_threads
>= 1)
1085 vcomp_init_thread_data()->fork_threads
= num_threads
;
1088 int CDECL
_vcomp_master_begin(void)
1091 return !vcomp_init_thread_data()->thread_num
;
1094 void CDECL
_vcomp_master_end(void)
1097 /* nothing to do here */
1100 int CDECL
_vcomp_single_begin(int flags
)
1102 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1103 struct vcomp_task_data
*task_data
= thread_data
->task
;
1106 TRACE("(%x): semi-stub\n", flags
);
1108 EnterCriticalSection(&vcomp_section
);
1109 thread_data
->single
++;
1110 if ((int)(thread_data
->single
- task_data
->single
) > 0)
1112 task_data
->single
= thread_data
->single
;
1115 LeaveCriticalSection(&vcomp_section
);
1120 void CDECL
_vcomp_single_end(void)
1123 /* nothing to do here */
1126 void CDECL
_vcomp_sections_init(int n
)
1128 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1129 struct vcomp_task_data
*task_data
= thread_data
->task
;
1133 EnterCriticalSection(&vcomp_section
);
1134 thread_data
->section
++;
1135 if ((int)(thread_data
->section
- task_data
->section
) > 0)
1137 task_data
->section
= thread_data
->section
;
1138 task_data
->num_sections
= n
;
1139 task_data
->section_index
= 0;
1141 LeaveCriticalSection(&vcomp_section
);
1144 int CDECL
_vcomp_sections_next(void)
1146 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1147 struct vcomp_task_data
*task_data
= thread_data
->task
;
1152 EnterCriticalSection(&vcomp_section
);
1153 if (thread_data
->section
== task_data
->section
&&
1154 task_data
->section_index
!= task_data
->num_sections
)
1156 i
= task_data
->section_index
++;
1158 LeaveCriticalSection(&vcomp_section
);
1162 void CDECL
_vcomp_for_static_simple_init(unsigned int first
, unsigned int last
, int step
,
1163 BOOL increment
, unsigned int *begin
, unsigned int *end
)
1165 unsigned int iterations
, per_thread
, remaining
;
1166 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1167 struct vcomp_team_data
*team_data
= thread_data
->team
;
1168 int num_threads
= team_data
? team_data
->num_threads
: 1;
1169 int thread_num
= thread_data
->thread_num
;
1171 TRACE("(%u, %u, %d, %u, %p, %p)\n", first
, last
, step
, increment
, begin
, end
);
1173 if (num_threads
== 1)
1183 *end
= increment
? -1 : 1;
1188 iterations
= 1 + (last
- first
) / step
;
1191 iterations
= 1 + (first
- last
) / step
;
1195 per_thread
= iterations
/ num_threads
;
1196 remaining
= iterations
- per_thread
* num_threads
;
1198 if (thread_num
< remaining
)
1200 else if (per_thread
)
1201 first
+= remaining
* step
;
1205 *end
= first
- step
;
1209 *begin
= first
+ per_thread
* thread_num
* step
;
1210 *end
= *begin
+ (per_thread
- 1) * step
;
1213 void CDECL
_vcomp_for_static_init(int first
, int last
, int step
, int chunksize
, unsigned int *loops
,
1214 int *begin
, int *end
, int *next
, int *lastchunk
)
1216 unsigned int iterations
, num_chunks
, per_thread
, remaining
;
1217 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1218 struct vcomp_team_data
*team_data
= thread_data
->team
;
1219 int num_threads
= team_data
? team_data
->num_threads
: 1;
1220 int thread_num
= thread_data
->thread_num
;
1221 int no_begin
, no_lastchunk
;
1223 TRACE("(%d, %d, %d, %d, %p, %p, %p, %p, %p)\n",
1224 first
, last
, step
, chunksize
, loops
, begin
, end
, next
, lastchunk
);
1229 lastchunk
= &no_lastchunk
;
1232 if (num_threads
== 1 && chunksize
!= 1)
1244 *loops
= !thread_num
;
1262 iterations
= 1 + (last
- first
) / step
;
1265 iterations
= 1 + (first
- last
) / step
;
1272 num_chunks
= ((DWORD64
)iterations
+ chunksize
- 1) / chunksize
;
1273 per_thread
= num_chunks
/ num_threads
;
1274 remaining
= num_chunks
- per_thread
* num_threads
;
1276 *loops
= per_thread
+ (thread_num
< remaining
);
1277 *begin
= first
+ thread_num
* chunksize
* step
;
1278 *end
= *begin
+ (chunksize
- 1) * step
;
1279 *next
= chunksize
* num_threads
* step
;
1280 *lastchunk
= first
+ (num_chunks
- 1) * chunksize
* step
;
1283 void CDECL
_vcomp_for_static_end(void)
1286 /* nothing to do here */
1289 void CDECL
_vcomp_for_dynamic_init(unsigned int flags
, unsigned int first
, unsigned int last
,
1290 int step
, unsigned int chunksize
)
1292 unsigned int iterations
, per_thread
, remaining
;
1293 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1294 struct vcomp_team_data
*team_data
= thread_data
->team
;
1295 struct vcomp_task_data
*task_data
= thread_data
->task
;
1296 int num_threads
= team_data
? team_data
->num_threads
: 1;
1297 int thread_num
= thread_data
->thread_num
;
1298 unsigned int type
= flags
& ~VCOMP_DYNAMIC_FLAGS_INCREMENT
;
1300 TRACE("(%u, %u, %u, %d, %u)\n", flags
, first
, last
, step
, chunksize
);
1304 thread_data
->dynamic_type
= 0;
1308 if (flags
& VCOMP_DYNAMIC_FLAGS_INCREMENT
)
1309 iterations
= 1 + (last
- first
) / step
;
1312 iterations
= 1 + (first
- last
) / step
;
1316 if (type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
1318 per_thread
= iterations
/ num_threads
;
1319 remaining
= iterations
- per_thread
* num_threads
;
1321 if (thread_num
< remaining
)
1323 else if (per_thread
)
1324 first
+= remaining
* step
;
1327 thread_data
->dynamic_type
= 0;
1331 thread_data
->dynamic_type
= VCOMP_DYNAMIC_FLAGS_STATIC
;
1332 thread_data
->dynamic_begin
= first
+ per_thread
* thread_num
* step
;
1333 thread_data
->dynamic_end
= thread_data
->dynamic_begin
+ (per_thread
- 1) * step
;
1337 if (type
!= VCOMP_DYNAMIC_FLAGS_CHUNKED
&&
1338 type
!= VCOMP_DYNAMIC_FLAGS_GUIDED
)
1340 FIXME("unsupported flags %u\n", flags
);
1341 type
= VCOMP_DYNAMIC_FLAGS_GUIDED
;
1344 EnterCriticalSection(&vcomp_section
);
1345 thread_data
->dynamic
++;
1346 thread_data
->dynamic_type
= type
;
1347 if ((int)(thread_data
->dynamic
- task_data
->dynamic
) > 0)
1349 task_data
->dynamic
= thread_data
->dynamic
;
1350 task_data
->dynamic_first
= first
;
1351 task_data
->dynamic_last
= last
;
1352 task_data
->dynamic_iterations
= iterations
;
1353 task_data
->dynamic_step
= step
;
1354 task_data
->dynamic_chunksize
= chunksize
;
1356 LeaveCriticalSection(&vcomp_section
);
1360 int CDECL
_vcomp_for_dynamic_next(unsigned int *begin
, unsigned int *end
)
1362 struct vcomp_thread_data
*thread_data
= vcomp_init_thread_data();
1363 struct vcomp_task_data
*task_data
= thread_data
->task
;
1364 struct vcomp_team_data
*team_data
= thread_data
->team
;
1365 int num_threads
= team_data
? team_data
->num_threads
: 1;
1367 TRACE("(%p, %p)\n", begin
, end
);
1369 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_STATIC
)
1371 *begin
= thread_data
->dynamic_begin
;
1372 *end
= thread_data
->dynamic_end
;
1373 thread_data
->dynamic_type
= 0;
1376 else if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_CHUNKED
||
1377 thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
)
1379 unsigned int iterations
= 0;
1380 EnterCriticalSection(&vcomp_section
);
1381 if (thread_data
->dynamic
== task_data
->dynamic
&&
1382 task_data
->dynamic_iterations
!= 0)
1384 iterations
= min(task_data
->dynamic_iterations
, task_data
->dynamic_chunksize
);
1385 if (thread_data
->dynamic_type
== VCOMP_DYNAMIC_FLAGS_GUIDED
&&
1386 task_data
->dynamic_iterations
> num_threads
* task_data
->dynamic_chunksize
)
1388 iterations
= (task_data
->dynamic_iterations
+ num_threads
- 1) / num_threads
;
1390 *begin
= task_data
->dynamic_first
;
1391 *end
= task_data
->dynamic_first
+ (iterations
- 1) * task_data
->dynamic_step
;
1392 task_data
->dynamic_iterations
-= iterations
;
1393 task_data
->dynamic_first
+= iterations
* task_data
->dynamic_step
;
1394 if (!task_data
->dynamic_iterations
)
1395 *end
= task_data
->dynamic_last
;
1397 LeaveCriticalSection(&vcomp_section
);
1398 return iterations
!= 0;
1404 int CDECL
omp_in_parallel(void)
1407 return vcomp_init_thread_data()->parallel
;
1410 static DWORD WINAPI
_vcomp_fork_worker(void *param
)
1412 struct vcomp_thread_data
*thread_data
= param
;
1413 vcomp_set_thread_data(thread_data
);
1415 TRACE("starting worker thread for %p\n", thread_data
);
1417 EnterCriticalSection(&vcomp_section
);
1420 struct vcomp_team_data
*team
= thread_data
->team
;
1423 LeaveCriticalSection(&vcomp_section
);
1424 _vcomp_fork_call_wrapper(team
->wrapper
, team
->nargs
, ptr_from_va_list(team
->valist
));
1425 EnterCriticalSection(&vcomp_section
);
1427 thread_data
->team
= NULL
;
1428 list_remove(&thread_data
->entry
);
1429 list_add_tail(&vcomp_idle_threads
, &thread_data
->entry
);
1430 if (++team
->finished_threads
>= team
->num_threads
)
1431 WakeAllConditionVariable(&team
->cond
);
1434 if (!SleepConditionVariableCS(&thread_data
->cond
, &vcomp_section
, 5000) &&
1435 GetLastError() == ERROR_TIMEOUT
&& !thread_data
->team
)
1440 list_remove(&thread_data
->entry
);
1441 LeaveCriticalSection(&vcomp_section
);
1443 TRACE("terminating worker thread for %p\n", thread_data
);
1445 HeapFree(GetProcessHeap(), 0, thread_data
);
1446 vcomp_set_thread_data(NULL
);
1447 FreeLibraryAndExitThread(vcomp_module
, 0);
1451 void WINAPIV
_vcomp_fork(BOOL ifval
, int nargs
, void *wrapper
, ...)
1453 struct vcomp_thread_data
*prev_thread_data
= vcomp_init_thread_data();
1454 struct vcomp_thread_data thread_data
;
1455 struct vcomp_team_data team_data
;
1456 struct vcomp_task_data task_data
;
1459 TRACE("(%d, %d, %p, ...)\n", ifval
, nargs
, wrapper
);
1461 if (prev_thread_data
->parallel
&& !vcomp_nested_fork
)
1466 else if (prev_thread_data
->fork_threads
)
1467 num_threads
= prev_thread_data
->fork_threads
;
1469 num_threads
= vcomp_num_threads
;
1471 InitializeConditionVariable(&team_data
.cond
);
1472 team_data
.num_threads
= 1;
1473 team_data
.finished_threads
= 0;
1474 team_data
.nargs
= nargs
;
1475 team_data
.wrapper
= wrapper
;
1476 __ms_va_start(team_data
.valist
, wrapper
);
1477 team_data
.barrier
= 0;
1478 team_data
.barrier_count
= 0;
1480 task_data
.single
= 0;
1481 task_data
.section
= 0;
1482 task_data
.dynamic
= 0;
1484 thread_data
.team
= &team_data
;
1485 thread_data
.task
= &task_data
;
1486 thread_data
.thread_num
= 0;
1487 thread_data
.parallel
= ifval
|| prev_thread_data
->parallel
;
1488 thread_data
.fork_threads
= 0;
1489 thread_data
.single
= 1;
1490 thread_data
.section
= 1;
1491 thread_data
.dynamic
= 1;
1492 thread_data
.dynamic_type
= 0;
1493 list_init(&thread_data
.entry
);
1494 InitializeConditionVariable(&thread_data
.cond
);
1496 if (num_threads
> 1)
1499 EnterCriticalSection(&vcomp_section
);
1501 /* reuse existing threads (if any) */
1502 while (team_data
.num_threads
< num_threads
&& (ptr
= list_head(&vcomp_idle_threads
)))
1504 struct vcomp_thread_data
*data
= LIST_ENTRY(ptr
, struct vcomp_thread_data
, entry
);
1505 data
->team
= &team_data
;
1506 data
->task
= &task_data
;
1507 data
->thread_num
= team_data
.num_threads
++;
1508 data
->parallel
= thread_data
.parallel
;
1509 data
->fork_threads
= 0;
1513 data
->dynamic_type
= 0;
1514 list_remove(&data
->entry
);
1515 list_add_tail(&thread_data
.entry
, &data
->entry
);
1516 WakeAllConditionVariable(&data
->cond
);
1519 /* spawn additional threads */
1520 while (team_data
.num_threads
< num_threads
)
1522 struct vcomp_thread_data
*data
;
1526 data
= HeapAlloc(GetProcessHeap(), 0, sizeof(*data
));
1529 data
->team
= &team_data
;
1530 data
->task
= &task_data
;
1531 data
->thread_num
= team_data
.num_threads
;
1532 data
->parallel
= thread_data
.parallel
;
1533 data
->fork_threads
= 0;
1537 data
->dynamic_type
= 0;
1538 InitializeConditionVariable(&data
->cond
);
1540 thread
= CreateThread(NULL
, 0, _vcomp_fork_worker
, data
, 0, NULL
);
1543 HeapFree(GetProcessHeap(), 0, data
);
1547 GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
,
1548 (const WCHAR
*)vcomp_module
, &module
);
1549 team_data
.num_threads
++;
1550 list_add_tail(&thread_data
.entry
, &data
->entry
);
1551 CloseHandle(thread
);
1554 LeaveCriticalSection(&vcomp_section
);
1557 vcomp_set_thread_data(&thread_data
);
1558 _vcomp_fork_call_wrapper(team_data
.wrapper
, team_data
.nargs
, ptr_from_va_list(team_data
.valist
));
1559 vcomp_set_thread_data(prev_thread_data
);
1560 prev_thread_data
->fork_threads
= 0;
1562 if (team_data
.num_threads
> 1)
1564 EnterCriticalSection(&vcomp_section
);
1566 team_data
.finished_threads
++;
1567 while (team_data
.finished_threads
< team_data
.num_threads
)
1568 SleepConditionVariableCS(&team_data
.cond
, &vcomp_section
, INFINITE
);
1570 LeaveCriticalSection(&vcomp_section
);
1571 assert(list_empty(&thread_data
.entry
));
1574 __ms_va_end(team_data
.valist
);
1577 static CRITICAL_SECTION
*alloc_critsect(void)
1579 CRITICAL_SECTION
*critsect
;
1580 if (!(critsect
= HeapAlloc(GetProcessHeap(), 0, sizeof(*critsect
))))
1582 ERR("could not allocate critical section\n");
1586 InitializeCriticalSection(critsect
);
1587 critsect
->DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": critsect");
1591 static void destroy_critsect(CRITICAL_SECTION
*critsect
)
1593 if (!critsect
) return;
1594 critsect
->DebugInfo
->Spare
[0] = 0;
1595 DeleteCriticalSection(critsect
);
1596 HeapFree(GetProcessHeap(), 0, critsect
);
1599 void CDECL
omp_init_lock(omp_lock_t
*lock
)
1601 TRACE("(%p)\n", lock
);
1602 *lock
= alloc_critsect();
1605 void CDECL
omp_destroy_lock(omp_lock_t
*lock
)
1607 TRACE("(%p)\n", lock
);
1608 destroy_critsect(*lock
);
1611 void CDECL
omp_set_lock(omp_lock_t
*lock
)
1613 TRACE("(%p)\n", lock
);
1615 if (RtlIsCriticalSectionLockedByThread(*lock
))
1617 ERR("omp_set_lock called while holding lock %p\n", *lock
);
1621 EnterCriticalSection(*lock
);
1624 void CDECL
omp_unset_lock(omp_lock_t
*lock
)
1626 TRACE("(%p)\n", lock
);
1627 LeaveCriticalSection(*lock
);
1630 int CDECL
omp_test_lock(omp_lock_t
*lock
)
1632 TRACE("(%p)\n", lock
);
1634 if (RtlIsCriticalSectionLockedByThread(*lock
))
1637 return TryEnterCriticalSection(*lock
);
1640 void CDECL
omp_set_nest_lock(omp_nest_lock_t
*lock
)
1642 TRACE("(%p)\n", lock
);
1643 EnterCriticalSection(*lock
);
1646 void CDECL
omp_unset_nest_lock(omp_nest_lock_t
*lock
)
1648 TRACE("(%p)\n", lock
);
1649 LeaveCriticalSection(*lock
);
1652 int CDECL
omp_test_nest_lock(omp_nest_lock_t
*lock
)
1654 TRACE("(%p)\n", lock
);
1655 return TryEnterCriticalSection(*lock
) ? (*lock
)->RecursionCount
: 0;
1658 void CDECL
_vcomp_enter_critsect(CRITICAL_SECTION
**critsect
)
1660 TRACE("(%p)\n", critsect
);
1664 CRITICAL_SECTION
*new_critsect
= alloc_critsect();
1665 if (InterlockedCompareExchangePointer((void **)critsect
, new_critsect
, NULL
) != NULL
)
1666 destroy_critsect(new_critsect
); /* someone beat us to it */
1669 EnterCriticalSection(*critsect
);
1672 void CDECL
_vcomp_leave_critsect(CRITICAL_SECTION
*critsect
)
1674 TRACE("(%p)\n", critsect
);
1675 LeaveCriticalSection(critsect
);
1678 static unsigned int get_step_count(int start
, int end
, int range_offset
, int step
)
1680 int range
= end
- start
+ step
- range_offset
;
1683 return (unsigned)-range
/ -step
;
1685 return (unsigned)range
/ step
;
1688 static void CDECL
c2vectparallel_wrapper(int start
, int end
, int step
, int end_included
, BOOL dynamic_distribution
,
1689 int volatile *dynamic_start
, void *function
, int nargs
, __ms_va_list valist
)
1691 void *wrapper_args
[MAX_VECT_PARALLEL_CALLBACK_ARGS
];
1692 unsigned int step_count
, steps_per_call
, remainder
;
1693 int thread_count
= omp_get_num_threads();
1694 int curr_start
, curr_end
, range_offset
;
1695 int thread
= _vcomp_get_thread_num();
1698 copy_va_list_data(&wrapper_args
[2], valist
, nargs
- 2);
1700 step_sign
= step
> 0 ? 1 : -1;
1701 range_offset
= step_sign
* !end_included
;
1703 if (dynamic_distribution
)
1705 int next_start
, new_start
, end_value
;
1707 start
= *dynamic_start
;
1708 end_value
= end
+ !!end_included
* step
;
1709 while (start
!= end_value
)
1711 step_count
= get_step_count(start
, end
, range_offset
, step
);
1713 curr_end
= start
+ (step_count
+ thread_count
- 1) / thread_count
* step
1716 if ((curr_end
- end
) * step_sign
> 0)
1718 next_start
= end_value
;
1723 next_start
= curr_end
- range_offset
;
1727 if ((new_start
= InterlockedCompareExchange(dynamic_start
, next_start
, start
)) != start
)
1733 wrapper_args
[0] = (void *)(ULONG_PTR
)start
;
1734 wrapper_args
[1] = (void *)(ULONG_PTR
)curr_end
;
1735 _vcomp_fork_call_wrapper(function
, nargs
, wrapper_args
);
1736 start
= *dynamic_start
;
1741 step_count
= get_step_count(start
, end
, range_offset
, step
);
1743 /* According to the tests native vcomp still makes extra calls
1744 * with empty range from excessive threads under certain conditions
1745 * for unclear reason. */
1746 if (thread
>= step_count
&& (end_included
|| (step
!= 1 && step
!= -1)))
1749 steps_per_call
= step_count
/ thread_count
;
1750 remainder
= step_count
% thread_count
;
1752 if (thread
< remainder
)
1754 curr_start
= thread
* (steps_per_call
+ 1);
1755 curr_end
= curr_start
+ steps_per_call
+ 1;
1757 else if (thread
< step_count
)
1759 curr_start
= remainder
+ steps_per_call
* thread
;
1760 curr_end
= curr_start
+ steps_per_call
;
1764 curr_start
= curr_end
= 0;
1767 curr_start
= start
+ curr_start
* step
;
1768 curr_end
= start
+ (curr_end
- 1) * step
+ range_offset
;
1770 wrapper_args
[0] = (void *)(ULONG_PTR
)curr_start
;
1771 wrapper_args
[1] = (void *)(ULONG_PTR
)curr_end
;
1772 _vcomp_fork_call_wrapper(function
, nargs
, wrapper_args
);
1775 void WINAPIV
C2VectParallel(int start
, int end
, int step
, BOOL end_included
, int thread_count
,
1776 BOOL dynamic_distribution
, void *function
, int nargs
, ...)
1778 struct vcomp_thread_data
*thread_data
;
1779 int volatile dynamic_start
;
1780 int prev_thread_count
;
1781 __ms_va_list valist
;
1783 TRACE("start %d, end %d, step %d, end_included %d, thread_count %d, dynamic_distribution %#x,"
1784 " function %p, nargs %d.\n", start
, end
, step
, end_included
, thread_count
,
1785 dynamic_distribution
, function
, nargs
);
1787 if (nargs
> MAX_VECT_PARALLEL_CALLBACK_ARGS
)
1789 FIXME("Number of arguments %u exceeds supported maximum %u"
1790 " (not calling the loop code, expect problems).\n",
1791 nargs
, MAX_VECT_PARALLEL_CALLBACK_ARGS
);
1795 __ms_va_start(valist
, nargs
);
1797 /* This expression can result in integer overflow. According to the tests,
1798 * native vcomp runs the function as a single thread both for empty range
1799 * and (end - start) not fitting the integer range. */
1800 if ((step
> 0 && end
< start
) || (step
< 0 && end
> start
)
1801 || (end
- start
) / step
< 2 || thread_count
< 0)
1803 void *wrapper_args
[MAX_VECT_PARALLEL_CALLBACK_ARGS
];
1805 wrapper_args
[0] = (void *)(ULONG_PTR
)start
;
1806 wrapper_args
[1] = (void *)(ULONG_PTR
)end
;
1807 copy_va_list_data(&wrapper_args
[2], valist
, nargs
- 2);
1808 _vcomp_fork_call_wrapper(function
, nargs
, wrapper_args
);
1809 __ms_va_end(valist
);
1813 thread_data
= vcomp_init_thread_data();
1814 prev_thread_count
= thread_data
->fork_threads
;
1815 thread_data
->fork_threads
= thread_count
;
1817 dynamic_start
= start
;
1819 _vcomp_fork(TRUE
, 9, c2vectparallel_wrapper
, start
, end
, step
, end_included
, dynamic_distribution
,
1820 &dynamic_start
, function
, nargs
, valist
);
1822 thread_data
->fork_threads
= prev_thread_count
;
1823 __ms_va_end(valist
);
1826 BOOL WINAPI
DllMain(HINSTANCE instance
, DWORD reason
, LPVOID reserved
)
1828 TRACE("(%p, %d, %p)\n", instance
, reason
, reserved
);
1832 case DLL_PROCESS_ATTACH
:
1834 SYSTEM_INFO sysinfo
;
1836 if ((vcomp_context_tls
= TlsAlloc()) == TLS_OUT_OF_INDEXES
)
1838 ERR("Failed to allocate TLS index\n");
1842 GetSystemInfo(&sysinfo
);
1843 vcomp_module
= instance
;
1844 vcomp_max_threads
= sysinfo
.dwNumberOfProcessors
;
1845 vcomp_num_threads
= sysinfo
.dwNumberOfProcessors
;
1849 case DLL_PROCESS_DETACH
:
1851 if (reserved
) break;
1852 if (vcomp_context_tls
!= TLS_OUT_OF_INDEXES
)
1854 vcomp_free_thread_data();
1855 TlsFree(vcomp_context_tls
);
1860 case DLL_THREAD_DETACH
:
1862 vcomp_free_thread_data();