1 /* Copyright (C) 2005-2024 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Offloading and Multi Processing Library
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
26 /* This file handles the maintenance of threads in response to team
27 creation and termination. */
34 #ifdef LIBGOMP_USE_PTHREADS
35 pthread_attr_t gomp_thread_attr
;
37 /* This key is for the thread destructor. */
38 pthread_key_t gomp_thread_destructor
;
41 /* This is the libgomp per-thread data structure. */
42 #if defined HAVE_TLS || defined USE_EMUTLS
43 __thread
struct gomp_thread gomp_tls_data
;
45 pthread_key_t gomp_tls_key
;
49 /* This structure is used to communicate across pthread_create. */
51 struct gomp_thread_start_data
55 struct gomp_team_state ts
;
56 struct gomp_task
*task
;
57 struct gomp_thread_pool
*thread_pool
;
59 unsigned int num_teams
;
60 unsigned int team_num
;
66 /* This function is a pthread_create entry point. This contains the idle
67 loop in which a thread waits to be called up to become part of a team. */
70 gomp_thread_start (void *xdata
)
72 struct gomp_thread_start_data
*data
= xdata
;
73 struct gomp_thread
*thr
;
74 struct gomp_thread_pool
*pool
;
75 void (*local_fn
) (void *);
78 #if defined HAVE_TLS || defined USE_EMUTLS
81 struct gomp_thread local_thr
;
84 gomp_sem_init (&thr
->release
, 0);
86 /* Extract what we need from data. */
88 local_data
= data
->fn_data
;
89 thr
->thread_pool
= data
->thread_pool
;
91 thr
->task
= data
->task
;
92 thr
->place
= data
->place
;
93 thr
->num_teams
= data
->num_teams
;
94 thr
->team_num
= data
->team_num
;
95 #ifdef GOMP_NEEDS_THREAD_HANDLE
96 thr
->handle
= data
->handle
;
98 #if !(defined HAVE_TLS || defined USE_EMUTLS)
99 pthread_setspecific (gomp_tls_key
, thr
);
102 thr
->ts
.team
->ordered_release
[thr
->ts
.team_id
] = &thr
->release
;
104 /* Make thread pool local. */
105 pool
= thr
->thread_pool
;
109 struct gomp_team
*team
= thr
->ts
.team
;
110 struct gomp_task
*task
= thr
->task
;
112 gomp_barrier_wait (&team
->barrier
);
114 local_fn (local_data
);
115 gomp_team_barrier_wait_final (&team
->barrier
);
116 gomp_finish_task (task
);
117 gomp_barrier_wait_last (&team
->barrier
);
121 pool
->threads
[thr
->ts
.team_id
] = thr
;
123 gomp_simple_barrier_wait (&pool
->threads_dock
);
126 struct gomp_team
*team
= thr
->ts
.team
;
127 struct gomp_task
*task
= thr
->task
;
129 local_fn (local_data
);
130 gomp_team_barrier_wait_final (&team
->barrier
);
131 gomp_finish_task (task
);
133 gomp_simple_barrier_wait (&pool
->threads_dock
);
136 local_data
= thr
->data
;
142 gomp_sem_destroy (&thr
->release
);
143 pthread_detach (pthread_self ());
144 thr
->thread_pool
= NULL
;
150 static inline struct gomp_team
*
151 get_last_team (unsigned nthreads
)
153 struct gomp_thread
*thr
= gomp_thread ();
154 if (thr
->ts
.team
== NULL
)
156 struct gomp_thread_pool
*pool
= gomp_get_thread_pool (thr
, nthreads
);
157 struct gomp_team
*last_team
= pool
->last_team
;
158 if (last_team
!= NULL
&& last_team
->nthreads
== nthreads
)
160 pool
->last_team
= NULL
;
167 /* Create a new team data structure. */
170 gomp_new_team (unsigned nthreads
)
172 struct gomp_team
*team
;
175 team
= get_last_team (nthreads
);
178 size_t extra
= sizeof (team
->ordered_release
[0])
179 + sizeof (team
->implicit_task
[0]);
180 #ifdef GOMP_USE_ALIGNED_WORK_SHARES
181 team
= gomp_aligned_alloc (__alignof (struct gomp_team
),
182 sizeof (*team
) + nthreads
* extra
);
184 team
= team_malloc (sizeof (*team
) + nthreads
* extra
);
187 #ifndef HAVE_SYNC_BUILTINS
188 gomp_mutex_init (&team
->work_share_list_free_lock
);
190 gomp_barrier_init (&team
->barrier
, nthreads
);
191 gomp_mutex_init (&team
->task_lock
);
193 team
->nthreads
= nthreads
;
196 team
->work_share_chunk
= 8;
197 #ifdef HAVE_SYNC_BUILTINS
198 team
->single_count
= 0;
200 team
->work_shares_to_free
= &team
->work_shares
[0];
201 gomp_init_work_share (&team
->work_shares
[0], 0, nthreads
);
202 team
->work_shares
[0].next_alloc
= NULL
;
203 team
->work_share_list_free
= NULL
;
204 team
->work_share_list_alloc
= &team
->work_shares
[1];
205 for (i
= 1; i
< 7; i
++)
206 team
->work_shares
[i
].next_free
= &team
->work_shares
[i
+ 1];
207 team
->work_shares
[i
].next_free
= NULL
;
209 gomp_sem_init (&team
->master_release
, 0);
210 team
->ordered_release
= (void *) &team
->implicit_task
[nthreads
];
211 team
->ordered_release
[0] = &team
->master_release
;
213 priority_queue_init (&team
->task_queue
);
214 team
->task_count
= 0;
215 team
->task_queued_count
= 0;
216 team
->task_running_count
= 0;
217 team
->work_share_cancelled
= 0;
218 team
->team_cancelled
= 0;
220 team
->task_detach_count
= 0;
226 /* Free a team data structure. */
229 free_team (struct gomp_team
*team
)
231 #ifndef HAVE_SYNC_BUILTINS
232 gomp_mutex_destroy (&team
->work_share_list_free_lock
);
234 gomp_barrier_destroy (&team
->barrier
);
235 gomp_mutex_destroy (&team
->task_lock
);
236 priority_queue_free (&team
->task_queue
);
241 gomp_free_pool_helper (void *thread_pool
)
243 struct gomp_thread
*thr
= gomp_thread ();
244 struct gomp_thread_pool
*pool
245 = (struct gomp_thread_pool
*) thread_pool
;
246 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
247 gomp_sem_destroy (&thr
->release
);
248 thr
->thread_pool
= NULL
;
250 #ifdef LIBGOMP_USE_PTHREADS
251 pthread_detach (pthread_self ());
253 #elif defined(__nvptx__)
255 #elif defined(__AMDGCN__)
258 #error gomp_free_pool_helper must terminate the thread
262 /* Free a thread pool and release its threads. */
265 gomp_free_thread (void *arg
__attribute__((unused
)))
267 struct gomp_thread
*thr
= gomp_thread ();
268 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
271 if (pool
->threads_used
> 0)
274 for (i
= 1; i
< pool
->threads_used
; i
++)
276 struct gomp_thread
*nthr
= pool
->threads
[i
];
277 nthr
->fn
= gomp_free_pool_helper
;
280 /* This barrier undocks threads docked on pool->threads_dock. */
281 gomp_simple_barrier_wait (&pool
->threads_dock
);
282 /* And this waits till all threads have called gomp_barrier_wait_last
283 in gomp_free_pool_helper. */
284 gomp_simple_barrier_wait (&pool
->threads_dock
);
285 /* Now it is safe to destroy the barrier and free the pool. */
286 gomp_simple_barrier_destroy (&pool
->threads_dock
);
288 #ifdef HAVE_SYNC_BUILTINS
289 __sync_fetch_and_add (&gomp_managed_threads
,
290 1L - pool
->threads_used
);
292 gomp_mutex_lock (&gomp_managed_threads_lock
);
293 gomp_managed_threads
-= pool
->threads_used
- 1L;
294 gomp_mutex_unlock (&gomp_managed_threads_lock
);
298 free_team (pool
->last_team
);
300 team_free (pool
->threads
);
303 thr
->thread_pool
= NULL
;
305 if (thr
->ts
.level
== 0 && __builtin_expect (thr
->ts
.team
!= NULL
, 0))
307 if (thr
->task
!= NULL
)
309 struct gomp_task
*task
= thr
->task
;
317 #ifdef LIBGOMP_USE_PTHREADS
319 gomp_team_start (void (*fn
) (void *), void *data
, unsigned nthreads
,
320 unsigned flags
, struct gomp_team
*team
,
321 struct gomp_taskgroup
*taskgroup
)
323 struct gomp_thread_start_data
*start_data
= NULL
;
324 struct gomp_thread
*thr
, *nthr
;
325 struct gomp_task
*task
;
326 struct gomp_task_icv
*icv
;
328 struct gomp_thread_pool
*pool
;
329 unsigned i
, n
, old_threads_used
= 0;
330 pthread_attr_t thread_attr
, *attr
;
331 unsigned long nthreads_var
;
333 unsigned int s
= 0, rest
= 0, p
= 0, k
= 0;
334 unsigned int affinity_count
= 0;
335 struct gomp_thread
**affinity_thr
= NULL
;
336 bool force_display
= false;
338 thr
= gomp_thread ();
339 nested
= thr
->ts
.level
;
340 pool
= thr
->thread_pool
;
342 icv
= task
? &task
->icv
: &gomp_global_icv
;
343 if (__builtin_expect (gomp_places_list
!= NULL
, 0) && thr
->place
== 0)
345 gomp_init_affinity ();
346 if (__builtin_expect (gomp_display_affinity_var
, 0) && nthreads
== 1)
347 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
351 /* Always save the previous state, even if this isn't a nested team.
352 In particular, we should save any work share state from an outer
353 orphaned work share construct. */
354 team
->prev_ts
= thr
->ts
;
360 ++thr
->ts
.active_level
;
361 thr
->ts
.work_share
= &team
->work_shares
[0];
362 thr
->ts
.last_work_share
= NULL
;
363 #ifdef HAVE_SYNC_BUILTINS
364 thr
->ts
.single_count
= 0;
366 thr
->ts
.static_trip
= 0;
367 thr
->task
= &team
->implicit_task
[0];
368 #ifdef GOMP_NEEDS_THREAD_HANDLE
369 thr
->handle
= pthread_self ();
371 nthreads_var
= icv
->nthreads_var
;
372 if (__builtin_expect (gomp_nthreads_var_list
!= NULL
, 0)
373 && thr
->ts
.level
< gomp_nthreads_var_list_len
)
374 nthreads_var
= gomp_nthreads_var_list
[thr
->ts
.level
];
375 bind_var
= icv
->bind_var
;
376 if (bind_var
!= omp_proc_bind_false
&& (flags
& 7) != omp_proc_bind_false
)
377 bind_var
= flags
& 7;
379 if (__builtin_expect (gomp_bind_var_list
!= NULL
, 0)
380 && thr
->ts
.level
< gomp_bind_var_list_len
)
381 bind_var
= gomp_bind_var_list
[thr
->ts
.level
];
382 gomp_init_task (thr
->task
, task
, icv
);
383 thr
->task
->taskgroup
= taskgroup
;
384 team
->implicit_task
[0].icv
.nthreads_var
= nthreads_var
;
385 team
->implicit_task
[0].icv
.bind_var
= bind_var
;
392 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
394 /* Depending on chosen proc_bind model, set subpartition
395 for the master thread and initialize helper variables
396 P and optionally S, K and/or REST used by later place
397 computation for each additional thread. */
401 case omp_proc_bind_true
:
402 case omp_proc_bind_close
:
403 if (nthreads
> thr
->ts
.place_partition_len
)
405 /* T > P. S threads will be placed in each place,
406 and the final REM threads placed one by one
407 into the already occupied places. */
408 s
= nthreads
/ thr
->ts
.place_partition_len
;
409 rest
= nthreads
% thr
->ts
.place_partition_len
;
415 case omp_proc_bind_master
:
416 /* Each thread will be bound to master's place. */
418 case omp_proc_bind_spread
:
419 if (nthreads
<= thr
->ts
.place_partition_len
)
421 /* T <= P. Each subpartition will have in between s
422 and s+1 places (subpartitions starting at or
423 after rest will have s places, earlier s+1 places),
424 each thread will be bound to the first place in
425 its subpartition (except for the master thread
426 that can be bound to another place in its
428 s
= thr
->ts
.place_partition_len
/ nthreads
;
429 rest
= thr
->ts
.place_partition_len
% nthreads
;
430 rest
= (s
+ 1) * rest
+ thr
->ts
.place_partition_off
;
433 p
-= (p
- thr
->ts
.place_partition_off
) % (s
+ 1);
434 thr
->ts
.place_partition_len
= s
+ 1;
439 thr
->ts
.place_partition_len
= s
;
441 thr
->ts
.place_partition_off
= p
;
445 /* T > P. Each subpartition will have just a single
446 place and we'll place between s and s+1
447 threads into each subpartition. */
448 s
= nthreads
/ thr
->ts
.place_partition_len
;
449 rest
= nthreads
% thr
->ts
.place_partition_len
;
450 thr
->ts
.place_partition_off
= p
;
451 thr
->ts
.place_partition_len
= 1;
458 bind
= omp_proc_bind_false
;
460 /* We only allow the reuse of idle threads for non-nested PARALLEL
461 regions. This appears to be implied by the semantics of
462 threadprivate variables, but perhaps that's reading too much into
463 things. Certainly it does prevent any locking problems, since
464 only the initial program thread will modify gomp_threads. */
467 old_threads_used
= pool
->threads_used
;
469 if (nthreads
<= old_threads_used
)
471 else if (old_threads_used
== 0)
474 gomp_simple_barrier_init (&pool
->threads_dock
, nthreads
);
478 n
= old_threads_used
;
480 /* Increase the barrier threshold to make sure all new
481 threads arrive before the team is released. */
482 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
485 /* Not true yet, but soon will be. We're going to release all
486 threads from the dock, and those that aren't part of the
488 pool
->threads_used
= nthreads
;
490 /* If necessary, expand the size of the gomp_threads array. It is
491 expected that changes in the number of threads are rare, thus we
492 make no effort to expand gomp_threads_size geometrically. */
493 if (nthreads
>= pool
->threads_size
)
495 pool
->threads_size
= nthreads
+ 1;
497 = gomp_realloc (pool
->threads
,
499 * sizeof (struct gomp_thread
*));
500 /* Add current (master) thread to threads[]. */
501 pool
->threads
[0] = thr
;
504 /* Release existing idle threads. */
507 unsigned int place_partition_off
= thr
->ts
.place_partition_off
;
508 unsigned int place_partition_len
= thr
->ts
.place_partition_len
;
509 unsigned int place
= 0;
510 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
514 case omp_proc_bind_true
:
515 case omp_proc_bind_close
:
519 if (p
== (team
->prev_ts
.place_partition_off
520 + team
->prev_ts
.place_partition_len
))
521 p
= team
->prev_ts
.place_partition_off
;
523 if (i
== nthreads
- rest
)
529 case omp_proc_bind_master
:
531 case omp_proc_bind_spread
:
539 if (p
== (team
->prev_ts
.place_partition_off
540 + team
->prev_ts
.place_partition_len
))
541 p
= team
->prev_ts
.place_partition_off
;
542 place_partition_off
= p
;
544 place_partition_len
= s
+ 1;
546 place_partition_len
= s
;
554 if (p
== (team
->prev_ts
.place_partition_off
555 + team
->prev_ts
.place_partition_len
))
556 p
= team
->prev_ts
.place_partition_off
;
558 if (i
== nthreads
- rest
)
563 place_partition_off
= p
;
564 place_partition_len
= 1;
568 if (affinity_thr
!= NULL
569 || (bind
!= omp_proc_bind_true
570 && pool
->threads
[i
]->place
!= p
+ 1)
571 || pool
->threads
[i
]->place
<= place_partition_off
572 || pool
->threads
[i
]->place
> (place_partition_off
573 + place_partition_len
))
576 force_display
= true;
577 if (affinity_thr
== NULL
)
581 if (team
->prev_ts
.place_partition_len
> 64)
583 = gomp_malloc (team
->prev_ts
.place_partition_len
584 * sizeof (struct gomp_thread
*));
587 = gomp_alloca (team
->prev_ts
.place_partition_len
588 * sizeof (struct gomp_thread
*));
589 memset (affinity_thr
, '\0',
590 team
->prev_ts
.place_partition_len
591 * sizeof (struct gomp_thread
*));
592 for (j
= i
; j
< old_threads_used
; j
++)
594 if (pool
->threads
[j
]->place
595 > team
->prev_ts
.place_partition_off
596 && (pool
->threads
[j
]->place
597 <= (team
->prev_ts
.place_partition_off
598 + team
->prev_ts
.place_partition_len
)))
600 l
= pool
->threads
[j
]->place
- 1
601 - team
->prev_ts
.place_partition_off
;
602 pool
->threads
[j
]->data
= affinity_thr
[l
];
603 affinity_thr
[l
] = pool
->threads
[j
];
605 pool
->threads
[j
] = NULL
;
607 if (nthreads
> old_threads_used
)
608 memset (&pool
->threads
[old_threads_used
],
609 '\0', ((nthreads
- old_threads_used
)
610 * sizeof (struct gomp_thread
*)));
612 affinity_count
= old_threads_used
- i
;
614 if (affinity_count
== 0)
617 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
620 if (bind
!= omp_proc_bind_true
)
622 for (l
= place_partition_off
;
623 l
< place_partition_off
+ place_partition_len
;
625 if (affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
628 if (l
== place_partition_off
+ place_partition_len
)
631 nthr
= affinity_thr
[l
- team
->prev_ts
.place_partition_off
];
632 affinity_thr
[l
- team
->prev_ts
.place_partition_off
]
633 = (struct gomp_thread
*) nthr
->data
;
635 pool
->threads
[i
] = nthr
;
638 nthr
= pool
->threads
[i
];
642 nthr
= pool
->threads
[i
];
643 nthr
->ts
.team
= team
;
644 nthr
->ts
.work_share
= &team
->work_shares
[0];
645 nthr
->ts
.last_work_share
= NULL
;
646 nthr
->ts
.team_id
= i
;
647 nthr
->ts
.level
= team
->prev_ts
.level
+ 1;
648 nthr
->ts
.active_level
= thr
->ts
.active_level
;
649 nthr
->ts
.place_partition_off
= place_partition_off
;
650 nthr
->ts
.place_partition_len
= place_partition_len
;
651 nthr
->ts
.def_allocator
= thr
->ts
.def_allocator
;
652 #ifdef HAVE_SYNC_BUILTINS
653 nthr
->ts
.single_count
= 0;
655 nthr
->ts
.static_trip
= 0;
656 nthr
->num_teams
= thr
->num_teams
;
657 nthr
->team_num
= thr
->team_num
;
658 nthr
->task
= &team
->implicit_task
[i
];
660 gomp_init_task (nthr
->task
, task
, icv
);
661 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
662 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
663 nthr
->task
->taskgroup
= taskgroup
;
666 team
->ordered_release
[i
] = &nthr
->release
;
669 if (__builtin_expect (affinity_thr
!= NULL
, 0))
671 /* If AFFINITY_THR is non-NULL just because we had to
672 permute some threads in the pool, but we've managed
673 to find exactly as many old threads as we'd find
674 without affinity, we don't need to handle this
675 specially anymore. */
676 if (nthreads
<= old_threads_used
677 ? (affinity_count
== old_threads_used
- nthreads
)
678 : (i
== old_threads_used
))
680 if (team
->prev_ts
.place_partition_len
> 64)
688 /* We are going to compute the places/subpartitions
689 again from the beginning. So, we need to reinitialize
690 vars modified by the switch (bind) above inside
691 of the loop, to the state they had after the initial
695 case omp_proc_bind_true
:
696 case omp_proc_bind_close
:
697 if (nthreads
> thr
->ts
.place_partition_len
)
698 /* T > P. S has been changed, so needs
700 s
= nthreads
/ thr
->ts
.place_partition_len
;
704 case omp_proc_bind_master
:
705 /* No vars have been changed. */
707 case omp_proc_bind_spread
:
708 p
= thr
->ts
.place_partition_off
;
712 s
= nthreads
/ team
->prev_ts
.place_partition_len
;
718 /* Increase the barrier threshold to make sure all new
719 threads and all the threads we're going to let die
720 arrive before the team is released. */
722 gomp_simple_barrier_reinit (&pool
->threads_dock
,
723 nthreads
+ affinity_count
);
732 if (__builtin_expect (nthreads
+ affinity_count
> old_threads_used
, 0))
734 long diff
= (long) (nthreads
+ affinity_count
) - (long) old_threads_used
;
736 if (old_threads_used
== 0)
739 #ifdef HAVE_SYNC_BUILTINS
740 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
742 gomp_mutex_lock (&gomp_managed_threads_lock
);
743 gomp_managed_threads
+= diff
;
744 gomp_mutex_unlock (&gomp_managed_threads_lock
);
748 attr
= &gomp_thread_attr
;
749 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
752 pthread_attr_init (&thread_attr
);
753 if (! pthread_attr_getstacksize (&gomp_thread_attr
, &stacksize
))
754 pthread_attr_setstacksize (&thread_attr
, stacksize
);
759 __builtin_unreachable ();
760 start_data
= gomp_alloca (sizeof (struct gomp_thread_start_data
)
763 /* Launch new threads. */
764 for (; i
< nthreads
; ++i
)
768 start_data
->ts
.place_partition_off
= thr
->ts
.place_partition_off
;
769 start_data
->ts
.place_partition_len
= thr
->ts
.place_partition_len
;
770 start_data
->place
= 0;
771 if (__builtin_expect (gomp_places_list
!= NULL
, 0))
775 case omp_proc_bind_true
:
776 case omp_proc_bind_close
:
780 if (p
== (team
->prev_ts
.place_partition_off
781 + team
->prev_ts
.place_partition_len
))
782 p
= team
->prev_ts
.place_partition_off
;
784 if (i
== nthreads
- rest
)
790 case omp_proc_bind_master
:
792 case omp_proc_bind_spread
:
800 if (p
== (team
->prev_ts
.place_partition_off
801 + team
->prev_ts
.place_partition_len
))
802 p
= team
->prev_ts
.place_partition_off
;
803 start_data
->ts
.place_partition_off
= p
;
805 start_data
->ts
.place_partition_len
= s
+ 1;
807 start_data
->ts
.place_partition_len
= s
;
815 if (p
== (team
->prev_ts
.place_partition_off
816 + team
->prev_ts
.place_partition_len
))
817 p
= team
->prev_ts
.place_partition_off
;
819 if (i
== nthreads
- rest
)
824 start_data
->ts
.place_partition_off
= p
;
825 start_data
->ts
.place_partition_len
= 1;
829 start_data
->place
= p
+ 1;
830 if (affinity_thr
!= NULL
&& pool
->threads
[i
] != NULL
)
832 gomp_init_thread_affinity (attr
, p
);
836 start_data
->fn_data
= data
;
837 start_data
->ts
.team
= team
;
838 start_data
->ts
.work_share
= &team
->work_shares
[0];
839 start_data
->ts
.last_work_share
= NULL
;
840 start_data
->ts
.team_id
= i
;
841 start_data
->ts
.level
= team
->prev_ts
.level
+ 1;
842 start_data
->ts
.active_level
= thr
->ts
.active_level
;
843 start_data
->ts
.def_allocator
= thr
->ts
.def_allocator
;
844 #ifdef HAVE_SYNC_BUILTINS
845 start_data
->ts
.single_count
= 0;
847 start_data
->ts
.static_trip
= 0;
848 start_data
->num_teams
= thr
->num_teams
;
849 start_data
->team_num
= thr
->team_num
;
850 start_data
->task
= &team
->implicit_task
[i
];
851 gomp_init_task (start_data
->task
, task
, icv
);
852 team
->implicit_task
[i
].icv
.nthreads_var
= nthreads_var
;
853 team
->implicit_task
[i
].icv
.bind_var
= bind_var
;
854 start_data
->task
->taskgroup
= taskgroup
;
855 start_data
->thread_pool
= pool
;
856 start_data
->nested
= nested
;
858 attr
= gomp_adjust_thread_attr (attr
, &thread_attr
);
859 err
= pthread_create (&start_data
->handle
, attr
, gomp_thread_start
,
863 gomp_fatal ("Thread creation failed: %s", strerror (err
));
866 if (__builtin_expect (attr
== &thread_attr
, 0))
867 pthread_attr_destroy (&thread_attr
);
871 gomp_barrier_wait (&team
->barrier
);
873 gomp_simple_barrier_wait (&pool
->threads_dock
);
875 /* Decrease the barrier threshold to match the number of threads
876 that should arrive back at the end of this team. The extra
877 threads should be exiting. Note that we arrange for this test
878 to never be true for nested teams. If AFFINITY_COUNT is non-zero,
879 the barrier as well as gomp_managed_threads was temporarily
880 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT,
881 AFFINITY_COUNT if non-zero will be always at least
882 OLD_THREADS_COUNT - NTHREADS. */
883 if (__builtin_expect (nthreads
< old_threads_used
, 0)
884 || __builtin_expect (affinity_count
, 0))
886 long diff
= (long) nthreads
- (long) old_threads_used
;
889 diff
= -affinity_count
;
891 gomp_simple_barrier_reinit (&pool
->threads_dock
, nthreads
);
893 #ifdef HAVE_SYNC_BUILTINS
894 __sync_fetch_and_add (&gomp_managed_threads
, diff
);
896 gomp_mutex_lock (&gomp_managed_threads_lock
);
897 gomp_managed_threads
+= diff
;
898 gomp_mutex_unlock (&gomp_managed_threads_lock
);
901 if (__builtin_expect (gomp_display_affinity_var
, 0))
904 || nthreads
!= old_threads_used
907 gomp_display_affinity_thread (gomp_thread_self (), &thr
->ts
,
911 start_data
-= nthreads
- 1;
912 for (i
= 1; i
< nthreads
; ++i
)
914 gomp_display_affinity_thread (
915 #ifdef LIBGOMP_USE_PTHREADS
927 for (i
= 1; i
< nthreads
; ++i
)
929 gomp_thread_handle handle
930 = gomp_thread_to_pthread_t (pool
->threads
[i
]);
931 gomp_display_affinity_thread (handle
, &pool
->threads
[i
]->ts
,
932 pool
->threads
[i
]->place
);
937 if (__builtin_expect (affinity_thr
!= NULL
, 0)
938 && team
->prev_ts
.place_partition_len
> 64)
944 /* Terminate the current team. This is only to be called by the master
945 thread. We assume that we must wait for the other threads. */
950 struct gomp_thread
*thr
= gomp_thread ();
951 struct gomp_team
*team
= thr
->ts
.team
;
953 /* This barrier handles all pending explicit threads.
954 As #pragma omp cancel parallel might get awaited count in
955 team->barrier in a inconsistent state, we need to use a different
957 gomp_team_barrier_wait_final (&team
->barrier
);
958 if (__builtin_expect (team
->team_cancelled
, 0))
960 struct gomp_work_share
*ws
= team
->work_shares_to_free
;
963 struct gomp_work_share
*next_ws
= gomp_ptrlock_get (&ws
->next_ws
);
965 gomp_ptrlock_set (&ws
->next_ws
, ws
);
966 gomp_fini_work_share (ws
);
972 gomp_fini_work_share (thr
->ts
.work_share
);
975 thr
->ts
= team
->prev_ts
;
977 if (__builtin_expect (thr
->ts
.level
!= 0, 0))
979 #ifdef HAVE_SYNC_BUILTINS
980 __sync_fetch_and_add (&gomp_managed_threads
, 1L - team
->nthreads
);
982 gomp_mutex_lock (&gomp_managed_threads_lock
);
983 gomp_managed_threads
-= team
->nthreads
- 1L;
984 gomp_mutex_unlock (&gomp_managed_threads_lock
);
986 /* This barrier has gomp_barrier_wait_last counterparts
987 and ensures the team can be safely destroyed. */
988 gomp_barrier_wait (&team
->barrier
);
991 if (__builtin_expect (team
->work_shares
[0].next_alloc
!= NULL
, 0))
993 struct gomp_work_share
*ws
= team
->work_shares
[0].next_alloc
;
996 struct gomp_work_share
*next_ws
= ws
->next_alloc
;
1002 gomp_sem_destroy (&team
->master_release
);
1004 if (__builtin_expect (thr
->ts
.team
!= NULL
, 0)
1005 || __builtin_expect (team
->nthreads
== 1, 0))
1009 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1010 if (pool
->last_team
)
1011 free_team (pool
->last_team
);
1012 pool
->last_team
= team
;
1013 gomp_release_thread_pool (pool
);
1017 #ifdef LIBGOMP_USE_PTHREADS
1019 /* Constructors for this file. */
1021 static void __attribute__((constructor
))
1022 initialize_team (void)
1024 #if !defined HAVE_TLS && !defined USE_EMUTLS
1025 static struct gomp_thread initial_thread_tls_data
;
1027 pthread_key_create (&gomp_tls_key
, NULL
);
1028 pthread_setspecific (gomp_tls_key
, &initial_thread_tls_data
);
1031 if (pthread_key_create (&gomp_thread_destructor
, gomp_free_thread
) != 0)
1032 gomp_fatal ("could not create thread pool destructor.");
1035 static void __attribute__((destructor
))
1036 team_destructor (void)
1038 /* Without this dlclose on libgomp could lead to subsequent
1040 pthread_key_delete (gomp_thread_destructor
);
1043 /* Similar to gomp_free_pool_helper, but don't detach itself,
1044 gomp_pause_host will pthread_join those threads. */
1047 gomp_pause_pool_helper (void *thread_pool
)
1049 struct gomp_thread
*thr
= gomp_thread ();
1050 struct gomp_thread_pool
*pool
1051 = (struct gomp_thread_pool
*) thread_pool
;
1052 gomp_simple_barrier_wait_last (&pool
->threads_dock
);
1053 gomp_sem_destroy (&thr
->release
);
1054 thr
->thread_pool
= NULL
;
1056 pthread_exit (NULL
);
1059 /* Free a thread pool and release its threads. Return non-zero on
1063 gomp_pause_host (void)
1065 struct gomp_thread
*thr
= gomp_thread ();
1066 struct gomp_thread_pool
*pool
= thr
->thread_pool
;
1071 if (pool
->threads_used
> 0)
1075 = gomp_alloca (sizeof (pthread_t
) * pool
->threads_used
);
1076 for (i
= 1; i
< pool
->threads_used
; i
++)
1078 struct gomp_thread
*nthr
= pool
->threads
[i
];
1079 nthr
->fn
= gomp_pause_pool_helper
;
1081 thrs
[i
] = gomp_thread_to_pthread_t (nthr
);
1083 /* This barrier undocks threads docked on pool->threads_dock. */
1084 gomp_simple_barrier_wait (&pool
->threads_dock
);
1085 /* And this waits till all threads have called gomp_barrier_wait_last
1086 in gomp_pause_pool_helper. */
1087 gomp_simple_barrier_wait (&pool
->threads_dock
);
1088 /* Now it is safe to destroy the barrier and free the pool. */
1089 gomp_simple_barrier_destroy (&pool
->threads_dock
);
1091 #ifdef HAVE_SYNC_BUILTINS
1092 __sync_fetch_and_add (&gomp_managed_threads
,
1093 1L - pool
->threads_used
);
1095 gomp_mutex_lock (&gomp_managed_threads_lock
);
1096 gomp_managed_threads
-= pool
->threads_used
- 1L;
1097 gomp_mutex_unlock (&gomp_managed_threads_lock
);
1099 for (i
= 1; i
< pool
->threads_used
; i
++)
1100 pthread_join (thrs
[i
], NULL
);
1102 if (pool
->last_team
)
1103 free_team (pool
->last_team
);
1105 team_free (pool
->threads
);
1108 thr
->thread_pool
= NULL
;
1114 struct gomp_task_icv
*
1117 struct gomp_thread
*thr
= gomp_thread ();
1118 struct gomp_task
*task
= gomp_malloc (sizeof (struct gomp_task
));
1119 gomp_init_task (task
, NULL
, &gomp_global_icv
);
1121 #ifdef LIBGOMP_USE_PTHREADS
1122 pthread_setspecific (gomp_thread_destructor
, thr
);