14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
119int __kmp_get_global_thread_id() {
121 kmp_info_t **other_threads;
129 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
130 __kmp_nth, __kmp_all_nth));
137 if (!TCR_4(__kmp_init_gtid))
141 if (TCR_4(__kmp_gtid_mode) >= 3) {
142 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
146 if (TCR_4(__kmp_gtid_mode) >= 2) {
147 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
148 return __kmp_gtid_get_specific();
150 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
152 stack_addr = (
char *)&stack_data;
153 other_threads = __kmp_threads;
166 for (i = 0; i < __kmp_threads_capacity; i++) {
168 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
180 if (stack_diff <= stack_size) {
187 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
188 __kmp_gtid_get_specific() == i);
196 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
198 i = __kmp_gtid_get_specific();
209 if (!TCR_SYNC_PTR(other_threads[i]))
214 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
215 KMP_FATAL(StackOverflow, i);
218 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
222 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
225 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
230 if (__kmp_storage_map) {
231 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
233 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
234 other_threads[i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)", i);
240int __kmp_get_global_thread_id_reg() {
243 if (!__kmp_init_serial) {
247 if (TCR_4(__kmp_gtid_mode) >= 3) {
248 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
252 if (TCR_4(__kmp_gtid_mode) >= 2) {
253 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
254 gtid = __kmp_gtid_get_specific();
257 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
258 gtid = __kmp_get_global_thread_id();
262 if (gtid == KMP_GTID_DNE) {
264 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
266 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
267 if (!__kmp_init_serial) {
268 __kmp_do_serial_initialize();
269 gtid = __kmp_gtid_get_specific();
271 gtid = __kmp_register_root(FALSE);
273 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
277 KMP_DEBUG_ASSERT(gtid >= 0);
283void __kmp_check_stack_overlap(kmp_info_t *th) {
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
289 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
290 if (__kmp_storage_map) {
291 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
294 gtid = __kmp_gtid_from_thread(th);
296 if (gtid == KMP_GTID_MONITOR) {
297 __kmp_print_storage_map_gtid(
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)",
"mon",
300 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
302 __kmp_print_storage_map_gtid(
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
311 gtid = __kmp_gtid_from_thread(th);
312 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
314 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
320 for (f = 0; f < __kmp_threads_capacity; f++) {
321 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
332 if (__kmp_storage_map)
333 __kmp_print_storage_map_gtid(
334 -1, other_stack_beg, other_stack_end,
335 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
336 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
338 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
344 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
349void __kmp_infinite_loop(
void) {
350 static int done = FALSE;
357#define MAX_MESSAGE 512
359void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
360 char const *format, ...) {
361 char buffer[MAX_MESSAGE];
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (
unsigned long)size, format);
367 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
368 __kmp_vprintf(kmp_err, buffer, ap);
369#if KMP_PRINT_DATA_PLACEMENT
372 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
373 if (__kmp_storage_map_verbose) {
374 node = __kmp_get_host_node(p1);
376 __kmp_storage_map_verbose = FALSE;
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
382 const int page_size = KMP_GET_PAGE_SIZE();
384 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
387 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
390 __kmp_printf_no_lock(
" GTID %d\n", gtid);
399 (
char *)p1 += page_size;
400 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
401 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
405 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
406 (
char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
409 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
410 (
char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
417 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
420 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
425void __kmp_warn(
char const *format, ...) {
426 char buffer[MAX_MESSAGE];
429 if (__kmp_generate_warnings == kmp_warnings_off) {
433 va_start(ap, format);
435 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
436 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
437 __kmp_vprintf(kmp_err, buffer, ap);
438 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
443void __kmp_abort_process() {
445 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
447 if (__kmp_debug_buf) {
448 __kmp_dump_debug_buffer();
454 __kmp_global.g.g_abort = SIGABRT;
468 __kmp_unregister_library();
472 __kmp_infinite_loop();
473 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
477void __kmp_abort_thread(
void) {
480 __kmp_infinite_loop();
486static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
487 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
491 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
493 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
494 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
496 __kmp_print_storage_map_gtid(
497 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
498 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
500 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
501 &thr->th.th_bar[bs_plain_barrier + 1],
502 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
506 &thr->th.th_bar[bs_forkjoin_barrier + 1],
507 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
510#if KMP_FAST_REDUCTION_BARRIER
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
512 &thr->th.th_bar[bs_reduction_barrier + 1],
513 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
521static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
522 int team_id,
int num_thr) {
523 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
524 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
527 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
528 &team->t.t_bar[bs_last_barrier],
529 sizeof(kmp_balign_team_t) * bs_last_barrier,
530 "%s_%d.t_bar", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
533 &team->t.t_bar[bs_plain_barrier + 1],
534 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
537 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
538 &team->t.t_bar[bs_forkjoin_barrier + 1],
539 sizeof(kmp_balign_team_t),
540 "%s_%d.t_bar[forkjoin]", header, team_id);
542#if KMP_FAST_REDUCTION_BARRIER
543 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
544 &team->t.t_bar[bs_reduction_barrier + 1],
545 sizeof(kmp_balign_team_t),
546 "%s_%d.t_bar[reduction]", header, team_id);
549 __kmp_print_storage_map_gtid(
550 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
551 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
553 __kmp_print_storage_map_gtid(
554 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
555 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
557 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
558 &team->t.t_disp_buffer[num_disp_buff],
559 sizeof(dispatch_shared_info_t) * num_disp_buff,
560 "%s_%d.t_disp_buffer", header, team_id);
563static void __kmp_init_allocator() {
564 __kmp_init_memkind();
565 __kmp_init_target_mem();
567static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
592 case DLL_PROCESS_DETACH:
593 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
606 if (lpReserved == NULL)
607 __kmp_internal_end_library(__kmp_gtid_get_specific());
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
618 case DLL_THREAD_DETACH:
619 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
621 __kmp_internal_end_thread(__kmp_gtid_get_specific());
632void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
635 kmp_team_t *team = __kmp_team_from_gtid(gtid);
638 if (__kmp_env_consistency_check) {
639 if (__kmp_threads[gtid]->th.th_root->r.r_active)
640#if KMP_USE_DYNAMIC_LOCK
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
643 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
649 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
657void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
660 int tid = __kmp_tid_from_gtid(gtid);
661 kmp_team_t *team = __kmp_team_from_gtid(gtid);
664 if (__kmp_env_consistency_check) {
665 if (__kmp_threads[gtid]->th.th_root->r.r_active)
666 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->t.t_serialized) {
674 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
684int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
689 if (!TCR_4(__kmp_init_parallel))
690 __kmp_parallel_initialize();
691 __kmp_resume_if_soft_paused();
693 th = __kmp_threads[gtid];
694 team = th->th.th_team;
697 th->th.th_ident = id_ref;
699 if (team->t.t_serialized) {
702 kmp_int32 old_this = th->th.th_local.this_construct;
704 ++th->th.th_local.this_construct;
708 if (team->t.t_construct == old_this) {
709 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
710 th->th.th_local.this_construct);
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
714 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
715 team->t.t_active_level == 1) {
717 __kmp_itt_metadata_single(id_ref);
722 if (__kmp_env_consistency_check) {
723 if (status && push_ws) {
724 __kmp_push_workshare(gtid, ct_psingle, id_ref);
726 __kmp_check_workshare(gtid, ct_psingle, id_ref);
731 __kmp_itt_single_start(gtid);
737void __kmp_exit_single(
int gtid) {
739 __kmp_itt_single_end(gtid);
741 if (__kmp_env_consistency_check)
742 __kmp_pop_workshare(gtid, ct_psingle, NULL);
751static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
752 int master_tid,
int set_nthreads,
756 KMP_DEBUG_ASSERT(__kmp_init_serial);
757 KMP_DEBUG_ASSERT(root && parent_team);
758 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
762 new_nthreads = set_nthreads;
763 if (!get__dynamic_2(parent_team, master_tid)) {
766#ifdef USE_LOAD_BALANCE
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
783 new_nthreads = __kmp_avail_proc - __kmp_nth +
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
796 new_nthreads = set_nthreads;
798 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
799 if (set_nthreads > 2) {
800 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
819 if (__kmp_nth + new_nthreads -
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
822 int tl_nthreads = __kmp_max_nth - __kmp_nth +
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
829 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
830 __kmp_reserve_warn = 1;
831 __kmp_msg(kmp_ms_warning,
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
860 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
861 __kmp_reserve_warn = 1;
862 __kmp_msg(kmp_ms_warning,
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
864 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
872 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
881 capacity = __kmp_threads_capacity;
882 if (TCR_PTR(__kmp_threads[0]) == NULL) {
888 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
889 capacity -= __kmp_hidden_helper_threads_num;
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
895 int slotsRequired = __kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
898 int slotsAdded = __kmp_expand_threads(slotsRequired);
899 if (slotsAdded < slotsRequired) {
901 new_nthreads -= (slotsRequired - slotsAdded);
902 KMP_ASSERT(new_nthreads >= 1);
905 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
906 __kmp_reserve_warn = 1;
907 if (__kmp_tp_cached) {
908 __kmp_msg(kmp_ms_warning,
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
910 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
911 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
913 __kmp_msg(kmp_ms_warning,
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
915 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
922 if (new_nthreads == 1) {
924 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
926 __kmp_get_gtid(), set_nthreads));
928 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
930 __kmp_get_gtid(), new_nthreads, set_nthreads));
939static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
940 kmp_info_t *master_th,
int master_gtid,
941 int fork_teams_workers) {
945 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
946 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized = FALSE;
955 master_th->th.th_dispatch = &team->t.t_dispatch[0];
958#if KMP_NESTED_HOT_TEAMS
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
963 int level = team->t.t_active_level - 1;
964 if (master_th->th.th_teams_microtask) {
965 if (master_th->th.th_teams_size.nteams > 1) {
969 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
970 master_th->th.th_teams_level == team->t.t_level) {
975 if (level < __kmp_hot_teams_max_level) {
976 if (hot_teams[level].hot_team) {
978 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
982 hot_teams[level].hot_team = team;
983 hot_teams[level].hot_team_nth = team->t.t_nproc;
990 use_hot_team = team == root->r.r_hot_team;
995 team->t.t_threads[0] = master_th;
996 __kmp_initialize_info(master_th, team, 0, master_gtid);
999 for (i = 1; i < team->t.t_nproc; i++) {
1002 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1003 team->t.t_threads[i] = thr;
1004 KMP_DEBUG_ASSERT(thr);
1005 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1007 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1009 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1010 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1011 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1012 team->t.t_bar[bs_plain_barrier].b_arrived));
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1018 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1019 for (b = 0; b < bs_last_barrier; ++b) {
1020 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1021 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1023 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1029#if KMP_AFFINITY_SUPPORTED
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1038 if (team->t.t_nproc > 1 &&
1039 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1040 team->t.b->update_num_threads(team->t.t_nproc);
1041 __kmp_add_threads_to_team(team, team->t.t_nproc);
1045 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1046 for (i = 0; i < team->t.t_nproc; i++) {
1047 kmp_info_t *thr = team->t.t_threads[i];
1048 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1049 thr->th.th_prev_level != team->t.t_level) {
1050 team->t.t_display_affinity = 1;
1059#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1063inline static void propagateFPControl(kmp_team_t *team) {
1064 if (__kmp_inherit_fp_control) {
1065 kmp_int16 x87_fpu_control_word;
1069 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1070 __kmp_store_mxcsr(&mxcsr);
1071 mxcsr &= KMP_X86_MXCSR_MASK;
1082 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1083 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1086 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1090 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1096inline static void updateHWFPControl(kmp_team_t *team) {
1097 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1100 kmp_int16 x87_fpu_control_word;
1102 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1103 __kmp_store_mxcsr(&mxcsr);
1104 mxcsr &= KMP_X86_MXCSR_MASK;
1106 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1107 __kmp_clear_x87_fpu_status_word();
1108 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1111 if (team->t.t_mxcsr != mxcsr) {
1112 __kmp_load_mxcsr(&team->t.t_mxcsr);
1117#define propagateFPControl(x) ((void)0)
1118#define updateHWFPControl(x) ((void)0)
1121static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1126void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1127 kmp_info_t *this_thr;
1128 kmp_team_t *serial_team;
1130 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1137 if (!TCR_4(__kmp_init_parallel))
1138 __kmp_parallel_initialize();
1139 __kmp_resume_if_soft_paused();
1141 this_thr = __kmp_threads[global_tid];
1142 serial_team = this_thr->th.th_serial_team;
1145 KMP_DEBUG_ASSERT(serial_team);
1148 if (__kmp_tasking_mode != tskm_immediate_exec) {
1150 this_thr->th.th_task_team ==
1151 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1152 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1154 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1155 "team %p, new task_team = NULL\n",
1156 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1157 this_thr->th.th_task_team = NULL;
1160 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1161 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1162 proc_bind = proc_bind_false;
1163 }
else if (proc_bind == proc_bind_default) {
1166 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1169 this_thr->th.th_set_proc_bind = proc_bind_default;
1172 this_thr->th.th_set_nproc = 0;
1175 ompt_data_t ompt_parallel_data = ompt_data_none;
1176 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1177 if (ompt_enabled.enabled &&
1178 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1180 ompt_task_info_t *parent_task_info;
1181 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1183 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1184 if (ompt_enabled.ompt_callback_parallel_begin) {
1187 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1188 &(parent_task_info->task_data), &(parent_task_info->frame),
1189 &ompt_parallel_data, team_size,
1190 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1195 if (this_thr->th.th_team != serial_team) {
1197 int level = this_thr->th.th_team->t.t_level;
1199 if (serial_team->t.t_serialized) {
1202 kmp_team_t *new_team;
1204 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1207 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1211 proc_bind, &this_thr->th.th_current_task->td_icvs,
1212 0 USE_NESTED_HOT_ARG(NULL));
1213 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1214 KMP_ASSERT(new_team);
1217 new_team->t.t_threads[0] = this_thr;
1218 new_team->t.t_parent = this_thr->th.th_team;
1219 serial_team = new_team;
1220 this_thr->th.th_serial_team = serial_team;
1224 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1225 global_tid, serial_team));
1233 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1234 global_tid, serial_team));
1238 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1239 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1240 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1241 serial_team->t.t_ident = loc;
1242 serial_team->t.t_serialized = 1;
1243 serial_team->t.t_nproc = 1;
1244 serial_team->t.t_parent = this_thr->th.th_team;
1245 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1246 this_thr->th.th_team = serial_team;
1247 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1249 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1250 this_thr->th.th_current_task));
1251 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1252 this_thr->th.th_current_task->td_flags.executing = 0;
1254 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1259 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1260 &this_thr->th.th_current_task->td_parent->td_icvs);
1264 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1265 this_thr->th.th_current_task->td_icvs.nproc =
1266 __kmp_nested_nth.nth[level + 1];
1269 if (__kmp_nested_proc_bind.used &&
1270 (level + 1 < __kmp_nested_proc_bind.used)) {
1271 this_thr->th.th_current_task->td_icvs.proc_bind =
1272 __kmp_nested_proc_bind.bind_types[level + 1];
1276 serial_team->t.t_pkfn = (microtask_t)(~0);
1278 this_thr->th.th_info.ds.ds_tid = 0;
1281 this_thr->th.th_team_nproc = 1;
1282 this_thr->th.th_team_master = this_thr;
1283 this_thr->th.th_team_serialized = 1;
1285 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1286 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1287 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1289 propagateFPControl(serial_team);
1292 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1293 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1294 serial_team->t.t_dispatch->th_disp_buffer =
1295 (dispatch_private_info_t *)__kmp_allocate(
1296 sizeof(dispatch_private_info_t));
1298 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1306 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1307 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1308 ++serial_team->t.t_serialized;
1309 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1312 int level = this_thr->th.th_team->t.t_level;
1315 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1316 this_thr->th.th_current_task->td_icvs.nproc =
1317 __kmp_nested_nth.nth[level + 1];
1319 serial_team->t.t_level++;
1320 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1321 "of serial team %p to %d\n",
1322 global_tid, serial_team, serial_team->t.t_level));
1325 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1327 dispatch_private_info_t *disp_buffer =
1328 (dispatch_private_info_t *)__kmp_allocate(
1329 sizeof(dispatch_private_info_t));
1330 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1331 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1337 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1341 if (__kmp_display_affinity) {
1342 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1343 this_thr->th.th_prev_num_threads != 1) {
1345 __kmp_aux_display_affinity(global_tid, NULL);
1346 this_thr->th.th_prev_level = serial_team->t.t_level;
1347 this_thr->th.th_prev_num_threads = 1;
1351 if (__kmp_env_consistency_check)
1352 __kmp_push_parallel(global_tid, NULL);
1354 serial_team->t.ompt_team_info.master_return_address = codeptr;
1355 if (ompt_enabled.enabled &&
1356 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1357 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1358 OMPT_GET_FRAME_ADDRESS(0);
1360 ompt_lw_taskteam_t lw_taskteam;
1361 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1362 &ompt_parallel_data, codeptr);
1364 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1368 if (ompt_enabled.ompt_callback_implicit_task) {
1369 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1370 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1371 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1372 ompt_task_implicit);
1373 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1374 __kmp_tid_from_gtid(global_tid);
1378 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1379 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1380 OMPT_GET_FRAME_ADDRESS(0);
1386static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1387 microtask_t microtask,
int level,
1388 int teams_level, kmp_va_list ap) {
1389 return (master_th->th.th_teams_microtask && ap &&
1390 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1395static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1396 int teams_level, kmp_va_list ap) {
1397 return ((ap == NULL && active_level == 0) ||
1398 (ap && teams_level > 0 && teams_level == level));
1405__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1406 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1407 enum fork_context_e call_context, microtask_t microtask,
1408 launch_t invoker,
int master_set_numthreads,
int level,
1410 ompt_data_t ompt_parallel_data,
void *return_address,
1416 parent_team->t.t_ident = loc;
1417 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1418 parent_team->t.t_argc = argc;
1419 argv = (
void **)parent_team->t.t_argv;
1420 for (i = argc - 1; i >= 0; --i) {
1421 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1424 if (parent_team == master_th->th.th_serial_team) {
1427 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1429 if (call_context == fork_context_gnu) {
1432 parent_team->t.t_serialized--;
1437 parent_team->t.t_pkfn = microtask;
1442 void **exit_frame_p;
1443 ompt_data_t *implicit_task_data;
1444 ompt_lw_taskteam_t lw_taskteam;
1446 if (ompt_enabled.enabled) {
1447 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1448 &ompt_parallel_data, return_address);
1449 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1451 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1455 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1456 if (ompt_enabled.ompt_callback_implicit_task) {
1457 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1458 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1459 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1460 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1464 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1466 exit_frame_p = &dummy;
1472 parent_team->t.t_serialized--;
1475 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1476 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1477 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1486 if (ompt_enabled.enabled) {
1487 *exit_frame_p = NULL;
1488 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1489 if (ompt_enabled.ompt_callback_implicit_task) {
1490 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1491 ompt_scope_end, NULL, implicit_task_data, 1,
1492 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1494 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1495 __ompt_lw_taskteam_unlink(master_th);
1496 if (ompt_enabled.ompt_callback_parallel_end) {
1497 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1498 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1499 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1501 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1507 parent_team->t.t_pkfn = microtask;
1508 parent_team->t.t_invoke = invoker;
1509 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1510 parent_team->t.t_active_level++;
1511 parent_team->t.t_level++;
1512 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1519 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1522 if (ompt_enabled.enabled) {
1523 ompt_lw_taskteam_t lw_taskteam;
1524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1526 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1531 if (master_set_numthreads) {
1532 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1534 kmp_info_t **other_threads = parent_team->t.t_threads;
1537 int old_proc = master_th->th.th_teams_size.nth;
1538 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1539 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1540 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1542 parent_team->t.t_nproc = master_set_numthreads;
1543 for (i = 0; i < master_set_numthreads; ++i) {
1544 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1548 master_th->th.th_set_nproc = 0;
1552 if (__kmp_debugging) {
1553 int nth = __kmp_omp_num_threads(loc);
1555 master_set_numthreads = nth;
1561 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1563 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1564 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1565 proc_bind = proc_bind_false;
1568 if (proc_bind == proc_bind_default) {
1569 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1575 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1576 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1577 master_th->th.th_current_task->td_icvs.proc_bind)) {
1578 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1581 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1583 if (proc_bind_icv != proc_bind_default &&
1584 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1585 kmp_info_t **other_threads = parent_team->t.t_threads;
1586 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1587 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1591 master_th->th.th_set_proc_bind = proc_bind_default;
1593#if USE_ITT_BUILD && USE_ITT_NOTIFY
1594 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1596 __kmp_forkjoin_frames_mode == 3 &&
1597 parent_team->t.t_active_level == 1
1598 && master_th->th.th_teams_size.nteams == 1) {
1599 kmp_uint64 tmp_time = __itt_get_timestamp();
1600 master_th->th.th_frame_time = tmp_time;
1601 parent_team->t.t_region_time = tmp_time;
1603 if (__itt_stack_caller_create_ptr) {
1604 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1606 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1609#if KMP_AFFINITY_SUPPORTED
1610 __kmp_partition_places(parent_team);
1613 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1614 "master_th=%p, gtid=%d\n",
1615 root, parent_team, master_th, gtid));
1616 __kmp_internal_fork(loc, gtid, parent_team);
1617 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1618 "master_th=%p, gtid=%d\n",
1619 root, parent_team, master_th, gtid));
1621 if (call_context == fork_context_gnu)
1625 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1626 parent_team->t.t_id, parent_team->t.t_pkfn));
1628 if (!parent_team->t.t_invoke(gtid)) {
1629 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1631 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1632 parent_team->t.t_id, parent_team->t.t_pkfn));
1635 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1642__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1643 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1644 kmp_info_t *master_th, kmp_team_t *parent_team,
1646 ompt_data_t *ompt_parallel_data,
void **return_address,
1647 ompt_data_t **parent_task_data,
1655#if KMP_OS_LINUX && \
1656 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1659 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1664 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1669 master_th->th.th_serial_team->t.t_pkfn = microtask;
1672 if (call_context == fork_context_intel) {
1674 master_th->th.th_serial_team->t.t_ident = loc;
1677 master_th->th.th_serial_team->t.t_level--;
1682 void **exit_frame_p;
1683 ompt_task_info_t *task_info;
1684 ompt_lw_taskteam_t lw_taskteam;
1686 if (ompt_enabled.enabled) {
1687 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1688 ompt_parallel_data, *return_address);
1690 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1692 task_info = OMPT_CUR_TASK_INFO(master_th);
1693 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1694 if (ompt_enabled.ompt_callback_implicit_task) {
1695 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1696 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1697 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1698 &(task_info->task_data), 1,
1699 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1703 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1705 exit_frame_p = &dummy;
1710 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1711 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1712 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1721 if (ompt_enabled.enabled) {
1722 *exit_frame_p = NULL;
1723 if (ompt_enabled.ompt_callback_implicit_task) {
1724 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1725 ompt_scope_end, NULL, &(task_info->task_data), 1,
1726 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1728 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1729 __ompt_lw_taskteam_unlink(master_th);
1730 if (ompt_enabled.ompt_callback_parallel_end) {
1731 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1732 ompt_parallel_data, *parent_task_data,
1733 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1735 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1738 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1739 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1740 team = master_th->th.th_team;
1742 team->t.t_invoke = invoker;
1743 __kmp_alloc_argv_entries(argc, team, TRUE);
1744 team->t.t_argc = argc;
1745 argv = (
void **)team->t.t_argv;
1747 for (i = argc - 1; i >= 0; --i)
1748 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1750 for (i = 0; i < argc; ++i)
1752 argv[i] = parent_team->t.t_argv[i];
1760 if (ompt_enabled.enabled) {
1761 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1762 if (ompt_enabled.ompt_callback_implicit_task) {
1763 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1764 ompt_scope_end, NULL, &(task_info->task_data), 0,
1765 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1767 if (ompt_enabled.ompt_callback_parallel_end) {
1768 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1769 ompt_parallel_data, *parent_task_data,
1770 OMPT_INVOKER(call_context) | ompt_parallel_league,
1773 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1778 for (i = argc - 1; i >= 0; --i)
1779 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1784 void **exit_frame_p;
1785 ompt_task_info_t *task_info;
1786 ompt_lw_taskteam_t lw_taskteam;
1787 ompt_data_t *implicit_task_data;
1789 if (ompt_enabled.enabled) {
1790 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1791 ompt_parallel_data, *return_address);
1792 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1794 task_info = OMPT_CUR_TASK_INFO(master_th);
1795 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1798 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1799 if (ompt_enabled.ompt_callback_implicit_task) {
1800 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1801 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1802 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1803 ompt_task_implicit);
1804 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1808 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1810 exit_frame_p = &dummy;
1815 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1816 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1817 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1826 if (ompt_enabled.enabled) {
1827 *exit_frame_p = NULL;
1828 if (ompt_enabled.ompt_callback_implicit_task) {
1829 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1830 ompt_scope_end, NULL, &(task_info->task_data), 1,
1831 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1834 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1835 __ompt_lw_taskteam_unlink(master_th);
1836 if (ompt_enabled.ompt_callback_parallel_end) {
1837 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1838 ompt_parallel_data, *parent_task_data,
1839 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1841 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1845 }
else if (call_context == fork_context_gnu) {
1847 if (ompt_enabled.enabled) {
1848 ompt_lw_taskteam_t lwt;
1849 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1852 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1853 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1859 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1862 KMP_ASSERT2(call_context < fork_context_last,
1863 "__kmp_serial_fork_call: unknown fork_context parameter");
1866 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1873int __kmp_fork_call(
ident_t *loc,
int gtid,
1874 enum fork_context_e call_context,
1875 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1880 int master_this_cons;
1882 kmp_team_t *parent_team;
1883 kmp_info_t *master_th;
1887 int master_set_numthreads;
1888 int task_thread_limit = 0;
1892#if KMP_NESTED_HOT_TEAMS
1893 kmp_hot_team_ptr_t **p_hot_teams;
1896 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1899 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1900 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1903 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1905 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1906 __kmp_stkpadding += (short)((kmp_int64)dummy);
1912 if (!TCR_4(__kmp_init_parallel))
1913 __kmp_parallel_initialize();
1914 __kmp_resume_if_soft_paused();
1919 master_th = __kmp_threads[gtid];
1921 parent_team = master_th->th.th_team;
1922 master_tid = master_th->th.th_info.ds.ds_tid;
1923 master_this_cons = master_th->th.th_local.this_construct;
1924 root = master_th->th.th_root;
1925 master_active = root->r.r_active;
1926 master_set_numthreads = master_th->th.th_set_nproc;
1928 master_th->th.th_current_task->td_icvs.task_thread_limit;
1931 ompt_data_t ompt_parallel_data = ompt_data_none;
1932 ompt_data_t *parent_task_data;
1933 ompt_frame_t *ompt_frame;
1934 void *return_address = NULL;
1936 if (ompt_enabled.enabled) {
1937 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1939 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1944 __kmp_assign_root_init_mask();
1947 level = parent_team->t.t_level;
1949 active_level = parent_team->t.t_active_level;
1951 teams_level = master_th->th.th_teams_level;
1952#if KMP_NESTED_HOT_TEAMS
1953 p_hot_teams = &master_th->th.th_hot_teams;
1954 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1955 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1956 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1957 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1959 (*p_hot_teams)[0].hot_team_nth = 1;
1964 if (ompt_enabled.enabled) {
1965 if (ompt_enabled.ompt_callback_parallel_begin) {
1966 int team_size = master_set_numthreads
1967 ? master_set_numthreads
1968 : get__nproc_2(parent_team, master_tid);
1969 int flags = OMPT_INVOKER(call_context) |
1970 ((microtask == (microtask_t)__kmp_teams_master)
1971 ? ompt_parallel_league
1972 : ompt_parallel_team);
1973 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1974 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1977 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1981 master_th->th.th_ident = loc;
1984 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1985 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1986 call_context, microtask, invoker,
1987 master_set_numthreads, level,
1989 ompt_parallel_data, return_address,
1995 if (__kmp_tasking_mode != tskm_immediate_exec) {
1996 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1997 parent_team->t.t_task_team[master_th->th.th_task_state]);
2007 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2008 if ((!enter_teams &&
2009 (parent_team->t.t_active_level >=
2010 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2011 (__kmp_library == library_serial)) {
2012 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2015 nthreads = master_set_numthreads
2016 ? master_set_numthreads
2018 : get__nproc_2(parent_team, master_tid);
2021 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2028 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2033 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2034 nthreads, enter_teams);
2035 if (nthreads == 1) {
2039 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2043 KMP_DEBUG_ASSERT(nthreads > 0);
2046 master_th->th.th_set_nproc = 0;
2048 if (nthreads == 1) {
2049 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2050 invoker, master_th, parent_team,
2052 &ompt_parallel_data, &return_address,
2060 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2061 "curtask=%p, curtask_max_aclevel=%d\n",
2062 parent_team->t.t_active_level, master_th,
2063 master_th->th.th_current_task,
2064 master_th->th.th_current_task->td_icvs.max_active_levels));
2068 master_th->th.th_current_task->td_flags.executing = 0;
2070 if (!master_th->th.th_teams_microtask || level > teams_level) {
2072 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2076 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2077 if ((level + 1 < __kmp_nested_nth.used) &&
2078 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2079 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2085 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2087 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2088 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2089 proc_bind = proc_bind_false;
2093 if (proc_bind == proc_bind_default) {
2094 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2097 if (master_th->th.th_teams_microtask &&
2098 microtask == (microtask_t)__kmp_teams_master) {
2099 proc_bind = __kmp_teams_proc_bind;
2105 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2106 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2107 master_th->th.th_current_task->td_icvs.proc_bind)) {
2110 if (!master_th->th.th_teams_microtask ||
2111 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2112 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2117 master_th->th.th_set_proc_bind = proc_bind_default;
2119 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2120 kmp_internal_control_t new_icvs;
2121 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2122 new_icvs.next = NULL;
2123 if (nthreads_icv > 0) {
2124 new_icvs.nproc = nthreads_icv;
2126 if (proc_bind_icv != proc_bind_default) {
2127 new_icvs.proc_bind = proc_bind_icv;
2131 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2132 team = __kmp_allocate_team(root, nthreads, nthreads,
2136 proc_bind, &new_icvs,
2137 argc USE_NESTED_HOT_ARG(master_th));
2138 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2139 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2142 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2143 team = __kmp_allocate_team(root, nthreads, nthreads,
2148 &master_th->th.th_current_task->td_icvs,
2149 argc USE_NESTED_HOT_ARG(master_th));
2150 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2151 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2152 &master_th->th.th_current_task->td_icvs);
2155 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2158 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2159 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2160 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2161 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2162 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2164 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2167 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2169 if (!master_th->th.th_teams_microtask || level > teams_level) {
2170 int new_level = parent_team->t.t_level + 1;
2171 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2172 new_level = parent_team->t.t_active_level + 1;
2173 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2176 int new_level = parent_team->t.t_level;
2177 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2178 new_level = parent_team->t.t_active_level;
2179 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2181 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2183 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2185 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2186 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2189 propagateFPControl(team);
2191 if (ompd_state & OMPD_ENABLE_BP)
2192 ompd_bp_parallel_begin();
2195 if (__kmp_tasking_mode != tskm_immediate_exec) {
2198 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2199 parent_team->t.t_task_team[master_th->th.th_task_state]);
2200 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2201 "%p, new task_team %p / team %p\n",
2202 __kmp_gtid_from_thread(master_th),
2203 master_th->th.th_task_team, parent_team,
2204 team->t.t_task_team[master_th->th.th_task_state], team));
2206 if (active_level || master_th->th.th_task_team) {
2208 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2209 if (master_th->th.th_task_state_top >=
2210 master_th->th.th_task_state_stack_sz) {
2211 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2212 kmp_uint8 *old_stack, *new_stack;
2214 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2215 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2216 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2218 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2222 old_stack = master_th->th.th_task_state_memo_stack;
2223 master_th->th.th_task_state_memo_stack = new_stack;
2224 master_th->th.th_task_state_stack_sz = new_size;
2225 __kmp_free(old_stack);
2229 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2230 master_th->th.th_task_state;
2231 master_th->th.th_task_state_top++;
2232#if KMP_NESTED_HOT_TEAMS
2233 if (master_th->th.th_hot_teams &&
2234 active_level < __kmp_hot_teams_max_level &&
2235 team == master_th->th.th_hot_teams[active_level].hot_team) {
2237 master_th->th.th_task_state =
2239 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2242 master_th->th.th_task_state = 0;
2243#if KMP_NESTED_HOT_TEAMS
2247#if !KMP_NESTED_HOT_TEAMS
2248 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2249 (team == root->r.r_hot_team));
2255 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2256 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2258 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2259 (team->t.t_master_tid == 0 &&
2260 (team->t.t_parent == root->r.r_root_team ||
2261 team->t.t_parent->t.t_serialized)));
2265 argv = (
void **)team->t.t_argv;
2267 for (i = argc - 1; i >= 0; --i) {
2268 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2269 KMP_CHECK_UPDATE(*argv, new_argv);
2273 for (i = 0; i < argc; ++i) {
2275 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2280 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2281 if (!root->r.r_active)
2282 root->r.r_active = TRUE;
2284 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2285 __kmp_setup_icv_copy(team, nthreads,
2286 &master_th->th.th_current_task->td_icvs, loc);
2289 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2292 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2295 if (team->t.t_active_level == 1
2296 && !master_th->th.th_teams_microtask) {
2298 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2299 (__kmp_forkjoin_frames_mode == 3 ||
2300 __kmp_forkjoin_frames_mode == 1)) {
2301 kmp_uint64 tmp_time = 0;
2302 if (__itt_get_timestamp_ptr)
2303 tmp_time = __itt_get_timestamp();
2305 master_th->th.th_frame_time = tmp_time;
2306 if (__kmp_forkjoin_frames_mode == 3)
2307 team->t.t_region_time = tmp_time;
2311 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2312 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2314 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2320 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2323 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2324 root, team, master_th, gtid));
2327 if (__itt_stack_caller_create_ptr) {
2330 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2331 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2332 }
else if (parent_team->t.t_serialized) {
2337 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2338 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2346 __kmp_internal_fork(loc, gtid, team);
2347 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2348 "master_th=%p, gtid=%d\n",
2349 root, team, master_th, gtid));
2352 if (call_context == fork_context_gnu) {
2353 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2358 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2359 team->t.t_id, team->t.t_pkfn));
2362#if KMP_STATS_ENABLED
2366 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2370 if (!team->t.t_invoke(gtid)) {
2371 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2374#if KMP_STATS_ENABLED
2377 KMP_SET_THREAD_STATE(previous_state);
2381 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2382 team->t.t_id, team->t.t_pkfn));
2385 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2387 if (ompt_enabled.enabled) {
2388 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2396static inline void __kmp_join_restore_state(kmp_info_t *thread,
2399 thread->th.ompt_thread_info.state =
2400 ((team->t.t_serialized) ? ompt_state_work_serial
2401 : ompt_state_work_parallel);
2404static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2405 kmp_team_t *team, ompt_data_t *parallel_data,
2406 int flags,
void *codeptr) {
2407 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2408 if (ompt_enabled.ompt_callback_parallel_end) {
2409 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2410 parallel_data, &(task_info->task_data), flags, codeptr);
2413 task_info->frame.enter_frame = ompt_data_none;
2414 __kmp_join_restore_state(thread, team);
2418void __kmp_join_call(
ident_t *loc,
int gtid
2421 enum fork_context_e fork_context
2425 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2427 kmp_team_t *parent_team;
2428 kmp_info_t *master_th;
2432 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2435 master_th = __kmp_threads[gtid];
2436 root = master_th->th.th_root;
2437 team = master_th->th.th_team;
2438 parent_team = team->t.t_parent;
2440 master_th->th.th_ident = loc;
2443 void *team_microtask = (
void *)team->t.t_pkfn;
2447 if (ompt_enabled.enabled &&
2448 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2449 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2454 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2455 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2456 "th_task_team = %p\n",
2457 __kmp_gtid_from_thread(master_th), team,
2458 team->t.t_task_team[master_th->th.th_task_state],
2459 master_th->th.th_task_team));
2460 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2461 team->t.t_task_team[master_th->th.th_task_state]);
2465 if (team->t.t_serialized) {
2466 if (master_th->th.th_teams_microtask) {
2468 int level = team->t.t_level;
2469 int tlevel = master_th->th.th_teams_level;
2470 if (level == tlevel) {
2474 }
else if (level == tlevel + 1) {
2478 team->t.t_serialized++;
2484 if (ompt_enabled.enabled) {
2485 if (fork_context == fork_context_gnu) {
2486 __ompt_lw_taskteam_unlink(master_th);
2488 __kmp_join_restore_state(master_th, parent_team);
2495 master_active = team->t.t_master_active;
2500 __kmp_internal_join(loc, gtid, team);
2502 if (__itt_stack_caller_create_ptr) {
2503 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2505 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2506 team->t.t_stack_id = NULL;
2510 master_th->th.th_task_state =
2513 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2514 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2518 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2519 parent_team->t.t_stack_id = NULL;
2527 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2528 void *codeptr = team->t.ompt_team_info.master_return_address;
2533 if (team->t.t_active_level == 1 &&
2534 (!master_th->th.th_teams_microtask ||
2535 master_th->th.th_teams_size.nteams == 1)) {
2536 master_th->th.th_ident = loc;
2539 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2540 __kmp_forkjoin_frames_mode == 3)
2541 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2542 master_th->th.th_frame_time, 0, loc,
2543 master_th->th.th_team_nproc, 1);
2544 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2545 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2546 __kmp_itt_region_joined(gtid);
2550#if KMP_AFFINITY_SUPPORTED
2553 master_th->th.th_first_place = team->t.t_first_place;
2554 master_th->th.th_last_place = team->t.t_last_place;
2558 if (master_th->th.th_teams_microtask && !exit_teams &&
2559 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2560 team->t.t_level == master_th->th.th_teams_level + 1) {
2565 ompt_data_t ompt_parallel_data = ompt_data_none;
2566 if (ompt_enabled.enabled) {
2567 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2568 if (ompt_enabled.ompt_callback_implicit_task) {
2569 int ompt_team_size = team->t.t_nproc;
2570 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2571 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2572 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2574 task_info->frame.exit_frame = ompt_data_none;
2575 task_info->task_data = ompt_data_none;
2576 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2577 __ompt_lw_taskteam_unlink(master_th);
2582 team->t.t_active_level--;
2583 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2589 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2590 int old_num = master_th->th.th_team_nproc;
2591 int new_num = master_th->th.th_teams_size.nth;
2592 kmp_info_t **other_threads = team->t.t_threads;
2593 team->t.t_nproc = new_num;
2594 for (
int i = 0; i < old_num; ++i) {
2595 other_threads[i]->th.th_team_nproc = new_num;
2598 for (
int i = old_num; i < new_num; ++i) {
2600 KMP_DEBUG_ASSERT(other_threads[i]);
2601 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2602 for (
int b = 0; b < bs_last_barrier; ++b) {
2603 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2604 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2606 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2609 if (__kmp_tasking_mode != tskm_immediate_exec) {
2611 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2617 if (ompt_enabled.enabled) {
2618 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2619 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2627 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2628 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2630 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2635 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2637 if (!master_th->th.th_teams_microtask ||
2638 team->t.t_level > master_th->th.th_teams_level) {
2640 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2642 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2645 if (ompt_enabled.enabled) {
2646 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2647 if (ompt_enabled.ompt_callback_implicit_task) {
2648 int flags = (team_microtask == (
void *)__kmp_teams_master)
2650 : ompt_task_implicit;
2651 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2652 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2653 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2654 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2656 task_info->frame.exit_frame = ompt_data_none;
2657 task_info->task_data = ompt_data_none;
2661 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2663 __kmp_pop_current_task_from_thread(master_th);
2665 master_th->th.th_def_allocator = team->t.t_def_allocator;
2668 if (ompd_state & OMPD_ENABLE_BP)
2669 ompd_bp_parallel_end();
2671 updateHWFPControl(team);
2673 if (root->r.r_active != master_active)
2674 root->r.r_active = master_active;
2676 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2684 master_th->th.th_team = parent_team;
2685 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2686 master_th->th.th_team_master = parent_team->t.t_threads[0];
2687 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2690 if (parent_team->t.t_serialized &&
2691 parent_team != master_th->th.th_serial_team &&
2692 parent_team != root->r.r_root_team) {
2693 __kmp_free_team(root,
2694 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2695 master_th->th.th_serial_team = parent_team;
2698 if (__kmp_tasking_mode != tskm_immediate_exec) {
2699 if (master_th->th.th_task_state_top >
2701 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2703 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2704 master_th->th.th_task_state;
2705 --master_th->th.th_task_state_top;
2707 master_th->th.th_task_state =
2709 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2710 }
else if (team != root->r.r_hot_team) {
2715 master_th->th.th_task_state = 0;
2718 master_th->th.th_task_team =
2719 parent_team->t.t_task_team[master_th->th.th_task_state];
2721 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2722 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2729 master_th->th.th_current_task->td_flags.executing = 1;
2731 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2733#if KMP_AFFINITY_SUPPORTED
2734 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2735 __kmp_reset_root_init_mask(gtid);
2740 OMPT_INVOKER(fork_context) |
2741 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2742 : ompt_parallel_team);
2743 if (ompt_enabled.enabled) {
2744 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2750 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2755void __kmp_save_internal_controls(kmp_info_t *thread) {
2757 if (thread->th.th_team != thread->th.th_serial_team) {
2760 if (thread->th.th_team->t.t_serialized > 1) {
2763 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2766 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2767 thread->th.th_team->t.t_serialized) {
2772 kmp_internal_control_t *control =
2773 (kmp_internal_control_t *)__kmp_allocate(
2774 sizeof(kmp_internal_control_t));
2776 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2778 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2780 control->next = thread->th.th_team->t.t_control_stack_top;
2781 thread->th.th_team->t.t_control_stack_top = control;
2787void __kmp_set_num_threads(
int new_nth,
int gtid) {
2791 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2792 KMP_DEBUG_ASSERT(__kmp_init_serial);
2796 else if (new_nth > __kmp_max_nth)
2797 new_nth = __kmp_max_nth;
2800 thread = __kmp_threads[gtid];
2801 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2804 __kmp_save_internal_controls(thread);
2806 set__nproc(thread, new_nth);
2811 root = thread->th.th_root;
2812 if (__kmp_init_parallel && (!root->r.r_active) &&
2813 (root->r.r_hot_team->t.t_nproc > new_nth)
2814#
if KMP_NESTED_HOT_TEAMS
2815 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2818 kmp_team_t *hot_team = root->r.r_hot_team;
2821 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2823 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2824 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2827 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2828 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2829 if (__kmp_tasking_mode != tskm_immediate_exec) {
2832 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2834 __kmp_free_thread(hot_team->t.t_threads[f]);
2835 hot_team->t.t_threads[f] = NULL;
2837 hot_team->t.t_nproc = new_nth;
2838#if KMP_NESTED_HOT_TEAMS
2839 if (thread->th.th_hot_teams) {
2840 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2841 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2845 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2846 hot_team->t.b->update_num_threads(new_nth);
2847 __kmp_add_threads_to_team(hot_team, new_nth);
2850 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2853 for (f = 0; f < new_nth; f++) {
2854 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2855 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2858 hot_team->t.t_size_changed = -1;
2863void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2866 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2868 gtid, max_active_levels));
2869 KMP_DEBUG_ASSERT(__kmp_init_serial);
2872 if (max_active_levels < 0) {
2873 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2878 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2879 "max_active_levels for thread %d = (%d)\n",
2880 gtid, max_active_levels));
2883 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2888 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2889 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2890 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2896 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2897 "max_active_levels for thread %d = (%d)\n",
2898 gtid, max_active_levels));
2900 thread = __kmp_threads[gtid];
2902 __kmp_save_internal_controls(thread);
2904 set__max_active_levels(thread, max_active_levels);
2908int __kmp_get_max_active_levels(
int gtid) {
2911 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2912 KMP_DEBUG_ASSERT(__kmp_init_serial);
2914 thread = __kmp_threads[gtid];
2915 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2916 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2917 "curtask_maxaclevel=%d\n",
2918 gtid, thread->th.th_current_task,
2919 thread->th.th_current_task->td_icvs.max_active_levels));
2920 return thread->th.th_current_task->td_icvs.max_active_levels;
2924void __kmp_set_num_teams(
int num_teams) {
2926 __kmp_nteams = num_teams;
2928int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2930void __kmp_set_teams_thread_limit(
int limit) {
2932 __kmp_teams_thread_limit = limit;
2934int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2936KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2937KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2940void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2942 kmp_sched_t orig_kind;
2945 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2946 gtid, (
int)kind, chunk));
2947 KMP_DEBUG_ASSERT(__kmp_init_serial);
2954 kind = __kmp_sched_without_mods(kind);
2956 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2957 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2959 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2960 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2962 kind = kmp_sched_default;
2966 thread = __kmp_threads[gtid];
2968 __kmp_save_internal_controls(thread);
2970 if (kind < kmp_sched_upper_std) {
2971 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2974 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2976 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2977 __kmp_sch_map[kind - kmp_sched_lower - 1];
2982 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2983 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2984 kmp_sched_lower - 2];
2986 __kmp_sched_apply_mods_intkind(
2987 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2988 if (kind == kmp_sched_auto || chunk < 1) {
2990 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2992 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2997void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
3001 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3002 KMP_DEBUG_ASSERT(__kmp_init_serial);
3004 thread = __kmp_threads[gtid];
3006 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3007 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3009 case kmp_sch_static_greedy:
3010 case kmp_sch_static_balanced:
3011 *kind = kmp_sched_static;
3012 __kmp_sched_apply_mods_stdkind(kind, th_type);
3015 case kmp_sch_static_chunked:
3016 *kind = kmp_sched_static;
3018 case kmp_sch_dynamic_chunked:
3019 *kind = kmp_sched_dynamic;
3022 case kmp_sch_guided_iterative_chunked:
3023 case kmp_sch_guided_analytical_chunked:
3024 *kind = kmp_sched_guided;
3027 *kind = kmp_sched_auto;
3029 case kmp_sch_trapezoidal:
3030 *kind = kmp_sched_trapezoidal;
3032#if KMP_STATIC_STEAL_ENABLED
3033 case kmp_sch_static_steal:
3034 *kind = kmp_sched_static_steal;
3038 KMP_FATAL(UnknownSchedulingType, th_type);
3041 __kmp_sched_apply_mods_stdkind(kind, th_type);
3042 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3045int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3051 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3052 KMP_DEBUG_ASSERT(__kmp_init_serial);
3059 thr = __kmp_threads[gtid];
3060 team = thr->th.th_team;
3061 ii = team->t.t_level;
3065 if (thr->th.th_teams_microtask) {
3067 int tlevel = thr->th.th_teams_level;
3070 KMP_DEBUG_ASSERT(ii >= tlevel);
3082 return __kmp_tid_from_gtid(gtid);
3084 dd = team->t.t_serialized;
3086 while (ii > level) {
3087 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3089 if ((team->t.t_serialized) && (!dd)) {
3090 team = team->t.t_parent;
3094 team = team->t.t_parent;
3095 dd = team->t.t_serialized;
3100 return (dd > 1) ? (0) : (team->t.t_master_tid);
3103int __kmp_get_team_size(
int gtid,
int level) {
3109 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3110 KMP_DEBUG_ASSERT(__kmp_init_serial);
3117 thr = __kmp_threads[gtid];
3118 team = thr->th.th_team;
3119 ii = team->t.t_level;
3123 if (thr->th.th_teams_microtask) {
3125 int tlevel = thr->th.th_teams_level;
3128 KMP_DEBUG_ASSERT(ii >= tlevel);
3139 while (ii > level) {
3140 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3142 if (team->t.t_serialized && (!dd)) {
3143 team = team->t.t_parent;
3147 team = team->t.t_parent;
3152 return team->t.t_nproc;
3155kmp_r_sched_t __kmp_get_schedule_global() {
3160 kmp_r_sched_t r_sched;
3166 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3167 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3170 r_sched.r_sched_type = __kmp_static;
3173 r_sched.r_sched_type = __kmp_guided;
3175 r_sched.r_sched_type = __kmp_sched;
3177 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3179 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3181 r_sched.chunk = KMP_DEFAULT_CHUNK;
3183 r_sched.chunk = __kmp_chunk;
3191static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3193 KMP_DEBUG_ASSERT(team);
3194 if (!realloc || argc > team->t.t_max_argc) {
3196 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3197 "current entries=%d\n",
3198 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3200 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3201 __kmp_free((
void *)team->t.t_argv);
3203 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3205 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3206 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3208 team->t.t_id, team->t.t_max_argc));
3209 team->t.t_argv = &team->t.t_inline_argv[0];
3210 if (__kmp_storage_map) {
3211 __kmp_print_storage_map_gtid(
3212 -1, &team->t.t_inline_argv[0],
3213 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3214 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3219 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3220 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3222 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3224 team->t.t_id, team->t.t_max_argc));
3226 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3227 if (__kmp_storage_map) {
3228 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3229 &team->t.t_argv[team->t.t_max_argc],
3230 sizeof(
void *) * team->t.t_max_argc,
3231 "team_%d.t_argv", team->t.t_id);
3237static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3239 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3241 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3242 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3243 sizeof(dispatch_shared_info_t) * num_disp_buff);
3244 team->t.t_dispatch =
3245 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3246 team->t.t_implicit_task_taskdata =
3247 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3248 team->t.t_max_nproc = max_nth;
3251 for (i = 0; i < num_disp_buff; ++i) {
3252 team->t.t_disp_buffer[i].buffer_index = i;
3253 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3257static void __kmp_free_team_arrays(kmp_team_t *team) {
3260 for (i = 0; i < team->t.t_max_nproc; ++i) {
3261 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3262 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3263 team->t.t_dispatch[i].th_disp_buffer = NULL;
3266#if KMP_USE_HIER_SCHED
3267 __kmp_dispatch_free_hierarchies(team);
3269 __kmp_free(team->t.t_threads);
3270 __kmp_free(team->t.t_disp_buffer);
3271 __kmp_free(team->t.t_dispatch);
3272 __kmp_free(team->t.t_implicit_task_taskdata);
3273 team->t.t_threads = NULL;
3274 team->t.t_disp_buffer = NULL;
3275 team->t.t_dispatch = NULL;
3276 team->t.t_implicit_task_taskdata = 0;
3279static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3280 kmp_info_t **oldThreads = team->t.t_threads;
3282 __kmp_free(team->t.t_disp_buffer);
3283 __kmp_free(team->t.t_dispatch);
3284 __kmp_free(team->t.t_implicit_task_taskdata);
3285 __kmp_allocate_team_arrays(team, max_nth);
3287 KMP_MEMCPY(team->t.t_threads, oldThreads,
3288 team->t.t_nproc *
sizeof(kmp_info_t *));
3290 __kmp_free(oldThreads);
3293static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3295 kmp_r_sched_t r_sched =
3296 __kmp_get_schedule_global();
3298 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3300 kmp_internal_control_t g_icvs = {
3302 (kmp_int8)__kmp_global.g.g_dynamic,
3304 (kmp_int8)__kmp_env_blocktime,
3306 __kmp_dflt_blocktime,
3311 __kmp_dflt_team_nth,
3317 __kmp_dflt_max_active_levels,
3321 __kmp_nested_proc_bind.bind_types[0],
3322 __kmp_default_device,
3329static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3331 kmp_internal_control_t gx_icvs;
3332 gx_icvs.serial_nesting_level =
3334 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3335 gx_icvs.next = NULL;
3340static void __kmp_initialize_root(kmp_root_t *root) {
3342 kmp_team_t *root_team;
3343 kmp_team_t *hot_team;
3344 int hot_team_max_nth;
3345 kmp_r_sched_t r_sched =
3346 __kmp_get_schedule_global();
3347 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3348 KMP_DEBUG_ASSERT(root);
3349 KMP_ASSERT(!root->r.r_begin);
3352 __kmp_init_lock(&root->r.r_begin_lock);
3353 root->r.r_begin = FALSE;
3354 root->r.r_active = FALSE;
3355 root->r.r_in_parallel = 0;
3356 root->r.r_blocktime = __kmp_dflt_blocktime;
3357#if KMP_AFFINITY_SUPPORTED
3358 root->r.r_affinity_assigned = FALSE;
3363 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3366 __kmp_allocate_team(root,
3372 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3374 USE_NESTED_HOT_ARG(NULL)
3379 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3382 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3384 root->r.r_root_team = root_team;
3385 root_team->t.t_control_stack_top = NULL;
3388 root_team->t.t_threads[0] = NULL;
3389 root_team->t.t_nproc = 1;
3390 root_team->t.t_serialized = 1;
3392 root_team->t.t_sched.sched = r_sched.sched;
3395 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3396 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3400 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3403 __kmp_allocate_team(root,
3405 __kmp_dflt_team_nth_ub * 2,
3409 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3411 USE_NESTED_HOT_ARG(NULL)
3413 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3415 root->r.r_hot_team = hot_team;
3416 root_team->t.t_control_stack_top = NULL;
3419 hot_team->t.t_parent = root_team;
3422 hot_team_max_nth = hot_team->t.t_max_nproc;
3423 for (f = 0; f < hot_team_max_nth; ++f) {
3424 hot_team->t.t_threads[f] = NULL;
3426 hot_team->t.t_nproc = 1;
3428 hot_team->t.t_sched.sched = r_sched.sched;
3429 hot_team->t.t_size_changed = 0;
3434typedef struct kmp_team_list_item {
3435 kmp_team_p
const *entry;
3436 struct kmp_team_list_item *next;
3437} kmp_team_list_item_t;
3438typedef kmp_team_list_item_t *kmp_team_list_t;
3440static void __kmp_print_structure_team_accum(
3441 kmp_team_list_t list,
3442 kmp_team_p
const *team
3452 KMP_DEBUG_ASSERT(list != NULL);
3457 __kmp_print_structure_team_accum(list, team->t.t_parent);
3458 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3462 while (l->next != NULL && l->entry != team) {
3465 if (l->next != NULL) {
3471 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3477 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3478 sizeof(kmp_team_list_item_t));
3485static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3488 __kmp_printf(
"%s", title);
3490 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3492 __kmp_printf(
" - (nil)\n");
3496static void __kmp_print_structure_thread(
char const *title,
3497 kmp_info_p
const *thread) {
3498 __kmp_printf(
"%s", title);
3499 if (thread != NULL) {
3500 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3502 __kmp_printf(
" - (nil)\n");
3506void __kmp_print_structure(
void) {
3508 kmp_team_list_t list;
3512 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3516 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3517 "Table\n------------------------------\n");
3520 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3521 __kmp_printf(
"%2d", gtid);
3522 if (__kmp_threads != NULL) {
3523 __kmp_printf(
" %p", __kmp_threads[gtid]);
3525 if (__kmp_root != NULL) {
3526 __kmp_printf(
" %p", __kmp_root[gtid]);
3533 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3535 if (__kmp_threads != NULL) {
3537 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3538 kmp_info_t
const *thread = __kmp_threads[gtid];
3539 if (thread != NULL) {
3540 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3541 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3542 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3543 __kmp_print_structure_team(
" Serial Team: ",
3544 thread->th.th_serial_team);
3545 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3546 __kmp_print_structure_thread(
" Primary: ",
3547 thread->th.th_team_master);
3548 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3549 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3550 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3551 __kmp_print_structure_thread(
" Next in pool: ",
3552 thread->th.th_next_pool);
3554 __kmp_print_structure_team_accum(list, thread->th.th_team);
3555 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3559 __kmp_printf(
"Threads array is not allocated.\n");
3563 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3565 if (__kmp_root != NULL) {
3567 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3568 kmp_root_t
const *root = __kmp_root[gtid];
3570 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3571 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3572 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3573 __kmp_print_structure_thread(
" Uber Thread: ",
3574 root->r.r_uber_thread);
3575 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3576 __kmp_printf(
" In Parallel: %2d\n",
3577 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3579 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3580 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3584 __kmp_printf(
"Ubers array is not allocated.\n");
3587 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3589 while (list->next != NULL) {
3590 kmp_team_p
const *team = list->entry;
3592 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3593 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3594 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3595 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3596 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3597 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3598 for (i = 0; i < team->t.t_nproc; ++i) {
3599 __kmp_printf(
" Thread %2d: ", i);
3600 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3602 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3608 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3610 __kmp_print_structure_thread(
"Thread pool: ",
3611 CCAST(kmp_info_t *, __kmp_thread_pool));
3612 __kmp_print_structure_team(
"Team pool: ",
3613 CCAST(kmp_team_t *, __kmp_team_pool));
3617 while (list != NULL) {
3618 kmp_team_list_item_t *item = list;
3620 KMP_INTERNAL_FREE(item);
3629static const unsigned __kmp_primes[] = {
3630 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3631 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3632 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3633 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3634 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3635 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3636 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3637 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3638 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3639 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3640 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3644unsigned short __kmp_get_random(kmp_info_t *thread) {
3645 unsigned x = thread->th.th_x;
3646 unsigned short r = (
unsigned short)(x >> 16);
3648 thread->th.th_x = x * thread->th.th_a + 1;
3650 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3651 thread->th.th_info.ds.ds_tid, r));
3657void __kmp_init_random(kmp_info_t *thread) {
3658 unsigned seed = thread->th.th_info.ds.ds_tid;
3661 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3662 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3664 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3670static int __kmp_reclaim_dead_roots(
void) {
3673 for (i = 0; i < __kmp_threads_capacity; ++i) {
3674 if (KMP_UBER_GTID(i) &&
3675 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3678 r += __kmp_unregister_root_other_thread(i);
3703static int __kmp_expand_threads(
int nNeed) {
3705 int minimumRequiredCapacity;
3707 kmp_info_t **newThreads;
3708 kmp_root_t **newRoot;
3714#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3717 added = __kmp_reclaim_dead_roots();
3746 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3749 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3753 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3755 newCapacity = __kmp_threads_capacity;
3757 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3758 : __kmp_sys_max_nth;
3759 }
while (newCapacity < minimumRequiredCapacity);
3760 newThreads = (kmp_info_t **)__kmp_allocate(
3761 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3763 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3764 KMP_MEMCPY(newThreads, __kmp_threads,
3765 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3766 KMP_MEMCPY(newRoot, __kmp_root,
3767 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3770 kmp_old_threads_list_t *node =
3771 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3772 node->threads = __kmp_threads;
3773 node->next = __kmp_old_threads_list;
3774 __kmp_old_threads_list = node;
3776 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3777 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3778 added += newCapacity - __kmp_threads_capacity;
3779 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3781 if (newCapacity > __kmp_tp_capacity) {
3782 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3783 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3784 __kmp_threadprivate_resize_cache(newCapacity);
3786 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3788 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3797int __kmp_register_root(
int initial_thread) {
3798 kmp_info_t *root_thread;
3802 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3803 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3820 capacity = __kmp_threads_capacity;
3821 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3828 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3829 capacity -= __kmp_hidden_helper_threads_num;
3833 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3834 if (__kmp_tp_cached) {
3835 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3836 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3837 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3839 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3849 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3852 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3853 gtid <= __kmp_hidden_helper_threads_num;
3856 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3857 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3858 "hidden helper thread: T#%d\n",
3864 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3867 for (gtid = __kmp_hidden_helper_threads_num + 1;
3868 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3872 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3873 KMP_ASSERT(gtid < __kmp_threads_capacity);
3878 TCW_4(__kmp_nth, __kmp_nth + 1);
3882 if (__kmp_adjust_gtid_mode) {
3883 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3884 if (TCR_4(__kmp_gtid_mode) != 2) {
3885 TCW_4(__kmp_gtid_mode, 2);
3888 if (TCR_4(__kmp_gtid_mode) != 1) {
3889 TCW_4(__kmp_gtid_mode, 1);
3894#ifdef KMP_ADJUST_BLOCKTIME
3897 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3898 if (__kmp_nth > __kmp_avail_proc) {
3899 __kmp_zero_bt = TRUE;
3905 if (!(root = __kmp_root[gtid])) {
3906 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3907 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3910#if KMP_STATS_ENABLED
3912 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3913 __kmp_stats_thread_ptr->startLife();
3914 KMP_SET_THREAD_STATE(SERIAL_REGION);
3917 __kmp_initialize_root(root);
3920 if (root->r.r_uber_thread) {
3921 root_thread = root->r.r_uber_thread;
3923 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3924 if (__kmp_storage_map) {
3925 __kmp_print_thread_storage_map(root_thread, gtid);
3927 root_thread->th.th_info.ds.ds_gtid = gtid;
3929 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3931 root_thread->th.th_root = root;
3932 if (__kmp_env_consistency_check) {
3933 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3936 __kmp_initialize_fast_memory(root_thread);
3940 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3941 __kmp_initialize_bget(root_thread);
3943 __kmp_init_random(root_thread);
3947 if (!root_thread->th.th_serial_team) {
3948 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3949 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3950 root_thread->th.th_serial_team = __kmp_allocate_team(
3955 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3957 KMP_ASSERT(root_thread->th.th_serial_team);
3958 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3959 root_thread->th.th_serial_team));
3962 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3964 root->r.r_root_team->t.t_threads[0] = root_thread;
3965 root->r.r_hot_team->t.t_threads[0] = root_thread;
3966 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3968 root_thread->th.th_serial_team->t.t_serialized = 0;
3969 root->r.r_uber_thread = root_thread;
3972 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3973 TCW_4(__kmp_init_gtid, TRUE);
3976 __kmp_gtid_set_specific(gtid);
3979 __kmp_itt_thread_name(gtid);
3982#ifdef KMP_TDATA_GTID
3985 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3986 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3988 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3990 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3991 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3992 KMP_INIT_BARRIER_STATE));
3995 for (b = 0; b < bs_last_barrier; ++b) {
3996 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3998 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4002 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4003 KMP_INIT_BARRIER_STATE);
4005#if KMP_AFFINITY_SUPPORTED
4006 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4007 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4008 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4009 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4011 root_thread->th.th_def_allocator = __kmp_def_allocator;
4012 root_thread->th.th_prev_level = 0;
4013 root_thread->th.th_prev_num_threads = 1;
4015 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4016 tmp->cg_root = root_thread;
4017 tmp->cg_thread_limit = __kmp_cg_max_nth;
4018 tmp->cg_nthreads = 1;
4019 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4020 " cg_nthreads init to 1\n",
4023 root_thread->th.th_cg_roots = tmp;
4025 __kmp_root_counter++;
4028 if (!initial_thread && ompt_enabled.enabled) {
4030 kmp_info_t *root_thread = ompt_get_thread();
4032 ompt_set_thread_state(root_thread, ompt_state_overhead);
4034 if (ompt_enabled.ompt_callback_thread_begin) {
4035 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4036 ompt_thread_initial, __ompt_get_thread_data_internal());
4038 ompt_data_t *task_data;
4039 ompt_data_t *parallel_data;
4040 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4042 if (ompt_enabled.ompt_callback_implicit_task) {
4043 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4044 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4047 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4051 if (ompd_state & OMPD_ENABLE_BP)
4052 ompd_bp_thread_begin();
4056 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4061#if KMP_NESTED_HOT_TEAMS
4062static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4063 const int max_level) {
4065 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4066 if (!hot_teams || !hot_teams[level].hot_team) {
4069 KMP_DEBUG_ASSERT(level < max_level);
4070 kmp_team_t *team = hot_teams[level].hot_team;
4071 nth = hot_teams[level].hot_team_nth;
4073 if (level < max_level - 1) {
4074 for (i = 0; i < nth; ++i) {
4075 kmp_info_t *th = team->t.t_threads[i];
4076 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4077 if (i > 0 && th->th.th_hot_teams) {
4078 __kmp_free(th->th.th_hot_teams);
4079 th->th.th_hot_teams = NULL;
4083 __kmp_free_team(root, team, NULL);
4090static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4091 kmp_team_t *root_team = root->r.r_root_team;
4092 kmp_team_t *hot_team = root->r.r_hot_team;
4093 int n = hot_team->t.t_nproc;
4096 KMP_DEBUG_ASSERT(!root->r.r_active);
4098 root->r.r_root_team = NULL;
4099 root->r.r_hot_team = NULL;
4102 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4103#if KMP_NESTED_HOT_TEAMS
4104 if (__kmp_hot_teams_max_level >
4106 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4107 kmp_info_t *th = hot_team->t.t_threads[i];
4108 if (__kmp_hot_teams_max_level > 1) {
4109 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4111 if (th->th.th_hot_teams) {
4112 __kmp_free(th->th.th_hot_teams);
4113 th->th.th_hot_teams = NULL;
4118 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4123 if (__kmp_tasking_mode != tskm_immediate_exec) {
4124 __kmp_wait_to_unref_task_teams();
4130 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4132 (LPVOID) & (root->r.r_uber_thread->th),
4133 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4134 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4138 if (ompd_state & OMPD_ENABLE_BP)
4139 ompd_bp_thread_end();
4143 ompt_data_t *task_data;
4144 ompt_data_t *parallel_data;
4145 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4147 if (ompt_enabled.ompt_callback_implicit_task) {
4148 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4149 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4151 if (ompt_enabled.ompt_callback_thread_end) {
4152 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4153 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4159 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4160 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4162 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4163 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4166 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4167 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4168 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4169 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4170 root->r.r_uber_thread->th.th_cg_roots = NULL;
4172 __kmp_reap_thread(root->r.r_uber_thread, 1);
4176 root->r.r_uber_thread = NULL;
4178 root->r.r_begin = FALSE;
4183void __kmp_unregister_root_current_thread(
int gtid) {
4184 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4188 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4189 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4190 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4193 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4196 kmp_root_t *root = __kmp_root[gtid];
4198 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4199 KMP_ASSERT(KMP_UBER_GTID(gtid));
4200 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4201 KMP_ASSERT(root->r.r_active == FALSE);
4205 kmp_info_t *thread = __kmp_threads[gtid];
4206 kmp_team_t *team = thread->th.th_team;
4207 kmp_task_team_t *task_team = thread->th.th_task_team;
4210 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4211 task_team->tt.tt_hidden_helper_task_encountered)) {
4214 thread->th.ompt_thread_info.state = ompt_state_undefined;
4216 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4219 __kmp_reset_root(gtid, root);
4223 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4225 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4232static int __kmp_unregister_root_other_thread(
int gtid) {
4233 kmp_root_t *root = __kmp_root[gtid];
4236 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4237 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4238 KMP_ASSERT(KMP_UBER_GTID(gtid));
4239 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4240 KMP_ASSERT(root->r.r_active == FALSE);
4242 r = __kmp_reset_root(gtid, root);
4244 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4250void __kmp_task_info() {
4252 kmp_int32 gtid = __kmp_entry_gtid();
4253 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4254 kmp_info_t *this_thr = __kmp_threads[gtid];
4255 kmp_team_t *steam = this_thr->th.th_serial_team;
4256 kmp_team_t *team = this_thr->th.th_team;
4259 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4261 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4262 team->t.t_implicit_task_taskdata[tid].td_parent);
4269static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4270 int tid,
int gtid) {
4274 KMP_DEBUG_ASSERT(this_thr != NULL);
4275 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4276 KMP_DEBUG_ASSERT(team);
4277 KMP_DEBUG_ASSERT(team->t.t_threads);
4278 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4279 kmp_info_t *master = team->t.t_threads[0];
4280 KMP_DEBUG_ASSERT(master);
4281 KMP_DEBUG_ASSERT(master->th.th_root);
4285 TCW_SYNC_PTR(this_thr->th.th_team, team);
4287 this_thr->th.th_info.ds.ds_tid = tid;
4288 this_thr->th.th_set_nproc = 0;
4289 if (__kmp_tasking_mode != tskm_immediate_exec)
4292 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4294 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4295 this_thr->th.th_set_proc_bind = proc_bind_default;
4296#if KMP_AFFINITY_SUPPORTED
4297 this_thr->th.th_new_place = this_thr->th.th_current_place;
4299 this_thr->th.th_root = master->th.th_root;
4302 this_thr->th.th_team_nproc = team->t.t_nproc;
4303 this_thr->th.th_team_master = master;
4304 this_thr->th.th_team_serialized = team->t.t_serialized;
4306 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4308 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4309 tid, gtid, this_thr, this_thr->th.th_current_task));
4311 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4314 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4315 tid, gtid, this_thr, this_thr->th.th_current_task));
4320 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4322 this_thr->th.th_local.this_construct = 0;
4324 if (!this_thr->th.th_pri_common) {
4325 this_thr->th.th_pri_common =
4326 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4327 if (__kmp_storage_map) {
4328 __kmp_print_storage_map_gtid(
4329 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4330 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4332 this_thr->th.th_pri_head = NULL;
4335 if (this_thr != master &&
4336 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4338 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4339 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4342 int i = tmp->cg_nthreads--;
4343 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4344 " on node %p of thread %p to %d\n",
4345 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4350 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4352 this_thr->th.th_cg_roots->cg_nthreads++;
4353 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4354 " node %p of thread %p to %d\n",
4355 this_thr, this_thr->th.th_cg_roots,
4356 this_thr->th.th_cg_roots->cg_root,
4357 this_thr->th.th_cg_roots->cg_nthreads));
4358 this_thr->th.th_current_task->td_icvs.thread_limit =
4359 this_thr->th.th_cg_roots->cg_thread_limit;
4364 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4367 sizeof(dispatch_private_info_t) *
4368 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4369 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4370 team->t.t_max_nproc));
4371 KMP_ASSERT(dispatch);
4372 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4373 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4375 dispatch->th_disp_index = 0;
4376 dispatch->th_doacross_buf_idx = 0;
4377 if (!dispatch->th_disp_buffer) {
4378 dispatch->th_disp_buffer =
4379 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4381 if (__kmp_storage_map) {
4382 __kmp_print_storage_map_gtid(
4383 gtid, &dispatch->th_disp_buffer[0],
4384 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4386 : __kmp_dispatch_num_buffers],
4388 "th_%d.th_dispatch.th_disp_buffer "
4389 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4390 gtid, team->t.t_id, gtid);
4393 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4396 dispatch->th_dispatch_pr_current = 0;
4397 dispatch->th_dispatch_sh_current = 0;
4399 dispatch->th_deo_fcn = 0;
4400 dispatch->th_dxo_fcn = 0;
4403 this_thr->th.th_next_pool = NULL;
4405 if (!this_thr->th.th_task_state_memo_stack) {
4407 this_thr->th.th_task_state_memo_stack =
4408 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4409 this_thr->th.th_task_state_top = 0;
4410 this_thr->th.th_task_state_stack_sz = 4;
4411 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4413 this_thr->th.th_task_state_memo_stack[i] = 0;
4416 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4417 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4427kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4429 kmp_team_t *serial_team;
4430 kmp_info_t *new_thr;
4433 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4434 KMP_DEBUG_ASSERT(root && team);
4435#if !KMP_NESTED_HOT_TEAMS
4436 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4441 if (__kmp_thread_pool) {
4442 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4443 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4444 if (new_thr == __kmp_thread_pool_insert_pt) {
4445 __kmp_thread_pool_insert_pt = NULL;
4447 TCW_4(new_thr->th.th_in_pool, FALSE);
4448 __kmp_suspend_initialize_thread(new_thr);
4449 __kmp_lock_suspend_mx(new_thr);
4450 if (new_thr->th.th_active_in_pool == TRUE) {
4451 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4452 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4453 new_thr->th.th_active_in_pool = FALSE;
4455 __kmp_unlock_suspend_mx(new_thr);
4457 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4458 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4459 KMP_ASSERT(!new_thr->th.th_team);
4460 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4463 __kmp_initialize_info(new_thr, team, new_tid,
4464 new_thr->th.th_info.ds.ds_gtid);
4465 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4467 TCW_4(__kmp_nth, __kmp_nth + 1);
4469 new_thr->th.th_task_state = 0;
4470 new_thr->th.th_task_state_top = 0;
4471 new_thr->th.th_task_state_stack_sz = 4;
4473 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4475 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4479#ifdef KMP_ADJUST_BLOCKTIME
4482 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4483 if (__kmp_nth > __kmp_avail_proc) {
4484 __kmp_zero_bt = TRUE;
4493 kmp_balign_t *balign = new_thr->th.th_bar;
4494 for (b = 0; b < bs_last_barrier; ++b)
4495 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4498 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4499 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4506 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4507 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4512 if (!TCR_4(__kmp_init_monitor)) {
4513 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4514 if (!TCR_4(__kmp_init_monitor)) {
4515 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4516 TCW_4(__kmp_init_monitor, 1);
4517 __kmp_create_monitor(&__kmp_monitor);
4518 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4529 while (TCR_4(__kmp_init_monitor) < 2) {
4532 KF_TRACE(10, (
"after monitor thread has started\n"));
4535 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4542 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4544 : __kmp_hidden_helper_threads_num + 1;
4546 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4548 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4551 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4552 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4557 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4559 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4561#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4564 __itt_suppress_mark_range(
4565 __itt_suppress_range, __itt_suppress_threading_errors,
4566 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4567 __itt_suppress_mark_range(
4568 __itt_suppress_range, __itt_suppress_threading_errors,
4569 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4571 __itt_suppress_mark_range(
4572 __itt_suppress_range, __itt_suppress_threading_errors,
4573 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4575 __itt_suppress_mark_range(__itt_suppress_range,
4576 __itt_suppress_threading_errors,
4577 &new_thr->th.th_suspend_init_count,
4578 sizeof(new_thr->th.th_suspend_init_count));
4581 __itt_suppress_mark_range(__itt_suppress_range,
4582 __itt_suppress_threading_errors,
4583 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4584 sizeof(new_thr->th.th_bar[0].bb.b_go));
4585 __itt_suppress_mark_range(__itt_suppress_range,
4586 __itt_suppress_threading_errors,
4587 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4588 sizeof(new_thr->th.th_bar[1].bb.b_go));
4589 __itt_suppress_mark_range(__itt_suppress_range,
4590 __itt_suppress_threading_errors,
4591 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4592 sizeof(new_thr->th.th_bar[2].bb.b_go));
4594 if (__kmp_storage_map) {
4595 __kmp_print_thread_storage_map(new_thr, new_gtid);
4600 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4601 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4602 new_thr->th.th_serial_team = serial_team =
4603 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4607 proc_bind_default, &r_icvs,
4608 0 USE_NESTED_HOT_ARG(NULL));
4610 KMP_ASSERT(serial_team);
4611 serial_team->t.t_serialized = 0;
4613 serial_team->t.t_threads[0] = new_thr;
4615 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4619 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4622 __kmp_initialize_fast_memory(new_thr);
4626 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4627 __kmp_initialize_bget(new_thr);
4630 __kmp_init_random(new_thr);
4634 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4635 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4638 kmp_balign_t *balign = new_thr->th.th_bar;
4639 for (b = 0; b < bs_last_barrier; ++b) {
4640 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4641 balign[b].bb.team = NULL;
4642 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4643 balign[b].bb.use_oncore_barrier = 0;
4646 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4647 new_thr->th.th_sleep_loc_type = flag_unset;
4649 new_thr->th.th_spin_here = FALSE;
4650 new_thr->th.th_next_waiting = 0;
4652 new_thr->th.th_blocking =
false;
4655#if KMP_AFFINITY_SUPPORTED
4656 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4657 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4658 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4659 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4661 new_thr->th.th_def_allocator = __kmp_def_allocator;
4662 new_thr->th.th_prev_level = 0;
4663 new_thr->th.th_prev_num_threads = 1;
4665 TCW_4(new_thr->th.th_in_pool, FALSE);
4666 new_thr->th.th_active_in_pool = FALSE;
4667 TCW_4(new_thr->th.th_active, TRUE);
4675 if (__kmp_adjust_gtid_mode) {
4676 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4677 if (TCR_4(__kmp_gtid_mode) != 2) {
4678 TCW_4(__kmp_gtid_mode, 2);
4681 if (TCR_4(__kmp_gtid_mode) != 1) {
4682 TCW_4(__kmp_gtid_mode, 1);
4687#ifdef KMP_ADJUST_BLOCKTIME
4690 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4691 if (__kmp_nth > __kmp_avail_proc) {
4692 __kmp_zero_bt = TRUE;
4697#if KMP_AFFINITY_SUPPORTED
4699 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4704 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4705 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4707 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4709 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4720static void __kmp_reinitialize_team(kmp_team_t *team,
4721 kmp_internal_control_t *new_icvs,
4723 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4724 team->t.t_threads[0], team));
4725 KMP_DEBUG_ASSERT(team && new_icvs);
4726 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4727 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4729 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4731 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4732 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4734 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4735 team->t.t_threads[0], team));
4741static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4742 kmp_internal_control_t *new_icvs,
4744 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4747 KMP_DEBUG_ASSERT(team);
4748 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4749 KMP_DEBUG_ASSERT(team->t.t_threads);
4752 team->t.t_master_tid = 0;
4754 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4755 team->t.t_nproc = new_nproc;
4758 team->t.t_next_pool = NULL;
4762 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4763 team->t.t_invoke = NULL;
4766 team->t.t_sched.sched = new_icvs->sched.sched;
4768#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4769 team->t.t_fp_control_saved = FALSE;
4770 team->t.t_x87_fpu_control_word = 0;
4771 team->t.t_mxcsr = 0;
4774 team->t.t_construct = 0;
4776 team->t.t_ordered.dt.t_value = 0;
4777 team->t.t_master_active = FALSE;
4780 team->t.t_copypriv_data = NULL;
4783 team->t.t_copyin_counter = 0;
4786 team->t.t_control_stack_top = NULL;
4788 __kmp_reinitialize_team(team, new_icvs, loc);
4791 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4794#if KMP_AFFINITY_SUPPORTED
4795static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4796 int first,
int last,
int newp) {
4797 th->th.th_first_place = first;
4798 th->th.th_last_place = last;
4799 th->th.th_new_place = newp;
4800 if (newp != th->th.th_current_place) {
4801 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4802 team->t.t_display_affinity = 1;
4804 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4805 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4813static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4815 if (KMP_HIDDEN_HELPER_TEAM(team))
4818 kmp_info_t *master_th = team->t.t_threads[0];
4819 KMP_DEBUG_ASSERT(master_th != NULL);
4820 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4821 int first_place = master_th->th.th_first_place;
4822 int last_place = master_th->th.th_last_place;
4823 int masters_place = master_th->th.th_current_place;
4824 int num_masks = __kmp_affinity.num_masks;
4825 team->t.t_first_place = first_place;
4826 team->t.t_last_place = last_place;
4828 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4829 "bound to place %d partition = [%d,%d]\n",
4830 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4831 team->t.t_id, masters_place, first_place, last_place));
4833 switch (proc_bind) {
4835 case proc_bind_default:
4838 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4841 case proc_bind_primary: {
4843 int n_th = team->t.t_nproc;
4844 for (f = 1; f < n_th; f++) {
4845 kmp_info_t *th = team->t.t_threads[f];
4846 KMP_DEBUG_ASSERT(th != NULL);
4847 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4849 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4850 "partition = [%d,%d]\n",
4851 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4852 f, masters_place, first_place, last_place));
4856 case proc_bind_close: {
4858 int n_th = team->t.t_nproc;
4860 if (first_place <= last_place) {
4861 n_places = last_place - first_place + 1;
4863 n_places = num_masks - first_place + last_place + 1;
4865 if (n_th <= n_places) {
4866 int place = masters_place;
4867 for (f = 1; f < n_th; f++) {
4868 kmp_info_t *th = team->t.t_threads[f];
4869 KMP_DEBUG_ASSERT(th != NULL);
4871 if (place == last_place) {
4872 place = first_place;
4873 }
else if (place == (num_masks - 1)) {
4878 __kmp_set_thread_place(team, th, first_place, last_place, place);
4880 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4881 "partition = [%d,%d]\n",
4882 __kmp_gtid_from_thread(team->t.t_threads[f]),
4883 team->t.t_id, f, place, first_place, last_place));
4886 int S, rem, gap, s_count;
4887 S = n_th / n_places;
4889 rem = n_th - (S * n_places);
4890 gap = rem > 0 ? n_places / rem : n_places;
4891 int place = masters_place;
4893 for (f = 0; f < n_th; f++) {
4894 kmp_info_t *th = team->t.t_threads[f];
4895 KMP_DEBUG_ASSERT(th != NULL);
4897 __kmp_set_thread_place(team, th, first_place, last_place, place);
4900 if ((s_count == S) && rem && (gap_ct == gap)) {
4902 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4904 if (place == last_place) {
4905 place = first_place;
4906 }
else if (place == (num_masks - 1)) {
4914 }
else if (s_count == S) {
4915 if (place == last_place) {
4916 place = first_place;
4917 }
else if (place == (num_masks - 1)) {
4927 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4928 "partition = [%d,%d]\n",
4929 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4930 th->th.th_new_place, first_place, last_place));
4932 KMP_DEBUG_ASSERT(place == masters_place);
4936 case proc_bind_spread: {
4938 int n_th = team->t.t_nproc;
4941 if (first_place <= last_place) {
4942 n_places = last_place - first_place + 1;
4944 n_places = num_masks - first_place + last_place + 1;
4946 if (n_th <= n_places) {
4949 if (n_places != num_masks) {
4950 int S = n_places / n_th;
4951 int s_count, rem, gap, gap_ct;
4953 place = masters_place;
4954 rem = n_places - n_th * S;
4955 gap = rem ? n_th / rem : 1;
4958 if (update_master_only == 1)
4960 for (f = 0; f < thidx; f++) {
4961 kmp_info_t *th = team->t.t_threads[f];
4962 KMP_DEBUG_ASSERT(th != NULL);
4964 int fplace = place, nplace = place;
4966 while (s_count < S) {
4967 if (place == last_place) {
4968 place = first_place;
4969 }
else if (place == (num_masks - 1)) {
4976 if (rem && (gap_ct == gap)) {
4977 if (place == last_place) {
4978 place = first_place;
4979 }
else if (place == (num_masks - 1)) {
4987 __kmp_set_thread_place(team, th, fplace, place, nplace);
4990 if (place == last_place) {
4991 place = first_place;
4992 }
else if (place == (num_masks - 1)) {
4999 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5000 "partition = [%d,%d], num_masks: %u\n",
5001 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5002 f, th->th.th_new_place, th->th.th_first_place,
5003 th->th.th_last_place, num_masks));
5009 double current =
static_cast<double>(masters_place);
5011 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5016 if (update_master_only == 1)
5018 for (f = 0; f < thidx; f++) {
5019 first =
static_cast<int>(current);
5020 last =
static_cast<int>(current + spacing) - 1;
5021 KMP_DEBUG_ASSERT(last >= first);
5022 if (first >= n_places) {
5023 if (masters_place) {
5026 if (first == (masters_place + 1)) {
5027 KMP_DEBUG_ASSERT(f == n_th);
5030 if (last == masters_place) {
5031 KMP_DEBUG_ASSERT(f == (n_th - 1));
5035 KMP_DEBUG_ASSERT(f == n_th);
5040 if (last >= n_places) {
5041 last = (n_places - 1);
5046 KMP_DEBUG_ASSERT(0 <= first);
5047 KMP_DEBUG_ASSERT(n_places > first);
5048 KMP_DEBUG_ASSERT(0 <= last);
5049 KMP_DEBUG_ASSERT(n_places > last);
5050 KMP_DEBUG_ASSERT(last_place >= first_place);
5051 th = team->t.t_threads[f];
5052 KMP_DEBUG_ASSERT(th);
5053 __kmp_set_thread_place(team, th, first, last, place);
5055 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5056 "partition = [%d,%d], spacing = %.4f\n",
5057 __kmp_gtid_from_thread(team->t.t_threads[f]),
5058 team->t.t_id, f, th->th.th_new_place,
5059 th->th.th_first_place, th->th.th_last_place, spacing));
5063 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5065 int S, rem, gap, s_count;
5066 S = n_th / n_places;
5068 rem = n_th - (S * n_places);
5069 gap = rem > 0 ? n_places / rem : n_places;
5070 int place = masters_place;
5073 if (update_master_only == 1)
5075 for (f = 0; f < thidx; f++) {
5076 kmp_info_t *th = team->t.t_threads[f];
5077 KMP_DEBUG_ASSERT(th != NULL);
5079 __kmp_set_thread_place(team, th, place, place, place);
5082 if ((s_count == S) && rem && (gap_ct == gap)) {
5084 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5086 if (place == last_place) {
5087 place = first_place;
5088 }
else if (place == (num_masks - 1)) {
5096 }
else if (s_count == S) {
5097 if (place == last_place) {
5098 place = first_place;
5099 }
else if (place == (num_masks - 1)) {
5108 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5109 "partition = [%d,%d]\n",
5110 __kmp_gtid_from_thread(team->t.t_threads[f]),
5111 team->t.t_id, f, th->th.th_new_place,
5112 th->th.th_first_place, th->th.th_last_place));
5114 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5122 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5130__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5132 ompt_data_t ompt_parallel_data,
5134 kmp_proc_bind_t new_proc_bind,
5135 kmp_internal_control_t *new_icvs,
5136 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5137 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5140 int use_hot_team = !root->r.r_active;
5142 int do_place_partition = 1;
5144 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5145 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5146 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5149#if KMP_NESTED_HOT_TEAMS
5150 kmp_hot_team_ptr_t *hot_teams;
5152 team = master->th.th_team;
5153 level = team->t.t_active_level;
5154 if (master->th.th_teams_microtask) {
5155 if (master->th.th_teams_size.nteams > 1 &&
5158 (microtask_t)__kmp_teams_master ||
5159 master->th.th_teams_level <
5166 if ((master->th.th_teams_size.nteams == 1 &&
5167 master->th.th_teams_level >= team->t.t_level) ||
5168 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5169 do_place_partition = 0;
5171 hot_teams = master->th.th_hot_teams;
5172 if (level < __kmp_hot_teams_max_level && hot_teams &&
5173 hot_teams[level].hot_team) {
5181 KMP_DEBUG_ASSERT(new_nproc == 1);
5185 if (use_hot_team && new_nproc > 1) {
5186 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5187#if KMP_NESTED_HOT_TEAMS
5188 team = hot_teams[level].hot_team;
5190 team = root->r.r_hot_team;
5193 if (__kmp_tasking_mode != tskm_immediate_exec) {
5194 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5195 "task_team[1] = %p before reinit\n",
5196 team->t.t_task_team[0], team->t.t_task_team[1]));
5200 if (team->t.t_nproc != new_nproc &&
5201 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5203 int old_nthr = team->t.t_nproc;
5204 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5209 if (do_place_partition == 0)
5210 team->t.t_proc_bind = proc_bind_default;
5214 if (team->t.t_nproc == new_nproc) {
5215 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5218 if (team->t.t_size_changed == -1) {
5219 team->t.t_size_changed = 1;
5221 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5225 kmp_r_sched_t new_sched = new_icvs->sched;
5227 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5229 __kmp_reinitialize_team(team, new_icvs,
5230 root->r.r_uber_thread->th.th_ident);
5232 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5233 team->t.t_threads[0], team));
5234 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5236#if KMP_AFFINITY_SUPPORTED
5237 if ((team->t.t_size_changed == 0) &&
5238 (team->t.t_proc_bind == new_proc_bind)) {
5239 if (new_proc_bind == proc_bind_spread) {
5240 if (do_place_partition) {
5242 __kmp_partition_places(team, 1);
5245 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5246 "proc_bind = %d, partition = [%d,%d]\n",
5247 team->t.t_id, new_proc_bind, team->t.t_first_place,
5248 team->t.t_last_place));
5250 if (do_place_partition) {
5251 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5252 __kmp_partition_places(team);
5256 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5258 }
else if (team->t.t_nproc > new_nproc) {
5260 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5263 team->t.t_size_changed = 1;
5264 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5267 __kmp_add_threads_to_team(team, new_nproc);
5269#if KMP_NESTED_HOT_TEAMS
5270 if (__kmp_hot_teams_mode == 0) {
5273 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5274 hot_teams[level].hot_team_nth = new_nproc;
5277 for (f = new_nproc; f < team->t.t_nproc; f++) {
5278 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5279 if (__kmp_tasking_mode != tskm_immediate_exec) {
5282 team->t.t_threads[f]->th.th_task_team = NULL;
5284 __kmp_free_thread(team->t.t_threads[f]);
5285 team->t.t_threads[f] = NULL;
5287#if KMP_NESTED_HOT_TEAMS
5292 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5293 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5294 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5295 for (
int b = 0; b < bs_last_barrier; ++b) {
5296 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5297 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5299 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5304 team->t.t_nproc = new_nproc;
5306 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5307 __kmp_reinitialize_team(team, new_icvs,
5308 root->r.r_uber_thread->th.th_ident);
5311 for (f = 0; f < new_nproc; ++f) {
5312 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5317 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5318 team->t.t_threads[0], team));
5320 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5323 for (f = 0; f < team->t.t_nproc; f++) {
5324 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5325 team->t.t_threads[f]->th.th_team_nproc ==
5330 if (do_place_partition) {
5331 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5332#if KMP_AFFINITY_SUPPORTED
5333 __kmp_partition_places(team);
5339 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5341 int old_nproc = team->t.t_nproc;
5342 team->t.t_size_changed = 1;
5344#if KMP_NESTED_HOT_TEAMS
5345 int avail_threads = hot_teams[level].hot_team_nth;
5346 if (new_nproc < avail_threads)
5347 avail_threads = new_nproc;
5348 kmp_info_t **other_threads = team->t.t_threads;
5349 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5353 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5354 for (b = 0; b < bs_last_barrier; ++b) {
5355 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5356 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5358 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5362 if (hot_teams[level].hot_team_nth >= new_nproc) {
5365 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5366 team->t.t_nproc = new_nproc;
5370 team->t.t_nproc = hot_teams[level].hot_team_nth;
5371 hot_teams[level].hot_team_nth = new_nproc;
5373 if (team->t.t_max_nproc < new_nproc) {
5375 __kmp_reallocate_team_arrays(team, new_nproc);
5376 __kmp_reinitialize_team(team, new_icvs, NULL);
5379#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5385 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5389 for (f = team->t.t_nproc; f < new_nproc; f++) {
5390 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5391 KMP_DEBUG_ASSERT(new_worker);
5392 team->t.t_threads[f] = new_worker;
5395 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5396 "join=%llu, plain=%llu\n",
5397 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5398 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5399 team->t.t_bar[bs_plain_barrier].b_arrived));
5403 kmp_balign_t *balign = new_worker->th.th_bar;
5404 for (b = 0; b < bs_last_barrier; ++b) {
5405 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5406 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5407 KMP_BARRIER_PARENT_FLAG);
5409 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5415#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5417 new_temp_affinity.restore();
5419#if KMP_NESTED_HOT_TEAMS
5422 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5425 __kmp_add_threads_to_team(team, new_nproc);
5429 __kmp_initialize_team(team, new_nproc, new_icvs,
5430 root->r.r_uber_thread->th.th_ident);
5433 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5434 for (f = 0; f < team->t.t_nproc; ++f)
5435 __kmp_initialize_info(team->t.t_threads[f], team, f,
5436 __kmp_gtid_from_tid(f, team));
5439 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5440 for (f = old_nproc; f < team->t.t_nproc; ++f)
5441 team->t.t_threads[f]->th.th_task_state = old_state;
5444 for (f = 0; f < team->t.t_nproc; ++f) {
5445 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5446 team->t.t_threads[f]->th.th_team_nproc ==
5451 if (do_place_partition) {
5452 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5453#if KMP_AFFINITY_SUPPORTED
5454 __kmp_partition_places(team);
5459 kmp_info_t *master = team->t.t_threads[0];
5460 if (master->th.th_teams_microtask) {
5461 for (f = 1; f < new_nproc; ++f) {
5463 kmp_info_t *thr = team->t.t_threads[f];
5464 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5465 thr->th.th_teams_level = master->th.th_teams_level;
5466 thr->th.th_teams_size = master->th.th_teams_size;
5469#if KMP_NESTED_HOT_TEAMS
5473 for (f = 1; f < new_nproc; ++f) {
5474 kmp_info_t *thr = team->t.t_threads[f];
5476 kmp_balign_t *balign = thr->th.th_bar;
5477 for (b = 0; b < bs_last_barrier; ++b) {
5478 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5479 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5481 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5489 __kmp_alloc_argv_entries(argc, team, TRUE);
5490 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5494 KF_TRACE(10, (
" hot_team = %p\n", team));
5497 if (__kmp_tasking_mode != tskm_immediate_exec) {
5498 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5499 "task_team[1] = %p after reinit\n",
5500 team->t.t_task_team[0], team->t.t_task_team[1]));
5505 __ompt_team_assign_id(team, ompt_parallel_data);
5515 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5518 if (team->t.t_max_nproc >= max_nproc) {
5520 __kmp_team_pool = team->t.t_next_pool;
5522 if (max_nproc > 1 &&
5523 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5525 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5530 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5532 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5533 "task_team[1] %p to NULL\n",
5534 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5535 team->t.t_task_team[0] = NULL;
5536 team->t.t_task_team[1] = NULL;
5539 __kmp_alloc_argv_entries(argc, team, TRUE);
5540 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5543 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5544 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5547 for (b = 0; b < bs_last_barrier; ++b) {
5548 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5550 team->t.t_bar[b].b_master_arrived = 0;
5551 team->t.t_bar[b].b_team_arrived = 0;
5556 team->t.t_proc_bind = new_proc_bind;
5558 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5562 __ompt_team_assign_id(team, ompt_parallel_data);
5574 team = __kmp_reap_team(team);
5575 __kmp_team_pool = team;
5580 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5583 team->t.t_max_nproc = max_nproc;
5584 if (max_nproc > 1 &&
5585 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5587 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5592 __kmp_allocate_team_arrays(team, max_nproc);
5594 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5595 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5597 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5599 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5600 team->t.t_task_team[0] = NULL;
5602 team->t.t_task_team[1] = NULL;
5605 if (__kmp_storage_map) {
5606 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5610 __kmp_alloc_argv_entries(argc, team, FALSE);
5611 team->t.t_argc = argc;
5614 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5615 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5618 for (b = 0; b < bs_last_barrier; ++b) {
5619 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5621 team->t.t_bar[b].b_master_arrived = 0;
5622 team->t.t_bar[b].b_team_arrived = 0;
5627 team->t.t_proc_bind = new_proc_bind;
5630 __ompt_team_assign_id(team, ompt_parallel_data);
5631 team->t.ompt_serialized_team_info = NULL;
5636 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5647void __kmp_free_team(kmp_root_t *root,
5648 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5650 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5654 KMP_DEBUG_ASSERT(root);
5655 KMP_DEBUG_ASSERT(team);
5656 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5657 KMP_DEBUG_ASSERT(team->t.t_threads);
5659 int use_hot_team = team == root->r.r_hot_team;
5660#if KMP_NESTED_HOT_TEAMS
5663 level = team->t.t_active_level - 1;
5664 if (master->th.th_teams_microtask) {
5665 if (master->th.th_teams_size.nteams > 1) {
5669 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5670 master->th.th_teams_level == team->t.t_level) {
5676 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5678 if (level < __kmp_hot_teams_max_level) {
5679 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5686 TCW_SYNC_PTR(team->t.t_pkfn,
5689 team->t.t_copyin_counter = 0;
5694 if (!use_hot_team) {
5695 if (__kmp_tasking_mode != tskm_immediate_exec) {
5697 for (f = 1; f < team->t.t_nproc; ++f) {
5698 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5699 kmp_info_t *th = team->t.t_threads[f];
5700 volatile kmp_uint32 *state = &th->th.th_reap_state;
5701 while (*state != KMP_SAFE_TO_REAP) {
5705 if (!__kmp_is_thread_alive(th, &ecode)) {
5706 *state = KMP_SAFE_TO_REAP;
5711 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5712 if (fl.is_sleeping())
5713 fl.resume(__kmp_gtid_from_thread(th));
5720 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5721 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5722 if (task_team != NULL) {
5723 for (f = 0; f < team->t.t_nproc; ++f) {
5724 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5725 team->t.t_threads[f]->th.th_task_team = NULL;
5729 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5730 __kmp_get_gtid(), task_team, team->t.t_id));
5731#if KMP_NESTED_HOT_TEAMS
5732 __kmp_free_task_team(master, task_team);
5734 team->t.t_task_team[tt_idx] = NULL;
5740 team->t.t_parent = NULL;
5741 team->t.t_level = 0;
5742 team->t.t_active_level = 0;
5745 for (f = 1; f < team->t.t_nproc; ++f) {
5746 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5747 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5748 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5751 __kmp_free_thread(team->t.t_threads[f]);
5754 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5757 team->t.b->go_release();
5758 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5759 for (f = 1; f < team->t.t_nproc; ++f) {
5760 if (team->t.b->sleep[f].sleep) {
5761 __kmp_atomic_resume_64(
5762 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5763 (kmp_atomic_flag_64<> *)NULL);
5768 for (
int f = 1; f < team->t.t_nproc; ++f) {
5769 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5775 for (f = 1; f < team->t.t_nproc; ++f) {
5776 team->t.t_threads[f] = NULL;
5779 if (team->t.t_max_nproc > 1 &&
5780 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5781 distributedBarrier::deallocate(team->t.b);
5786 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5787 __kmp_team_pool = (
volatile kmp_team_t *)team;
5790 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5791 team->t.t_threads[1]->th.th_cg_roots);
5792 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5794 for (f = 1; f < team->t.t_nproc; ++f) {
5795 kmp_info_t *thr = team->t.t_threads[f];
5796 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5797 thr->th.th_cg_roots->cg_root == thr);
5799 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5800 thr->th.th_cg_roots = tmp->up;
5801 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5802 " up to node %p. cg_nthreads was %d\n",
5803 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5804 int i = tmp->cg_nthreads--;
5809 if (thr->th.th_cg_roots)
5810 thr->th.th_current_task->td_icvs.thread_limit =
5811 thr->th.th_cg_roots->cg_thread_limit;
5820kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5821 kmp_team_t *next_pool = team->t.t_next_pool;
5823 KMP_DEBUG_ASSERT(team);
5824 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5825 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5826 KMP_DEBUG_ASSERT(team->t.t_threads);
5827 KMP_DEBUG_ASSERT(team->t.t_argv);
5832 __kmp_free_team_arrays(team);
5833 if (team->t.t_argv != &team->t.t_inline_argv[0])
5834 __kmp_free((
void *)team->t.t_argv);
5866void __kmp_free_thread(kmp_info_t *this_th) {
5870 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5871 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5873 KMP_DEBUG_ASSERT(this_th);
5878 kmp_balign_t *balign = this_th->th.th_bar;
5879 for (b = 0; b < bs_last_barrier; ++b) {
5880 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5881 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5882 balign[b].bb.team = NULL;
5883 balign[b].bb.leaf_kids = 0;
5885 this_th->th.th_task_state = 0;
5886 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5889 TCW_PTR(this_th->th.th_team, NULL);
5890 TCW_PTR(this_th->th.th_root, NULL);
5891 TCW_PTR(this_th->th.th_dispatch, NULL);
5893 while (this_th->th.th_cg_roots) {
5894 this_th->th.th_cg_roots->cg_nthreads--;
5895 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5896 " %p of thread %p to %d\n",
5897 this_th, this_th->th.th_cg_roots,
5898 this_th->th.th_cg_roots->cg_root,
5899 this_th->th.th_cg_roots->cg_nthreads));
5900 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5901 if (tmp->cg_root == this_th) {
5902 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5904 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5905 this_th->th.th_cg_roots = tmp->up;
5908 if (tmp->cg_nthreads == 0) {
5911 this_th->th.th_cg_roots = NULL;
5921 __kmp_free_implicit_task(this_th);
5922 this_th->th.th_current_task = NULL;
5926 gtid = this_th->th.th_info.ds.ds_gtid;
5927 if (__kmp_thread_pool_insert_pt != NULL) {
5928 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5929 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5930 __kmp_thread_pool_insert_pt = NULL;
5939 if (__kmp_thread_pool_insert_pt != NULL) {
5940 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5942 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5944 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5945 scan = &((*scan)->th.th_next_pool))
5950 TCW_PTR(this_th->th.th_next_pool, *scan);
5951 __kmp_thread_pool_insert_pt = *scan = this_th;
5952 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5953 (this_th->th.th_info.ds.ds_gtid <
5954 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5955 TCW_4(this_th->th.th_in_pool, TRUE);
5956 __kmp_suspend_initialize_thread(this_th);
5957 __kmp_lock_suspend_mx(this_th);
5958 if (this_th->th.th_active == TRUE) {
5959 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5960 this_th->th.th_active_in_pool = TRUE;
5964 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5967 __kmp_unlock_suspend_mx(this_th);
5969 TCW_4(__kmp_nth, __kmp_nth - 1);
5971#ifdef KMP_ADJUST_BLOCKTIME
5974 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5975 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5976 if (__kmp_nth <= __kmp_avail_proc) {
5977 __kmp_zero_bt = FALSE;
5987void *__kmp_launch_thread(kmp_info_t *this_thr) {
5988#if OMP_PROFILING_SUPPORT
5989 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5991 if (ProfileTraceFile)
5992 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5995 int gtid = this_thr->th.th_info.ds.ds_gtid;
5997 kmp_team_t **
volatile pteam;
6000 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6002 if (__kmp_env_consistency_check) {
6003 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6007 if (ompd_state & OMPD_ENABLE_BP)
6008 ompd_bp_thread_begin();
6012 ompt_data_t *thread_data =
nullptr;
6013 if (ompt_enabled.enabled) {
6014 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6015 *thread_data = ompt_data_none;
6017 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6018 this_thr->th.ompt_thread_info.wait_id = 0;
6019 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6020 this_thr->th.ompt_thread_info.parallel_flags = 0;
6021 if (ompt_enabled.ompt_callback_thread_begin) {
6022 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6023 ompt_thread_worker, thread_data);
6025 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6030 while (!TCR_4(__kmp_global.g.g_done)) {
6031 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6035 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6038 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6041 if (ompt_enabled.enabled) {
6042 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6046 pteam = &this_thr->th.th_team;
6049 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6051 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6054 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6055 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6056 (*pteam)->t.t_pkfn));
6058 updateHWFPControl(*pteam);
6061 if (ompt_enabled.enabled) {
6062 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6066 rc = (*pteam)->t.t_invoke(gtid);
6070 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6071 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6072 (*pteam)->t.t_pkfn));
6075 if (ompt_enabled.enabled) {
6077 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6079 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6083 __kmp_join_barrier(gtid);
6088 if (ompd_state & OMPD_ENABLE_BP)
6089 ompd_bp_thread_end();
6093 if (ompt_enabled.ompt_callback_thread_end) {
6094 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6098 this_thr->th.th_task_team = NULL;
6100 __kmp_common_destroy_gtid(gtid);
6102 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6105#if OMP_PROFILING_SUPPORT
6106 llvm::timeTraceProfilerFinishThread();
6113void __kmp_internal_end_dest(
void *specific_gtid) {
6116 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6118 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6122 __kmp_internal_end_thread(gtid);
6125#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6127__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6128 __kmp_internal_end_atexit();
6135void __kmp_internal_end_atexit(
void) {
6136 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6160 __kmp_internal_end_library(-1);
6162 __kmp_close_console();
6166static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6171 KMP_DEBUG_ASSERT(thread != NULL);
6173 gtid = thread->th.th_info.ds.ds_gtid;
6176 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6179 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6181 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6183 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6185 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6189 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6191 __kmp_release_64(&flag);
6196 __kmp_reap_worker(thread);
6208 if (thread->th.th_active_in_pool) {
6209 thread->th.th_active_in_pool = FALSE;
6210 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6211 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6215 __kmp_free_implicit_task(thread);
6219 __kmp_free_fast_memory(thread);
6222 __kmp_suspend_uninitialize_thread(thread);
6224 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6225 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6230#ifdef KMP_ADJUST_BLOCKTIME
6233 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6234 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6235 if (__kmp_nth <= __kmp_avail_proc) {
6236 __kmp_zero_bt = FALSE;
6242 if (__kmp_env_consistency_check) {
6243 if (thread->th.th_cons) {
6244 __kmp_free_cons_stack(thread->th.th_cons);
6245 thread->th.th_cons = NULL;
6249 if (thread->th.th_pri_common != NULL) {
6250 __kmp_free(thread->th.th_pri_common);
6251 thread->th.th_pri_common = NULL;
6254 if (thread->th.th_task_state_memo_stack != NULL) {
6255 __kmp_free(thread->th.th_task_state_memo_stack);
6256 thread->th.th_task_state_memo_stack = NULL;
6260 if (thread->th.th_local.bget_data != NULL) {
6261 __kmp_finalize_bget(thread);
6265#if KMP_AFFINITY_SUPPORTED
6266 if (thread->th.th_affin_mask != NULL) {
6267 KMP_CPU_FREE(thread->th.th_affin_mask);
6268 thread->th.th_affin_mask = NULL;
6272#if KMP_USE_HIER_SCHED
6273 if (thread->th.th_hier_bar_data != NULL) {
6274 __kmp_free(thread->th.th_hier_bar_data);
6275 thread->th.th_hier_bar_data = NULL;
6279 __kmp_reap_team(thread->th.th_serial_team);
6280 thread->th.th_serial_team = NULL;
6287static void __kmp_itthash_clean(kmp_info_t *th) {
6289 if (__kmp_itt_region_domains.count > 0) {
6290 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6291 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6293 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6294 __kmp_thread_free(th, bucket);
6299 if (__kmp_itt_barrier_domains.count > 0) {
6300 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6301 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6303 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6304 __kmp_thread_free(th, bucket);
6312static void __kmp_internal_end(
void) {
6316 __kmp_unregister_library();
6323 __kmp_reclaim_dead_roots();
6327 for (i = 0; i < __kmp_threads_capacity; i++)
6329 if (__kmp_root[i]->r.r_active)
6332 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6334 if (i < __kmp_threads_capacity) {
6346 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6347 if (TCR_4(__kmp_init_monitor)) {
6348 __kmp_reap_monitor(&__kmp_monitor);
6349 TCW_4(__kmp_init_monitor, 0);
6351 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6352 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6358 for (i = 0; i < __kmp_threads_capacity; i++) {
6359 if (__kmp_root[i]) {
6362 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6371 while (__kmp_thread_pool != NULL) {
6373 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6374 __kmp_thread_pool = thread->th.th_next_pool;
6376 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6377 thread->th.th_next_pool = NULL;
6378 thread->th.th_in_pool = FALSE;
6379 __kmp_reap_thread(thread, 0);
6381 __kmp_thread_pool_insert_pt = NULL;
6384 while (__kmp_team_pool != NULL) {
6386 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6387 __kmp_team_pool = team->t.t_next_pool;
6389 team->t.t_next_pool = NULL;
6390 __kmp_reap_team(team);
6393 __kmp_reap_task_teams();
6400 for (i = 0; i < __kmp_threads_capacity; i++) {
6401 kmp_info_t *thr = __kmp_threads[i];
6402 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6407 for (i = 0; i < __kmp_threads_capacity; ++i) {
6414 TCW_SYNC_4(__kmp_init_common, FALSE);
6416 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6424 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6425 if (TCR_4(__kmp_init_monitor)) {
6426 __kmp_reap_monitor(&__kmp_monitor);
6427 TCW_4(__kmp_init_monitor, 0);
6429 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6430 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6433 TCW_4(__kmp_init_gtid, FALSE);
6442void __kmp_internal_end_library(
int gtid_req) {
6449 if (__kmp_global.g.g_abort) {
6450 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6454 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6455 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6460 if (TCR_4(__kmp_init_hidden_helper) &&
6461 !TCR_4(__kmp_hidden_helper_team_done)) {
6462 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6464 __kmp_hidden_helper_main_thread_release();
6466 __kmp_hidden_helper_threads_deinitz_wait();
6472 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6474 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6475 if (gtid == KMP_GTID_SHUTDOWN) {
6476 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6477 "already shutdown\n"));
6479 }
else if (gtid == KMP_GTID_MONITOR) {
6480 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6481 "registered, or system shutdown\n"));
6483 }
else if (gtid == KMP_GTID_DNE) {
6484 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6487 }
else if (KMP_UBER_GTID(gtid)) {
6489 if (__kmp_root[gtid]->r.r_active) {
6490 __kmp_global.g.g_abort = -1;
6491 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6492 __kmp_unregister_library();
6494 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6498 __kmp_itthash_clean(__kmp_threads[gtid]);
6501 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6502 __kmp_unregister_root_current_thread(gtid);
6509#ifdef DUMP_DEBUG_ON_EXIT
6510 if (__kmp_debug_buf)
6511 __kmp_dump_debug_buffer();
6516 __kmp_unregister_library();
6521 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6524 if (__kmp_global.g.g_abort) {
6525 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6527 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6530 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6531 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6540 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6543 __kmp_internal_end();
6545 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6546 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6548 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6550#ifdef DUMP_DEBUG_ON_EXIT
6551 if (__kmp_debug_buf)
6552 __kmp_dump_debug_buffer();
6556 __kmp_close_console();
6559 __kmp_fini_allocator();
6563void __kmp_internal_end_thread(
int gtid_req) {
6572 if (__kmp_global.g.g_abort) {
6573 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6577 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6578 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6583 if (TCR_4(__kmp_init_hidden_helper) &&
6584 !TCR_4(__kmp_hidden_helper_team_done)) {
6585 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6587 __kmp_hidden_helper_main_thread_release();
6589 __kmp_hidden_helper_threads_deinitz_wait();
6596 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6598 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6599 if (gtid == KMP_GTID_SHUTDOWN) {
6600 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6601 "already shutdown\n"));
6603 }
else if (gtid == KMP_GTID_MONITOR) {
6604 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6605 "registered, or system shutdown\n"));
6607 }
else if (gtid == KMP_GTID_DNE) {
6608 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6612 }
else if (KMP_UBER_GTID(gtid)) {
6614 if (__kmp_root[gtid]->r.r_active) {
6615 __kmp_global.g.g_abort = -1;
6616 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6618 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6622 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6624 __kmp_unregister_root_current_thread(gtid);
6628 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6631 __kmp_threads[gtid]->th.th_task_team = NULL;
6635 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6641 if (__kmp_pause_status != kmp_hard_paused)
6645 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6650 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6653 if (__kmp_global.g.g_abort) {
6654 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6656 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6659 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6660 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6671 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6673 for (i = 0; i < __kmp_threads_capacity; ++i) {
6674 if (KMP_UBER_GTID(i)) {
6677 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6678 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6679 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6686 __kmp_internal_end();
6688 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6689 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6691 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6693#ifdef DUMP_DEBUG_ON_EXIT
6694 if (__kmp_debug_buf)
6695 __kmp_dump_debug_buffer();
6702static long __kmp_registration_flag = 0;
6704static char *__kmp_registration_str = NULL;
6707static inline char *__kmp_reg_status_name() {
6713#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6714 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6717 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6721#if defined(KMP_USE_SHM)
6722bool __kmp_shm_available =
false;
6723bool __kmp_tmp_available =
false;
6725char *temp_reg_status_file_name =
nullptr;
6728void __kmp_register_library_startup(
void) {
6730 char *name = __kmp_reg_status_name();
6736#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6737 __kmp_initialize_system_tick();
6739 __kmp_read_system_time(&time.dtime);
6740 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6741 __kmp_registration_str =
6742 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6743 __kmp_registration_flag, KMP_LIBRARY_FILE);
6745 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6746 __kmp_registration_str));
6752#if defined(KMP_USE_SHM)
6753 char *shm_name =
nullptr;
6754 char *data1 =
nullptr;
6755 __kmp_shm_available = __kmp_detect_shm();
6756 if (__kmp_shm_available) {
6758 shm_name = __kmp_str_format(
"/%s", name);
6759 int shm_preexist = 0;
6760 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6761 if ((fd1 == -1) && (errno == EEXIST)) {
6764 fd1 = shm_open(shm_name, O_RDWR, 0666);
6766 KMP_WARNING(FunctionError,
"Can't open SHM");
6767 __kmp_shm_available =
false;
6772 if (__kmp_shm_available && shm_preexist == 0) {
6773 if (ftruncate(fd1, SHM_SIZE) == -1) {
6774 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6775 __kmp_shm_available =
false;
6778 if (__kmp_shm_available) {
6779 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6781 if (data1 == MAP_FAILED) {
6782 KMP_WARNING(FunctionError,
"Can't map SHM");
6783 __kmp_shm_available =
false;
6786 if (__kmp_shm_available) {
6787 if (shm_preexist == 0) {
6788 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6791 value = __kmp_str_format(
"%s", data1);
6792 munmap(data1, SHM_SIZE);
6797 if (!__kmp_shm_available)
6798 __kmp_tmp_available = __kmp_detect_tmp();
6799 if (!__kmp_shm_available && __kmp_tmp_available) {
6806 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6807 int tmp_preexist = 0;
6808 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6809 if ((fd1 == -1) && (errno == EEXIST)) {
6812 fd1 = open(temp_reg_status_file_name, O_RDWR, 0666);
6814 KMP_WARNING(FunctionError,
"Can't open TEMP");
6815 __kmp_tmp_available =
false;
6820 if (__kmp_tmp_available && tmp_preexist == 0) {
6822 if (ftruncate(fd1, SHM_SIZE) == -1) {
6823 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6824 __kmp_tmp_available =
false;
6827 if (__kmp_tmp_available) {
6828 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6830 if (data1 == MAP_FAILED) {
6831 KMP_WARNING(FunctionError,
"Can't map /tmp");
6832 __kmp_tmp_available =
false;
6835 if (__kmp_tmp_available) {
6836 if (tmp_preexist == 0) {
6837 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6840 value = __kmp_str_format(
"%s", data1);
6841 munmap(data1, SHM_SIZE);
6846 if (!__kmp_shm_available && !__kmp_tmp_available) {
6849 __kmp_env_set(name, __kmp_registration_str, 0);
6851 value = __kmp_env_get(name);
6855 __kmp_env_set(name, __kmp_registration_str, 0);
6857 value = __kmp_env_get(name);
6860 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6867 char *flag_addr_str = NULL;
6868 char *flag_val_str = NULL;
6869 char const *file_name = NULL;
6870 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6871 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6874 unsigned long *flag_addr = 0;
6875 unsigned long flag_val = 0;
6876 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6877 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6878 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6882 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6896 file_name =
"unknown library";
6901 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6902 if (!__kmp_str_match_true(duplicate_ok)) {
6904 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6905 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6907 KMP_INTERNAL_FREE(duplicate_ok);
6908 __kmp_duplicate_library_ok = 1;
6913#if defined(KMP_USE_SHM)
6914 if (__kmp_shm_available) {
6915 shm_unlink(shm_name);
6916 }
else if (__kmp_tmp_available) {
6917 unlink(temp_reg_status_file_name);
6920 __kmp_env_unset(name);
6924 __kmp_env_unset(name);
6928 KMP_DEBUG_ASSERT(0);
6932 KMP_INTERNAL_FREE((
void *)value);
6933#if defined(KMP_USE_SHM)
6935 KMP_INTERNAL_FREE((
void *)shm_name);
6938 KMP_INTERNAL_FREE((
void *)name);
6942void __kmp_unregister_library(
void) {
6944 char *name = __kmp_reg_status_name();
6947#if defined(KMP_USE_SHM)
6948 char *shm_name =
nullptr;
6950 if (__kmp_shm_available) {
6951 shm_name = __kmp_str_format(
"/%s", name);
6952 fd1 = shm_open(shm_name, O_RDONLY, 0666);
6954 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6955 if (data1 != MAP_FAILED) {
6956 value = __kmp_str_format(
"%s", data1);
6957 munmap(data1, SHM_SIZE);
6961 }
else if (__kmp_tmp_available) {
6962 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6964 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6965 if (data1 != MAP_FAILED) {
6966 value = __kmp_str_format(
"%s", data1);
6967 munmap(data1, SHM_SIZE);
6972 value = __kmp_env_get(name);
6975 value = __kmp_env_get(name);
6978 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6979 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6980 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6982#if defined(KMP_USE_SHM)
6983 if (__kmp_shm_available) {
6984 shm_unlink(shm_name);
6985 }
else if (__kmp_tmp_available) {
6986 unlink(temp_reg_status_file_name);
6988 __kmp_env_unset(name);
6991 __kmp_env_unset(name);
6995#if defined(KMP_USE_SHM)
6997 KMP_INTERNAL_FREE(shm_name);
6998 if (temp_reg_status_file_name)
6999 KMP_INTERNAL_FREE(temp_reg_status_file_name);
7002 KMP_INTERNAL_FREE(__kmp_registration_str);
7003 KMP_INTERNAL_FREE(value);
7004 KMP_INTERNAL_FREE(name);
7006 __kmp_registration_flag = 0;
7007 __kmp_registration_str = NULL;
7014#if KMP_MIC_SUPPORTED
7016static void __kmp_check_mic_type() {
7017 kmp_cpuid_t cpuid_state = {0};
7018 kmp_cpuid_t *cs_p = &cpuid_state;
7019 __kmp_x86_cpuid(1, 0, cs_p);
7021 if ((cs_p->eax & 0xff0) == 0xB10) {
7022 __kmp_mic_type = mic2;
7023 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7024 __kmp_mic_type = mic3;
7026 __kmp_mic_type = non_mic;
7033static void __kmp_user_level_mwait_init() {
7034 struct kmp_cpuid buf;
7035 __kmp_x86_cpuid(7, 0, &buf);
7036 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7037 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7038 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7039 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7040 __kmp_umwait_enabled));
7043#ifndef AT_INTELPHIUSERMWAIT
7046#define AT_INTELPHIUSERMWAIT 10000
7051unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7052unsigned long getauxval(
unsigned long) {
return 0; }
7054static void __kmp_user_level_mwait_init() {
7059 if (__kmp_mic_type == mic3) {
7060 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7061 if ((res & 0x1) || __kmp_user_level_mwait) {
7062 __kmp_mwait_enabled = TRUE;
7063 if (__kmp_user_level_mwait) {
7064 KMP_INFORM(EnvMwaitWarn);
7067 __kmp_mwait_enabled = FALSE;
7070 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7071 "__kmp_mwait_enabled = %d\n",
7072 __kmp_mic_type, __kmp_mwait_enabled));
7076static void __kmp_do_serial_initialize(
void) {
7080 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7082 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7083 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7084 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7085 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7086 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7096 __kmp_validate_locks();
7098#if ENABLE_LIBOMPTARGET
7100 __kmp_init_omptarget();
7104 __kmp_init_allocator();
7110 if (__kmp_need_register_serial)
7111 __kmp_register_library_startup();
7114 if (TCR_4(__kmp_global.g.g_done)) {
7115 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7118 __kmp_global.g.g_abort = 0;
7119 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7122#if KMP_USE_ADAPTIVE_LOCKS
7123#if KMP_DEBUG_ADAPTIVE_LOCKS
7124 __kmp_init_speculative_stats();
7127#if KMP_STATS_ENABLED
7130 __kmp_init_lock(&__kmp_global_lock);
7131 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7132 __kmp_init_lock(&__kmp_debug_lock);
7133 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7134 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7135 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7136 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7137 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7138 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7139 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7140 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7141 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7142 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7143 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7144 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7145 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7146 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7147 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7149 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7151 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7155 __kmp_runtime_initialize();
7157#if KMP_MIC_SUPPORTED
7158 __kmp_check_mic_type();
7165 __kmp_abort_delay = 0;
7169 __kmp_dflt_team_nth_ub = __kmp_xproc;
7170 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7171 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7173 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7174 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7176 __kmp_max_nth = __kmp_sys_max_nth;
7177 __kmp_cg_max_nth = __kmp_sys_max_nth;
7178 __kmp_teams_max_nth = __kmp_xproc;
7179 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7180 __kmp_teams_max_nth = __kmp_sys_max_nth;
7185 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7187 __kmp_monitor_wakeups =
7188 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7189 __kmp_bt_intervals =
7190 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7193 __kmp_library = library_throughput;
7195 __kmp_static = kmp_sch_static_balanced;
7202#if KMP_FAST_REDUCTION_BARRIER
7203#define kmp_reduction_barrier_gather_bb ((int)1)
7204#define kmp_reduction_barrier_release_bb ((int)1)
7205#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7206#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7208 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7209 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7210 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7211 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7212 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7213#if KMP_FAST_REDUCTION_BARRIER
7214 if (i == bs_reduction_barrier) {
7216 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7217 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7218 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7219 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7223#if KMP_FAST_REDUCTION_BARRIER
7224#undef kmp_reduction_barrier_release_pat
7225#undef kmp_reduction_barrier_gather_pat
7226#undef kmp_reduction_barrier_release_bb
7227#undef kmp_reduction_barrier_gather_bb
7229#if KMP_MIC_SUPPORTED
7230 if (__kmp_mic_type == mic2) {
7232 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7233 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7235 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7236 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7238#if KMP_FAST_REDUCTION_BARRIER
7239 if (__kmp_mic_type == mic2) {
7240 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7241 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7248 __kmp_env_checks = TRUE;
7250 __kmp_env_checks = FALSE;
7254 __kmp_foreign_tp = TRUE;
7256 __kmp_global.g.g_dynamic = FALSE;
7257 __kmp_global.g.g_dynamic_mode = dynamic_default;
7259 __kmp_init_nesting_mode();
7261 __kmp_env_initialize(NULL);
7263#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7264 __kmp_user_level_mwait_init();
7268 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7269 if (__kmp_str_match_true(val)) {
7270 kmp_str_buf_t buffer;
7271 __kmp_str_buf_init(&buffer);
7272 __kmp_i18n_dump_catalog(&buffer);
7273 __kmp_printf(
"%s", buffer.str);
7274 __kmp_str_buf_free(&buffer);
7276 __kmp_env_free(&val);
7279 __kmp_threads_capacity =
7280 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7282 __kmp_tp_capacity = __kmp_default_tp_capacity(
7283 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7288 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7289 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7290 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7291 __kmp_thread_pool = NULL;
7292 __kmp_thread_pool_insert_pt = NULL;
7293 __kmp_team_pool = NULL;
7300 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7302 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7303 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7304 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7307 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7309 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7314 gtid = __kmp_register_root(TRUE);
7315 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7316 KMP_ASSERT(KMP_UBER_GTID(gtid));
7317 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7321 __kmp_common_initialize();
7325 __kmp_register_atfork();
7328#if !KMP_DYNAMIC_LIB || \
7329 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7334 int rc = atexit(__kmp_internal_end_atexit);
7336 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7342#if KMP_HANDLE_SIGNALS
7348 __kmp_install_signals(FALSE);
7351 __kmp_install_signals(TRUE);
7356 __kmp_init_counter++;
7358 __kmp_init_serial = TRUE;
7360 if (__kmp_version) {
7361 __kmp_print_version_1();
7364 if (__kmp_settings) {
7368 if (__kmp_display_env || __kmp_display_env_verbose) {
7369 __kmp_env_print_2();
7378 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7381void __kmp_serial_initialize(
void) {
7382 if (__kmp_init_serial) {
7385 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7386 if (__kmp_init_serial) {
7387 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7390 __kmp_do_serial_initialize();
7391 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7394static void __kmp_do_middle_initialize(
void) {
7396 int prev_dflt_team_nth;
7398 if (!__kmp_init_serial) {
7399 __kmp_do_serial_initialize();
7402 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7404 if (UNLIKELY(!__kmp_need_register_serial)) {
7407 __kmp_register_library_startup();
7412 prev_dflt_team_nth = __kmp_dflt_team_nth;
7414#if KMP_AFFINITY_SUPPORTED
7417 __kmp_affinity_initialize(__kmp_affinity);
7421 KMP_ASSERT(__kmp_xproc > 0);
7422 if (__kmp_avail_proc == 0) {
7423 __kmp_avail_proc = __kmp_xproc;
7429 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7430 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7435 if (__kmp_dflt_team_nth == 0) {
7436#ifdef KMP_DFLT_NTH_CORES
7438 __kmp_dflt_team_nth = __kmp_ncores;
7439 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7440 "__kmp_ncores (%d)\n",
7441 __kmp_dflt_team_nth));
7444 __kmp_dflt_team_nth = __kmp_avail_proc;
7445 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7446 "__kmp_avail_proc(%d)\n",
7447 __kmp_dflt_team_nth));
7451 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7452 __kmp_dflt_team_nth = KMP_MIN_NTH;
7454 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7455 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7458 if (__kmp_nesting_mode > 0)
7459 __kmp_set_nesting_mode_threads();
7463 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7465 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7470 for (i = 0; i < __kmp_threads_capacity; i++) {
7471 kmp_info_t *thread = __kmp_threads[i];
7474 if (thread->th.th_current_task->td_icvs.nproc != 0)
7477 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7482 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7483 __kmp_dflt_team_nth));
7485#ifdef KMP_ADJUST_BLOCKTIME
7487 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7488 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7489 if (__kmp_nth > __kmp_avail_proc) {
7490 __kmp_zero_bt = TRUE;
7496 TCW_SYNC_4(__kmp_init_middle, TRUE);
7498 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7501void __kmp_middle_initialize(
void) {
7502 if (__kmp_init_middle) {
7505 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7506 if (__kmp_init_middle) {
7507 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7510 __kmp_do_middle_initialize();
7511 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7514void __kmp_parallel_initialize(
void) {
7515 int gtid = __kmp_entry_gtid();
7518 if (TCR_4(__kmp_init_parallel))
7520 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7521 if (TCR_4(__kmp_init_parallel)) {
7522 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7527 if (TCR_4(__kmp_global.g.g_done)) {
7530 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7531 __kmp_infinite_loop();
7537 if (!__kmp_init_middle) {
7538 __kmp_do_middle_initialize();
7540 __kmp_assign_root_init_mask();
7541 __kmp_resume_if_hard_paused();
7544 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7545 KMP_ASSERT(KMP_UBER_GTID(gtid));
7547#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7550 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7551 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7552 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7556#if KMP_HANDLE_SIGNALS
7558 __kmp_install_signals(TRUE);
7562 __kmp_suspend_initialize();
7564#if defined(USE_LOAD_BALANCE)
7565 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7566 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7569 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7570 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7574 if (__kmp_version) {
7575 __kmp_print_version_2();
7579 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7582 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7584 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7587void __kmp_hidden_helper_initialize() {
7588 if (TCR_4(__kmp_init_hidden_helper))
7592 if (!TCR_4(__kmp_init_parallel))
7593 __kmp_parallel_initialize();
7597 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7598 if (TCR_4(__kmp_init_hidden_helper)) {
7599 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7603#if KMP_AFFINITY_SUPPORTED
7607 if (!__kmp_hh_affinity.flags.initialized)
7608 __kmp_affinity_initialize(__kmp_hh_affinity);
7612 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7616 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7619 __kmp_do_initialize_hidden_helper_threads();
7622 __kmp_hidden_helper_threads_initz_wait();
7625 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7627 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7632void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7634 kmp_disp_t *dispatch;
7639 this_thr->th.th_local.this_construct = 0;
7641 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7643 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7644 KMP_DEBUG_ASSERT(dispatch);
7645 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7649 dispatch->th_disp_index = 0;
7650 dispatch->th_doacross_buf_idx = 0;
7651 if (__kmp_env_consistency_check)
7652 __kmp_push_parallel(gtid, team->t.t_ident);
7657void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7659 if (__kmp_env_consistency_check)
7660 __kmp_pop_parallel(gtid, team->t.t_ident);
7662 __kmp_finish_implicit_task(this_thr);
7665int __kmp_invoke_task_func(
int gtid) {
7667 int tid = __kmp_tid_from_gtid(gtid);
7668 kmp_info_t *this_thr = __kmp_threads[gtid];
7669 kmp_team_t *team = this_thr->th.th_team;
7671 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7673 if (__itt_stack_caller_create_ptr) {
7675 if (team->t.t_stack_id != NULL) {
7676 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7678 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7679 __kmp_itt_stack_callee_enter(
7680 (__itt_caller)team->t.t_parent->t.t_stack_id);
7684#if INCLUDE_SSC_MARKS
7685 SSC_MARK_INVOKING();
7690 void **exit_frame_p;
7691 ompt_data_t *my_task_data;
7692 ompt_data_t *my_parallel_data;
7695 if (ompt_enabled.enabled) {
7696 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7697 .ompt_task_info.frame.exit_frame.ptr);
7699 exit_frame_p = &dummy;
7703 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7704 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7705 if (ompt_enabled.ompt_callback_implicit_task) {
7706 ompt_team_size = team->t.t_nproc;
7707 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7708 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7709 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7710 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7714#if KMP_STATS_ENABLED
7716 if (previous_state == stats_state_e::TEAMS_REGION) {
7717 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7719 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7721 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7724 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7725 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7732 *exit_frame_p = NULL;
7733 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7736#if KMP_STATS_ENABLED
7737 if (previous_state == stats_state_e::TEAMS_REGION) {
7738 KMP_SET_THREAD_STATE(previous_state);
7740 KMP_POP_PARTITIONED_TIMER();
7744 if (__itt_stack_caller_create_ptr) {
7746 if (team->t.t_stack_id != NULL) {
7747 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7749 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7750 __kmp_itt_stack_callee_leave(
7751 (__itt_caller)team->t.t_parent->t.t_stack_id);
7755 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7760void __kmp_teams_master(
int gtid) {
7762 kmp_info_t *thr = __kmp_threads[gtid];
7763 kmp_team_t *team = thr->th.th_team;
7764 ident_t *loc = team->t.t_ident;
7765 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7766 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7767 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7768 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7769 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7772 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7775 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7776 tmp->cg_nthreads = 1;
7777 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7778 " cg_nthreads to 1\n",
7780 tmp->up = thr->th.th_cg_roots;
7781 thr->th.th_cg_roots = tmp;
7785#if INCLUDE_SSC_MARKS
7788 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7789 (microtask_t)thr->th.th_teams_microtask,
7790 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7791#if INCLUDE_SSC_MARKS
7795 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7796 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7799 __kmp_join_call(loc, gtid
7808int __kmp_invoke_teams_master(
int gtid) {
7809 kmp_info_t *this_thr = __kmp_threads[gtid];
7810 kmp_team_t *team = this_thr->th.th_team;
7812 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7813 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7814 (
void *)__kmp_teams_master);
7816 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7818 int tid = __kmp_tid_from_gtid(gtid);
7819 ompt_data_t *task_data =
7820 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7821 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7822 if (ompt_enabled.ompt_callback_implicit_task) {
7823 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7824 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7826 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7829 __kmp_teams_master(gtid);
7831 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7833 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7842void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7843 kmp_info_t *thr = __kmp_threads[gtid];
7845 if (num_threads > 0)
7846 thr->th.th_set_nproc = num_threads;
7849static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7851 KMP_DEBUG_ASSERT(thr);
7853 if (!TCR_4(__kmp_init_middle))
7854 __kmp_middle_initialize();
7855 __kmp_assign_root_init_mask();
7856 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7857 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7859 if (num_threads == 0) {
7860 if (__kmp_teams_thread_limit > 0) {
7861 num_threads = __kmp_teams_thread_limit;
7863 num_threads = __kmp_avail_proc / num_teams;
7868 if (num_threads > __kmp_dflt_team_nth) {
7869 num_threads = __kmp_dflt_team_nth;
7871 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7872 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7874 if (num_teams * num_threads > __kmp_teams_max_nth) {
7875 num_threads = __kmp_teams_max_nth / num_teams;
7877 if (num_threads == 0) {
7881 if (num_threads < 0) {
7882 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7888 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7890 if (num_threads > __kmp_dflt_team_nth) {
7891 num_threads = __kmp_dflt_team_nth;
7893 if (num_teams * num_threads > __kmp_teams_max_nth) {
7894 int new_threads = __kmp_teams_max_nth / num_teams;
7895 if (new_threads == 0) {
7898 if (new_threads != num_threads) {
7899 if (!__kmp_reserve_warn) {
7900 __kmp_reserve_warn = 1;
7901 __kmp_msg(kmp_ms_warning,
7902 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7903 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7906 num_threads = new_threads;
7909 thr->th.th_teams_size.nth = num_threads;
7914void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7916 kmp_info_t *thr = __kmp_threads[gtid];
7917 if (num_teams < 0) {
7920 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7924 if (num_teams == 0) {
7925 if (__kmp_nteams > 0) {
7926 num_teams = __kmp_nteams;
7931 if (num_teams > __kmp_teams_max_nth) {
7932 if (!__kmp_reserve_warn) {
7933 __kmp_reserve_warn = 1;
7934 __kmp_msg(kmp_ms_warning,
7935 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7936 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7938 num_teams = __kmp_teams_max_nth;
7942 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7944 __kmp_push_thread_limit(thr, num_teams, num_threads);
7949void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7950 int num_teams_ub,
int num_threads) {
7951 kmp_info_t *thr = __kmp_threads[gtid];
7952 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7953 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7954 KMP_DEBUG_ASSERT(num_threads >= 0);
7956 if (num_teams_lb > num_teams_ub) {
7957 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7958 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7963 if (num_teams_lb == 0 && num_teams_ub > 0)
7964 num_teams_lb = num_teams_ub;
7966 if (num_teams_lb == 0 && num_teams_ub == 0) {
7967 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7968 if (num_teams > __kmp_teams_max_nth) {
7969 if (!__kmp_reserve_warn) {
7970 __kmp_reserve_warn = 1;
7971 __kmp_msg(kmp_ms_warning,
7972 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7973 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7975 num_teams = __kmp_teams_max_nth;
7977 }
else if (num_teams_lb == num_teams_ub) {
7978 num_teams = num_teams_ub;
7980 if (num_threads <= 0) {
7981 if (num_teams_ub > __kmp_teams_max_nth) {
7982 num_teams = num_teams_lb;
7984 num_teams = num_teams_ub;
7987 num_teams = (num_threads > __kmp_teams_max_nth)
7989 : __kmp_teams_max_nth / num_threads;
7990 if (num_teams < num_teams_lb) {
7991 num_teams = num_teams_lb;
7992 }
else if (num_teams > num_teams_ub) {
7993 num_teams = num_teams_ub;
7999 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
8001 __kmp_push_thread_limit(thr, num_teams, num_threads);
8005void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8006 kmp_info_t *thr = __kmp_threads[gtid];
8007 thr->th.th_set_proc_bind = proc_bind;
8012void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8013 kmp_info_t *this_thr = __kmp_threads[gtid];
8019 KMP_DEBUG_ASSERT(team);
8020 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8021 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8024 team->t.t_construct = 0;
8025 team->t.t_ordered.dt.t_value =
8029 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8030 if (team->t.t_max_nproc > 1) {
8032 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8033 team->t.t_disp_buffer[i].buffer_index = i;
8034 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8037 team->t.t_disp_buffer[0].buffer_index = 0;
8038 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8042 KMP_ASSERT(this_thr->th.th_team == team);
8045 for (f = 0; f < team->t.t_nproc; f++) {
8046 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8047 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8052 __kmp_fork_barrier(gtid, 0);
8055void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8056 kmp_info_t *this_thr = __kmp_threads[gtid];
8058 KMP_DEBUG_ASSERT(team);
8059 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8060 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8066 if (__kmp_threads[gtid] &&
8067 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8068 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8069 __kmp_threads[gtid]);
8070 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8071 "team->t.t_nproc=%d\n",
8072 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8074 __kmp_print_structure();
8076 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8077 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8080 __kmp_join_barrier(gtid);
8082 if (ompt_enabled.enabled &&
8083 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8084 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8085 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8086 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8088 void *codeptr = NULL;
8089 if (KMP_MASTER_TID(ds_tid) &&
8090 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8091 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8092 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8094 if (ompt_enabled.ompt_callback_sync_region_wait) {
8095 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8096 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8099 if (ompt_enabled.ompt_callback_sync_region) {
8100 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8101 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8105 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8106 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8107 ompt_scope_end, NULL, task_data, 0, ds_tid,
8108 ompt_task_implicit);
8114 KMP_ASSERT(this_thr->th.th_team == team);
8119#ifdef USE_LOAD_BALANCE
8123static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8126 kmp_team_t *hot_team;
8128 if (root->r.r_active) {
8131 hot_team = root->r.r_hot_team;
8132 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8133 return hot_team->t.t_nproc - 1;
8138 for (i = 1; i < hot_team->t.t_nproc; i++) {
8139 if (hot_team->t.t_threads[i]->th.th_active) {
8148static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8151 int hot_team_active;
8152 int team_curr_active;
8155 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8157 KMP_DEBUG_ASSERT(root);
8158 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8159 ->th.th_current_task->td_icvs.dynamic == TRUE);
8160 KMP_DEBUG_ASSERT(set_nproc > 1);
8162 if (set_nproc == 1) {
8163 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8172 pool_active = __kmp_thread_pool_active_nth;
8173 hot_team_active = __kmp_active_hot_team_nproc(root);
8174 team_curr_active = pool_active + hot_team_active + 1;
8177 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8178 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8179 "hot team active = %d\n",
8180 system_active, pool_active, hot_team_active));
8182 if (system_active < 0) {
8186 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8187 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8190 retval = __kmp_avail_proc - __kmp_nth +
8191 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8192 if (retval > set_nproc) {
8195 if (retval < KMP_MIN_NTH) {
8196 retval = KMP_MIN_NTH;
8199 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8207 if (system_active < team_curr_active) {
8208 system_active = team_curr_active;
8210 retval = __kmp_avail_proc - system_active + team_curr_active;
8211 if (retval > set_nproc) {
8214 if (retval < KMP_MIN_NTH) {
8215 retval = KMP_MIN_NTH;
8218 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8227void __kmp_cleanup(
void) {
8230 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8232 if (TCR_4(__kmp_init_parallel)) {
8233#if KMP_HANDLE_SIGNALS
8234 __kmp_remove_signals();
8236 TCW_4(__kmp_init_parallel, FALSE);
8239 if (TCR_4(__kmp_init_middle)) {
8240#if KMP_AFFINITY_SUPPORTED
8241 __kmp_affinity_uninitialize();
8243 __kmp_cleanup_hierarchy();
8244 TCW_4(__kmp_init_middle, FALSE);
8247 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8249 if (__kmp_init_serial) {
8250 __kmp_runtime_destroy();
8251 __kmp_init_serial = FALSE;
8254 __kmp_cleanup_threadprivate_caches();
8256 for (f = 0; f < __kmp_threads_capacity; f++) {
8257 if (__kmp_root[f] != NULL) {
8258 __kmp_free(__kmp_root[f]);
8259 __kmp_root[f] = NULL;
8262 __kmp_free(__kmp_threads);
8265 __kmp_threads = NULL;
8267 __kmp_threads_capacity = 0;
8270 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8272 kmp_old_threads_list_t *next = ptr->next;
8273 __kmp_free(ptr->threads);
8278#if KMP_USE_DYNAMIC_LOCK
8279 __kmp_cleanup_indirect_user_locks();
8281 __kmp_cleanup_user_locks();
8285 __kmp_free(ompd_env_block);
8286 ompd_env_block = NULL;
8287 ompd_env_block_size = 0;
8291#if KMP_AFFINITY_SUPPORTED
8292 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8293 __kmp_cpuinfo_file = NULL;
8296#if KMP_USE_ADAPTIVE_LOCKS
8297#if KMP_DEBUG_ADAPTIVE_LOCKS
8298 __kmp_print_speculative_stats();
8301 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8302 __kmp_nested_nth.nth = NULL;
8303 __kmp_nested_nth.size = 0;
8304 __kmp_nested_nth.used = 0;
8305 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8306 __kmp_nested_proc_bind.bind_types = NULL;
8307 __kmp_nested_proc_bind.size = 0;
8308 __kmp_nested_proc_bind.used = 0;
8309 if (__kmp_affinity_format) {
8310 KMP_INTERNAL_FREE(__kmp_affinity_format);
8311 __kmp_affinity_format = NULL;
8314 __kmp_i18n_catclose();
8316#if KMP_USE_HIER_SCHED
8317 __kmp_hier_scheds.deallocate();
8320#if KMP_STATS_ENABLED
8324 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8329int __kmp_ignore_mppbeg(
void) {
8332 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8333 if (__kmp_str_match_false(env))
8340int __kmp_ignore_mppend(
void) {
8343 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8344 if (__kmp_str_match_false(env))
8351void __kmp_internal_begin(
void) {
8357 gtid = __kmp_entry_gtid();
8358 root = __kmp_threads[gtid]->th.th_root;
8359 KMP_ASSERT(KMP_UBER_GTID(gtid));
8361 if (root->r.r_begin)
8363 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8364 if (root->r.r_begin) {
8365 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8369 root->r.r_begin = TRUE;
8371 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8376void __kmp_user_set_library(
enum library_type arg) {
8383 gtid = __kmp_entry_gtid();
8384 thread = __kmp_threads[gtid];
8386 root = thread->th.th_root;
8388 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8390 if (root->r.r_in_parallel) {
8392 KMP_WARNING(SetLibraryIncorrectCall);
8397 case library_serial:
8398 thread->th.th_set_nproc = 0;
8399 set__nproc(thread, 1);
8401 case library_turnaround:
8402 thread->th.th_set_nproc = 0;
8403 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8404 : __kmp_dflt_team_nth_ub);
8406 case library_throughput:
8407 thread->th.th_set_nproc = 0;
8408 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8409 : __kmp_dflt_team_nth_ub);
8412 KMP_FATAL(UnknownLibraryType, arg);
8415 __kmp_aux_set_library(arg);
8418void __kmp_aux_set_stacksize(
size_t arg) {
8419 if (!__kmp_init_serial)
8420 __kmp_serial_initialize();
8423 if (arg & (0x1000 - 1)) {
8424 arg &= ~(0x1000 - 1);
8429 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8432 if (!TCR_4(__kmp_init_parallel)) {
8435 if (value < __kmp_sys_min_stksize)
8436 value = __kmp_sys_min_stksize;
8437 else if (value > KMP_MAX_STKSIZE)
8438 value = KMP_MAX_STKSIZE;
8440 __kmp_stksize = value;
8442 __kmp_env_stksize = TRUE;
8445 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8450void __kmp_aux_set_library(
enum library_type arg) {
8451 __kmp_library = arg;
8453 switch (__kmp_library) {
8454 case library_serial: {
8455 KMP_INFORM(LibraryIsSerial);
8457 case library_turnaround:
8458 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8459 __kmp_use_yield = 2;
8461 case library_throughput:
8462 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8463 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8466 KMP_FATAL(UnknownLibraryType, arg);
8472static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8473 kmp_info_t *thr = __kmp_entry_thread();
8474 teams_serialized = 0;
8475 if (thr->th.th_teams_microtask) {
8476 kmp_team_t *team = thr->th.th_team;
8477 int tlevel = thr->th.th_teams_level;
8478 int ii = team->t.t_level;
8479 teams_serialized = team->t.t_serialized;
8480 int level = tlevel + 1;
8481 KMP_DEBUG_ASSERT(ii >= tlevel);
8482 while (ii > level) {
8483 for (teams_serialized = team->t.t_serialized;
8484 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8486 if (team->t.t_serialized && (!teams_serialized)) {
8487 team = team->t.t_parent;
8491 team = team->t.t_parent;
8500int __kmp_aux_get_team_num() {
8502 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8504 if (serialized > 1) {
8507 return team->t.t_master_tid;
8513int __kmp_aux_get_num_teams() {
8515 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8517 if (serialized > 1) {
8520 return team->t.t_parent->t.t_nproc;
8559typedef struct kmp_affinity_format_field_t {
8561 const char *long_name;
8564} kmp_affinity_format_field_t;
8566static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8567#if KMP_AFFINITY_SUPPORTED
8568 {
'A',
"thread_affinity",
's'},
8570 {
't',
"team_num",
'd'},
8571 {
'T',
"num_teams",
'd'},
8572 {
'L',
"nesting_level",
'd'},
8573 {
'n',
"thread_num",
'd'},
8574 {
'N',
"num_threads",
'd'},
8575 {
'a',
"ancestor_tnum",
'd'},
8577 {
'P',
"process_id",
'd'},
8578 {
'i',
"native_thread_id",
'd'}};
8581static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8583 kmp_str_buf_t *field_buffer) {
8584 int rc, format_index, field_value;
8585 const char *width_left, *width_right;
8586 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8587 static const int FORMAT_SIZE = 20;
8588 char format[FORMAT_SIZE] = {0};
8589 char absolute_short_name = 0;
8591 KMP_DEBUG_ASSERT(gtid >= 0);
8592 KMP_DEBUG_ASSERT(th);
8593 KMP_DEBUG_ASSERT(**ptr ==
'%');
8594 KMP_DEBUG_ASSERT(field_buffer);
8596 __kmp_str_buf_clear(field_buffer);
8603 __kmp_str_buf_cat(field_buffer,
"%", 1);
8614 right_justify =
false;
8616 right_justify =
true;
8620 width_left = width_right = NULL;
8621 if (**ptr >=
'0' && **ptr <=
'9') {
8629 format[format_index++] =
'%';
8631 format[format_index++] =
'-';
8633 format[format_index++] =
'0';
8634 if (width_left && width_right) {
8638 while (i < 8 && width_left < width_right) {
8639 format[format_index++] = *width_left;
8647 found_valid_name =
false;
8648 parse_long_name = (**ptr ==
'{');
8649 if (parse_long_name)
8651 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8652 sizeof(__kmp_affinity_format_table[0]);
8654 char short_name = __kmp_affinity_format_table[i].short_name;
8655 const char *long_name = __kmp_affinity_format_table[i].long_name;
8656 char field_format = __kmp_affinity_format_table[i].field_format;
8657 if (parse_long_name) {
8658 size_t length = KMP_STRLEN(long_name);
8659 if (strncmp(*ptr, long_name, length) == 0) {
8660 found_valid_name =
true;
8663 }
else if (**ptr == short_name) {
8664 found_valid_name =
true;
8667 if (found_valid_name) {
8668 format[format_index++] = field_format;
8669 format[format_index++] =
'\0';
8670 absolute_short_name = short_name;
8674 if (parse_long_name) {
8676 absolute_short_name = 0;
8684 switch (absolute_short_name) {
8686 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8689 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8692 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8695 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8698 static const int BUFFER_SIZE = 256;
8699 char buf[BUFFER_SIZE];
8700 __kmp_expand_host_name(buf, BUFFER_SIZE);
8701 rc = __kmp_str_buf_print(field_buffer, format, buf);
8704 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8707 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8710 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8714 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8715 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8717#if KMP_AFFINITY_SUPPORTED
8720 __kmp_str_buf_init(&buf);
8721 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8722 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8723 __kmp_str_buf_free(&buf);
8729 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8731 if (parse_long_name) {
8740 KMP_ASSERT(format_index <= FORMAT_SIZE);
8750size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8751 kmp_str_buf_t *buffer) {
8752 const char *parse_ptr;
8754 const kmp_info_t *th;
8755 kmp_str_buf_t field;
8757 KMP_DEBUG_ASSERT(buffer);
8758 KMP_DEBUG_ASSERT(gtid >= 0);
8760 __kmp_str_buf_init(&field);
8761 __kmp_str_buf_clear(buffer);
8763 th = __kmp_threads[gtid];
8769 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8770 parse_ptr = __kmp_affinity_format;
8772 KMP_DEBUG_ASSERT(parse_ptr);
8774 while (*parse_ptr !=
'\0') {
8776 if (*parse_ptr ==
'%') {
8778 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8779 __kmp_str_buf_catbuf(buffer, &field);
8783 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8788 __kmp_str_buf_free(&field);
8793void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8795 __kmp_str_buf_init(&buf);
8796 __kmp_aux_capture_affinity(gtid, format, &buf);
8797 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8798 __kmp_str_buf_free(&buf);
8802void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8803 int blocktime = arg;
8809 __kmp_save_internal_controls(thread);
8812 if (blocktime < KMP_MIN_BLOCKTIME)
8813 blocktime = KMP_MIN_BLOCKTIME;
8814 else if (blocktime > KMP_MAX_BLOCKTIME)
8815 blocktime = KMP_MAX_BLOCKTIME;
8817 set__blocktime_team(thread->th.th_team, tid, blocktime);
8818 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8822 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8824 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8825 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8831 set__bt_set_team(thread->th.th_team, tid, bt_set);
8832 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8834 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8835 "bt_intervals=%d, monitor_updates=%d\n",
8836 __kmp_gtid_from_tid(tid, thread->th.th_team),
8837 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8838 __kmp_monitor_wakeups));
8840 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8841 __kmp_gtid_from_tid(tid, thread->th.th_team),
8842 thread->th.th_team->t.t_id, tid, blocktime));
8846void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8847 if (!__kmp_init_serial) {
8848 __kmp_serial_initialize();
8850 __kmp_env_initialize(str);
8852 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8860PACKED_REDUCTION_METHOD_T
8861__kmp_determine_reduction_method(
8862 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8863 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8864 kmp_critical_name *lck) {
8875 PACKED_REDUCTION_METHOD_T retval;
8879 KMP_DEBUG_ASSERT(lck);
8881#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8883 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8884#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8886 retval = critical_reduce_block;
8889 team_size = __kmp_get_team_num_threads(global_tid);
8890 if (team_size == 1) {
8892 retval = empty_reduce_block;
8896 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8898#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8899 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8900 KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM
8902#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8903 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8904 KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX
8906 int teamsize_cutoff = 4;
8908#if KMP_MIC_SUPPORTED
8909 if (__kmp_mic_type != non_mic) {
8910 teamsize_cutoff = 8;
8913 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8914 if (tree_available) {
8915 if (team_size <= teamsize_cutoff) {
8916 if (atomic_available) {
8917 retval = atomic_reduce_block;
8920 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8922 }
else if (atomic_available) {
8923 retval = atomic_reduce_block;
8926#error "Unknown or unsupported OS"
8931#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \
8932 KMP_ARCH_WASM || KMP_ARCH_PPC
8934#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8935 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \
8936 KMP_OS_WASI || KMP_OS_AIX
8940 if (atomic_available) {
8941 if (num_vars <= 2) {
8942 retval = atomic_reduce_block;
8948 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8949 if (atomic_available && (num_vars <= 3)) {
8950 retval = atomic_reduce_block;
8951 }
else if (tree_available) {
8952 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8953 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8954 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8959#error "Unknown or unsupported OS"
8963#error "Unknown or unsupported architecture"
8971 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8974 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8976 int atomic_available, tree_available;
8978 switch ((forced_retval = __kmp_force_reduction_method)) {
8979 case critical_reduce_block:
8983 case atomic_reduce_block:
8984 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8985 if (!atomic_available) {
8986 KMP_WARNING(RedMethodNotSupported,
"atomic");
8987 forced_retval = critical_reduce_block;
8991 case tree_reduce_block:
8992 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8993 if (!tree_available) {
8994 KMP_WARNING(RedMethodNotSupported,
"tree");
8995 forced_retval = critical_reduce_block;
8997#if KMP_FAST_REDUCTION_BARRIER
8998 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9007 retval = forced_retval;
9010 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9012#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9013#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9018kmp_int32 __kmp_get_reduce_method(
void) {
9019 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9024void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9028void __kmp_hard_pause() {
9029 __kmp_pause_status = kmp_hard_paused;
9030 __kmp_internal_end_thread(-1);
9034void __kmp_resume_if_soft_paused() {
9035 if (__kmp_pause_status == kmp_soft_paused) {
9036 __kmp_pause_status = kmp_not_paused;
9038 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9039 kmp_info_t *thread = __kmp_threads[gtid];
9041 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9043 if (fl.is_sleeping())
9045 else if (__kmp_try_suspend_mx(thread)) {
9046 __kmp_unlock_suspend_mx(thread);
9049 if (fl.is_sleeping()) {
9052 }
else if (__kmp_try_suspend_mx(thread)) {
9053 __kmp_unlock_suspend_mx(thread);
9065int __kmp_pause_resource(kmp_pause_status_t level) {
9066 if (level == kmp_not_paused) {
9067 if (__kmp_pause_status == kmp_not_paused) {
9071 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9072 __kmp_pause_status == kmp_hard_paused);
9073 __kmp_pause_status = kmp_not_paused;
9076 }
else if (level == kmp_soft_paused) {
9077 if (__kmp_pause_status != kmp_not_paused) {
9084 }
else if (level == kmp_hard_paused) {
9085 if (__kmp_pause_status != kmp_not_paused) {
9098void __kmp_omp_display_env(
int verbose) {
9099 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9100 if (__kmp_init_serial == 0)
9101 __kmp_do_serial_initialize();
9102 __kmp_display_env_impl(!verbose, verbose);
9103 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9107void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9109 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9111 kmp_info_t **other_threads = team->t.t_threads;
9115 for (
int f = 1; f < old_nthreads; ++f) {
9116 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9118 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9124 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9125 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9129 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9131 team->t.t_threads[f]->th.th_used_in_team.store(2);
9132 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9135 team->t.b->go_release();
9141 int count = old_nthreads - 1;
9143 count = old_nthreads - 1;
9144 for (
int f = 1; f < old_nthreads; ++f) {
9145 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9146 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9147 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9148 void *, other_threads[f]->th.th_sleep_loc);
9149 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9152 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9158 team->t.b->update_num_threads(new_nthreads);
9159 team->t.b->go_reset();
9162void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9164 KMP_DEBUG_ASSERT(team);
9170 for (
int f = 1; f < new_nthreads; ++f) {
9171 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9172 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9174 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9175 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9176 (kmp_flag_32<false, false> *)NULL);
9182 int count = new_nthreads - 1;
9184 count = new_nthreads - 1;
9185 for (
int f = 1; f < new_nthreads; ++f) {
9186 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9194kmp_info_t **__kmp_hidden_helper_threads;
9195kmp_info_t *__kmp_hidden_helper_main_thread;
9196std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9198kmp_int32 __kmp_hidden_helper_threads_num = 8;
9199kmp_int32 __kmp_enable_hidden_helper = TRUE;
9201kmp_int32 __kmp_hidden_helper_threads_num = 0;
9202kmp_int32 __kmp_enable_hidden_helper = FALSE;
9206std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9208void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9213 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9214 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9215 __kmp_hidden_helper_threads_num)
9221 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9222 __kmp_hidden_helper_initz_release();
9223 __kmp_hidden_helper_main_thread_wait();
9225 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9226 __kmp_hidden_helper_worker_thread_signal();
9232void __kmp_hidden_helper_threads_initz_routine() {
9234 const int gtid = __kmp_register_root(TRUE);
9235 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9236 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9237 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9238 __kmp_hidden_helper_threads_num;
9240 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9245 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9247 __kmp_hidden_helper_threads_deinitz_release();
9267void __kmp_init_nesting_mode() {
9268 int levels = KMP_HW_LAST;
9269 __kmp_nesting_mode_nlevels = levels;
9270 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9271 for (
int i = 0; i < levels; ++i)
9272 __kmp_nesting_nth_level[i] = 0;
9273 if (__kmp_nested_nth.size < levels) {
9274 __kmp_nested_nth.nth =
9275 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9276 __kmp_nested_nth.size = levels;
9281void __kmp_set_nesting_mode_threads() {
9282 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9284 if (__kmp_nesting_mode == 1)
9285 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9286 else if (__kmp_nesting_mode > 1)
9287 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9289 if (__kmp_topology) {
9291 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9292 loc < __kmp_nesting_mode_nlevels;
9293 loc++, hw_level++) {
9294 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9295 if (__kmp_nesting_nth_level[loc] == 1)
9299 if (__kmp_nesting_mode > 1 && loc > 1) {
9300 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9301 int num_cores = __kmp_topology->get_count(core_level);
9302 int upper_levels = 1;
9303 for (
int level = 0; level < loc - 1; ++level)
9304 upper_levels *= __kmp_nesting_nth_level[level];
9305 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9306 __kmp_nesting_nth_level[loc - 1] =
9307 num_cores / __kmp_nesting_nth_level[loc - 2];
9309 __kmp_nesting_mode_nlevels = loc;
9310 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9312 if (__kmp_avail_proc >= 4) {
9313 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9314 __kmp_nesting_nth_level[1] = 2;
9315 __kmp_nesting_mode_nlevels = 2;
9317 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9318 __kmp_nesting_mode_nlevels = 1;
9320 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9322 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9323 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9325 set__nproc(thread, __kmp_nesting_nth_level[0]);
9326 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9327 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9328 if (get__max_active_levels(thread) > 1) {
9330 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9332 if (__kmp_nesting_mode == 1)
9333 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9338#if !KMP_STATS_ENABLED
9339void __kmp_reset_stats() {}
9342int __kmp_omp_debug_struct_info = FALSE;
9343int __kmp_debugging = FALSE;
9345#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9346void __kmp_itt_fini_ittlib() {}
9347void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)