17#include "kmp_wait_release.h"
18#include "kmp_taskdeps.h"
21#include "ompt-specific.h"
24#if ENABLE_LIBOMPTARGET
25static void (*tgt_target_nowait_query)(
void **);
27void __kmp_init_target_task() {
28 *(
void **)(&tgt_target_nowait_query) = KMP_DLSYM(
"__tgt_target_nowait_query");
33static void __kmp_enable_tasking(kmp_task_team_t *task_team,
34 kmp_info_t *this_thr);
35static void __kmp_alloc_task_deque(kmp_info_t *thread,
36 kmp_thread_data_t *thread_data);
37static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
38 kmp_task_team_t *task_team);
39static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
41static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id);
42int __kmp_taskloop_task(
int gtid,
void *ptask);
45#ifdef BUILD_TIED_TASK_STACK
54static void __kmp_trace_task_stack(kmp_int32 gtid,
55 kmp_thread_data_t *thread_data,
56 int threshold,
char *location) {
57 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
58 kmp_taskdata_t **stack_top = task_stack->ts_top;
59 kmp_int32 entries = task_stack->ts_entries;
60 kmp_taskdata_t *tied_task;
64 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
65 "first_block = %p, stack_top = %p \n",
66 location, gtid, entries, task_stack->ts_first_block, stack_top));
68 KMP_DEBUG_ASSERT(stack_top != NULL);
69 KMP_DEBUG_ASSERT(entries > 0);
71 while (entries != 0) {
72 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
74 if (entries & TASK_STACK_INDEX_MASK == 0) {
75 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
77 stack_block = stack_block->sb_prev;
78 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
85 tied_task = *stack_top;
87 KMP_DEBUG_ASSERT(tied_task != NULL);
88 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
91 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
92 "stack_top=%p, tied_task=%p\n",
93 location, gtid, entries, stack_top, tied_task));
95 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
98 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
108static void __kmp_init_task_stack(kmp_int32 gtid,
109 kmp_thread_data_t *thread_data) {
110 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
111 kmp_stack_block_t *first_block;
114 first_block = &task_stack->ts_first_block;
115 task_stack->ts_top = (kmp_taskdata_t **)first_block;
116 memset((
void *)first_block,
'\0',
117 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
120 task_stack->ts_entries = TASK_STACK_EMPTY;
121 first_block->sb_next = NULL;
122 first_block->sb_prev = NULL;
129static void __kmp_free_task_stack(kmp_int32 gtid,
130 kmp_thread_data_t *thread_data) {
131 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
132 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
134 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
136 while (stack_block != NULL) {
137 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
139 stack_block->sb_next = NULL;
140 stack_block->sb_prev = NULL;
141 if (stack_block != &task_stack->ts_first_block) {
142 __kmp_thread_free(thread,
145 stack_block = next_block;
148 task_stack->ts_entries = 0;
149 task_stack->ts_top = NULL;
158static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
159 kmp_taskdata_t *tied_task) {
161 kmp_thread_data_t *thread_data =
162 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
163 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
165 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
169 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
170 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
173 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
174 gtid, thread, tied_task));
176 *(task_stack->ts_top) = tied_task;
179 task_stack->ts_top++;
180 task_stack->ts_entries++;
182 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
184 kmp_stack_block_t *stack_block =
185 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
188 if (stack_block->sb_next !=
190 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
192 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
193 thread,
sizeof(kmp_stack_block_t));
195 task_stack->ts_top = &new_block->sb_block[0];
196 stack_block->sb_next = new_block;
197 new_block->sb_prev = stack_block;
198 new_block->sb_next = NULL;
202 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
203 gtid, tied_task, new_block));
206 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
217static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
218 kmp_taskdata_t *ending_task) {
220 kmp_thread_data_t *thread_data =
221 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
222 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
223 kmp_taskdata_t *tied_task;
225 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
230 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
231 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
233 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
237 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
238 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
240 stack_block = stack_block->sb_prev;
241 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
245 task_stack->ts_top--;
246 task_stack->ts_entries--;
248 tied_task = *(task_stack->ts_top);
250 KMP_DEBUG_ASSERT(tied_task != NULL);
251 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
252 KMP_DEBUG_ASSERT(tied_task == ending_task);
254 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
263static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
264 const kmp_taskdata_t *tasknew,
265 const kmp_taskdata_t *taskcurr) {
266 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
270 kmp_taskdata_t *current = taskcurr->td_last_tied;
271 KMP_DEBUG_ASSERT(current != NULL);
273 if (current->td_flags.tasktype == TASK_EXPLICIT ||
274 current->td_taskwait_thread > 0) {
275 kmp_int32 level = current->td_level;
276 kmp_taskdata_t *parent = tasknew->td_parent;
277 while (parent != current && parent->td_level > level) {
279 parent = parent->td_parent;
280 KMP_DEBUG_ASSERT(parent != NULL);
282 if (parent != current)
287 kmp_depnode_t *node = tasknew->td_depnode;
289 if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
291 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
293 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
294 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
295 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
298 for (
int j = i - 1; j >= 0; --j)
299 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
303 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
312static void __kmp_realloc_task_deque(kmp_info_t *thread,
313 kmp_thread_data_t *thread_data) {
314 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
315 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
316 kmp_int32 new_size = 2 * size;
318 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
319 "%d] for thread_data %p\n",
320 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
322 kmp_taskdata_t **new_deque =
323 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
326 for (i = thread_data->td.td_deque_head, j = 0; j < size;
327 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
328 new_deque[j] = thread_data->td.td_deque[i];
330 __kmp_free(thread_data->td.td_deque);
332 thread_data->td.td_deque_head = 0;
333 thread_data->td.td_deque_tail = size;
334 thread_data->td.td_deque = new_deque;
335 thread_data->td.td_deque_size = new_size;
338static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
339 kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(
sizeof(kmp_task_pri_t));
340 kmp_thread_data_t *thread_data = &l->td;
341 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
342 thread_data->td.td_deque_last_stolen = -1;
343 KE_TRACE(20, (
"__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
344 "for thread_data %p\n",
345 __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
346 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
347 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
348 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
357static kmp_thread_data_t *
358__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
359 kmp_thread_data_t *thread_data;
360 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
361 if (lst->priority == pri) {
363 thread_data = &lst->td;
364 }
else if (lst->priority < pri) {
367 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
368 thread_data = &list->td;
369 list->priority = pri;
371 task_team->tt.tt_task_pri_list = list;
373 kmp_task_pri_t *next_queue = lst->next;
374 while (next_queue && next_queue->priority > pri) {
376 next_queue = lst->next;
379 if (next_queue == NULL) {
381 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
382 thread_data = &list->td;
383 list->priority = pri;
386 }
else if (next_queue->priority == pri) {
388 thread_data = &next_queue->td;
391 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
392 thread_data = &list->td;
393 list->priority = pri;
394 list->next = next_queue;
402static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
403 kmp_taskdata_t *taskdata,
404 kmp_task_team_t *task_team,
406 kmp_thread_data_t *thread_data = NULL;
408 (
"__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
409 gtid, taskdata, pri));
412 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
413 if (UNLIKELY(lst == NULL)) {
414 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
415 if (task_team->tt.tt_task_pri_list == NULL) {
417 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
418 thread_data = &list->td;
419 list->priority = pri;
421 task_team->tt.tt_task_pri_list = list;
424 thread_data = __kmp_get_priority_deque_data(task_team, pri);
426 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
428 if (lst->priority == pri) {
430 thread_data = &lst->td;
432 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
433 thread_data = __kmp_get_priority_deque_data(task_team, pri);
434 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
437 KMP_DEBUG_ASSERT(thread_data);
439 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
441 if (TCR_4(thread_data->td.td_deque_ntasks) >=
442 TASK_DEQUE_SIZE(thread_data->td)) {
443 if (__kmp_enable_task_throttling &&
444 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
445 thread->th.th_current_task)) {
446 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
447 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d deque is full; returning "
448 "TASK_NOT_PUSHED for task %p\n",
450 return TASK_NOT_PUSHED;
453 __kmp_realloc_task_deque(thread, thread_data);
456 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
457 TASK_DEQUE_SIZE(thread_data->td));
459 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
461 thread_data->td.td_deque_tail =
462 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
463 TCW_4(thread_data->td.td_deque_ntasks,
464 TCR_4(thread_data->td.td_deque_ntasks) + 1);
465 KMP_FSYNC_RELEASING(thread->th.th_current_task);
466 KMP_FSYNC_RELEASING(taskdata);
467 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d returning "
468 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
469 gtid, taskdata, thread_data->td.td_deque_ntasks,
470 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
471 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
472 task_team->tt.tt_num_task_pri++;
473 return TASK_SUCCESSFULLY_PUSHED;
477static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
478 kmp_info_t *thread = __kmp_threads[gtid];
479 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
484 if (UNLIKELY(taskdata->td_flags.hidden_helper &&
485 !KMP_HIDDEN_HELPER_THREAD(gtid))) {
486 kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
487 __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
489 __kmp_hidden_helper_worker_thread_signal();
490 return TASK_SUCCESSFULLY_PUSHED;
493 kmp_task_team_t *task_team = thread->th.th_task_team;
494 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
495 kmp_thread_data_t *thread_data;
498 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
500 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
503 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
504 KMP_DEBUG_USE_VAR(counter);
507 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
508 gtid, counter, taskdata));
512 if (UNLIKELY(taskdata->td_flags.task_serial)) {
513 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
514 "TASK_NOT_PUSHED for task %p\n",
516 return TASK_NOT_PUSHED;
521 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
522 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
523 __kmp_enable_tasking(task_team, thread);
525 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
526 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
528 if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
529 __kmp_max_task_priority > 0) {
530 int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
531 return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
535 thread_data = &task_team->tt.tt_threads_data[tid];
540 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
541 __kmp_alloc_task_deque(thread, thread_data);
546 if (TCR_4(thread_data->td.td_deque_ntasks) >=
547 TASK_DEQUE_SIZE(thread_data->td)) {
548 if (__kmp_enable_task_throttling &&
549 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
550 thread->th.th_current_task)) {
551 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
552 "TASK_NOT_PUSHED for task %p\n",
554 return TASK_NOT_PUSHED;
556 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
558 if (TCR_4(thread_data->td.td_deque_ntasks) >=
559 TASK_DEQUE_SIZE(thread_data->td)) {
561 __kmp_realloc_task_deque(thread, thread_data);
567 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
569 if (TCR_4(thread_data->td.td_deque_ntasks) >=
570 TASK_DEQUE_SIZE(thread_data->td)) {
571 if (__kmp_enable_task_throttling &&
572 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
573 thread->th.th_current_task)) {
574 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
575 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
576 "returning TASK_NOT_PUSHED for task %p\n",
578 return TASK_NOT_PUSHED;
581 __kmp_realloc_task_deque(thread, thread_data);
586 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
587 TASK_DEQUE_SIZE(thread_data->td));
589 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
592 thread_data->td.td_deque_tail =
593 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
594 TCW_4(thread_data->td.td_deque_ntasks,
595 TCR_4(thread_data->td.td_deque_ntasks) + 1);
596 KMP_FSYNC_RELEASING(thread->th.th_current_task);
597 KMP_FSYNC_RELEASING(taskdata);
598 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
599 "task=%p ntasks=%d head=%u tail=%u\n",
600 gtid, taskdata, thread_data->td.td_deque_ntasks,
601 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
603 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
605 return TASK_SUCCESSFULLY_PUSHED;
612void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
613 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
614 "this_thread=%p, curtask=%p, "
615 "curtask_parent=%p\n",
616 0, this_thr, this_thr->th.th_current_task,
617 this_thr->th.th_current_task->td_parent));
619 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
621 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
622 "this_thread=%p, curtask=%p, "
623 "curtask_parent=%p\n",
624 0, this_thr, this_thr->th.th_current_task,
625 this_thr->th.th_current_task->td_parent));
634void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
638 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
641 tid, this_thr, this_thr->th.th_current_task,
642 team->t.t_implicit_task_taskdata[tid].td_parent));
644 KMP_DEBUG_ASSERT(this_thr != NULL);
647 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
648 team->t.t_implicit_task_taskdata[0].td_parent =
649 this_thr->th.th_current_task;
650 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
653 team->t.t_implicit_task_taskdata[tid].td_parent =
654 team->t.t_implicit_task_taskdata[0].td_parent;
655 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
658 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
661 tid, this_thr, this_thr->th.th_current_task,
662 team->t.t_implicit_task_taskdata[tid].td_parent));
670static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
671 kmp_taskdata_t *current_task) {
672 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
673 kmp_info_t *thread = __kmp_threads[gtid];
676 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
677 gtid, taskdata, current_task));
679 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
684 current_task->td_flags.executing = 0;
687#ifdef BUILD_TIED_TASK_STACK
688 if (taskdata->td_flags.tiedness == TASK_TIED) {
689 __kmp_push_task_stack(gtid, thread, taskdata);
694 thread->th.th_current_task = taskdata;
696 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
697 taskdata->td_flags.tiedness == TASK_UNTIED);
698 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
699 taskdata->td_flags.tiedness == TASK_UNTIED);
700 taskdata->td_flags.started = 1;
701 taskdata->td_flags.executing = 1;
702 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
703 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
710 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
721static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
723 task->ompt_task_info.task_data.value = 0;
724 task->ompt_task_info.frame.exit_frame = ompt_data_none;
725 task->ompt_task_info.frame.enter_frame = ompt_data_none;
726 task->ompt_task_info.frame.exit_frame_flags =
727 ompt_frame_runtime | ompt_frame_framepointer;
728 task->ompt_task_info.frame.enter_frame_flags =
729 ompt_frame_runtime | ompt_frame_framepointer;
730 task->ompt_task_info.dispatch_chunk.start = 0;
731 task->ompt_task_info.dispatch_chunk.iterations = 0;
736static inline void __ompt_task_start(kmp_task_t *task,
737 kmp_taskdata_t *current_task,
739 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
740 ompt_task_status_t status = ompt_task_switch;
741 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
742 status = ompt_task_yield;
743 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
746 if (ompt_enabled.ompt_callback_task_schedule) {
747 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
748 &(current_task->ompt_task_info.task_data), status,
749 &(taskdata->ompt_task_info.task_data));
751 taskdata->ompt_task_info.scheduling_parent = current_task;
756static inline void __ompt_task_finish(kmp_task_t *task,
757 kmp_taskdata_t *resumed_task,
758 ompt_task_status_t status) {
759 if (ompt_enabled.ompt_callback_task_schedule) {
760 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
761 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
762 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
763 status = ompt_task_cancel;
767 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
768 &(taskdata->ompt_task_info.task_data), status,
769 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
775static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
778 void *return_address) {
779 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
780 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
782 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
784 gtid, loc_ref, taskdata, current_task));
786 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
789 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
790 KMP_DEBUG_USE_VAR(counter);
791 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
792 "incremented for task %p\n",
793 gtid, counter, taskdata));
796 taskdata->td_flags.task_serial =
798 __kmp_task_start(gtid, task, current_task);
802 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
803 current_task->ompt_task_info.frame.enter_frame.ptr =
804 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
805 current_task->ompt_task_info.frame.enter_frame_flags =
806 taskdata->ompt_task_info.frame.exit_frame_flags =
807 ompt_frame_application | ompt_frame_framepointer;
809 if (ompt_enabled.ompt_callback_task_create) {
810 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
811 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
812 &(parent_info->task_data), &(parent_info->frame),
813 &(taskdata->ompt_task_info.task_data),
814 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
817 __ompt_task_start(task, current_task, gtid);
821 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
827static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
830 void *return_address) {
831 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
848__attribute__((target(
"backchain")))
850void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
853 if (UNLIKELY(ompt_enabled.enabled)) {
854 OMPT_STORE_RETURN_ADDRESS(gtid);
855 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
856 OMPT_GET_FRAME_ADDRESS(1),
857 OMPT_LOAD_RETURN_ADDRESS(gtid));
861 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
867void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
868 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
872 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
873 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
875 __kmp_task_start(gtid, task, current_task);
877 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
878 loc_ref, KMP_TASK_TO_TASKDATA(task)));
888static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
889 kmp_info_t *thread) {
890 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
894 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
895 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
896 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
897 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
898 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
899 taskdata->td_flags.task_serial == 1);
900 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
901 kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
903 task->data1.destructors = NULL;
904 task->data2.priority = 0;
906 taskdata->td_flags.freed = 1;
909 if (!taskdata->is_taskgraph) {
913 __kmp_fast_free(thread, taskdata);
915 __kmp_thread_free(thread, taskdata);
919 taskdata->td_flags.complete = 0;
920 taskdata->td_flags.started = 0;
921 taskdata->td_flags.freed = 0;
922 taskdata->td_flags.executing = 0;
923 taskdata->td_flags.task_serial =
924 (taskdata->td_parent->td_flags.final ||
925 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
928 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
929 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
931 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
935 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
944static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
945 kmp_taskdata_t *taskdata,
946 kmp_info_t *thread) {
949 kmp_int32 team_serial =
950 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
951 !taskdata->td_flags.proxy;
952 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
954 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
955 KMP_DEBUG_ASSERT(children >= 0);
958 while (children == 0) {
959 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
961 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
962 "and freeing itself\n",
966 __kmp_free_task(gtid, taskdata, thread);
968 taskdata = parent_taskdata;
974 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
975 if (taskdata->td_dephash) {
976 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
977 kmp_tasking_flags_t flags_old = taskdata->td_flags;
978 if (children == 0 && flags_old.complete == 1) {
979 kmp_tasking_flags_t flags_new = flags_old;
980 flags_new.complete = 0;
981 if (KMP_COMPARE_AND_STORE_ACQ32(
982 RCAST(kmp_int32 *, &taskdata->td_flags),
983 *RCAST(kmp_int32 *, &flags_old),
984 *RCAST(kmp_int32 *, &flags_new))) {
985 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
986 "dephash of implicit task %p\n",
989 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
996 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
997 KMP_DEBUG_ASSERT(children >= 0);
1001 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
1002 "not freeing it yet\n",
1003 gtid, taskdata, children));
1014static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
1015 kmp_tasking_flags_t flags = taskdata->td_flags;
1016 bool ret = !(flags.team_serial || flags.tasking_ser);
1017 ret = ret || flags.proxy == TASK_PROXY ||
1018 flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
1020 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
1022 if (taskdata->td_taskgroup && taskdata->is_taskgraph)
1023 ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0;
1038static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
1039 kmp_taskdata_t *resumed_task) {
1040 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1041 kmp_info_t *thread = __kmp_threads[gtid];
1042 kmp_task_team_t *task_team =
1043 thread->th.th_task_team;
1049 kmp_int32 children = 0;
1051 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
1053 gtid, taskdata, resumed_task));
1055 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
1058 is_taskgraph = taskdata->is_taskgraph;
1062#ifdef BUILD_TIED_TASK_STACK
1063 if (taskdata->td_flags.tiedness == TASK_TIED) {
1064 __kmp_pop_task_stack(gtid, thread, taskdata);
1068 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
1071 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
1074 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
1075 gtid, counter, taskdata));
1079 if (resumed_task == NULL) {
1080 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
1081 resumed_task = taskdata->td_parent;
1084 thread->th.th_current_task = resumed_task;
1085 resumed_task->td_flags.executing = 1;
1086 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
1087 "resuming task %p\n",
1088 gtid, taskdata, resumed_task));
1096 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
1097 taskdata->td_flags.task_serial);
1098 if (taskdata->td_flags.task_serial) {
1099 if (resumed_task == NULL) {
1100 resumed_task = taskdata->td_parent;
1104 KMP_DEBUG_ASSERT(resumed_task !=
1114 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
1115 kmp_routine_entry_t destr_thunk = task->data1.destructors;
1116 KMP_ASSERT(destr_thunk);
1117 destr_thunk(gtid, task);
1120 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
1121 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
1122 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
1124 bool completed =
true;
1125 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
1126 if (taskdata->td_allow_completion_event.type ==
1127 KMP_EVENT_ALLOW_COMPLETION) {
1129 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1130 if (taskdata->td_allow_completion_event.type ==
1131 KMP_EVENT_ALLOW_COMPLETION) {
1133 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1134 taskdata->td_flags.executing = 0;
1141 __ompt_task_finish(task, resumed_task, ompt_task_detach);
1147 taskdata->td_flags.proxy = TASK_PROXY;
1150 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1155 if (taskdata->td_target_data.async_handle != NULL) {
1159 __kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
1160 if (KMP_HIDDEN_HELPER_THREAD(gtid))
1161 __kmp_hidden_helper_worker_thread_signal();
1166 taskdata->td_flags.complete = 1;
1168 taskdata->td_flags.onced = 1;
1174 __ompt_task_finish(task, resumed_task, ompt_task_complete);
1178 if (__kmp_track_children_task(taskdata)) {
1179 __kmp_release_deps(gtid, taskdata);
1184 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
1185 KMP_DEBUG_ASSERT(children >= 0);
1187 if (taskdata->td_taskgroup && !taskdata->is_taskgraph)
1189 if (taskdata->td_taskgroup)
1191 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1192 }
else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1193 task_team->tt.tt_hidden_helper_task_encountered)) {
1196 __kmp_release_deps(gtid, taskdata);
1202 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1203 taskdata->td_flags.executing = 0;
1206 if (taskdata->td_flags.hidden_helper) {
1208 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1209 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1214 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
1215 gtid, taskdata, children));
1221 thread->th.th_current_task = resumed_task;
1223 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
1227 resumed_task->td_flags.executing = 1;
1230 if (is_taskgraph && __kmp_track_children_task(taskdata) &&
1231 taskdata->td_taskgroup) {
1238 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1243 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
1244 gtid, taskdata, resumed_task));
1250static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
1253 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
1254 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1255 KMP_DEBUG_ASSERT(gtid >= 0);
1257 __kmp_task_finish<ompt>(gtid, task, NULL);
1259 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
1260 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1264 ompt_frame_t *ompt_frame;
1265 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1266 ompt_frame->enter_frame = ompt_data_none;
1267 ompt_frame->enter_frame_flags =
1268 ompt_frame_runtime | ompt_frame_framepointer;
1277void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1279 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1288void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1291 if (UNLIKELY(ompt_enabled.enabled)) {
1292 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1296 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1302void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1304 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1305 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1307 __kmp_task_finish<false>(gtid, task,
1310 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1311 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1327void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1328 kmp_team_t *team,
int tid,
int set_curr_task) {
1329 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1333 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1334 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1336 task->td_task_id = KMP_GEN_TASK_ID();
1337 task->td_team = team;
1340 task->td_ident = loc_ref;
1341 task->td_taskwait_ident = NULL;
1342 task->td_taskwait_counter = 0;
1343 task->td_taskwait_thread = 0;
1345 task->td_flags.tiedness = TASK_TIED;
1346 task->td_flags.tasktype = TASK_IMPLICIT;
1347 task->td_flags.proxy = TASK_FULL;
1350 task->td_flags.task_serial = 1;
1351 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1352 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1354 task->td_flags.started = 1;
1355 task->td_flags.executing = 1;
1356 task->td_flags.complete = 0;
1357 task->td_flags.freed = 0;
1359 task->td_flags.onced = 0;
1362 task->td_depnode = NULL;
1363 task->td_last_tied = task;
1364 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1366 if (set_curr_task) {
1367 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1369 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1370 task->td_taskgroup = NULL;
1371 task->td_dephash = NULL;
1372 __kmp_push_current_task_to_thread(this_thr, team, tid);
1374 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1375 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1379 if (UNLIKELY(ompt_enabled.enabled))
1380 __ompt_task_init(task, tid);
1383 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1392void __kmp_finish_implicit_task(kmp_info_t *thread) {
1393 kmp_taskdata_t *task = thread->th.th_current_task;
1394 if (task->td_dephash) {
1396 task->td_flags.complete = 1;
1398 task->td_flags.onced = 1;
1400 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1401 kmp_tasking_flags_t flags_old = task->td_flags;
1402 if (children == 0 && flags_old.complete == 1) {
1403 kmp_tasking_flags_t flags_new = flags_old;
1404 flags_new.complete = 0;
1405 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1406 *RCAST(kmp_int32 *, &flags_old),
1407 *RCAST(kmp_int32 *, &flags_new))) {
1408 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1409 "dephash of implicit task %p\n",
1410 thread->th.th_info.ds.ds_gtid, task));
1411 __kmp_dephash_free_entries(thread, task->td_dephash);
1421void __kmp_free_implicit_task(kmp_info_t *thread) {
1422 kmp_taskdata_t *task = thread->th.th_current_task;
1423 if (task && task->td_dephash) {
1424 __kmp_dephash_free(thread, task->td_dephash);
1425 task->td_dephash = NULL;
1431static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1432 if (size & (val - 1)) {
1434 if (size <= KMP_SIZE_T_MAX - val) {
1453kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1454 kmp_tasking_flags_t *flags,
1455 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1456 kmp_routine_entry_t task_entry) {
1458 kmp_taskdata_t *taskdata;
1459 kmp_info_t *thread = __kmp_threads[gtid];
1460 kmp_team_t *team = thread->th.th_team;
1461 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1462 size_t shareds_offset;
1464 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1465 __kmp_middle_initialize();
1467 if (flags->hidden_helper) {
1468 if (__kmp_enable_hidden_helper) {
1469 if (!TCR_4(__kmp_init_hidden_helper))
1470 __kmp_hidden_helper_initialize();
1473 flags->hidden_helper = FALSE;
1477 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1478 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1479 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1480 sizeof_shareds, task_entry));
1482 KMP_DEBUG_ASSERT(parent_task);
1483 if (parent_task->td_flags.final) {
1484 if (flags->merged_if0) {
1489 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1493 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1499 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1500 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1501 if (flags->proxy == TASK_PROXY) {
1502 flags->tiedness = TASK_UNTIED;
1503 flags->merged_if0 = 1;
1507 if ((thread->th.th_task_team) == NULL) {
1510 KMP_DEBUG_ASSERT(team->t.t_serialized);
1512 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1515 __kmp_task_team_setup(thread, team, 1);
1516 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1518 kmp_task_team_t *task_team = thread->th.th_task_team;
1521 if (!KMP_TASKING_ENABLED(task_team)) {
1524 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1525 __kmp_enable_tasking(task_team, thread);
1526 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1527 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1529 if (thread_data->td.td_deque == NULL) {
1530 __kmp_alloc_task_deque(thread, thread_data);
1534 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1535 task_team->tt.tt_found_proxy_tasks == FALSE)
1536 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1537 if (flags->hidden_helper &&
1538 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1539 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1544 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1545 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1548 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1550 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1555 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1558 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1562 task = KMP_TASKDATA_TO_TASK(taskdata);
1565#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_S390X || !KMP_HAVE_QUAD
1566 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1567 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1569 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1570 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1572 if (sizeof_shareds > 0) {
1574 task->shareds = &((
char *)taskdata)[shareds_offset];
1576 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1579 task->shareds = NULL;
1581 task->routine = task_entry;
1584 taskdata->td_task_id = KMP_GEN_TASK_ID();
1585 taskdata->td_team = thread->th.th_team;
1586 taskdata->td_alloc_thread = thread;
1587 taskdata->td_parent = parent_task;
1588 taskdata->td_level = parent_task->td_level + 1;
1589 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1590 taskdata->td_ident = loc_ref;
1591 taskdata->td_taskwait_ident = NULL;
1592 taskdata->td_taskwait_counter = 0;
1593 taskdata->td_taskwait_thread = 0;
1594 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1596 if (flags->proxy == TASK_FULL)
1597 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1599 taskdata->td_flags = *flags;
1600 taskdata->td_task_team = thread->th.th_task_team;
1601 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1602 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1605 if (flags->hidden_helper) {
1606 kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1607 taskdata->td_team = shadow_thread->th.th_team;
1608 taskdata->td_task_team = shadow_thread->th.th_task_team;
1612 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1615 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1621 taskdata->td_flags.task_serial =
1622 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1623 taskdata->td_flags.tasking_ser || flags->merged_if0);
1625 taskdata->td_flags.started = 0;
1626 taskdata->td_flags.executing = 0;
1627 taskdata->td_flags.complete = 0;
1628 taskdata->td_flags.freed = 0;
1630 taskdata->td_flags.onced = 0;
1632 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1634 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1635 taskdata->td_taskgroup =
1636 parent_task->td_taskgroup;
1637 taskdata->td_dephash = NULL;
1638 taskdata->td_depnode = NULL;
1639 taskdata->td_target_data.async_handle = NULL;
1640 if (flags->tiedness == TASK_UNTIED)
1641 taskdata->td_last_tied = NULL;
1643 taskdata->td_last_tied = taskdata;
1644 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1646 if (UNLIKELY(ompt_enabled.enabled))
1647 __ompt_task_init(taskdata, gtid);
1651 if (__kmp_track_children_task(taskdata)) {
1652 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1653 if (parent_task->td_taskgroup)
1654 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1657 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1658 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1660 if (flags->hidden_helper) {
1661 taskdata->td_flags.task_serial = FALSE;
1663 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1668 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
1669 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
1670 (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
1671 taskdata->is_taskgraph = 1;
1672 taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
1673 taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
1676 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1677 gtid, taskdata, taskdata->td_parent));
1682kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1683 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1684 size_t sizeof_shareds,
1685 kmp_routine_entry_t task_entry) {
1687 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1688 __kmp_assert_valid_gtid(gtid);
1689 input_flags->native = FALSE;
1691 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1692 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1693 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1694 input_flags->proxy ?
"proxy" :
"",
1695 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1696 sizeof_shareds, task_entry));
1698 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1699 sizeof_shareds, task_entry);
1701 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1706kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1708 size_t sizeof_kmp_task_t,
1709 size_t sizeof_shareds,
1710 kmp_routine_entry_t task_entry,
1711 kmp_int64 device_id) {
1712 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1714 input_flags.tiedness = TASK_UNTIED;
1716 if (__kmp_enable_hidden_helper)
1717 input_flags.hidden_helper = TRUE;
1719 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1720 sizeof_shareds, task_entry);
1738 kmp_task_t *new_task, kmp_int32 naffins,
1739 kmp_task_affinity_info_t *affin_list) {
1749__attribute__((target(
"backchain")))
1752__kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1753 kmp_taskdata_t *current_task) {
1754 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1758 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1759 gtid, taskdata, current_task));
1760 KMP_DEBUG_ASSERT(task);
1761 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1762 taskdata->td_flags.complete == 1)) {
1767 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1770 __kmp_bottom_half_finish_proxy(gtid, task);
1772 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1773 "proxy task %p, resuming task %p\n",
1774 gtid, taskdata, current_task));
1782 ompt_thread_info_t oldInfo;
1783 if (UNLIKELY(ompt_enabled.enabled)) {
1785 thread = __kmp_threads[gtid];
1786 oldInfo = thread->th.ompt_thread_info;
1787 thread->th.ompt_thread_info.wait_id = 0;
1788 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1789 ? ompt_state_work_serial
1790 : ompt_state_work_parallel;
1791 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1796 if (taskdata->td_flags.proxy != TASK_PROXY) {
1797 __kmp_task_start(gtid, task, current_task);
1803 if (UNLIKELY(__kmp_omp_cancellation)) {
1804 thread = __kmp_threads[gtid];
1805 kmp_team_t *this_team = thread->th.th_team;
1806 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1807 if ((taskgroup && taskgroup->cancel_request) ||
1808 (this_team->t.t_cancel_request == cancel_parallel)) {
1809#if OMPT_SUPPORT && OMPT_OPTIONAL
1810 ompt_data_t *task_data;
1811 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1812 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1813 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1815 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1816 : ompt_cancel_parallel) |
1817 ompt_cancel_discarded_task,
1830 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1831 taskdata->td_last_tied = current_task->td_last_tied;
1832 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1834#if KMP_STATS_ENABLED
1836 switch (KMP_GET_THREAD_STATE()) {
1837 case FORK_JOIN_BARRIER:
1838 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1841 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1844 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1847 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1850 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1853 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1860 if (UNLIKELY(ompt_enabled.enabled))
1861 __ompt_task_start(task, current_task, gtid);
1863#if OMPT_SUPPORT && OMPT_OPTIONAL
1864 if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
1865 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
1866 ompt_data_t instance = ompt_data_none;
1867 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1868 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1869 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
1870 &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1871 ompt_dispatch_taskloop_chunk, instance);
1872 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1877 if (ompd_state & OMPD_ENABLE_BP)
1878 ompd_bp_task_begin();
1881#if USE_ITT_BUILD && USE_ITT_NOTIFY
1882 kmp_uint64 cur_time;
1883 kmp_int32 kmp_itt_count_task =
1884 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1885 current_task->td_flags.tasktype == TASK_IMPLICIT;
1886 if (kmp_itt_count_task) {
1887 thread = __kmp_threads[gtid];
1889 if (thread->th.th_bar_arrive_time)
1890 cur_time = __itt_get_timestamp();
1892 kmp_itt_count_task = 0;
1894 KMP_FSYNC_ACQUIRED(taskdata);
1897#if ENABLE_LIBOMPTARGET
1898 if (taskdata->td_target_data.async_handle != NULL) {
1902 KMP_ASSERT(tgt_target_nowait_query);
1903 tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1906 if (task->routine != NULL) {
1907#ifdef KMP_GOMP_COMPAT
1908 if (taskdata->td_flags.native) {
1909 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1913 (*(task->routine))(gtid, task);
1916 KMP_POP_PARTITIONED_TIMER();
1918#if USE_ITT_BUILD && USE_ITT_NOTIFY
1919 if (kmp_itt_count_task) {
1921 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1923 KMP_FSYNC_CANCEL(taskdata);
1924 KMP_FSYNC_RELEASING(taskdata->td_parent);
1929 if (ompd_state & OMPD_ENABLE_BP)
1934 if (taskdata->td_flags.proxy != TASK_PROXY) {
1936 if (UNLIKELY(ompt_enabled.enabled)) {
1937 thread->th.ompt_thread_info = oldInfo;
1938 if (taskdata->td_flags.tiedness == TASK_TIED) {
1939 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1941 __kmp_task_finish<true>(gtid, task, current_task);
1944 __kmp_task_finish<false>(gtid, task, current_task);
1949 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1950 gtid, taskdata, current_task));
1964kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1965 kmp_task_t *new_task) {
1966 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1968 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1969 loc_ref, new_taskdata));
1972 kmp_taskdata_t *parent;
1973 if (UNLIKELY(ompt_enabled.enabled)) {
1974 parent = new_taskdata->td_parent;
1975 if (ompt_enabled.ompt_callback_task_create) {
1976 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1977 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1978 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1979 OMPT_GET_RETURN_ADDRESS(0));
1987 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1989 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1990 new_taskdata->td_flags.task_serial = 1;
1991 __kmp_invoke_task(gtid, new_task, current_task);
1996 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1997 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1998 gtid, loc_ref, new_taskdata));
2001 if (UNLIKELY(ompt_enabled.enabled)) {
2002 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2005 return TASK_CURRENT_NOT_QUEUED;
2019kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
2020 bool serialize_immediate) {
2021 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
2024 if (new_taskdata->is_taskgraph &&
2025 __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
2026 kmp_tdg_info_t *tdg = new_taskdata->tdg;
2028 if (new_taskdata->td_task_id >= new_taskdata->tdg->map_size) {
2029 __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
2032 if (new_taskdata->td_task_id >= tdg->map_size) {
2033 kmp_uint old_size = tdg->map_size;
2034 kmp_uint new_size = old_size * 2;
2035 kmp_node_info_t *old_record = tdg->record_map;
2036 kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
2037 new_size *
sizeof(kmp_node_info_t));
2039 KMP_MEMCPY(new_record, old_record, old_size *
sizeof(kmp_node_info_t));
2040 tdg->record_map = new_record;
2042 __kmp_free(old_record);
2044 for (kmp_int i = old_size; i < new_size; i++) {
2045 kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
2046 __kmp_successors_size *
sizeof(kmp_int32));
2047 new_record[i].task =
nullptr;
2048 new_record[i].successors = successorsList;
2049 new_record[i].nsuccessors = 0;
2050 new_record[i].npredecessors = 0;
2051 new_record[i].successors_size = __kmp_successors_size;
2052 KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
2056 tdg->map_size = new_size;
2058 __kmp_release_bootstrap_lock(&tdg->graph_lock);
2061 if (tdg->record_map[new_taskdata->td_task_id].task ==
nullptr) {
2062 tdg->record_map[new_taskdata->td_task_id].task = new_task;
2063 tdg->record_map[new_taskdata->td_task_id].parent_task =
2064 new_taskdata->td_parent;
2065 KMP_ATOMIC_INC(&tdg->num_tasks);
2072 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
2073 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
2075 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
2076 if (serialize_immediate)
2077 new_taskdata->td_flags.task_serial = 1;
2078 __kmp_invoke_task(gtid, new_task, current_task);
2079 }
else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
2080 __kmp_wpolicy_passive) {
2081 kmp_info_t *this_thr = __kmp_threads[gtid];
2082 kmp_team_t *team = this_thr->th.th_team;
2083 kmp_int32 nthreads = this_thr->th.th_team_nproc;
2084 for (
int i = 0; i < nthreads; ++i) {
2085 kmp_info_t *thread = team->t.t_threads[i];
2086 if (thread == this_thr)
2088 if (thread->th.th_sleep_loc != NULL) {
2089 __kmp_null_resume_wrapper(thread);
2094 return TASK_CURRENT_NOT_QUEUED;
2109kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
2110 kmp_task_t *new_task) {
2112 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
2114#if KMP_DEBUG || OMPT_SUPPORT
2115 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
2117 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
2119 __kmp_assert_valid_gtid(gtid);
2122 kmp_taskdata_t *parent = NULL;
2123 if (UNLIKELY(ompt_enabled.enabled)) {
2124 if (!new_taskdata->td_flags.started) {
2125 OMPT_STORE_RETURN_ADDRESS(gtid);
2126 parent = new_taskdata->td_parent;
2127 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
2128 parent->ompt_task_info.frame.enter_frame.ptr =
2129 OMPT_GET_FRAME_ADDRESS(0);
2131 if (ompt_enabled.ompt_callback_task_create) {
2132 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
2133 &(parent->ompt_task_info.task_data),
2134 &(parent->ompt_task_info.frame),
2135 &(new_taskdata->ompt_task_info.task_data),
2136 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
2137 OMPT_LOAD_RETURN_ADDRESS(gtid));
2142 __ompt_task_finish(new_task,
2143 new_taskdata->ompt_task_info.scheduling_parent,
2145 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
2150 res = __kmp_omp_task(gtid, new_task,
true);
2152 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2153 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2154 gtid, loc_ref, new_taskdata));
2156 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
2157 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2176kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
2177 kmp_task_t *new_task,
void *codeptr_ra) {
2179 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
2181#if KMP_DEBUG || OMPT_SUPPORT
2182 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
2184 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
2188 kmp_taskdata_t *parent = NULL;
2189 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
2190 parent = new_taskdata->td_parent;
2191 if (!parent->ompt_task_info.frame.enter_frame.ptr)
2192 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2193 if (ompt_enabled.ompt_callback_task_create) {
2194 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
2195 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
2196 &(new_taskdata->ompt_task_info.task_data),
2197 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
2203 res = __kmp_omp_task(gtid, new_task,
true);
2205 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2206 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2207 gtid, loc_ref, new_taskdata));
2209 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
2210 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2217static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
2218 void *frame_address,
2219 void *return_address) {
2220 kmp_taskdata_t *taskdata =
nullptr;
2222 int thread_finished = FALSE;
2223 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
2225 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
2226 KMP_DEBUG_ASSERT(gtid >= 0);
2228 if (__kmp_tasking_mode != tskm_immediate_exec) {
2229 thread = __kmp_threads[gtid];
2230 taskdata = thread->th.th_current_task;
2232#if OMPT_SUPPORT && OMPT_OPTIONAL
2233 ompt_data_t *my_task_data;
2234 ompt_data_t *my_parallel_data;
2237 my_task_data = &(taskdata->ompt_task_info.task_data);
2238 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
2240 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2242 if (ompt_enabled.ompt_callback_sync_region) {
2243 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2244 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2245 my_task_data, return_address);
2248 if (ompt_enabled.ompt_callback_sync_region_wait) {
2249 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2250 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2251 my_task_data, return_address);
2261 taskdata->td_taskwait_counter += 1;
2262 taskdata->td_taskwait_ident = loc_ref;
2263 taskdata->td_taskwait_thread = gtid + 1;
2266 void *itt_sync_obj = NULL;
2268 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2273 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2275 must_wait = must_wait || (thread->th.th_task_team != NULL &&
2276 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2280 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
2281 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2284 kmp_flag_32<false, false> flag(
2285 RCAST(std::atomic<kmp_uint32> *,
2286 &(taskdata->td_incomplete_child_tasks)),
2288 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
2289 flag.execute_tasks(thread, gtid, FALSE,
2290 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2291 __kmp_task_stealing_constraint);
2295 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2296 KMP_FSYNC_ACQUIRED(taskdata);
2301 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2303#if OMPT_SUPPORT && OMPT_OPTIONAL
2305 if (ompt_enabled.ompt_callback_sync_region_wait) {
2306 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2307 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2308 my_task_data, return_address);
2310 if (ompt_enabled.ompt_callback_sync_region) {
2311 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2312 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2313 my_task_data, return_address);
2315 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
2320 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
2321 "returning TASK_CURRENT_NOT_QUEUED\n",
2324 return TASK_CURRENT_NOT_QUEUED;
2327#if OMPT_SUPPORT && OMPT_OPTIONAL
2329static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
2330 void *frame_address,
2331 void *return_address) {
2332 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2339kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
2340#if OMPT_SUPPORT && OMPT_OPTIONAL
2341 if (UNLIKELY(ompt_enabled.enabled)) {
2342 OMPT_STORE_RETURN_ADDRESS(gtid);
2343 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
2344 OMPT_LOAD_RETURN_ADDRESS(gtid));
2347 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2351kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2352 kmp_taskdata_t *taskdata = NULL;
2354 int thread_finished = FALSE;
2357 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2359 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2360 gtid, loc_ref, end_part));
2361 __kmp_assert_valid_gtid(gtid);
2363 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2364 thread = __kmp_threads[gtid];
2365 taskdata = thread->th.th_current_task;
2372 taskdata->td_taskwait_counter += 1;
2373 taskdata->td_taskwait_ident = loc_ref;
2374 taskdata->td_taskwait_thread = gtid + 1;
2377 void *itt_sync_obj = NULL;
2379 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2382 if (!taskdata->td_flags.team_serial) {
2383 kmp_task_team_t *task_team = thread->th.th_task_team;
2384 if (task_team != NULL) {
2385 if (KMP_TASKING_ENABLED(task_team)) {
2387 if (UNLIKELY(ompt_enabled.enabled))
2388 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2390 __kmp_execute_tasks_32(
2391 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2392 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2393 __kmp_task_stealing_constraint);
2395 if (UNLIKELY(ompt_enabled.enabled))
2396 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2402 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2407 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2410 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2411 "returning TASK_CURRENT_NOT_QUEUED\n",
2414 return TASK_CURRENT_NOT_QUEUED;
2435 unsigned reserved31 : 31;
2515template <
typename T>
2516void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2517 __kmp_assert_valid_gtid(gtid);
2518 kmp_info_t *thread = __kmp_threads[gtid];
2519 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2520 kmp_uint32 nth = thread->th.th_team_nproc;
2524 KMP_ASSERT(tg != NULL);
2525 KMP_ASSERT(data != NULL);
2526 KMP_ASSERT(num > 0);
2527 if (nth == 1 && !__kmp_enable_hidden_helper) {
2528 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2532 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2536 for (
int i = 0; i < num; ++i) {
2537 size_t size = data[i].reduce_size - 1;
2539 size += CACHE_LINE - size % CACHE_LINE;
2540 KMP_ASSERT(data[i].reduce_comb != NULL);
2543 arr[i].
flags = data[i].flags;
2547 __kmp_assign_orig<T>(arr[i], data[i]);
2548 if (!arr[i].flags.lazy_priv) {
2551 arr[i].
reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2552 if (arr[i].reduce_init != NULL) {
2554 for (
size_t j = 0; j < nth; ++j) {
2555 __kmp_call_init<T>(arr[i], j * size);
2562 arr[i].
reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2565 tg->reduce_data = (
void *)arr;
2566 tg->reduce_num_data = num;
2586 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2587 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2588 kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2589 this_tdg->rec_taskred_data =
2591 this_tdg->rec_num_taskred = num;
2592 KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2613 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2614 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2615 kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2616 this_tdg->rec_taskred_data =
2618 this_tdg->rec_num_taskred = num;
2619 KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2627template <
typename T>
2628void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2629 kmp_taskgroup_t *tg,
void *reduce_data) {
2631 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2633 thr, tg, reduce_data));
2638 for (
int i = 0; i < num; ++i) {
2641 tg->reduce_data = (
void *)arr;
2642 tg->reduce_num_data = num;
2655 __kmp_assert_valid_gtid(gtid);
2656 kmp_info_t *thread = __kmp_threads[gtid];
2657 kmp_int32 nth = thread->th.th_team_nproc;
2661 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2663 tg = thread->th.th_current_task->td_taskgroup;
2664 KMP_ASSERT(tg != NULL);
2666 kmp_int32 num = tg->reduce_num_data;
2667 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2670 if ((thread->th.th_current_task->is_taskgraph) &&
2671 (!__kmp_tdg_is_recording(
2672 __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
2673 tg = thread->th.th_current_task->td_taskgroup;
2674 KMP_ASSERT(tg != NULL);
2675 KMP_ASSERT(tg->reduce_data != NULL);
2677 num = tg->reduce_num_data;
2681 KMP_ASSERT(data != NULL);
2682 while (tg != NULL) {
2683 for (
int i = 0; i < num; ++i) {
2684 if (!arr[i].flags.lazy_priv) {
2685 if (data == arr[i].reduce_shar ||
2686 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2687 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2690 void **p_priv = (
void **)(arr[i].reduce_priv);
2691 if (data == arr[i].reduce_shar)
2694 for (
int j = 0; j < nth; ++j)
2695 if (data == p_priv[j])
2699 if (p_priv[tid] == NULL) {
2701 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2702 if (arr[i].reduce_init != NULL) {
2703 if (arr[i].reduce_orig != NULL) {
2705 p_priv[tid], arr[i].reduce_orig);
2707 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2714 KMP_ASSERT(tg->parent);
2717 num = tg->reduce_num_data;
2719 KMP_ASSERT2(0,
"Unknown task reduction item");
2725static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2726 kmp_int32 nth = th->th.th_team_nproc;
2729 __kmp_enable_hidden_helper);
2732 kmp_int32 num = tg->reduce_num_data;
2733 for (
int i = 0; i < num; ++i) {
2735 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2736 void (*f_comb)(
void *,
void *) =
2738 if (!arr[i].flags.lazy_priv) {
2741 for (
int j = 0; j < nth; ++j) {
2742 void *priv_data = (
char *)pr_data + j * size;
2743 f_comb(sh_data, priv_data);
2748 void **pr_data = (
void **)(arr[i].reduce_priv);
2749 for (
int j = 0; j < nth; ++j) {
2750 if (pr_data[j] != NULL) {
2751 f_comb(sh_data, pr_data[j]);
2754 __kmp_free(pr_data[j]);
2758 __kmp_free(arr[i].reduce_priv);
2760 __kmp_thread_free(th, arr);
2761 tg->reduce_data = NULL;
2762 tg->reduce_num_data = 0;
2768static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2769 __kmp_thread_free(th, tg->reduce_data);
2770 tg->reduce_data = NULL;
2771 tg->reduce_num_data = 0;
2774template <
typename T>
2775void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2777 __kmp_assert_valid_gtid(gtid);
2778 kmp_info_t *thr = __kmp_threads[gtid];
2779 kmp_int32 nth = thr->th.th_team_nproc;
2780 __kmpc_taskgroup(loc, gtid);
2783 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2784 gtid, thr->th.th_current_task->td_taskgroup));
2785 return (
void *)thr->th.th_current_task->td_taskgroup;
2787 kmp_team_t *team = thr->th.th_team;
2789 kmp_taskgroup_t *tg;
2790 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2791 if (reduce_data == NULL &&
2792 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2795 KMP_DEBUG_ASSERT(reduce_data == NULL);
2797 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2801 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2802 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2803 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2806 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2810 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2811 tg = thr->th.th_current_task->td_taskgroup;
2812 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2834 int num,
void *data) {
2835 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2855 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2868 __kmpc_end_taskgroup(loc, gtid);
2872void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2873 __kmp_assert_valid_gtid(gtid);
2874 kmp_info_t *thread = __kmp_threads[gtid];
2875 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2876 kmp_taskgroup_t *tg_new =
2877 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2878 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2879 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2880 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2881 tg_new->parent = taskdata->td_taskgroup;
2882 tg_new->reduce_data = NULL;
2883 tg_new->reduce_num_data = 0;
2884 tg_new->gomp_data = NULL;
2885 taskdata->td_taskgroup = tg_new;
2887#if OMPT_SUPPORT && OMPT_OPTIONAL
2888 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2889 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2891 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2892 kmp_team_t *team = thread->th.th_team;
2893 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2895 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2897 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2898 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2899 &(my_task_data), codeptr);
2906void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2907 __kmp_assert_valid_gtid(gtid);
2908 kmp_info_t *thread = __kmp_threads[gtid];
2909 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2910 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2911 int thread_finished = FALSE;
2913#if OMPT_SUPPORT && OMPT_OPTIONAL
2915 ompt_data_t my_task_data;
2916 ompt_data_t my_parallel_data;
2917 void *codeptr =
nullptr;
2918 if (UNLIKELY(ompt_enabled.enabled)) {
2919 team = thread->th.th_team;
2920 my_task_data = taskdata->ompt_task_info.task_data;
2922 my_parallel_data = team->t.ompt_team_info.parallel_data;
2923 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2925 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2929 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2930 KMP_DEBUG_ASSERT(taskgroup != NULL);
2931 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2933 if (__kmp_tasking_mode != tskm_immediate_exec) {
2935 taskdata->td_taskwait_counter += 1;
2936 taskdata->td_taskwait_ident = loc;
2937 taskdata->td_taskwait_thread = gtid + 1;
2941 void *itt_sync_obj = NULL;
2943 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2947#if OMPT_SUPPORT && OMPT_OPTIONAL
2948 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2949 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2950 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2951 &(my_task_data), codeptr);
2955 if (!taskdata->td_flags.team_serial ||
2956 (thread->th.th_task_team != NULL &&
2957 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2958 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2959 kmp_flag_32<false, false> flag(
2960 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2961 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2962 flag.execute_tasks(thread, gtid, FALSE,
2963 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2964 __kmp_task_stealing_constraint);
2967 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2969#if OMPT_SUPPORT && OMPT_OPTIONAL
2970 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2971 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2972 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2973 &(my_task_data), codeptr);
2978 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2979 KMP_FSYNC_ACQUIRED(taskdata);
2982 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2984 if (taskgroup->reduce_data != NULL &&
2985 !taskgroup->gomp_data) {
2988 kmp_team_t *t = thread->th.th_team;
2992 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2995 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2996 if (cnt == thread->th.th_team_nproc - 1) {
2999 __kmp_task_reduction_fini(thread, taskgroup);
3002 __kmp_thread_free(thread, reduce_data);
3003 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
3004 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
3008 __kmp_task_reduction_clean(thread, taskgroup);
3010 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
3014 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
3015 if (cnt == thread->th.th_team_nproc - 1) {
3017 __kmp_task_reduction_fini(thread, taskgroup);
3020 __kmp_thread_free(thread, reduce_data);
3021 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
3022 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
3026 __kmp_task_reduction_clean(thread, taskgroup);
3030 __kmp_task_reduction_fini(thread, taskgroup);
3034 taskdata->td_taskgroup = taskgroup->parent;
3035 __kmp_thread_free(thread, taskgroup);
3037 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
3040#if OMPT_SUPPORT && OMPT_OPTIONAL
3041 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
3042 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
3043 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
3044 &(my_task_data), codeptr);
3049static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
3050 kmp_task_team_t *task_team,
3051 kmp_int32 is_constrained) {
3052 kmp_task_t *task = NULL;
3053 kmp_taskdata_t *taskdata;
3054 kmp_taskdata_t *current;
3055 kmp_thread_data_t *thread_data;
3056 int ntasks = task_team->tt.tt_num_task_pri;
3059 20, (
"__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
3064 if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
3067 ntasks = task_team->tt.tt_num_task_pri;
3068 }
while (ntasks > 0);
3070 KA_TRACE(20, (
"__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
3076 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3078 KMP_ASSERT(list != NULL);
3079 thread_data = &list->td;
3080 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3081 deque_ntasks = thread_data->td.td_deque_ntasks;
3082 if (deque_ntasks == 0) {
3083 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3084 KA_TRACE(20, (
"__kmp_get_priority_task: T#%d No tasks to get from %p\n",
3085 __kmp_get_gtid(), thread_data));
3088 }
while (deque_ntasks == 0);
3089 KMP_DEBUG_ASSERT(deque_ntasks);
3090 int target = thread_data->td.td_deque_head;
3091 current = __kmp_threads[gtid]->th.th_current_task;
3092 taskdata = thread_data->td.td_deque[target];
3093 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3095 thread_data->td.td_deque_head =
3096 (target + 1) & TASK_DEQUE_MASK(thread_data->td);
3098 if (!task_team->tt.tt_untied_task_encountered) {
3100 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3101 KA_TRACE(20, (
"__kmp_get_priority_task(exit #3): T#%d could not get task "
3102 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
3103 gtid, thread_data, task_team, deque_ntasks, target,
3104 thread_data->td.td_deque_tail));
3105 task_team->tt.tt_num_task_pri++;
3111 for (i = 1; i < deque_ntasks; ++i) {
3112 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
3113 taskdata = thread_data->td.td_deque[target];
3114 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3120 if (taskdata == NULL) {
3122 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3124 10, (
"__kmp_get_priority_task(exit #4): T#%d could not get task from "
3125 "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
3126 gtid, thread_data, task_team, deque_ntasks,
3127 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3128 task_team->tt.tt_num_task_pri++;
3132 for (i = i + 1; i < deque_ntasks; ++i) {
3134 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
3135 thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
3139 thread_data->td.td_deque_tail ==
3140 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
3141 thread_data->td.td_deque_tail = target;
3143 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
3144 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3145 task = KMP_TASKDATA_TO_TASK(taskdata);
3150static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
3151 kmp_task_team_t *task_team,
3152 kmp_int32 is_constrained) {
3154 kmp_taskdata_t *taskdata;
3155 kmp_thread_data_t *thread_data;
3158 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3159 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
3162 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
3164 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
3165 gtid, thread_data->td.td_deque_ntasks,
3166 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3168 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3170 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
3171 "ntasks=%d head=%u tail=%u\n",
3172 gtid, thread_data->td.td_deque_ntasks,
3173 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3177 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3179 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3180 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3182 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
3183 "ntasks=%d head=%u tail=%u\n",
3184 gtid, thread_data->td.td_deque_ntasks,
3185 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3189 tail = (thread_data->td.td_deque_tail - 1) &
3190 TASK_DEQUE_MASK(thread_data->td);
3191 taskdata = thread_data->td.td_deque[tail];
3193 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
3194 thread->th.th_current_task)) {
3196 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3198 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
3199 "ntasks=%d head=%u tail=%u\n",
3200 gtid, thread_data->td.td_deque_ntasks,
3201 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3205 thread_data->td.td_deque_tail = tail;
3206 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
3208 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3210 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
3211 "ntasks=%d head=%u tail=%u\n",
3212 gtid, taskdata, thread_data->td.td_deque_ntasks,
3213 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3215 task = KMP_TASKDATA_TO_TASK(taskdata);
3222static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
3223 kmp_task_team_t *task_team,
3224 std::atomic<kmp_int32> *unfinished_threads,
3225 int *thread_finished,
3226 kmp_int32 is_constrained) {
3228 kmp_taskdata_t *taskdata;
3229 kmp_taskdata_t *current;
3230 kmp_thread_data_t *victim_td, *threads_data;
3232 kmp_int32 victim_tid;
3234 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3236 threads_data = task_team->tt.tt_threads_data;
3237 KMP_DEBUG_ASSERT(threads_data != NULL);
3239 victim_tid = victim_thr->th.th_info.ds.ds_tid;
3240 victim_td = &threads_data[victim_tid];
3242 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
3243 "task_team=%p ntasks=%d head=%u tail=%u\n",
3244 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3245 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3246 victim_td->td.td_deque_tail));
3248 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
3249 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
3250 "task_team=%p ntasks=%d head=%u tail=%u\n",
3251 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3252 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3253 victim_td->td.td_deque_tail));
3257 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3259 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
3262 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3263 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
3264 "task_team=%p ntasks=%d head=%u tail=%u\n",
3265 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3266 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3270 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
3271 current = __kmp_threads[gtid]->th.th_current_task;
3272 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3273 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3275 victim_td->td.td_deque_head =
3276 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
3278 if (!task_team->tt.tt_untied_task_encountered) {
3280 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3281 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
3282 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3283 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3284 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3289 target = victim_td->td.td_deque_head;
3291 for (i = 1; i < ntasks; ++i) {
3292 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3293 taskdata = victim_td->td.td_deque[target];
3294 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3300 if (taskdata == NULL) {
3302 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3303 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
3304 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3305 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3306 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3310 for (i = i + 1; i < ntasks; ++i) {
3312 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3313 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3317 victim_td->td.td_deque_tail ==
3318 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
3319 victim_td->td.td_deque_tail = target;
3321 if (*thread_finished) {
3328 KMP_ATOMIC_INC(unfinished_threads);
3331 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
3332 gtid, count + 1, task_team));
3333 *thread_finished = FALSE;
3335 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
3337 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3341 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
3342 "task_team=%p ntasks=%d head=%u tail=%u\n",
3343 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
3344 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3346 task = KMP_TASKDATA_TO_TASK(taskdata);
3360static inline int __kmp_execute_tasks_template(
3361 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
3362 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3363 kmp_int32 is_constrained) {
3364 kmp_task_team_t *task_team = thread->th.th_task_team;
3365 kmp_thread_data_t *threads_data;
3367 kmp_info_t *other_thread;
3368 kmp_taskdata_t *current_task = thread->th.th_current_task;
3369 std::atomic<kmp_int32> *unfinished_threads;
3370 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3371 tid = thread->th.th_info.ds.ds_tid;
3373 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3374 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
3376 if (task_team == NULL || current_task == NULL)
3379 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
3380 "*thread_finished=%d\n",
3381 gtid, final_spin, *thread_finished));
3383 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
3384 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3386 KMP_DEBUG_ASSERT(threads_data != NULL);
3388 nthreads = task_team->tt.tt_nproc;
3389 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3390 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
3391 task_team->tt.tt_hidden_helper_task_encountered);
3392 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
3398 if (task_team->tt.tt_num_task_pri) {
3399 task = __kmp_get_priority_task(gtid, task_team, is_constrained);
3401 if (task == NULL && use_own_tasks) {
3402 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
3404 if ((task == NULL) && (nthreads > 1)) {
3408 if (victim_tid == -2) {
3409 victim_tid = threads_data[tid].td.td_deque_last_stolen;
3412 other_thread = threads_data[victim_tid].td.td_thr;
3414 if (victim_tid != -1) {
3416 }
else if (!new_victim) {
3422 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3423 if (victim_tid >= tid) {
3427 other_thread = threads_data[victim_tid].td.td_thr;
3437 if ((__kmp_tasking_mode == tskm_task_teams) &&
3438 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
3439 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
3442 __kmp_null_resume_wrapper(other_thread);
3455 task = __kmp_steal_task(other_thread, gtid, task_team,
3456 unfinished_threads, thread_finished,
3460 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3461 threads_data[tid].td.td_deque_last_stolen = victim_tid;
3468 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
3477#if USE_ITT_BUILD && USE_ITT_NOTIFY
3478 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
3479 if (itt_sync_obj == NULL) {
3481 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3483 __kmp_itt_task_starting(itt_sync_obj);
3486 __kmp_invoke_task(gtid, task, current_task);
3488 if (itt_sync_obj != NULL)
3489 __kmp_itt_task_finished(itt_sync_obj);
3496 if (flag == NULL || (!final_spin && flag->done_check())) {
3499 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3503 if (thread->th.th_task_team == NULL) {
3506 KMP_YIELD(__kmp_library == library_throughput);
3509 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3510 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3511 "other tasks, restart\n",
3522 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3526 if (!*thread_finished) {
3528 kmp_int32 count = -1 +
3530 KMP_ATOMIC_DEC(unfinished_threads);
3531 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3532 "unfinished_threads to %d task_team=%p\n",
3533 gtid, count, task_team));
3534 *thread_finished = TRUE;
3542 if (flag != NULL && flag->done_check()) {
3545 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3553 if (thread->th.th_task_team == NULL) {
3555 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3564 if (flag == NULL || (!final_spin && flag->done_check())) {
3566 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3573 if (nthreads == 1 &&
3574 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3578 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3584template <
bool C,
bool S>
3585int __kmp_execute_tasks_32(
3586 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3587 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3588 kmp_int32 is_constrained) {
3589 return __kmp_execute_tasks_template(
3590 thread, gtid, flag, final_spin,
3591 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3594template <
bool C,
bool S>
3595int __kmp_execute_tasks_64(
3596 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3597 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3598 kmp_int32 is_constrained) {
3599 return __kmp_execute_tasks_template(
3600 thread, gtid, flag, final_spin,
3601 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3604template <
bool C,
bool S>
3605int __kmp_atomic_execute_tasks_64(
3606 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3607 int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3608 kmp_int32 is_constrained) {
3609 return __kmp_execute_tasks_template(
3610 thread, gtid, flag, final_spin,
3611 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3614int __kmp_execute_tasks_oncore(
3615 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3616 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3617 kmp_int32 is_constrained) {
3618 return __kmp_execute_tasks_template(
3619 thread, gtid, flag, final_spin,
3620 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3624__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3625 kmp_flag_32<false, false> *,
int,
3626 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3628template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3629 kmp_flag_64<false, true> *,
3631 int *USE_ITT_BUILD_ARG(
void *),
3634template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3635 kmp_flag_64<true, false> *,
3637 int *USE_ITT_BUILD_ARG(
void *),
3640template int __kmp_atomic_execute_tasks_64<false, true>(
3641 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *,
int,
3642 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3644template int __kmp_atomic_execute_tasks_64<true, false>(
3645 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *,
int,
3646 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3651static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3652 kmp_info_t *this_thr) {
3653 kmp_thread_data_t *threads_data;
3654 int nthreads, i, is_init_thread;
3656 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3657 __kmp_gtid_from_thread(this_thr)));
3659 KMP_DEBUG_ASSERT(task_team != NULL);
3660 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3662 nthreads = task_team->tt.tt_nproc;
3663 KMP_DEBUG_ASSERT(nthreads > 0);
3664 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3667 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3669 if (!is_init_thread) {
3673 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3674 __kmp_gtid_from_thread(this_thr)));
3677 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3678 KMP_DEBUG_ASSERT(threads_data != NULL);
3680 if (__kmp_tasking_mode == tskm_task_teams &&
3681 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3685 for (i = 0; i < nthreads; i++) {
3687 kmp_info_t *thread = threads_data[i].td.td_thr;
3689 if (i == this_thr->th.th_info.ds.ds_tid) {
3698 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3700 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3701 __kmp_gtid_from_thread(this_thr),
3702 __kmp_gtid_from_thread(thread)));
3703 __kmp_null_resume_wrapper(thread);
3705 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3706 __kmp_gtid_from_thread(this_thr),
3707 __kmp_gtid_from_thread(thread)));
3712 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3713 __kmp_gtid_from_thread(this_thr)));
3746static kmp_task_team_t *__kmp_free_task_teams =
3749kmp_bootstrap_lock_t __kmp_task_team_lock =
3750 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3757static void __kmp_alloc_task_deque(kmp_info_t *thread,
3758 kmp_thread_data_t *thread_data) {
3759 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3760 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3763 thread_data->td.td_deque_last_stolen = -1;
3765 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3766 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3767 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3771 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3772 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3776 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3777 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3778 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3784static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3785 if (thread_data->td.td_deque != NULL) {
3786 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3787 TCW_4(thread_data->td.td_deque_ntasks, 0);
3788 __kmp_free(thread_data->td.td_deque);
3789 thread_data->td.td_deque = NULL;
3790 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3793#ifdef BUILD_TIED_TASK_STACK
3795 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3796 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3808static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3809 kmp_task_team_t *task_team) {
3810 kmp_thread_data_t **threads_data_p;
3811 kmp_int32 nthreads, maxthreads;
3812 int is_init_thread = FALSE;
3814 if (TCR_4(task_team->tt.tt_found_tasks)) {
3819 threads_data_p = &task_team->tt.tt_threads_data;
3820 nthreads = task_team->tt.tt_nproc;
3821 maxthreads = task_team->tt.tt_max_threads;
3826 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3828 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3830 kmp_team_t *team = thread->th.th_team;
3833 is_init_thread = TRUE;
3834 if (maxthreads < nthreads) {
3836 if (*threads_data_p != NULL) {
3837 kmp_thread_data_t *old_data = *threads_data_p;
3838 kmp_thread_data_t *new_data = NULL;
3842 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3843 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3844 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3849 new_data = (kmp_thread_data_t *)__kmp_allocate(
3850 nthreads *
sizeof(kmp_thread_data_t));
3852 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3853 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3855#ifdef BUILD_TIED_TASK_STACK
3857 for (i = maxthreads; i < nthreads; i++) {
3858 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3859 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3863 (*threads_data_p) = new_data;
3864 __kmp_free(old_data);
3866 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3867 "threads data for task_team %p, size = %d\n",
3868 __kmp_gtid_from_thread(thread), task_team, nthreads));
3872 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3873 nthreads *
sizeof(kmp_thread_data_t));
3874#ifdef BUILD_TIED_TASK_STACK
3876 for (i = 0; i < nthreads; i++) {
3877 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3878 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3882 task_team->tt.tt_max_threads = nthreads;
3885 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3889 for (i = 0; i < nthreads; i++) {
3890 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3891 thread_data->td.td_thr = team->t.t_threads[i];
3893 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3897 thread_data->td.td_deque_last_stolen = -1;
3902 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3905 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3906 return is_init_thread;
3912static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3913 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3914 if (task_team->tt.tt_threads_data != NULL) {
3916 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3917 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3919 __kmp_free(task_team->tt.tt_threads_data);
3920 task_team->tt.tt_threads_data = NULL;
3922 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3928static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
3929 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3930 if (task_team->tt.tt_task_pri_list != NULL) {
3931 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3932 while (list != NULL) {
3933 kmp_task_pri_t *next = list->next;
3934 __kmp_free_task_deque(&list->td);
3938 task_team->tt.tt_task_pri_list = NULL;
3940 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3947static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3949 kmp_task_team_t *task_team = NULL;
3952 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3953 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3955 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3957 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3958 if (__kmp_free_task_teams != NULL) {
3959 task_team = __kmp_free_task_teams;
3960 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3961 task_team->tt.tt_next = NULL;
3963 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3966 if (task_team == NULL) {
3967 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3968 "task team for team %p\n",
3969 __kmp_gtid_from_thread(thread), team));
3972 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3973 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3974 __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3975#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3978 __itt_suppress_mark_range(
3979 __itt_suppress_range, __itt_suppress_threading_errors,
3980 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3981 __itt_suppress_mark_range(__itt_suppress_range,
3982 __itt_suppress_threading_errors,
3983 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3984 sizeof(task_team->tt.tt_active));
3992 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3993 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3994 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3995 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3997 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3998 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3999 TCW_4(task_team->tt.tt_active, TRUE);
4001 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
4002 "unfinished_threads init'd to %d\n",
4003 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
4004 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
4011void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
4012 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
4013 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
4016 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
4018 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
4019 task_team->tt.tt_next = __kmp_free_task_teams;
4020 TCW_PTR(__kmp_free_task_teams, task_team);
4022 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
4030void __kmp_reap_task_teams(
void) {
4031 kmp_task_team_t *task_team;
4033 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
4035 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
4036 while ((task_team = __kmp_free_task_teams) != NULL) {
4037 __kmp_free_task_teams = task_team->tt.tt_next;
4038 task_team->tt.tt_next = NULL;
4041 if (task_team->tt.tt_threads_data != NULL) {
4042 __kmp_free_task_threads_data(task_team);
4044 if (task_team->tt.tt_task_pri_list != NULL) {
4045 __kmp_free_task_pri_list(task_team);
4047 __kmp_free(task_team);
4049 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
4056void __kmp_wait_to_unref_task_teams(
void) {
4062 KMP_INIT_YIELD(spins);
4063 KMP_INIT_BACKOFF(time);
4071 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
4072 thread = thread->th.th_next_pool) {
4076 if (TCR_PTR(thread->th.th_task_team) == NULL) {
4077 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
4078 __kmp_gtid_from_thread(thread)));
4083 if (!__kmp_is_thread_alive(thread, &exit_val)) {
4084 thread->th.th_task_team = NULL;
4091 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
4092 "unreference task_team\n",
4093 __kmp_gtid_from_thread(thread)));
4095 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
4098 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
4102 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
4103 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
4104 __kmp_null_resume_wrapper(thread);
4113 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
4117void __kmp_shift_task_state_stack(kmp_info_t *this_thr, kmp_uint8 value) {
4119 if (this_thr->th.th_task_state_top + 1 >=
4120 this_thr->th.th_task_state_stack_sz) {
4121 kmp_uint32 new_size = 2 * this_thr->th.th_task_state_stack_sz;
4122 kmp_uint8 *old_stack, *new_stack;
4124 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
4125 for (i = 0; i <= this_thr->th.th_task_state_top; ++i) {
4126 new_stack[i] = this_thr->th.th_task_state_memo_stack[i];
4129 for (; i < this_thr->th.th_task_state_stack_sz; ++i) {
4130 new_stack[i + 1] = this_thr->th.th_task_state_memo_stack[i];
4132 for (i = this_thr->th.th_task_state_stack_sz; i < new_size;
4136 old_stack = this_thr->th.th_task_state_memo_stack;
4137 this_thr->th.th_task_state_memo_stack = new_stack;
4138 this_thr->th.th_task_state_stack_sz = new_size;
4139 __kmp_free(old_stack);
4145 .th_task_state_memo_stack[this_thr->th.th_task_state_stack_sz];
4147 for (i = this_thr->th.th_task_state_stack_sz - 1;
4148 i > this_thr->th.th_task_state_top; i--, end--)
4151 this_thr->th.th_task_state_memo_stack[this_thr->th.th_task_state_top + 1] =
4157void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
4158 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4164 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
4165 (always || team->t.t_nproc > 1)) {
4166 team->t.t_task_team[this_thr->th.th_task_state] =
4167 __kmp_allocate_task_team(this_thr, team);
4168 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
4169 " for team %d at parity=%d\n",
4170 __kmp_gtid_from_thread(this_thr),
4171 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
4172 this_thr->th.th_task_state));
4174 if (this_thr->th.th_task_state == 1 && always && team->t.t_nproc == 1) {
4176 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d needs to shift stack"
4177 " for team %d at parity=%d\n",
4178 __kmp_gtid_from_thread(this_thr), team->t.t_id,
4179 this_thr->th.th_task_state));
4180 __kmp_shift_task_state_stack(this_thr, this_thr->th.th_task_state);
4190 if (team->t.t_nproc > 1) {
4191 int other_team = 1 - this_thr->th.th_task_state;
4192 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
4193 if (team->t.t_task_team[other_team] == NULL) {
4194 team->t.t_task_team[other_team] =
4195 __kmp_allocate_task_team(this_thr, team);
4196 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
4197 "task_team %p for team %d at parity=%d\n",
4198 __kmp_gtid_from_thread(this_thr),
4199 team->t.t_task_team[other_team], team->t.t_id, other_team));
4202 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
4203 if (!task_team->tt.tt_active ||
4204 team->t.t_nproc != task_team->tt.tt_nproc) {
4205 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
4206 TCW_4(task_team->tt.tt_found_tasks, FALSE);
4207 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
4208 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
4209 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
4211 TCW_4(task_team->tt.tt_active, TRUE);
4215 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
4216 "%p for team %d at parity=%d\n",
4217 __kmp_gtid_from_thread(this_thr),
4218 team->t.t_task_team[other_team], team->t.t_id, other_team));
4226 if (this_thr == __kmp_hidden_helper_main_thread) {
4227 for (
int i = 0; i < 2; ++i) {
4228 kmp_task_team_t *task_team = team->t.t_task_team[i];
4229 if (KMP_TASKING_ENABLED(task_team)) {
4232 __kmp_enable_tasking(task_team, this_thr);
4233 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
4234 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4235 if (thread_data->td.td_deque == NULL) {
4236 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
4246void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
4247 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4251 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4255 TCW_PTR(this_thr->th.th_task_team,
4256 team->t.t_task_team[this_thr->th.th_task_state]);
4258 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
4259 "%p from Team #%d (parity=%d)\n",
4260 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
4261 team->t.t_id, this_thr->th.th_task_state));
4271void __kmp_task_team_wait(
4272 kmp_info_t *this_thr,
4273 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
4274 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4276 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4277 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
4279 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
4281 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
4282 "(for unfinished_threads to reach 0) on task_team = %p\n",
4283 __kmp_gtid_from_thread(this_thr), task_team));
4287 kmp_flag_32<false, false> flag(
4288 RCAST(std::atomic<kmp_uint32> *,
4289 &task_team->tt.tt_unfinished_threads),
4291 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
4297 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
4298 "setting active to false, setting local and team's pointer to NULL\n",
4299 __kmp_gtid_from_thread(this_thr), task_team));
4300 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
4301 task_team->tt.tt_found_proxy_tasks == TRUE ||
4302 task_team->tt.tt_hidden_helper_task_encountered == TRUE);
4303 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
4304 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
4305 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
4306 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
4309 TCW_PTR(this_thr->th.th_task_team, NULL);
4318void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
4319 std::atomic<kmp_uint32> *spin = RCAST(
4320 std::atomic<kmp_uint32> *,
4321 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
4323 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
4326 KMP_FSYNC_SPIN_INIT(spin, NULL);
4328 kmp_flag_32<false, false> spin_flag(spin, 0U);
4329 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
4330 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
4333 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
4336 if (TCR_4(__kmp_global.g.g_done)) {
4337 if (__kmp_global.g.g_abort)
4338 __kmp_abort_thread();
4344 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
4353static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
4355 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4356 kmp_task_team_t *task_team = taskdata->td_task_team;
4358 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
4362 KMP_DEBUG_ASSERT(task_team != NULL);
4364 bool result =
false;
4365 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4367 if (thread_data->td.td_deque == NULL) {
4371 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
4376 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4377 TASK_DEQUE_SIZE(thread_data->td)) {
4380 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
4385 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4388 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4389 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4390 TASK_DEQUE_SIZE(thread_data->td)) {
4392 __kmp_realloc_task_deque(thread, thread_data);
4397 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4399 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4400 TASK_DEQUE_SIZE(thread_data->td)) {
4401 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
4407 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4408 goto release_and_exit;
4410 __kmp_realloc_task_deque(thread, thread_data);
4416 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4418 thread_data->td.td_deque_tail =
4419 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
4420 TCW_4(thread_data->td.td_deque_ntasks,
4421 TCR_4(thread_data->td.td_deque_ntasks) + 1);
4424 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
4428 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4433#define PROXY_TASK_FLAG 0x40000000
4450static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4451 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
4452 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4453 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
4454 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
4456 taskdata->td_flags.complete = 1;
4458 taskdata->td_flags.onced = 1;
4461 if (taskdata->td_taskgroup)
4462 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
4466 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
4469static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4471 kmp_int32 children = 0;
4475 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
4476 KMP_DEBUG_ASSERT(children >= 0);
4479 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
4482static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
4483 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4484 kmp_info_t *thread = __kmp_threads[gtid];
4486 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4487 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
4492 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
4493 PROXY_TASK_FLAG) > 0)
4496 __kmp_release_deps(gtid, taskdata);
4497 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
4509 KMP_DEBUG_ASSERT(ptask != NULL);
4510 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4512 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
4514 __kmp_assert_valid_gtid(gtid);
4515 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4517 __kmp_first_top_half_finish_proxy(taskdata);
4518 __kmp_second_top_half_finish_proxy(taskdata);
4519 __kmp_bottom_half_finish_proxy(gtid, ptask);
4522 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
4526void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4527 KMP_DEBUG_ASSERT(ptask != NULL);
4528 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4532 kmp_team_t *team = taskdata->td_team;
4533 kmp_int32 nthreads = team->t.t_nproc;
4538 kmp_int32 start_k = start % nthreads;
4540 kmp_int32 k = start_k;
4544 thread = team->t.t_threads[k];
4545 k = (k + 1) % nthreads;
4551 }
while (!__kmp_give_task(thread, k, ptask, pass));
4553 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
4555 for (
int i = 0; i < nthreads; ++i) {
4556 thread = team->t.t_threads[i];
4557 if (thread->th.th_sleep_loc != NULL) {
4558 __kmp_null_resume_wrapper(thread);
4573 KMP_DEBUG_ASSERT(ptask != NULL);
4574 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4578 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
4581 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4583 __kmp_first_top_half_finish_proxy(taskdata);
4585 __kmpc_give_task(ptask);
4587 __kmp_second_top_half_finish_proxy(taskdata);
4591 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4595kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
4597 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
4598 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4599 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4600 td->td_allow_completion_event.ed.task = task;
4601 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4603 return &td->td_allow_completion_event;
4606void __kmp_fulfill_event(kmp_event_t *event) {
4607 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4608 kmp_task_t *ptask = event->ed.task;
4609 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4610 bool detached =
false;
4611 int gtid = __kmp_get_gtid();
4616 __kmp_acquire_tas_lock(&event->lock, gtid);
4617 if (taskdata->td_flags.proxy == TASK_PROXY) {
4623 if (UNLIKELY(ompt_enabled.enabled))
4624 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4627 event->type = KMP_EVENT_UNINITIALIZED;
4628 __kmp_release_tas_lock(&event->lock, gtid);
4634 if (UNLIKELY(ompt_enabled.enabled))
4635 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4639 kmp_team_t *team = taskdata->td_team;
4640 kmp_info_t *thread = __kmp_get_thread();
4641 if (thread->th.th_team == team) {
4661kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
4663 ,
int taskloop_recur
4667 kmp_taskdata_t *taskdata;
4668 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4669 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4670 size_t shareds_offset;
4673 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4675 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4677 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4678 task_size = taskdata_src->td_size_alloc;
4681 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4684 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4686 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4688 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4690 task = KMP_TASKDATA_TO_TASK(taskdata);
4694 if (!taskdata->is_taskgraph || taskloop_recur)
4695 taskdata->td_task_id = KMP_GEN_TASK_ID();
4696 else if (taskdata->is_taskgraph &&
4697 __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
4698 taskdata->td_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
4700 taskdata->td_task_id = KMP_GEN_TASK_ID();
4702 if (task->shareds != NULL) {
4703 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4704 task->shareds = &((
char *)taskdata)[shareds_offset];
4705 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4708 taskdata->td_alloc_thread = thread;
4709 taskdata->td_parent = parent_task;
4711 taskdata->td_taskgroup = parent_task->td_taskgroup;
4714 if (taskdata->td_flags.tiedness == TASK_TIED)
4715 taskdata->td_last_tied = taskdata;
4719 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4720 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4721 if (parent_task->td_taskgroup)
4722 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4725 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4726 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4730 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4731 thread, taskdata, taskdata->td_parent));
4733 if (UNLIKELY(ompt_enabled.enabled))
4734 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4743typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4745KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4750class kmp_taskloop_bounds_t {
4752 const kmp_taskdata_t *taskdata;
4753 size_t lower_offset;
4754 size_t upper_offset;
4757 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4758 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4759 lower_offset((char *)lb - (char *)task),
4760 upper_offset((char *)ub - (char *)task) {
4761 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4762 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4764 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4765 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4766 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4767 size_t get_lower_offset()
const {
return lower_offset; }
4768 size_t get_upper_offset()
const {
return upper_offset; }
4769 kmp_uint64 get_lb()
const {
4771#if defined(KMP_GOMP_COMPAT)
4773 if (!taskdata->td_flags.native) {
4774 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4777 if (taskdata->td_size_loop_bounds == 4) {
4778 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4779 retval = (kmp_int64)*lb;
4781 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4782 retval = (kmp_int64)*lb;
4787 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4791 kmp_uint64 get_ub()
const {
4793#if defined(KMP_GOMP_COMPAT)
4795 if (!taskdata->td_flags.native) {
4796 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4799 if (taskdata->td_size_loop_bounds == 4) {
4800 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4801 retval = (kmp_int64)*ub;
4803 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4804 retval = (kmp_int64)*ub;
4808 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4812 void set_lb(kmp_uint64 lb) {
4813#if defined(KMP_GOMP_COMPAT)
4815 if (!taskdata->td_flags.native) {
4816 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4819 if (taskdata->td_size_loop_bounds == 4) {
4820 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4821 *lower = (kmp_uint32)lb;
4823 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4824 *lower = (kmp_uint64)lb;
4828 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4831 void set_ub(kmp_uint64 ub) {
4832#if defined(KMP_GOMP_COMPAT)
4834 if (!taskdata->td_flags.native) {
4835 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4838 if (taskdata->td_size_loop_bounds == 4) {
4839 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4840 *upper = (kmp_uint32)ub;
4842 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4843 *upper = (kmp_uint64)ub;
4847 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4868void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4869 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4870 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4871 kmp_uint64 grainsize, kmp_uint64 extras,
4872 kmp_int64 last_chunk, kmp_uint64 tc,
4878 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4879 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4881 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4882 kmp_uint64 lower = task_bounds.get_lb();
4883 kmp_uint64 upper = task_bounds.get_ub();
4885 kmp_info_t *thread = __kmp_threads[gtid];
4886 kmp_taskdata_t *current_task = thread->th.th_current_task;
4887 kmp_task_t *next_task;
4888 kmp_int32 lastpriv = 0;
4890 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4891 (last_chunk < 0 ? last_chunk : extras));
4892 KMP_DEBUG_ASSERT(num_tasks > extras);
4893 KMP_DEBUG_ASSERT(num_tasks > 0);
4894 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4895 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4896 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4897 ub_glob, st, task_dup));
4900 for (i = 0; i < num_tasks; ++i) {
4901 kmp_uint64 chunk_minus_1;
4903 chunk_minus_1 = grainsize - 1;
4905 chunk_minus_1 = grainsize;
4908 upper = lower + st * chunk_minus_1;
4912 if (i == num_tasks - 1) {
4915 KMP_DEBUG_ASSERT(upper == *ub);
4916 if (upper == ub_glob)
4918 }
else if (st > 0) {
4919 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4920 if ((kmp_uint64)st > ub_glob - upper)
4923 KMP_DEBUG_ASSERT(upper + st < *ub);
4924 if (upper - ub_glob < (kmp_uint64)(-st))
4930 next_task = __kmp_task_dup_alloc(thread, task, 0);
4932 next_task = __kmp_task_dup_alloc(thread, task);
4935 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4936 kmp_taskloop_bounds_t next_task_bounds =
4937 kmp_taskloop_bounds_t(next_task, task_bounds);
4940 next_task_bounds.set_lb(lower);
4941 if (next_taskdata->td_flags.native) {
4942 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4944 next_task_bounds.set_ub(upper);
4946 if (ptask_dup != NULL)
4948 ptask_dup(next_task, task, lastpriv);
4950 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4951 "upper %lld stride %lld, (offsets %p %p)\n",
4952 gtid, i, next_task, lower, upper, st,
4953 next_task_bounds.get_lower_offset(),
4954 next_task_bounds.get_upper_offset()));
4956 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4959 if (ompt_enabled.ompt_callback_dispatch) {
4960 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
4965 __kmp_omp_task(gtid, next_task,
true);
4970 __kmp_task_start(gtid, task, current_task);
4972 __kmp_task_finish<false>(gtid, task, current_task);
4977typedef struct __taskloop_params {
4984 kmp_uint64 num_tasks;
4985 kmp_uint64 grainsize;
4987 kmp_int64 last_chunk;
4989 kmp_uint64 num_t_min;
4993} __taskloop_params_t;
4995void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4996 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4997 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
5005int __kmp_taskloop_task(
int gtid,
void *ptask) {
5006 __taskloop_params_t *p =
5007 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
5008 kmp_task_t *task = p->task;
5009 kmp_uint64 *lb = p->lb;
5010 kmp_uint64 *ub = p->ub;
5011 void *task_dup = p->task_dup;
5013 kmp_int64 st = p->st;
5014 kmp_uint64 ub_glob = p->ub_glob;
5015 kmp_uint64 num_tasks = p->num_tasks;
5016 kmp_uint64 grainsize = p->grainsize;
5017 kmp_uint64 extras = p->extras;
5018 kmp_int64 last_chunk = p->last_chunk;
5019 kmp_uint64 tc = p->tc;
5020 kmp_uint64 num_t_min = p->num_t_min;
5022 void *codeptr_ra = p->codeptr_ra;
5025 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
5026 KMP_DEBUG_ASSERT(task != NULL);
5028 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
5029 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
5030 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
5033 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
5034 if (num_tasks > num_t_min)
5035 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
5036 grainsize, extras, last_chunk, tc, num_t_min,
5042 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
5043 grainsize, extras, last_chunk, tc,
5049 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
5071void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
5072 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5073 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
5074 kmp_uint64 grainsize, kmp_uint64 extras,
5075 kmp_int64 last_chunk, kmp_uint64 tc,
5076 kmp_uint64 num_t_min,
5081 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
5082 KMP_DEBUG_ASSERT(task != NULL);
5083 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
5085 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
5086 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
5087 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
5089 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
5090 kmp_uint64 lower = *lb;
5091 kmp_info_t *thread = __kmp_threads[gtid];
5093 kmp_task_t *next_task;
5094 size_t lower_offset =
5095 (
char *)lb - (
char *)task;
5096 size_t upper_offset =
5097 (
char *)ub - (
char *)task;
5099 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5100 (last_chunk < 0 ? last_chunk : extras));
5101 KMP_DEBUG_ASSERT(num_tasks > extras);
5102 KMP_DEBUG_ASSERT(num_tasks > 0);
5105 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
5106 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
5107 kmp_uint64 gr_size0 = grainsize;
5108 kmp_uint64 n_tsk0 = num_tasks >> 1;
5109 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
5110 if (last_chunk < 0) {
5112 last_chunk1 = last_chunk;
5113 tc0 = grainsize * n_tsk0;
5115 }
else if (n_tsk0 <= extras) {
5118 ext1 = extras - n_tsk0;
5119 tc0 = gr_size0 * n_tsk0;
5124 tc1 = grainsize * n_tsk1;
5127 ub0 = lower + st * (tc0 - 1);
5132 next_task = __kmp_task_dup_alloc(thread, task,
5135 next_task = __kmp_task_dup_alloc(thread, task);
5138 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
5139 if (ptask_dup != NULL)
5140 ptask_dup(next_task, task, 0);
5145 kmp_taskdata_t *current_task = thread->th.th_current_task;
5146 thread->th.th_current_task = taskdata->td_parent;
5147 kmp_task_t *new_task =
5148 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
5149 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
5151 thread->th.th_current_task = current_task;
5152 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
5153 p->task = next_task;
5154 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
5155 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
5156 p->task_dup = task_dup;
5158 p->ub_glob = ub_glob;
5159 p->num_tasks = n_tsk1;
5160 p->grainsize = grainsize;
5162 p->last_chunk = last_chunk1;
5164 p->num_t_min = num_t_min;
5166 p->codeptr_ra = codeptr_ra;
5170 kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task);
5171 new_task_data->tdg = taskdata->tdg;
5172 new_task_data->is_taskgraph = 0;
5177 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
5179 __kmp_omp_task(gtid, new_task,
true);
5183 if (n_tsk0 > num_t_min)
5184 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
5185 ext0, last_chunk0, tc0, num_t_min,
5191 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
5192 gr_size0, ext0, last_chunk0, tc0,
5198 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
5201static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
5202 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5203 int nogroup,
int sched, kmp_uint64 grainsize,
5204 int modifier,
void *task_dup) {
5205 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
5206 KMP_DEBUG_ASSERT(task != NULL);
5208#if OMPT_SUPPORT && OMPT_OPTIONAL
5209 OMPT_STORE_RETURN_ADDRESS(gtid);
5211 __kmpc_taskgroup(loc, gtid);
5215 KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
5219 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
5222 kmp_uint64 lower = task_bounds.get_lb();
5223 kmp_uint64 upper = task_bounds.get_ub();
5224 kmp_uint64 ub_glob = upper;
5225 kmp_uint64 num_tasks = 0, extras = 0;
5226 kmp_int64 last_chunk =
5228 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
5229 kmp_info_t *thread = __kmp_threads[gtid];
5230 kmp_taskdata_t *current_task = thread->th.th_current_task;
5232 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
5233 "grain %llu(%d, %d), dup %p\n",
5234 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
5239 tc = upper - lower + 1;
5240 }
else if (st < 0) {
5241 tc = (lower - upper) / (-st) + 1;
5243 tc = (upper - lower) / st + 1;
5246 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
5248 __kmp_task_start(gtid, task, current_task);
5250 __kmp_task_finish<false>(gtid, task, current_task);
5254#if OMPT_SUPPORT && OMPT_OPTIONAL
5255 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
5256 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
5257 if (ompt_enabled.ompt_callback_work) {
5258 ompt_callbacks.ompt_callback(ompt_callback_work)(
5259 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
5260 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5264 if (num_tasks_min == 0)
5267 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
5273 grainsize = thread->th.th_team_nproc * 10;
5276 if (grainsize > tc) {
5281 num_tasks = grainsize;
5282 grainsize = tc / num_tasks;
5283 extras = tc % num_tasks;
5287 if (grainsize > tc) {
5293 num_tasks = (tc + grainsize - 1) / grainsize;
5294 last_chunk = tc - (num_tasks * grainsize);
5297 num_tasks = tc / grainsize;
5299 grainsize = tc / num_tasks;
5300 extras = tc % num_tasks;
5305 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
5308 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5309 (last_chunk < 0 ? last_chunk : extras));
5310 KMP_DEBUG_ASSERT(num_tasks > extras);
5311 KMP_DEBUG_ASSERT(num_tasks > 0);
5317 taskdata->td_flags.task_serial = 1;
5318 taskdata->td_flags.tiedness = TASK_TIED;
5320 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5321 grainsize, extras, last_chunk, tc,
5323 OMPT_GET_RETURN_ADDRESS(0),
5328 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5329 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
5330 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5331 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5333 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5334 grainsize, extras, last_chunk, tc, num_tasks_min,
5336 OMPT_GET_RETURN_ADDRESS(0),
5340 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
5341 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5342 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5344 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5345 grainsize, extras, last_chunk, tc,
5347 OMPT_GET_RETURN_ADDRESS(0),
5352#if OMPT_SUPPORT && OMPT_OPTIONAL
5353 if (ompt_enabled.ompt_callback_work) {
5354 ompt_callbacks.ompt_callback(ompt_callback_work)(
5355 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5356 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5361#if OMPT_SUPPORT && OMPT_OPTIONAL
5362 OMPT_STORE_RETURN_ADDRESS(gtid);
5364 __kmpc_end_taskgroup(loc, gtid);
5366 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
5386 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
5387 int sched, kmp_uint64 grainsize,
void *task_dup) {
5388 __kmp_assert_valid_gtid(gtid);
5389 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
5390 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5392 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
5413 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5414 int nogroup,
int sched, kmp_uint64 grainsize,
5415 int modifier,
void *task_dup) {
5416 __kmp_assert_valid_gtid(gtid);
5417 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
5418 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5419 modifier, task_dup);
5420 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
5432 if (gtid == KMP_GTID_DNE)
5435 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5436 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5441 return &taskdata->td_target_data.async_handle;
5453 if (gtid == KMP_GTID_DNE)
5456 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5457 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5462 return taskdata->td_task_team != NULL;
5471static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
5472 kmp_tdg_info_t *res =
nullptr;
5473 if (__kmp_max_tdgs == 0)
5476 if (__kmp_global_tdgs == NULL)
5477 __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
5478 sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
5480 if ((__kmp_global_tdgs[tdg_id]) &&
5481 (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
5482 res = __kmp_global_tdgs[tdg_id];
5488void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg) {
5489 kmp_int32 tdg_id = tdg->tdg_id;
5490 KA_TRACE(10, (
"__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id));
5493 sprintf(file_name,
"tdg_%d.dot", tdg_id);
5496 kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5500 " subgraph cluster {\n"
5503 for (kmp_int32 i = 0; i < num_tasks; i++) {
5504 fprintf(tdg_file,
" %d[style=bold]\n", i);
5506 fprintf(tdg_file,
" }\n");
5507 for (kmp_int32 i = 0; i < num_tasks; i++) {
5508 kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
5509 kmp_int32 *successors = tdg->record_map[i].successors;
5510 if (nsuccessors > 0) {
5511 for (kmp_int32 j = 0; j < nsuccessors; j++)
5512 fprintf(tdg_file,
" %d -> %d \n", i, successors[j]);
5515 fprintf(tdg_file,
"}");
5516 KA_TRACE(10, (
"__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
5523void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
5524 KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
5525 KA_TRACE(10, (
"__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
5526 tdg->tdg_id, tdg->num_roots));
5527 kmp_node_info_t *this_record_map = tdg->record_map;
5528 kmp_int32 *this_root_tasks = tdg->root_tasks;
5529 kmp_int32 this_num_roots = tdg->num_roots;
5530 kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5532 kmp_info_t *thread = __kmp_threads[gtid];
5533 kmp_taskdata_t *parent_task = thread->th.th_current_task;
5535 if (tdg->rec_taskred_data) {
5539 for (kmp_int32 j = 0; j < this_num_tasks; j++) {
5540 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
5542 td->td_parent = parent_task;
5543 this_record_map[j].parent_task = parent_task;
5545 kmp_taskgroup_t *parent_taskgroup =
5546 this_record_map[j].parent_task->td_taskgroup;
5548 KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
5549 this_record_map[j].npredecessors);
5550 KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
5552 if (parent_taskgroup) {
5553 KMP_ATOMIC_INC(&parent_taskgroup->count);
5555 td->td_taskgroup = parent_taskgroup;
5556 }
else if (td->td_taskgroup !=
nullptr) {
5558 td->td_taskgroup =
nullptr;
5560 if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
5561 KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
5564 for (kmp_int32 j = 0; j < this_num_roots; ++j) {
5565 __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task,
true);
5567 KA_TRACE(10, (
"__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
5568 tdg->tdg_id, tdg->num_roots));
5576static inline void __kmp_start_record(kmp_int32 gtid,
5577 kmp_taskgraph_flags_t *flags,
5579 kmp_tdg_info_t *tdg =
5580 (kmp_tdg_info_t *)__kmp_allocate(
sizeof(kmp_tdg_info_t));
5581 __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
5583 tdg->tdg_id = tdg_id;
5584 tdg->map_size = INIT_MAPSIZE;
5585 tdg->num_roots = -1;
5586 tdg->root_tasks =
nullptr;
5587 tdg->tdg_status = KMP_TDG_RECORDING;
5588 tdg->rec_num_taskred = 0;
5589 tdg->rec_taskred_data =
nullptr;
5590 KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
5593 kmp_node_info_t *this_record_map =
5594 (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE *
sizeof(kmp_node_info_t));
5595 for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
5596 kmp_int32 *successorsList =
5597 (kmp_int32 *)__kmp_allocate(__kmp_successors_size *
sizeof(kmp_int32));
5598 this_record_map[i].task =
nullptr;
5599 this_record_map[i].successors = successorsList;
5600 this_record_map[i].nsuccessors = 0;
5601 this_record_map[i].npredecessors = 0;
5602 this_record_map[i].successors_size = __kmp_successors_size;
5603 KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
5606 __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
5616kmp_int32 __kmpc_start_record_task(
ident_t *loc_ref, kmp_int32 gtid,
5617 kmp_int32 input_flags, kmp_int32 tdg_id) {
5620 kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
5622 (
"__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
5623 gtid, loc_ref, input_flags, tdg_id));
5625 if (__kmp_max_tdgs == 0) {
5628 (
"__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
5629 "__kmp_max_tdgs = 0\n",
5630 gtid, loc_ref, input_flags, tdg_id));
5634 __kmpc_taskgroup(loc_ref, gtid);
5635 if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
5637 __kmp_exec_tdg(gtid, tdg);
5640 __kmp_curr_tdg_idx = tdg_id;
5641 KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
5642 __kmp_start_record(gtid, flags, tdg_id);
5646 KA_TRACE(10, (
"__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n",
5647 gtid, tdg_id, res ?
"record" :
"execute"));
5654void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
5656 kmp_node_info_t *this_record_map = tdg->record_map;
5657 kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5658 kmp_int32 *this_root_tasks =
5659 (kmp_int32 *)__kmp_allocate(this_num_tasks *
sizeof(kmp_int32));
5660 kmp_int32 this_map_size = tdg->map_size;
5661 kmp_int32 this_num_roots = 0;
5662 kmp_info_t *thread = __kmp_threads[gtid];
5664 for (kmp_int32 i = 0; i < this_num_tasks; i++) {
5665 if (this_record_map[i].npredecessors == 0) {
5666 this_root_tasks[this_num_roots++] = i;
5671 tdg->map_size = this_map_size;
5672 tdg->num_roots = this_num_roots;
5673 tdg->root_tasks = this_root_tasks;
5674 KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
5675 tdg->tdg_status = KMP_TDG_READY;
5677 if (thread->th.th_current_task->td_dephash) {
5678 __kmp_dephash_free(thread, thread->th.th_current_task->td_dephash);
5679 thread->th.th_current_task->td_dephash = NULL;
5683 for (kmp_int32 i = 0; i < this_num_tasks; i++) {
5684 KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
5685 this_record_map[i].npredecessors);
5687 KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
5690 __kmp_print_tdg_dot(tdg);
5700void __kmpc_end_record_task(
ident_t *loc_ref, kmp_int32 gtid,
5701 kmp_int32 input_flags, kmp_int32 tdg_id) {
5702 kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
5704 KA_TRACE(10, (
"__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
5705 " tdg=%d with flags=%d\n",
5706 gtid, loc_ref, tdg_id, input_flags));
5707 if (__kmp_max_tdgs) {
5709 __kmpc_end_taskgroup(loc_ref, gtid);
5710 if (__kmp_tdg_is_recording(tdg->tdg_status))
5711 __kmp_end_record(gtid, tdg);
5713 KA_TRACE(10, (
"__kmpc_end_record_task(exit): T#%d loc=%p finished recording"
5714 " tdg=%d, its status is now READY\n",
5715 gtid, loc_ref, tdg_id));
struct kmp_taskred_data kmp_taskred_data_t
struct kmp_task_red_input kmp_task_red_input_t
struct kmp_taskred_flags kmp_taskred_flags_t
struct kmp_taskred_input kmp_taskred_input_t
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
bool __kmpc_omp_has_task_team(kmp_int32 gtid)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_taskred_init(int gtid, int num, void *data)
void ** __kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid)
kmp_taskred_flags_t flags