14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
185 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
186 __kmp_gtid_get_specific() == i);
194 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
195 "thread, using TLS\n"));
196 i = __kmp_gtid_get_specific();
207 if (!TCR_SYNC_PTR(other_threads[i]))
212 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
213 KMP_FATAL(StackOverflow, i);
216 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
217 if (stack_addr > stack_base) {
218 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
219 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
220 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
223 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
224 stack_base - stack_addr);
228 if (__kmp_storage_map) {
229 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
230 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
231 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
232 other_threads[i]->th.th_info.ds.ds_stacksize,
233 "th_%d stack (refinement)", i);
238int __kmp_get_global_thread_id_reg() {
241 if (!__kmp_init_serial) {
245 if (TCR_4(__kmp_gtid_mode) >= 3) {
246 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
250 if (TCR_4(__kmp_gtid_mode) >= 2) {
251 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
252 gtid = __kmp_gtid_get_specific();
255 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
256 gtid = __kmp_get_global_thread_id();
260 if (gtid == KMP_GTID_DNE) {
262 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
263 "Registering a new gtid.\n"));
264 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
265 if (!__kmp_init_serial) {
266 __kmp_do_serial_initialize();
267 gtid = __kmp_gtid_get_specific();
269 gtid = __kmp_register_root(FALSE);
271 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
275 KMP_DEBUG_ASSERT(gtid >= 0);
281void __kmp_check_stack_overlap(kmp_info_t *th) {
283 char *stack_beg = NULL;
284 char *stack_end = NULL;
287 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
288 if (__kmp_storage_map) {
289 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
290 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 gtid = __kmp_gtid_from_thread(th);
294 if (gtid == KMP_GTID_MONITOR) {
295 __kmp_print_storage_map_gtid(
296 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
297 "th_%s stack (%s)",
"mon",
298 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
300 __kmp_print_storage_map_gtid(
301 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
302 "th_%d stack (%s)", gtid,
303 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
309 gtid = __kmp_gtid_from_thread(th);
310 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
312 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
313 if (stack_beg == NULL) {
314 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
315 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
318 for (f = 0; f < __kmp_threads_capacity; f++) {
319 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
321 if (f_th && f_th != th) {
322 char *other_stack_end =
323 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
324 char *other_stack_beg =
325 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
326 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
327 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
330 if (__kmp_storage_map)
331 __kmp_print_storage_map_gtid(
332 -1, other_stack_beg, other_stack_end,
333 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
334 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
336 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
342 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
347void __kmp_infinite_loop(
void) {
348 static int done = FALSE;
355#define MAX_MESSAGE 512
357void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
358 char const *format, ...) {
359 char buffer[MAX_MESSAGE];
362 va_start(ap, format);
363 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
364 p2, (
unsigned long)size, format);
365 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
366 __kmp_vprintf(kmp_err, buffer, ap);
367#if KMP_PRINT_DATA_PLACEMENT
370 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
371 if (__kmp_storage_map_verbose) {
372 node = __kmp_get_host_node(p1);
374 __kmp_storage_map_verbose = FALSE;
378 int localProc = __kmp_get_cpu_from_gtid(gtid);
380 const int page_size = KMP_GET_PAGE_SIZE();
382 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
383 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
385 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
388 __kmp_printf_no_lock(
" GTID %d\n", gtid);
397 (
char *)p1 += page_size;
398 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
399 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
403 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
404 (
char *)p1 + (page_size - 1),
405 __kmp_get_host_node(p1));
407 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
408 (
char *)p2 + (page_size - 1),
409 __kmp_get_host_node(p2));
415 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
418 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
423void __kmp_warn(
char const *format, ...) {
424 char buffer[MAX_MESSAGE];
427 if (__kmp_generate_warnings == kmp_warnings_off) {
431 va_start(ap, format);
433 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
434 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
435 __kmp_vprintf(kmp_err, buffer, ap);
436 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
441void __kmp_abort_process() {
443 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
445 if (__kmp_debug_buf) {
446 __kmp_dump_debug_buffer();
449 if (KMP_OS_WINDOWS) {
452 __kmp_global.g.g_abort = SIGABRT;
466 __kmp_unregister_library();
470 __kmp_infinite_loop();
471 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
475void __kmp_abort_thread(
void) {
478 __kmp_infinite_loop();
484static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
485 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
488 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
489 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
491 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
492 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
494 __kmp_print_storage_map_gtid(
495 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
496 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
498 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
499 &thr->th.th_bar[bs_plain_barrier + 1],
500 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
503 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
504 &thr->th.th_bar[bs_forkjoin_barrier + 1],
505 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
508#if KMP_FAST_REDUCTION_BARRIER
509 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
510 &thr->th.th_bar[bs_reduction_barrier + 1],
511 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
519static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
520 int team_id,
int num_thr) {
521 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
522 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
525 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
526 &team->t.t_bar[bs_last_barrier],
527 sizeof(kmp_balign_team_t) * bs_last_barrier,
528 "%s_%d.t_bar", header, team_id);
530 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
531 &team->t.t_bar[bs_plain_barrier + 1],
532 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
535 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
536 &team->t.t_bar[bs_forkjoin_barrier + 1],
537 sizeof(kmp_balign_team_t),
538 "%s_%d.t_bar[forkjoin]", header, team_id);
540#if KMP_FAST_REDUCTION_BARRIER
541 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
542 &team->t.t_bar[bs_reduction_barrier + 1],
543 sizeof(kmp_balign_team_t),
544 "%s_%d.t_bar[reduction]", header, team_id);
547 __kmp_print_storage_map_gtid(
548 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
549 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
551 __kmp_print_storage_map_gtid(
552 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
553 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
555 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
556 &team->t.t_disp_buffer[num_disp_buff],
557 sizeof(dispatch_shared_info_t) * num_disp_buff,
558 "%s_%d.t_disp_buffer", header, team_id);
561static void __kmp_init_allocator() {
562 __kmp_init_memkind();
563 __kmp_init_target_mem();
565static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
569#if ENABLE_LIBOMPTARGET
570static void __kmp_init_omptarget() {
571 __kmp_init_target_task();
580BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
585 case DLL_PROCESS_ATTACH:
586 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
590 case DLL_PROCESS_DETACH:
591 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
604 if (lpReserved == NULL)
605 __kmp_internal_end_library(__kmp_gtid_get_specific());
609 case DLL_THREAD_ATTACH:
610 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
616 case DLL_THREAD_DETACH:
617 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
619 __kmp_internal_end_thread(__kmp_gtid_get_specific());
630void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
631 int gtid = *gtid_ref;
632#ifdef BUILD_PARALLEL_ORDERED
633 kmp_team_t *team = __kmp_team_from_gtid(gtid);
636 if (__kmp_env_consistency_check) {
637 if (__kmp_threads[gtid]->th.th_root->r.r_active)
638#if KMP_USE_DYNAMIC_LOCK
639 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
644#ifdef BUILD_PARALLEL_ORDERED
645 if (!team->t.t_serialized) {
647 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
655void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
656 int gtid = *gtid_ref;
657#ifdef BUILD_PARALLEL_ORDERED
658 int tid = __kmp_tid_from_gtid(gtid);
659 kmp_team_t *team = __kmp_team_from_gtid(gtid);
662 if (__kmp_env_consistency_check) {
663 if (__kmp_threads[gtid]->th.th_root->r.r_active)
664 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
666#ifdef BUILD_PARALLEL_ORDERED
667 if (!team->t.t_serialized) {
672 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
682int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
687 if (!TCR_4(__kmp_init_parallel))
688 __kmp_parallel_initialize();
689 __kmp_resume_if_soft_paused();
691 th = __kmp_threads[gtid];
692 team = th->th.th_team;
695 th->th.th_ident = id_ref;
697 if (team->t.t_serialized) {
700 kmp_int32 old_this = th->th.th_local.this_construct;
702 ++th->th.th_local.this_construct;
706 if (team->t.t_construct == old_this) {
707 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
708 th->th.th_local.this_construct);
711 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
712 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
713 team->t.t_active_level == 1) {
715 __kmp_itt_metadata_single(id_ref);
720 if (__kmp_env_consistency_check) {
721 if (status && push_ws) {
722 __kmp_push_workshare(gtid, ct_psingle, id_ref);
724 __kmp_check_workshare(gtid, ct_psingle, id_ref);
729 __kmp_itt_single_start(gtid);
735void __kmp_exit_single(
int gtid) {
737 __kmp_itt_single_end(gtid);
739 if (__kmp_env_consistency_check)
740 __kmp_pop_workshare(gtid, ct_psingle, NULL);
749static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
750 int master_tid,
int set_nthreads,
754 KMP_DEBUG_ASSERT(__kmp_init_serial);
755 KMP_DEBUG_ASSERT(root && parent_team);
756 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
760 new_nthreads = set_nthreads;
761 if (!get__dynamic_2(parent_team, master_tid)) {
764#ifdef USE_LOAD_BALANCE
765 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
766 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
767 if (new_nthreads == 1) {
768 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
769 "reservation to 1 thread\n",
773 if (new_nthreads < set_nthreads) {
774 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
775 "reservation to %d threads\n",
776 master_tid, new_nthreads));
780 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
781 new_nthreads = __kmp_avail_proc - __kmp_nth +
782 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
783 if (new_nthreads <= 1) {
784 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
785 "reservation to 1 thread\n",
789 if (new_nthreads < set_nthreads) {
790 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
791 "reservation to %d threads\n",
792 master_tid, new_nthreads));
794 new_nthreads = set_nthreads;
796 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
797 if (set_nthreads > 2) {
798 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
799 new_nthreads = (new_nthreads % set_nthreads) + 1;
800 if (new_nthreads == 1) {
801 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
802 "reservation to 1 thread\n",
806 if (new_nthreads < set_nthreads) {
807 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
808 "reservation to %d threads\n",
809 master_tid, new_nthreads));
817 if (__kmp_nth + new_nthreads -
818 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
820 int tl_nthreads = __kmp_max_nth - __kmp_nth +
821 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
822 if (tl_nthreads <= 0) {
827 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
828 __kmp_reserve_warn = 1;
829 __kmp_msg(kmp_ms_warning,
830 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
831 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
833 if (tl_nthreads == 1) {
834 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
835 "reduced reservation to 1 thread\n",
839 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
840 "reservation to %d threads\n",
841 master_tid, tl_nthreads));
842 new_nthreads = tl_nthreads;
846 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
847 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
848 if (cg_nthreads + new_nthreads -
849 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
851 int tl_nthreads = max_cg_threads - cg_nthreads +
852 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
853 if (tl_nthreads <= 0) {
858 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
859 __kmp_reserve_warn = 1;
860 __kmp_msg(kmp_ms_warning,
861 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
862 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
864 if (tl_nthreads == 1) {
865 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
866 "reduced reservation to 1 thread\n",
870 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
871 "reservation to %d threads\n",
872 master_tid, tl_nthreads));
873 new_nthreads = tl_nthreads;
879 capacity = __kmp_threads_capacity;
880 if (TCR_PTR(__kmp_threads[0]) == NULL) {
886 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
887 capacity -= __kmp_hidden_helper_threads_num;
889 if (__kmp_nth + new_nthreads -
890 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
893 int slotsRequired = __kmp_nth + new_nthreads -
894 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
896 int slotsAdded = __kmp_expand_threads(slotsRequired);
897 if (slotsAdded < slotsRequired) {
899 new_nthreads -= (slotsRequired - slotsAdded);
900 KMP_ASSERT(new_nthreads >= 1);
903 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
904 __kmp_reserve_warn = 1;
905 if (__kmp_tp_cached) {
906 __kmp_msg(kmp_ms_warning,
907 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
908 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
909 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
911 __kmp_msg(kmp_ms_warning,
912 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
913 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
920 if (new_nthreads == 1) {
922 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
923 "dead roots and rechecking; requested %d threads\n",
924 __kmp_get_gtid(), set_nthreads));
926 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
928 __kmp_get_gtid(), new_nthreads, set_nthreads));
937static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
938 kmp_info_t *master_th,
int master_gtid,
939 int fork_teams_workers) {
943 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
944 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
948 master_th->th.th_info.ds.ds_tid = 0;
949 master_th->th.th_team = team;
950 master_th->th.th_team_nproc = team->t.t_nproc;
951 master_th->th.th_team_master = master_th;
952 master_th->th.th_team_serialized = FALSE;
953 master_th->th.th_dispatch = &team->t.t_dispatch[0];
956#if KMP_NESTED_HOT_TEAMS
958 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
961 int level = team->t.t_active_level - 1;
962 if (master_th->th.th_teams_microtask) {
963 if (master_th->th.th_teams_size.nteams > 1) {
967 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
968 master_th->th.th_teams_level == team->t.t_level) {
973 if (level < __kmp_hot_teams_max_level) {
974 if (hot_teams[level].hot_team) {
976 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
980 hot_teams[level].hot_team = team;
981 hot_teams[level].hot_team_nth = team->t.t_nproc;
988 use_hot_team = team == root->r.r_hot_team;
993 team->t.t_threads[0] = master_th;
994 __kmp_initialize_info(master_th, team, 0, master_gtid);
997 for (i = 1; i < team->t.t_nproc; i++) {
1000 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1001 team->t.t_threads[i] = thr;
1002 KMP_DEBUG_ASSERT(thr);
1003 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1005 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1006 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1007 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1008 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1009 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1010 team->t.t_bar[bs_plain_barrier].b_arrived));
1011 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1012 thr->th.th_teams_level = master_th->th.th_teams_level;
1013 thr->th.th_teams_size = master_th->th.th_teams_size;
1016 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1017 for (b = 0; b < bs_last_barrier; ++b) {
1018 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1019 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1021 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1027#if KMP_AFFINITY_SUPPORTED
1031 if (!fork_teams_workers) {
1032 __kmp_partition_places(team);
1036 if (team->t.t_nproc > 1 &&
1037 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1038 team->t.b->update_num_threads(team->t.t_nproc);
1039 __kmp_add_threads_to_team(team, team->t.t_nproc);
1043 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1044 for (i = 0; i < team->t.t_nproc; i++) {
1045 kmp_info_t *thr = team->t.t_threads[i];
1046 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1047 thr->th.th_prev_level != team->t.t_level) {
1048 team->t.t_display_affinity = 1;
1057#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1061inline static void propagateFPControl(kmp_team_t *team) {
1062 if (__kmp_inherit_fp_control) {
1063 kmp_int16 x87_fpu_control_word;
1067 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1068 __kmp_store_mxcsr(&mxcsr);
1069 mxcsr &= KMP_X86_MXCSR_MASK;
1080 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1081 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1084 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1088 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1094inline static void updateHWFPControl(kmp_team_t *team) {
1095 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1098 kmp_int16 x87_fpu_control_word;
1100 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1101 __kmp_store_mxcsr(&mxcsr);
1102 mxcsr &= KMP_X86_MXCSR_MASK;
1104 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1105 __kmp_clear_x87_fpu_status_word();
1106 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1109 if (team->t.t_mxcsr != mxcsr) {
1110 __kmp_load_mxcsr(&team->t.t_mxcsr);
1115#define propagateFPControl(x) ((void)0)
1116#define updateHWFPControl(x) ((void)0)
1119static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1124void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1125 kmp_info_t *this_thr;
1126 kmp_team_t *serial_team;
1128 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1135 if (!TCR_4(__kmp_init_parallel))
1136 __kmp_parallel_initialize();
1137 __kmp_resume_if_soft_paused();
1139 this_thr = __kmp_threads[global_tid];
1140 serial_team = this_thr->th.th_serial_team;
1143 KMP_DEBUG_ASSERT(serial_team);
1146 if (__kmp_tasking_mode != tskm_immediate_exec) {
1148 this_thr->th.th_task_team ==
1149 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1150 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1152 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1153 "team %p, new task_team = NULL\n",
1154 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1155 this_thr->th.th_task_team = NULL;
1158 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1159 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1160 proc_bind = proc_bind_false;
1161 }
else if (proc_bind == proc_bind_default) {
1164 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1167 this_thr->th.th_set_proc_bind = proc_bind_default;
1170 this_thr->th.th_set_nproc = 0;
1173 ompt_data_t ompt_parallel_data = ompt_data_none;
1174 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1175 if (ompt_enabled.enabled &&
1176 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1178 ompt_task_info_t *parent_task_info;
1179 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1181 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1182 if (ompt_enabled.ompt_callback_parallel_begin) {
1185 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1186 &(parent_task_info->task_data), &(parent_task_info->frame),
1187 &ompt_parallel_data, team_size,
1188 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1193 if (this_thr->th.th_team != serial_team) {
1195 int level = this_thr->th.th_team->t.t_level;
1197 if (serial_team->t.t_serialized) {
1200 kmp_team_t *new_team;
1202 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1205 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1209 proc_bind, &this_thr->th.th_current_task->td_icvs,
1210 0 USE_NESTED_HOT_ARG(NULL));
1211 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1212 KMP_ASSERT(new_team);
1215 new_team->t.t_threads[0] = this_thr;
1216 new_team->t.t_parent = this_thr->th.th_team;
1217 serial_team = new_team;
1218 this_thr->th.th_serial_team = serial_team;
1222 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1223 global_tid, serial_team));
1231 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1232 global_tid, serial_team));
1236 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1237 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1238 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1239 serial_team->t.t_ident = loc;
1240 serial_team->t.t_serialized = 1;
1241 serial_team->t.t_nproc = 1;
1242 serial_team->t.t_parent = this_thr->th.th_team;
1243 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1244 this_thr->th.th_team = serial_team;
1245 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1247 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1248 this_thr->th.th_current_task));
1249 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1250 this_thr->th.th_current_task->td_flags.executing = 0;
1252 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1257 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1258 &this_thr->th.th_current_task->td_parent->td_icvs);
1262 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1263 this_thr->th.th_current_task->td_icvs.nproc =
1264 __kmp_nested_nth.nth[level + 1];
1267 if (__kmp_nested_proc_bind.used &&
1268 (level + 1 < __kmp_nested_proc_bind.used)) {
1269 this_thr->th.th_current_task->td_icvs.proc_bind =
1270 __kmp_nested_proc_bind.bind_types[level + 1];
1274 serial_team->t.t_pkfn = (microtask_t)(~0);
1276 this_thr->th.th_info.ds.ds_tid = 0;
1279 this_thr->th.th_team_nproc = 1;
1280 this_thr->th.th_team_master = this_thr;
1281 this_thr->th.th_team_serialized = 1;
1283 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1284 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1285 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1287 propagateFPControl(serial_team);
1290 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1291 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1292 serial_team->t.t_dispatch->th_disp_buffer =
1293 (dispatch_private_info_t *)__kmp_allocate(
1294 sizeof(dispatch_private_info_t));
1296 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1303 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1304 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1305 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1306 ++serial_team->t.t_serialized;
1307 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1310 int level = this_thr->th.th_team->t.t_level;
1313 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1314 this_thr->th.th_current_task->td_icvs.nproc =
1315 __kmp_nested_nth.nth[level + 1];
1317 serial_team->t.t_level++;
1318 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1319 "of serial team %p to %d\n",
1320 global_tid, serial_team, serial_team->t.t_level));
1323 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1325 dispatch_private_info_t *disp_buffer =
1326 (dispatch_private_info_t *)__kmp_allocate(
1327 sizeof(dispatch_private_info_t));
1328 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1329 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1331 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1335 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1339 if (__kmp_display_affinity) {
1340 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1341 this_thr->th.th_prev_num_threads != 1) {
1343 __kmp_aux_display_affinity(global_tid, NULL);
1344 this_thr->th.th_prev_level = serial_team->t.t_level;
1345 this_thr->th.th_prev_num_threads = 1;
1349 if (__kmp_env_consistency_check)
1350 __kmp_push_parallel(global_tid, NULL);
1352 serial_team->t.ompt_team_info.master_return_address = codeptr;
1353 if (ompt_enabled.enabled &&
1354 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1355 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1356 OMPT_GET_FRAME_ADDRESS(0);
1358 ompt_lw_taskteam_t lw_taskteam;
1359 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1360 &ompt_parallel_data, codeptr);
1362 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1366 if (ompt_enabled.ompt_callback_implicit_task) {
1367 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1368 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1369 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1370 ompt_task_implicit);
1371 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1372 __kmp_tid_from_gtid(global_tid);
1376 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1377 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1378 OMPT_GET_FRAME_ADDRESS(0);
1384static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1385 microtask_t microtask,
int level,
1386 int teams_level, kmp_va_list ap) {
1387 return (master_th->th.th_teams_microtask && ap &&
1388 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1393static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1394 int teams_level, kmp_va_list ap) {
1395 return ((ap == NULL && active_level == 0) ||
1396 (ap && teams_level > 0 && teams_level == level));
1403__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1404 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1405 enum fork_context_e call_context, microtask_t microtask,
1406 launch_t invoker,
int master_set_numthreads,
int level,
1408 ompt_data_t ompt_parallel_data,
void *return_address,
1414 parent_team->t.t_ident = loc;
1415 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1416 parent_team->t.t_argc = argc;
1417 argv = (
void **)parent_team->t.t_argv;
1418 for (i = argc - 1; i >= 0; --i) {
1419 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1422 if (parent_team == master_th->th.th_serial_team) {
1425 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1427 if (call_context == fork_context_gnu) {
1430 parent_team->t.t_serialized--;
1435 parent_team->t.t_pkfn = microtask;
1440 void **exit_frame_p;
1441 ompt_data_t *implicit_task_data;
1442 ompt_lw_taskteam_t lw_taskteam;
1444 if (ompt_enabled.enabled) {
1445 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1446 &ompt_parallel_data, return_address);
1447 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1449 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1453 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1454 if (ompt_enabled.ompt_callback_implicit_task) {
1455 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1456 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1457 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1458 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1462 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1464 exit_frame_p = &dummy;
1470 parent_team->t.t_serialized--;
1473 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1474 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1475 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1484 if (ompt_enabled.enabled) {
1485 *exit_frame_p = NULL;
1486 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1487 if (ompt_enabled.ompt_callback_implicit_task) {
1488 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1489 ompt_scope_end, NULL, implicit_task_data, 1,
1490 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1492 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1493 __ompt_lw_taskteam_unlink(master_th);
1494 if (ompt_enabled.ompt_callback_parallel_end) {
1495 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1496 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1497 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1499 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1505 parent_team->t.t_pkfn = microtask;
1506 parent_team->t.t_invoke = invoker;
1507 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1508 parent_team->t.t_active_level++;
1509 parent_team->t.t_level++;
1510 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1517 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1520 if (ompt_enabled.enabled) {
1521 ompt_lw_taskteam_t lw_taskteam;
1522 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1524 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1529 if (master_set_numthreads) {
1530 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1532 kmp_info_t **other_threads = parent_team->t.t_threads;
1535 int old_proc = master_th->th.th_teams_size.nth;
1536 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1537 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1538 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1540 parent_team->t.t_nproc = master_set_numthreads;
1541 for (i = 0; i < master_set_numthreads; ++i) {
1542 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1546 master_th->th.th_set_nproc = 0;
1550 if (__kmp_debugging) {
1551 int nth = __kmp_omp_num_threads(loc);
1553 master_set_numthreads = nth;
1559 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1561 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1562 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1563 proc_bind = proc_bind_false;
1566 if (proc_bind == proc_bind_default) {
1567 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1573 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1574 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1575 master_th->th.th_current_task->td_icvs.proc_bind)) {
1576 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1579 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1581 if (proc_bind_icv != proc_bind_default &&
1582 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1583 kmp_info_t **other_threads = parent_team->t.t_threads;
1584 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1585 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1589 master_th->th.th_set_proc_bind = proc_bind_default;
1591#if USE_ITT_BUILD && USE_ITT_NOTIFY
1592 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1594 __kmp_forkjoin_frames_mode == 3 &&
1595 parent_team->t.t_active_level == 1
1596 && master_th->th.th_teams_size.nteams == 1) {
1597 kmp_uint64 tmp_time = __itt_get_timestamp();
1598 master_th->th.th_frame_time = tmp_time;
1599 parent_team->t.t_region_time = tmp_time;
1601 if (__itt_stack_caller_create_ptr) {
1602 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1604 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1607#if KMP_AFFINITY_SUPPORTED
1608 __kmp_partition_places(parent_team);
1611 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1612 "master_th=%p, gtid=%d\n",
1613 root, parent_team, master_th, gtid));
1614 __kmp_internal_fork(loc, gtid, parent_team);
1615 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1616 "master_th=%p, gtid=%d\n",
1617 root, parent_team, master_th, gtid));
1619 if (call_context == fork_context_gnu)
1623 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1624 parent_team->t.t_id, parent_team->t.t_pkfn));
1626 if (!parent_team->t.t_invoke(gtid)) {
1627 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1629 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1630 parent_team->t.t_id, parent_team->t.t_pkfn));
1633 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1640__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1641 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1642 kmp_info_t *master_th, kmp_team_t *parent_team,
1644 ompt_data_t *ompt_parallel_data,
void **return_address,
1645 ompt_data_t **parent_task_data,
1653#if KMP_OS_LINUX && \
1654 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1657 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1662 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1667 master_th->th.th_serial_team->t.t_pkfn = microtask;
1670 if (call_context == fork_context_intel) {
1672 master_th->th.th_serial_team->t.t_ident = loc;
1675 master_th->th.th_serial_team->t.t_level--;
1680 void **exit_frame_p;
1681 ompt_task_info_t *task_info;
1682 ompt_lw_taskteam_t lw_taskteam;
1684 if (ompt_enabled.enabled) {
1685 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1686 ompt_parallel_data, *return_address);
1688 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1690 task_info = OMPT_CUR_TASK_INFO(master_th);
1691 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1692 if (ompt_enabled.ompt_callback_implicit_task) {
1693 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1694 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1695 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1696 &(task_info->task_data), 1,
1697 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1701 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1703 exit_frame_p = &dummy;
1708 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1709 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1710 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1719 if (ompt_enabled.enabled) {
1720 *exit_frame_p = NULL;
1721 if (ompt_enabled.ompt_callback_implicit_task) {
1722 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1723 ompt_scope_end, NULL, &(task_info->task_data), 1,
1724 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1726 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1727 __ompt_lw_taskteam_unlink(master_th);
1728 if (ompt_enabled.ompt_callback_parallel_end) {
1729 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1730 ompt_parallel_data, *parent_task_data,
1731 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1733 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1736 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1737 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1738 team = master_th->th.th_team;
1740 team->t.t_invoke = invoker;
1741 __kmp_alloc_argv_entries(argc, team, TRUE);
1742 team->t.t_argc = argc;
1743 argv = (
void **)team->t.t_argv;
1745 for (i = argc - 1; i >= 0; --i)
1746 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1748 for (i = 0; i < argc; ++i)
1750 argv[i] = parent_team->t.t_argv[i];
1758 if (ompt_enabled.enabled) {
1759 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1760 if (ompt_enabled.ompt_callback_implicit_task) {
1761 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1762 ompt_scope_end, NULL, &(task_info->task_data), 0,
1763 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1765 if (ompt_enabled.ompt_callback_parallel_end) {
1766 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1767 ompt_parallel_data, *parent_task_data,
1768 OMPT_INVOKER(call_context) | ompt_parallel_league,
1771 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1776 for (i = argc - 1; i >= 0; --i)
1777 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1782 void **exit_frame_p;
1783 ompt_task_info_t *task_info;
1784 ompt_lw_taskteam_t lw_taskteam;
1785 ompt_data_t *implicit_task_data;
1787 if (ompt_enabled.enabled) {
1788 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1789 ompt_parallel_data, *return_address);
1790 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1792 task_info = OMPT_CUR_TASK_INFO(master_th);
1793 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1796 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1797 if (ompt_enabled.ompt_callback_implicit_task) {
1798 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1799 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1800 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1801 ompt_task_implicit);
1802 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1806 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1808 exit_frame_p = &dummy;
1813 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1814 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1815 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1824 if (ompt_enabled.enabled) {
1825 *exit_frame_p = NULL;
1826 if (ompt_enabled.ompt_callback_implicit_task) {
1827 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1828 ompt_scope_end, NULL, &(task_info->task_data), 1,
1829 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1832 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1833 __ompt_lw_taskteam_unlink(master_th);
1834 if (ompt_enabled.ompt_callback_parallel_end) {
1835 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1836 ompt_parallel_data, *parent_task_data,
1837 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1839 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1843 }
else if (call_context == fork_context_gnu) {
1845 if (ompt_enabled.enabled) {
1846 ompt_lw_taskteam_t lwt;
1847 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1850 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1851 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1857 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1860 KMP_ASSERT2(call_context < fork_context_last,
1861 "__kmp_serial_fork_call: unknown fork_context parameter");
1864 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1871int __kmp_fork_call(
ident_t *loc,
int gtid,
1872 enum fork_context_e call_context,
1873 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1878 int master_this_cons;
1880 kmp_team_t *parent_team;
1881 kmp_info_t *master_th;
1885 int master_set_numthreads;
1886 int task_thread_limit = 0;
1890#if KMP_NESTED_HOT_TEAMS
1891 kmp_hot_team_ptr_t **p_hot_teams;
1894 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1897 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1898 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1901 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1903 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1904 __kmp_stkpadding += (short)((kmp_int64)dummy);
1910 if (!TCR_4(__kmp_init_parallel))
1911 __kmp_parallel_initialize();
1912 __kmp_resume_if_soft_paused();
1917 master_th = __kmp_threads[gtid];
1919 parent_team = master_th->th.th_team;
1920 master_tid = master_th->th.th_info.ds.ds_tid;
1921 master_this_cons = master_th->th.th_local.this_construct;
1922 root = master_th->th.th_root;
1923 master_active = root->r.r_active;
1924 master_set_numthreads = master_th->th.th_set_nproc;
1926 master_th->th.th_current_task->td_icvs.task_thread_limit;
1929 ompt_data_t ompt_parallel_data = ompt_data_none;
1930 ompt_data_t *parent_task_data;
1931 ompt_frame_t *ompt_frame;
1932 void *return_address = NULL;
1934 if (ompt_enabled.enabled) {
1935 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1937 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1942 __kmp_assign_root_init_mask();
1945 level = parent_team->t.t_level;
1947 active_level = parent_team->t.t_active_level;
1949 teams_level = master_th->th.th_teams_level;
1950#if KMP_NESTED_HOT_TEAMS
1951 p_hot_teams = &master_th->th.th_hot_teams;
1952 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1953 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1954 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1955 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1957 (*p_hot_teams)[0].hot_team_nth = 1;
1962 if (ompt_enabled.enabled) {
1963 if (ompt_enabled.ompt_callback_parallel_begin) {
1964 int team_size = master_set_numthreads
1965 ? master_set_numthreads
1966 : get__nproc_2(parent_team, master_tid);
1967 int flags = OMPT_INVOKER(call_context) |
1968 ((microtask == (microtask_t)__kmp_teams_master)
1969 ? ompt_parallel_league
1970 : ompt_parallel_team);
1971 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1972 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1975 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1979 master_th->th.th_ident = loc;
1982 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1983 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1984 call_context, microtask, invoker,
1985 master_set_numthreads, level,
1987 ompt_parallel_data, return_address,
1993 if (__kmp_tasking_mode != tskm_immediate_exec) {
1994 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1995 parent_team->t.t_task_team[master_th->th.th_task_state]);
2005 __kmp_is_entering_teams(active_level, level, teams_level, ap);
2006 if ((!enter_teams &&
2007 (parent_team->t.t_active_level >=
2008 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
2009 (__kmp_library == library_serial)) {
2010 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
2013 nthreads = master_set_numthreads
2014 ? master_set_numthreads
2016 : get__nproc_2(parent_team, master_tid);
2019 nthreads = task_thread_limit > 0 && task_thread_limit < nthreads
2026 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2031 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2032 nthreads, enter_teams);
2033 if (nthreads == 1) {
2037 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2041 KMP_DEBUG_ASSERT(nthreads > 0);
2044 master_th->th.th_set_nproc = 0;
2046 if (nthreads == 1) {
2047 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2048 invoker, master_th, parent_team,
2050 &ompt_parallel_data, &return_address,
2058 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2059 "curtask=%p, curtask_max_aclevel=%d\n",
2060 parent_team->t.t_active_level, master_th,
2061 master_th->th.th_current_task,
2062 master_th->th.th_current_task->td_icvs.max_active_levels));
2066 master_th->th.th_current_task->td_flags.executing = 0;
2068 if (!master_th->th.th_teams_microtask || level > teams_level) {
2070 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2074 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2075 if ((level + 1 < __kmp_nested_nth.used) &&
2076 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2077 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2083 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2085 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2086 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2087 proc_bind = proc_bind_false;
2091 if (proc_bind == proc_bind_default) {
2092 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2095 if (master_th->th.th_teams_microtask &&
2096 microtask == (microtask_t)__kmp_teams_master) {
2097 proc_bind = __kmp_teams_proc_bind;
2103 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2104 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2105 master_th->th.th_current_task->td_icvs.proc_bind)) {
2108 if (!master_th->th.th_teams_microtask ||
2109 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2110 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2115 master_th->th.th_set_proc_bind = proc_bind_default;
2117 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2118 kmp_internal_control_t new_icvs;
2119 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2120 new_icvs.next = NULL;
2121 if (nthreads_icv > 0) {
2122 new_icvs.nproc = nthreads_icv;
2124 if (proc_bind_icv != proc_bind_default) {
2125 new_icvs.proc_bind = proc_bind_icv;
2129 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2130 team = __kmp_allocate_team(root, nthreads, nthreads,
2134 proc_bind, &new_icvs,
2135 argc USE_NESTED_HOT_ARG(master_th));
2136 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2137 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2140 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2141 team = __kmp_allocate_team(root, nthreads, nthreads,
2146 &master_th->th.th_current_task->td_icvs,
2147 argc USE_NESTED_HOT_ARG(master_th));
2148 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2149 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2150 &master_th->th.th_current_task->td_icvs);
2153 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2156 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2157 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2158 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2159 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2160 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2162 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2165 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2167 if (!master_th->th.th_teams_microtask || level > teams_level) {
2168 int new_level = parent_team->t.t_level + 1;
2169 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2170 new_level = parent_team->t.t_active_level + 1;
2171 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2174 int new_level = parent_team->t.t_level;
2175 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2176 new_level = parent_team->t.t_active_level;
2177 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2179 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2181 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2183 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2184 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2187 propagateFPControl(team);
2189 if (ompd_state & OMPD_ENABLE_BP)
2190 ompd_bp_parallel_begin();
2193 if (__kmp_tasking_mode != tskm_immediate_exec) {
2196 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2197 parent_team->t.t_task_team[master_th->th.th_task_state]);
2198 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2199 "%p, new task_team %p / team %p\n",
2200 __kmp_gtid_from_thread(master_th),
2201 master_th->th.th_task_team, parent_team,
2202 team->t.t_task_team[master_th->th.th_task_state], team));
2204 if (active_level || master_th->th.th_task_team) {
2206 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2207 if (master_th->th.th_task_state_top >=
2208 master_th->th.th_task_state_stack_sz) {
2209 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2210 kmp_uint8 *old_stack, *new_stack;
2212 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2213 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2214 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2216 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2220 old_stack = master_th->th.th_task_state_memo_stack;
2221 master_th->th.th_task_state_memo_stack = new_stack;
2222 master_th->th.th_task_state_stack_sz = new_size;
2223 __kmp_free(old_stack);
2227 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2228 master_th->th.th_task_state;
2229 master_th->th.th_task_state_top++;
2230#if KMP_NESTED_HOT_TEAMS
2231 if (master_th->th.th_hot_teams &&
2232 active_level < __kmp_hot_teams_max_level &&
2233 team == master_th->th.th_hot_teams[active_level].hot_team) {
2235 master_th->th.th_task_state =
2237 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2240 master_th->th.th_task_state = 0;
2241#if KMP_NESTED_HOT_TEAMS
2245#if !KMP_NESTED_HOT_TEAMS
2246 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2247 (team == root->r.r_hot_team));
2253 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2254 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2256 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2257 (team->t.t_master_tid == 0 &&
2258 (team->t.t_parent == root->r.r_root_team ||
2259 team->t.t_parent->t.t_serialized)));
2263 argv = (
void **)team->t.t_argv;
2265 for (i = argc - 1; i >= 0; --i) {
2266 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2267 KMP_CHECK_UPDATE(*argv, new_argv);
2271 for (i = 0; i < argc; ++i) {
2273 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2278 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2279 if (!root->r.r_active)
2280 root->r.r_active = TRUE;
2282 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2283 __kmp_setup_icv_copy(team, nthreads,
2284 &master_th->th.th_current_task->td_icvs, loc);
2287 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2290 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2293 if (team->t.t_active_level == 1
2294 && !master_th->th.th_teams_microtask) {
2296 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2297 (__kmp_forkjoin_frames_mode == 3 ||
2298 __kmp_forkjoin_frames_mode == 1)) {
2299 kmp_uint64 tmp_time = 0;
2300 if (__itt_get_timestamp_ptr)
2301 tmp_time = __itt_get_timestamp();
2303 master_th->th.th_frame_time = tmp_time;
2304 if (__kmp_forkjoin_frames_mode == 3)
2305 team->t.t_region_time = tmp_time;
2309 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2310 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2312 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2318 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2321 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2322 root, team, master_th, gtid));
2325 if (__itt_stack_caller_create_ptr) {
2328 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2329 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2330 }
else if (parent_team->t.t_serialized) {
2335 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2336 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2344 __kmp_internal_fork(loc, gtid, team);
2345 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2346 "master_th=%p, gtid=%d\n",
2347 root, team, master_th, gtid));
2350 if (call_context == fork_context_gnu) {
2351 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2356 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2357 team->t.t_id, team->t.t_pkfn));
2360#if KMP_STATS_ENABLED
2364 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2368 if (!team->t.t_invoke(gtid)) {
2369 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2372#if KMP_STATS_ENABLED
2375 KMP_SET_THREAD_STATE(previous_state);
2379 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2380 team->t.t_id, team->t.t_pkfn));
2383 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2385 if (ompt_enabled.enabled) {
2386 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2394static inline void __kmp_join_restore_state(kmp_info_t *thread,
2397 thread->th.ompt_thread_info.state =
2398 ((team->t.t_serialized) ? ompt_state_work_serial
2399 : ompt_state_work_parallel);
2402static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2403 kmp_team_t *team, ompt_data_t *parallel_data,
2404 int flags,
void *codeptr) {
2405 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2406 if (ompt_enabled.ompt_callback_parallel_end) {
2407 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2408 parallel_data, &(task_info->task_data), flags, codeptr);
2411 task_info->frame.enter_frame = ompt_data_none;
2412 __kmp_join_restore_state(thread, team);
2416void __kmp_join_call(
ident_t *loc,
int gtid
2419 enum fork_context_e fork_context
2423 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2425 kmp_team_t *parent_team;
2426 kmp_info_t *master_th;
2430 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2433 master_th = __kmp_threads[gtid];
2434 root = master_th->th.th_root;
2435 team = master_th->th.th_team;
2436 parent_team = team->t.t_parent;
2438 master_th->th.th_ident = loc;
2441 void *team_microtask = (
void *)team->t.t_pkfn;
2445 if (ompt_enabled.enabled &&
2446 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2447 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2452 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2453 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2454 "th_task_team = %p\n",
2455 __kmp_gtid_from_thread(master_th), team,
2456 team->t.t_task_team[master_th->th.th_task_state],
2457 master_th->th.th_task_team));
2458 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2459 team->t.t_task_team[master_th->th.th_task_state]);
2463 if (team->t.t_serialized) {
2464 if (master_th->th.th_teams_microtask) {
2466 int level = team->t.t_level;
2467 int tlevel = master_th->th.th_teams_level;
2468 if (level == tlevel) {
2472 }
else if (level == tlevel + 1) {
2476 team->t.t_serialized++;
2482 if (ompt_enabled.enabled) {
2483 if (fork_context == fork_context_gnu) {
2484 __ompt_lw_taskteam_unlink(master_th);
2486 __kmp_join_restore_state(master_th, parent_team);
2493 master_active = team->t.t_master_active;
2498 __kmp_internal_join(loc, gtid, team);
2500 if (__itt_stack_caller_create_ptr) {
2501 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2503 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2504 team->t.t_stack_id = NULL;
2508 master_th->th.th_task_state =
2511 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2512 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2516 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2517 parent_team->t.t_stack_id = NULL;
2525 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2526 void *codeptr = team->t.ompt_team_info.master_return_address;
2531 if (team->t.t_active_level == 1 &&
2532 (!master_th->th.th_teams_microtask ||
2533 master_th->th.th_teams_size.nteams == 1)) {
2534 master_th->th.th_ident = loc;
2537 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2538 __kmp_forkjoin_frames_mode == 3)
2539 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2540 master_th->th.th_frame_time, 0, loc,
2541 master_th->th.th_team_nproc, 1);
2542 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2543 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2544 __kmp_itt_region_joined(gtid);
2548#if KMP_AFFINITY_SUPPORTED
2551 master_th->th.th_first_place = team->t.t_first_place;
2552 master_th->th.th_last_place = team->t.t_last_place;
2556 if (master_th->th.th_teams_microtask && !exit_teams &&
2557 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2558 team->t.t_level == master_th->th.th_teams_level + 1) {
2563 ompt_data_t ompt_parallel_data = ompt_data_none;
2564 if (ompt_enabled.enabled) {
2565 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2566 if (ompt_enabled.ompt_callback_implicit_task) {
2567 int ompt_team_size = team->t.t_nproc;
2568 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2569 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2570 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2572 task_info->frame.exit_frame = ompt_data_none;
2573 task_info->task_data = ompt_data_none;
2574 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2575 __ompt_lw_taskteam_unlink(master_th);
2580 team->t.t_active_level--;
2581 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2587 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2588 int old_num = master_th->th.th_team_nproc;
2589 int new_num = master_th->th.th_teams_size.nth;
2590 kmp_info_t **other_threads = team->t.t_threads;
2591 team->t.t_nproc = new_num;
2592 for (
int i = 0; i < old_num; ++i) {
2593 other_threads[i]->th.th_team_nproc = new_num;
2596 for (
int i = old_num; i < new_num; ++i) {
2598 KMP_DEBUG_ASSERT(other_threads[i]);
2599 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2600 for (
int b = 0; b < bs_last_barrier; ++b) {
2601 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2602 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2604 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2607 if (__kmp_tasking_mode != tskm_immediate_exec) {
2609 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2615 if (ompt_enabled.enabled) {
2616 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2617 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2625 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2626 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2628 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2633 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2635 if (!master_th->th.th_teams_microtask ||
2636 team->t.t_level > master_th->th.th_teams_level) {
2638 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2640 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2643 if (ompt_enabled.enabled) {
2644 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2645 if (ompt_enabled.ompt_callback_implicit_task) {
2646 int flags = (team_microtask == (
void *)__kmp_teams_master)
2648 : ompt_task_implicit;
2649 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2650 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2651 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2652 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2654 task_info->frame.exit_frame = ompt_data_none;
2655 task_info->task_data = ompt_data_none;
2659 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2661 __kmp_pop_current_task_from_thread(master_th);
2663 master_th->th.th_def_allocator = team->t.t_def_allocator;
2666 if (ompd_state & OMPD_ENABLE_BP)
2667 ompd_bp_parallel_end();
2669 updateHWFPControl(team);
2671 if (root->r.r_active != master_active)
2672 root->r.r_active = master_active;
2674 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2682 master_th->th.th_team = parent_team;
2683 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2684 master_th->th.th_team_master = parent_team->t.t_threads[0];
2685 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2688 if (parent_team->t.t_serialized &&
2689 parent_team != master_th->th.th_serial_team &&
2690 parent_team != root->r.r_root_team) {
2691 __kmp_free_team(root,
2692 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2693 master_th->th.th_serial_team = parent_team;
2696 if (__kmp_tasking_mode != tskm_immediate_exec) {
2697 if (master_th->th.th_task_state_top >
2699 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2701 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2702 master_th->th.th_task_state;
2703 --master_th->th.th_task_state_top;
2705 master_th->th.th_task_state =
2707 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2708 }
else if (team != root->r.r_hot_team) {
2713 master_th->th.th_task_state = 0;
2716 master_th->th.th_task_team =
2717 parent_team->t.t_task_team[master_th->th.th_task_state];
2719 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2720 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2727 master_th->th.th_current_task->td_flags.executing = 1;
2729 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2731#if KMP_AFFINITY_SUPPORTED
2732 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2733 __kmp_reset_root_init_mask(gtid);
2738 OMPT_INVOKER(fork_context) |
2739 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2740 : ompt_parallel_team);
2741 if (ompt_enabled.enabled) {
2742 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2748 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2753void __kmp_save_internal_controls(kmp_info_t *thread) {
2755 if (thread->th.th_team != thread->th.th_serial_team) {
2758 if (thread->th.th_team->t.t_serialized > 1) {
2761 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2764 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2765 thread->th.th_team->t.t_serialized) {
2770 kmp_internal_control_t *control =
2771 (kmp_internal_control_t *)__kmp_allocate(
2772 sizeof(kmp_internal_control_t));
2774 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2776 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2778 control->next = thread->th.th_team->t.t_control_stack_top;
2779 thread->th.th_team->t.t_control_stack_top = control;
2785void __kmp_set_num_threads(
int new_nth,
int gtid) {
2789 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2790 KMP_DEBUG_ASSERT(__kmp_init_serial);
2794 else if (new_nth > __kmp_max_nth)
2795 new_nth = __kmp_max_nth;
2798 thread = __kmp_threads[gtid];
2799 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2802 __kmp_save_internal_controls(thread);
2804 set__nproc(thread, new_nth);
2809 root = thread->th.th_root;
2810 if (__kmp_init_parallel && (!root->r.r_active) &&
2811 (root->r.r_hot_team->t.t_nproc > new_nth)
2812#
if KMP_NESTED_HOT_TEAMS
2813 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2816 kmp_team_t *hot_team = root->r.r_hot_team;
2819 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2821 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2822 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2825 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2826 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2827 if (__kmp_tasking_mode != tskm_immediate_exec) {
2830 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2832 __kmp_free_thread(hot_team->t.t_threads[f]);
2833 hot_team->t.t_threads[f] = NULL;
2835 hot_team->t.t_nproc = new_nth;
2836#if KMP_NESTED_HOT_TEAMS
2837 if (thread->th.th_hot_teams) {
2838 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2839 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2843 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2844 hot_team->t.b->update_num_threads(new_nth);
2845 __kmp_add_threads_to_team(hot_team, new_nth);
2848 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2851 for (f = 0; f < new_nth; f++) {
2852 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2853 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2856 hot_team->t.t_size_changed = -1;
2861void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2864 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2866 gtid, max_active_levels));
2867 KMP_DEBUG_ASSERT(__kmp_init_serial);
2870 if (max_active_levels < 0) {
2871 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2876 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2877 "max_active_levels for thread %d = (%d)\n",
2878 gtid, max_active_levels));
2881 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2886 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2887 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2888 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2894 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2895 "max_active_levels for thread %d = (%d)\n",
2896 gtid, max_active_levels));
2898 thread = __kmp_threads[gtid];
2900 __kmp_save_internal_controls(thread);
2902 set__max_active_levels(thread, max_active_levels);
2906int __kmp_get_max_active_levels(
int gtid) {
2909 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2910 KMP_DEBUG_ASSERT(__kmp_init_serial);
2912 thread = __kmp_threads[gtid];
2913 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2914 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2915 "curtask_maxaclevel=%d\n",
2916 gtid, thread->th.th_current_task,
2917 thread->th.th_current_task->td_icvs.max_active_levels));
2918 return thread->th.th_current_task->td_icvs.max_active_levels;
2922void __kmp_set_num_teams(
int num_teams) {
2924 __kmp_nteams = num_teams;
2926int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2928void __kmp_set_teams_thread_limit(
int limit) {
2930 __kmp_teams_thread_limit = limit;
2932int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2934KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2935KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2938void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2940 kmp_sched_t orig_kind;
2943 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2944 gtid, (
int)kind, chunk));
2945 KMP_DEBUG_ASSERT(__kmp_init_serial);
2952 kind = __kmp_sched_without_mods(kind);
2954 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2955 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2957 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2958 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2960 kind = kmp_sched_default;
2964 thread = __kmp_threads[gtid];
2966 __kmp_save_internal_controls(thread);
2968 if (kind < kmp_sched_upper_std) {
2969 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2972 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2974 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2975 __kmp_sch_map[kind - kmp_sched_lower - 1];
2980 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2981 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2982 kmp_sched_lower - 2];
2984 __kmp_sched_apply_mods_intkind(
2985 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2986 if (kind == kmp_sched_auto || chunk < 1) {
2988 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2990 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2995void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2999 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
3000 KMP_DEBUG_ASSERT(__kmp_init_serial);
3002 thread = __kmp_threads[gtid];
3004 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
3005 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
3007 case kmp_sch_static_greedy:
3008 case kmp_sch_static_balanced:
3009 *kind = kmp_sched_static;
3010 __kmp_sched_apply_mods_stdkind(kind, th_type);
3013 case kmp_sch_static_chunked:
3014 *kind = kmp_sched_static;
3016 case kmp_sch_dynamic_chunked:
3017 *kind = kmp_sched_dynamic;
3020 case kmp_sch_guided_iterative_chunked:
3021 case kmp_sch_guided_analytical_chunked:
3022 *kind = kmp_sched_guided;
3025 *kind = kmp_sched_auto;
3027 case kmp_sch_trapezoidal:
3028 *kind = kmp_sched_trapezoidal;
3030#if KMP_STATIC_STEAL_ENABLED
3031 case kmp_sch_static_steal:
3032 *kind = kmp_sched_static_steal;
3036 KMP_FATAL(UnknownSchedulingType, th_type);
3039 __kmp_sched_apply_mods_stdkind(kind, th_type);
3040 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3043int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3049 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3050 KMP_DEBUG_ASSERT(__kmp_init_serial);
3057 thr = __kmp_threads[gtid];
3058 team = thr->th.th_team;
3059 ii = team->t.t_level;
3063 if (thr->th.th_teams_microtask) {
3065 int tlevel = thr->th.th_teams_level;
3068 KMP_DEBUG_ASSERT(ii >= tlevel);
3080 return __kmp_tid_from_gtid(gtid);
3082 dd = team->t.t_serialized;
3084 while (ii > level) {
3085 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3087 if ((team->t.t_serialized) && (!dd)) {
3088 team = team->t.t_parent;
3092 team = team->t.t_parent;
3093 dd = team->t.t_serialized;
3098 return (dd > 1) ? (0) : (team->t.t_master_tid);
3101int __kmp_get_team_size(
int gtid,
int level) {
3107 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3108 KMP_DEBUG_ASSERT(__kmp_init_serial);
3115 thr = __kmp_threads[gtid];
3116 team = thr->th.th_team;
3117 ii = team->t.t_level;
3121 if (thr->th.th_teams_microtask) {
3123 int tlevel = thr->th.th_teams_level;
3126 KMP_DEBUG_ASSERT(ii >= tlevel);
3137 while (ii > level) {
3138 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3140 if (team->t.t_serialized && (!dd)) {
3141 team = team->t.t_parent;
3145 team = team->t.t_parent;
3150 return team->t.t_nproc;
3153kmp_r_sched_t __kmp_get_schedule_global() {
3158 kmp_r_sched_t r_sched;
3164 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3165 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3168 r_sched.r_sched_type = __kmp_static;
3171 r_sched.r_sched_type = __kmp_guided;
3173 r_sched.r_sched_type = __kmp_sched;
3175 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3177 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3179 r_sched.chunk = KMP_DEFAULT_CHUNK;
3181 r_sched.chunk = __kmp_chunk;
3189static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3191 KMP_DEBUG_ASSERT(team);
3192 if (!realloc || argc > team->t.t_max_argc) {
3194 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3195 "current entries=%d\n",
3196 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3198 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3199 __kmp_free((
void *)team->t.t_argv);
3201 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3203 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3204 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3206 team->t.t_id, team->t.t_max_argc));
3207 team->t.t_argv = &team->t.t_inline_argv[0];
3208 if (__kmp_storage_map) {
3209 __kmp_print_storage_map_gtid(
3210 -1, &team->t.t_inline_argv[0],
3211 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3212 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3217 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3218 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3220 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3222 team->t.t_id, team->t.t_max_argc));
3224 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3225 if (__kmp_storage_map) {
3226 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3227 &team->t.t_argv[team->t.t_max_argc],
3228 sizeof(
void *) * team->t.t_max_argc,
3229 "team_%d.t_argv", team->t.t_id);
3235static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3237 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3239 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3240 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3241 sizeof(dispatch_shared_info_t) * num_disp_buff);
3242 team->t.t_dispatch =
3243 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3244 team->t.t_implicit_task_taskdata =
3245 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3246 team->t.t_max_nproc = max_nth;
3249 for (i = 0; i < num_disp_buff; ++i) {
3250 team->t.t_disp_buffer[i].buffer_index = i;
3251 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3255static void __kmp_free_team_arrays(kmp_team_t *team) {
3258 for (i = 0; i < team->t.t_max_nproc; ++i) {
3259 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3260 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3261 team->t.t_dispatch[i].th_disp_buffer = NULL;
3264#if KMP_USE_HIER_SCHED
3265 __kmp_dispatch_free_hierarchies(team);
3267 __kmp_free(team->t.t_threads);
3268 __kmp_free(team->t.t_disp_buffer);
3269 __kmp_free(team->t.t_dispatch);
3270 __kmp_free(team->t.t_implicit_task_taskdata);
3271 team->t.t_threads = NULL;
3272 team->t.t_disp_buffer = NULL;
3273 team->t.t_dispatch = NULL;
3274 team->t.t_implicit_task_taskdata = 0;
3277static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3278 kmp_info_t **oldThreads = team->t.t_threads;
3280 __kmp_free(team->t.t_disp_buffer);
3281 __kmp_free(team->t.t_dispatch);
3282 __kmp_free(team->t.t_implicit_task_taskdata);
3283 __kmp_allocate_team_arrays(team, max_nth);
3285 KMP_MEMCPY(team->t.t_threads, oldThreads,
3286 team->t.t_nproc *
sizeof(kmp_info_t *));
3288 __kmp_free(oldThreads);
3291static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3293 kmp_r_sched_t r_sched =
3294 __kmp_get_schedule_global();
3296 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3298 kmp_internal_control_t g_icvs = {
3300 (kmp_int8)__kmp_global.g.g_dynamic,
3302 (kmp_int8)__kmp_env_blocktime,
3304 __kmp_dflt_blocktime,
3309 __kmp_dflt_team_nth,
3315 __kmp_dflt_max_active_levels,
3319 __kmp_nested_proc_bind.bind_types[0],
3320 __kmp_default_device,
3327static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3329 kmp_internal_control_t gx_icvs;
3330 gx_icvs.serial_nesting_level =
3332 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3333 gx_icvs.next = NULL;
3338static void __kmp_initialize_root(kmp_root_t *root) {
3340 kmp_team_t *root_team;
3341 kmp_team_t *hot_team;
3342 int hot_team_max_nth;
3343 kmp_r_sched_t r_sched =
3344 __kmp_get_schedule_global();
3345 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3346 KMP_DEBUG_ASSERT(root);
3347 KMP_ASSERT(!root->r.r_begin);
3350 __kmp_init_lock(&root->r.r_begin_lock);
3351 root->r.r_begin = FALSE;
3352 root->r.r_active = FALSE;
3353 root->r.r_in_parallel = 0;
3354 root->r.r_blocktime = __kmp_dflt_blocktime;
3355#if KMP_AFFINITY_SUPPORTED
3356 root->r.r_affinity_assigned = FALSE;
3361 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3364 __kmp_allocate_team(root,
3370 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3372 USE_NESTED_HOT_ARG(NULL)
3377 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3380 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3382 root->r.r_root_team = root_team;
3383 root_team->t.t_control_stack_top = NULL;
3386 root_team->t.t_threads[0] = NULL;
3387 root_team->t.t_nproc = 1;
3388 root_team->t.t_serialized = 1;
3390 root_team->t.t_sched.sched = r_sched.sched;
3393 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3394 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3398 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3401 __kmp_allocate_team(root,
3403 __kmp_dflt_team_nth_ub * 2,
3407 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3409 USE_NESTED_HOT_ARG(NULL)
3411 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3413 root->r.r_hot_team = hot_team;
3414 root_team->t.t_control_stack_top = NULL;
3417 hot_team->t.t_parent = root_team;
3420 hot_team_max_nth = hot_team->t.t_max_nproc;
3421 for (f = 0; f < hot_team_max_nth; ++f) {
3422 hot_team->t.t_threads[f] = NULL;
3424 hot_team->t.t_nproc = 1;
3426 hot_team->t.t_sched.sched = r_sched.sched;
3427 hot_team->t.t_size_changed = 0;
3432typedef struct kmp_team_list_item {
3433 kmp_team_p
const *entry;
3434 struct kmp_team_list_item *next;
3435} kmp_team_list_item_t;
3436typedef kmp_team_list_item_t *kmp_team_list_t;
3438static void __kmp_print_structure_team_accum(
3439 kmp_team_list_t list,
3440 kmp_team_p
const *team
3450 KMP_DEBUG_ASSERT(list != NULL);
3455 __kmp_print_structure_team_accum(list, team->t.t_parent);
3456 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3460 while (l->next != NULL && l->entry != team) {
3463 if (l->next != NULL) {
3469 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3475 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3476 sizeof(kmp_team_list_item_t));
3483static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3486 __kmp_printf(
"%s", title);
3488 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3490 __kmp_printf(
" - (nil)\n");
3494static void __kmp_print_structure_thread(
char const *title,
3495 kmp_info_p
const *thread) {
3496 __kmp_printf(
"%s", title);
3497 if (thread != NULL) {
3498 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3500 __kmp_printf(
" - (nil)\n");
3504void __kmp_print_structure(
void) {
3506 kmp_team_list_t list;
3510 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3514 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3515 "Table\n------------------------------\n");
3518 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3519 __kmp_printf(
"%2d", gtid);
3520 if (__kmp_threads != NULL) {
3521 __kmp_printf(
" %p", __kmp_threads[gtid]);
3523 if (__kmp_root != NULL) {
3524 __kmp_printf(
" %p", __kmp_root[gtid]);
3531 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3533 if (__kmp_threads != NULL) {
3535 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3536 kmp_info_t
const *thread = __kmp_threads[gtid];
3537 if (thread != NULL) {
3538 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3539 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3540 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3541 __kmp_print_structure_team(
" Serial Team: ",
3542 thread->th.th_serial_team);
3543 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3544 __kmp_print_structure_thread(
" Primary: ",
3545 thread->th.th_team_master);
3546 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3547 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3548 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3549 __kmp_print_structure_thread(
" Next in pool: ",
3550 thread->th.th_next_pool);
3552 __kmp_print_structure_team_accum(list, thread->th.th_team);
3553 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3557 __kmp_printf(
"Threads array is not allocated.\n");
3561 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3563 if (__kmp_root != NULL) {
3565 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3566 kmp_root_t
const *root = __kmp_root[gtid];
3568 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3569 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3570 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3571 __kmp_print_structure_thread(
" Uber Thread: ",
3572 root->r.r_uber_thread);
3573 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3574 __kmp_printf(
" In Parallel: %2d\n",
3575 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3577 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3578 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3582 __kmp_printf(
"Ubers array is not allocated.\n");
3585 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3587 while (list->next != NULL) {
3588 kmp_team_p
const *team = list->entry;
3590 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3591 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3592 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3593 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3594 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3595 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3596 for (i = 0; i < team->t.t_nproc; ++i) {
3597 __kmp_printf(
" Thread %2d: ", i);
3598 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3600 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3606 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3608 __kmp_print_structure_thread(
"Thread pool: ",
3609 CCAST(kmp_info_t *, __kmp_thread_pool));
3610 __kmp_print_structure_team(
"Team pool: ",
3611 CCAST(kmp_team_t *, __kmp_team_pool));
3615 while (list != NULL) {
3616 kmp_team_list_item_t *item = list;
3618 KMP_INTERNAL_FREE(item);
3627static const unsigned __kmp_primes[] = {
3628 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3629 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3630 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3631 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3632 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3633 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3634 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3635 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3636 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3637 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3638 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3642unsigned short __kmp_get_random(kmp_info_t *thread) {
3643 unsigned x = thread->th.th_x;
3644 unsigned short r = (
unsigned short)(x >> 16);
3646 thread->th.th_x = x * thread->th.th_a + 1;
3648 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3649 thread->th.th_info.ds.ds_tid, r));
3655void __kmp_init_random(kmp_info_t *thread) {
3656 unsigned seed = thread->th.th_info.ds.ds_tid;
3659 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3660 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3662 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3668static int __kmp_reclaim_dead_roots(
void) {
3671 for (i = 0; i < __kmp_threads_capacity; ++i) {
3672 if (KMP_UBER_GTID(i) &&
3673 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3676 r += __kmp_unregister_root_other_thread(i);
3701static int __kmp_expand_threads(
int nNeed) {
3703 int minimumRequiredCapacity;
3705 kmp_info_t **newThreads;
3706 kmp_root_t **newRoot;
3712#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3715 added = __kmp_reclaim_dead_roots();
3744 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3747 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3751 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3753 newCapacity = __kmp_threads_capacity;
3755 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3756 : __kmp_sys_max_nth;
3757 }
while (newCapacity < minimumRequiredCapacity);
3758 newThreads = (kmp_info_t **)__kmp_allocate(
3759 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3761 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3762 KMP_MEMCPY(newThreads, __kmp_threads,
3763 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3764 KMP_MEMCPY(newRoot, __kmp_root,
3765 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3768 kmp_old_threads_list_t *node =
3769 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3770 node->threads = __kmp_threads;
3771 node->next = __kmp_old_threads_list;
3772 __kmp_old_threads_list = node;
3774 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3775 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3776 added += newCapacity - __kmp_threads_capacity;
3777 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3779 if (newCapacity > __kmp_tp_capacity) {
3780 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3781 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3782 __kmp_threadprivate_resize_cache(newCapacity);
3784 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3786 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3795int __kmp_register_root(
int initial_thread) {
3796 kmp_info_t *root_thread;
3800 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3801 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3818 capacity = __kmp_threads_capacity;
3819 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3826 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3827 capacity -= __kmp_hidden_helper_threads_num;
3831 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3832 if (__kmp_tp_cached) {
3833 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3834 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3835 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3837 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3847 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3850 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3851 gtid <= __kmp_hidden_helper_threads_num;
3854 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3855 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3856 "hidden helper thread: T#%d\n",
3862 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3865 for (gtid = __kmp_hidden_helper_threads_num + 1;
3866 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3870 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3871 KMP_ASSERT(gtid < __kmp_threads_capacity);
3876 TCW_4(__kmp_nth, __kmp_nth + 1);
3880 if (__kmp_adjust_gtid_mode) {
3881 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3882 if (TCR_4(__kmp_gtid_mode) != 2) {
3883 TCW_4(__kmp_gtid_mode, 2);
3886 if (TCR_4(__kmp_gtid_mode) != 1) {
3887 TCW_4(__kmp_gtid_mode, 1);
3892#ifdef KMP_ADJUST_BLOCKTIME
3895 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3896 if (__kmp_nth > __kmp_avail_proc) {
3897 __kmp_zero_bt = TRUE;
3903 if (!(root = __kmp_root[gtid])) {
3904 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3905 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3908#if KMP_STATS_ENABLED
3910 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3911 __kmp_stats_thread_ptr->startLife();
3912 KMP_SET_THREAD_STATE(SERIAL_REGION);
3915 __kmp_initialize_root(root);
3918 if (root->r.r_uber_thread) {
3919 root_thread = root->r.r_uber_thread;
3921 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3922 if (__kmp_storage_map) {
3923 __kmp_print_thread_storage_map(root_thread, gtid);
3925 root_thread->th.th_info.ds.ds_gtid = gtid;
3927 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3929 root_thread->th.th_root = root;
3930 if (__kmp_env_consistency_check) {
3931 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3934 __kmp_initialize_fast_memory(root_thread);
3938 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3939 __kmp_initialize_bget(root_thread);
3941 __kmp_init_random(root_thread);
3945 if (!root_thread->th.th_serial_team) {
3946 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3947 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3948 root_thread->th.th_serial_team = __kmp_allocate_team(
3953 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3955 KMP_ASSERT(root_thread->th.th_serial_team);
3956 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3957 root_thread->th.th_serial_team));
3960 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3962 root->r.r_root_team->t.t_threads[0] = root_thread;
3963 root->r.r_hot_team->t.t_threads[0] = root_thread;
3964 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3966 root_thread->th.th_serial_team->t.t_serialized = 0;
3967 root->r.r_uber_thread = root_thread;
3970 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3971 TCW_4(__kmp_init_gtid, TRUE);
3974 __kmp_gtid_set_specific(gtid);
3977 __kmp_itt_thread_name(gtid);
3980#ifdef KMP_TDATA_GTID
3983 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3984 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3986 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3988 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3989 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3990 KMP_INIT_BARRIER_STATE));
3993 for (b = 0; b < bs_last_barrier; ++b) {
3994 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3996 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
4000 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
4001 KMP_INIT_BARRIER_STATE);
4003#if KMP_AFFINITY_SUPPORTED
4004 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
4005 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
4006 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
4007 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
4009 root_thread->th.th_def_allocator = __kmp_def_allocator;
4010 root_thread->th.th_prev_level = 0;
4011 root_thread->th.th_prev_num_threads = 1;
4013 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
4014 tmp->cg_root = root_thread;
4015 tmp->cg_thread_limit = __kmp_cg_max_nth;
4016 tmp->cg_nthreads = 1;
4017 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
4018 " cg_nthreads init to 1\n",
4021 root_thread->th.th_cg_roots = tmp;
4023 __kmp_root_counter++;
4026 if (!initial_thread && ompt_enabled.enabled) {
4028 kmp_info_t *root_thread = ompt_get_thread();
4030 ompt_set_thread_state(root_thread, ompt_state_overhead);
4032 if (ompt_enabled.ompt_callback_thread_begin) {
4033 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4034 ompt_thread_initial, __ompt_get_thread_data_internal());
4036 ompt_data_t *task_data;
4037 ompt_data_t *parallel_data;
4038 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4040 if (ompt_enabled.ompt_callback_implicit_task) {
4041 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4042 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4045 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4049 if (ompd_state & OMPD_ENABLE_BP)
4050 ompd_bp_thread_begin();
4054 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4059#if KMP_NESTED_HOT_TEAMS
4060static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4061 const int max_level) {
4063 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4064 if (!hot_teams || !hot_teams[level].hot_team) {
4067 KMP_DEBUG_ASSERT(level < max_level);
4068 kmp_team_t *team = hot_teams[level].hot_team;
4069 nth = hot_teams[level].hot_team_nth;
4071 if (level < max_level - 1) {
4072 for (i = 0; i < nth; ++i) {
4073 kmp_info_t *th = team->t.t_threads[i];
4074 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4075 if (i > 0 && th->th.th_hot_teams) {
4076 __kmp_free(th->th.th_hot_teams);
4077 th->th.th_hot_teams = NULL;
4081 __kmp_free_team(root, team, NULL);
4088static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4089 kmp_team_t *root_team = root->r.r_root_team;
4090 kmp_team_t *hot_team = root->r.r_hot_team;
4091 int n = hot_team->t.t_nproc;
4094 KMP_DEBUG_ASSERT(!root->r.r_active);
4096 root->r.r_root_team = NULL;
4097 root->r.r_hot_team = NULL;
4100 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4101#if KMP_NESTED_HOT_TEAMS
4102 if (__kmp_hot_teams_max_level >
4104 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4105 kmp_info_t *th = hot_team->t.t_threads[i];
4106 if (__kmp_hot_teams_max_level > 1) {
4107 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4109 if (th->th.th_hot_teams) {
4110 __kmp_free(th->th.th_hot_teams);
4111 th->th.th_hot_teams = NULL;
4116 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4121 if (__kmp_tasking_mode != tskm_immediate_exec) {
4122 __kmp_wait_to_unref_task_teams();
4128 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4130 (LPVOID) & (root->r.r_uber_thread->th),
4131 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4132 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4136 if (ompd_state & OMPD_ENABLE_BP)
4137 ompd_bp_thread_end();
4141 ompt_data_t *task_data;
4142 ompt_data_t *parallel_data;
4143 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4145 if (ompt_enabled.ompt_callback_implicit_task) {
4146 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4147 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4149 if (ompt_enabled.ompt_callback_thread_end) {
4150 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4151 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4157 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4158 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4160 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4161 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4164 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4165 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4166 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4167 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4168 root->r.r_uber_thread->th.th_cg_roots = NULL;
4170 __kmp_reap_thread(root->r.r_uber_thread, 1);
4174 root->r.r_uber_thread = NULL;
4176 root->r.r_begin = FALSE;
4181void __kmp_unregister_root_current_thread(
int gtid) {
4182 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4186 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4187 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4188 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4191 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4194 kmp_root_t *root = __kmp_root[gtid];
4196 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4197 KMP_ASSERT(KMP_UBER_GTID(gtid));
4198 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4199 KMP_ASSERT(root->r.r_active == FALSE);
4203 kmp_info_t *thread = __kmp_threads[gtid];
4204 kmp_team_t *team = thread->th.th_team;
4205 kmp_task_team_t *task_team = thread->th.th_task_team;
4208 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4209 task_team->tt.tt_hidden_helper_task_encountered)) {
4212 thread->th.ompt_thread_info.state = ompt_state_undefined;
4214 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4217 __kmp_reset_root(gtid, root);
4221 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4223 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4230static int __kmp_unregister_root_other_thread(
int gtid) {
4231 kmp_root_t *root = __kmp_root[gtid];
4234 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4235 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4236 KMP_ASSERT(KMP_UBER_GTID(gtid));
4237 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4238 KMP_ASSERT(root->r.r_active == FALSE);
4240 r = __kmp_reset_root(gtid, root);
4242 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4248void __kmp_task_info() {
4250 kmp_int32 gtid = __kmp_entry_gtid();
4251 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4252 kmp_info_t *this_thr = __kmp_threads[gtid];
4253 kmp_team_t *steam = this_thr->th.th_serial_team;
4254 kmp_team_t *team = this_thr->th.th_team;
4257 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4259 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4260 team->t.t_implicit_task_taskdata[tid].td_parent);
4267static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4268 int tid,
int gtid) {
4272 KMP_DEBUG_ASSERT(this_thr != NULL);
4273 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4274 KMP_DEBUG_ASSERT(team);
4275 KMP_DEBUG_ASSERT(team->t.t_threads);
4276 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4277 kmp_info_t *master = team->t.t_threads[0];
4278 KMP_DEBUG_ASSERT(master);
4279 KMP_DEBUG_ASSERT(master->th.th_root);
4283 TCW_SYNC_PTR(this_thr->th.th_team, team);
4285 this_thr->th.th_info.ds.ds_tid = tid;
4286 this_thr->th.th_set_nproc = 0;
4287 if (__kmp_tasking_mode != tskm_immediate_exec)
4290 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4292 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4293 this_thr->th.th_set_proc_bind = proc_bind_default;
4294#if KMP_AFFINITY_SUPPORTED
4295 this_thr->th.th_new_place = this_thr->th.th_current_place;
4297 this_thr->th.th_root = master->th.th_root;
4300 this_thr->th.th_team_nproc = team->t.t_nproc;
4301 this_thr->th.th_team_master = master;
4302 this_thr->th.th_team_serialized = team->t.t_serialized;
4304 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4306 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4307 tid, gtid, this_thr, this_thr->th.th_current_task));
4309 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4312 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4313 tid, gtid, this_thr, this_thr->th.th_current_task));
4318 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4320 this_thr->th.th_local.this_construct = 0;
4322 if (!this_thr->th.th_pri_common) {
4323 this_thr->th.th_pri_common =
4324 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4325 if (__kmp_storage_map) {
4326 __kmp_print_storage_map_gtid(
4327 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4328 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4330 this_thr->th.th_pri_head = NULL;
4333 if (this_thr != master &&
4334 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4336 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4337 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4340 int i = tmp->cg_nthreads--;
4341 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4342 " on node %p of thread %p to %d\n",
4343 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4348 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4350 this_thr->th.th_cg_roots->cg_nthreads++;
4351 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4352 " node %p of thread %p to %d\n",
4353 this_thr, this_thr->th.th_cg_roots,
4354 this_thr->th.th_cg_roots->cg_root,
4355 this_thr->th.th_cg_roots->cg_nthreads));
4356 this_thr->th.th_current_task->td_icvs.thread_limit =
4357 this_thr->th.th_cg_roots->cg_thread_limit;
4362 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4365 sizeof(dispatch_private_info_t) *
4366 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4367 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4368 team->t.t_max_nproc));
4369 KMP_ASSERT(dispatch);
4370 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4371 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4373 dispatch->th_disp_index = 0;
4374 dispatch->th_doacross_buf_idx = 0;
4375 if (!dispatch->th_disp_buffer) {
4376 dispatch->th_disp_buffer =
4377 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4379 if (__kmp_storage_map) {
4380 __kmp_print_storage_map_gtid(
4381 gtid, &dispatch->th_disp_buffer[0],
4382 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4384 : __kmp_dispatch_num_buffers],
4386 "th_%d.th_dispatch.th_disp_buffer "
4387 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4388 gtid, team->t.t_id, gtid);
4391 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4394 dispatch->th_dispatch_pr_current = 0;
4395 dispatch->th_dispatch_sh_current = 0;
4397 dispatch->th_deo_fcn = 0;
4398 dispatch->th_dxo_fcn = 0;
4401 this_thr->th.th_next_pool = NULL;
4403 if (!this_thr->th.th_task_state_memo_stack) {
4405 this_thr->th.th_task_state_memo_stack =
4406 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4407 this_thr->th.th_task_state_top = 0;
4408 this_thr->th.th_task_state_stack_sz = 4;
4409 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4411 this_thr->th.th_task_state_memo_stack[i] = 0;
4414 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4415 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4425kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4427 kmp_team_t *serial_team;
4428 kmp_info_t *new_thr;
4431 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4432 KMP_DEBUG_ASSERT(root && team);
4433#if !KMP_NESTED_HOT_TEAMS
4434 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4439 if (__kmp_thread_pool) {
4440 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4441 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4442 if (new_thr == __kmp_thread_pool_insert_pt) {
4443 __kmp_thread_pool_insert_pt = NULL;
4445 TCW_4(new_thr->th.th_in_pool, FALSE);
4446 __kmp_suspend_initialize_thread(new_thr);
4447 __kmp_lock_suspend_mx(new_thr);
4448 if (new_thr->th.th_active_in_pool == TRUE) {
4449 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4450 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4451 new_thr->th.th_active_in_pool = FALSE;
4453 __kmp_unlock_suspend_mx(new_thr);
4455 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4456 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4457 KMP_ASSERT(!new_thr->th.th_team);
4458 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4461 __kmp_initialize_info(new_thr, team, new_tid,
4462 new_thr->th.th_info.ds.ds_gtid);
4463 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4465 TCW_4(__kmp_nth, __kmp_nth + 1);
4467 new_thr->th.th_task_state = 0;
4468 new_thr->th.th_task_state_top = 0;
4469 new_thr->th.th_task_state_stack_sz = 4;
4471 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4473 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4477#ifdef KMP_ADJUST_BLOCKTIME
4480 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4481 if (__kmp_nth > __kmp_avail_proc) {
4482 __kmp_zero_bt = TRUE;
4491 kmp_balign_t *balign = new_thr->th.th_bar;
4492 for (b = 0; b < bs_last_barrier; ++b)
4493 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4496 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4497 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4504 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4505 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4510 if (!TCR_4(__kmp_init_monitor)) {
4511 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4512 if (!TCR_4(__kmp_init_monitor)) {
4513 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4514 TCW_4(__kmp_init_monitor, 1);
4515 __kmp_create_monitor(&__kmp_monitor);
4516 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4527 while (TCR_4(__kmp_init_monitor) < 2) {
4530 KF_TRACE(10, (
"after monitor thread has started\n"));
4533 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4540 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4542 : __kmp_hidden_helper_threads_num + 1;
4544 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4546 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4549 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4550 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4555 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4557 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4559#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4562 __itt_suppress_mark_range(
4563 __itt_suppress_range, __itt_suppress_threading_errors,
4564 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4565 __itt_suppress_mark_range(
4566 __itt_suppress_range, __itt_suppress_threading_errors,
4567 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4569 __itt_suppress_mark_range(
4570 __itt_suppress_range, __itt_suppress_threading_errors,
4571 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4573 __itt_suppress_mark_range(__itt_suppress_range,
4574 __itt_suppress_threading_errors,
4575 &new_thr->th.th_suspend_init_count,
4576 sizeof(new_thr->th.th_suspend_init_count));
4579 __itt_suppress_mark_range(__itt_suppress_range,
4580 __itt_suppress_threading_errors,
4581 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4582 sizeof(new_thr->th.th_bar[0].bb.b_go));
4583 __itt_suppress_mark_range(__itt_suppress_range,
4584 __itt_suppress_threading_errors,
4585 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4586 sizeof(new_thr->th.th_bar[1].bb.b_go));
4587 __itt_suppress_mark_range(__itt_suppress_range,
4588 __itt_suppress_threading_errors,
4589 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4590 sizeof(new_thr->th.th_bar[2].bb.b_go));
4592 if (__kmp_storage_map) {
4593 __kmp_print_thread_storage_map(new_thr, new_gtid);
4598 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4599 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4600 new_thr->th.th_serial_team = serial_team =
4601 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4605 proc_bind_default, &r_icvs,
4606 0 USE_NESTED_HOT_ARG(NULL));
4608 KMP_ASSERT(serial_team);
4609 serial_team->t.t_serialized = 0;
4611 serial_team->t.t_threads[0] = new_thr;
4613 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4617 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4620 __kmp_initialize_fast_memory(new_thr);
4624 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4625 __kmp_initialize_bget(new_thr);
4628 __kmp_init_random(new_thr);
4632 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4633 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4636 kmp_balign_t *balign = new_thr->th.th_bar;
4637 for (b = 0; b < bs_last_barrier; ++b) {
4638 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4639 balign[b].bb.team = NULL;
4640 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4641 balign[b].bb.use_oncore_barrier = 0;
4644 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4645 new_thr->th.th_sleep_loc_type = flag_unset;
4647 new_thr->th.th_spin_here = FALSE;
4648 new_thr->th.th_next_waiting = 0;
4650 new_thr->th.th_blocking =
false;
4653#if KMP_AFFINITY_SUPPORTED
4654 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4655 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4656 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4657 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4659 new_thr->th.th_def_allocator = __kmp_def_allocator;
4660 new_thr->th.th_prev_level = 0;
4661 new_thr->th.th_prev_num_threads = 1;
4663 TCW_4(new_thr->th.th_in_pool, FALSE);
4664 new_thr->th.th_active_in_pool = FALSE;
4665 TCW_4(new_thr->th.th_active, TRUE);
4673 if (__kmp_adjust_gtid_mode) {
4674 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4675 if (TCR_4(__kmp_gtid_mode) != 2) {
4676 TCW_4(__kmp_gtid_mode, 2);
4679 if (TCR_4(__kmp_gtid_mode) != 1) {
4680 TCW_4(__kmp_gtid_mode, 1);
4685#ifdef KMP_ADJUST_BLOCKTIME
4688 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4689 if (__kmp_nth > __kmp_avail_proc) {
4690 __kmp_zero_bt = TRUE;
4695#if KMP_AFFINITY_SUPPORTED
4697 __kmp_affinity_set_init_mask(new_gtid, FALSE);
4702 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4703 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4705 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4707 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4718static void __kmp_reinitialize_team(kmp_team_t *team,
4719 kmp_internal_control_t *new_icvs,
4721 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4722 team->t.t_threads[0], team));
4723 KMP_DEBUG_ASSERT(team && new_icvs);
4724 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4725 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4727 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4729 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4730 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4732 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4733 team->t.t_threads[0], team));
4739static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4740 kmp_internal_control_t *new_icvs,
4742 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4745 KMP_DEBUG_ASSERT(team);
4746 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4747 KMP_DEBUG_ASSERT(team->t.t_threads);
4750 team->t.t_master_tid = 0;
4752 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4753 team->t.t_nproc = new_nproc;
4756 team->t.t_next_pool = NULL;
4760 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4761 team->t.t_invoke = NULL;
4764 team->t.t_sched.sched = new_icvs->sched.sched;
4766#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4767 team->t.t_fp_control_saved = FALSE;
4768 team->t.t_x87_fpu_control_word = 0;
4769 team->t.t_mxcsr = 0;
4772 team->t.t_construct = 0;
4774 team->t.t_ordered.dt.t_value = 0;
4775 team->t.t_master_active = FALSE;
4778 team->t.t_copypriv_data = NULL;
4781 team->t.t_copyin_counter = 0;
4784 team->t.t_control_stack_top = NULL;
4786 __kmp_reinitialize_team(team, new_icvs, loc);
4789 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4792#if KMP_AFFINITY_SUPPORTED
4793static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
4794 int first,
int last,
int newp) {
4795 th->th.th_first_place = first;
4796 th->th.th_last_place = last;
4797 th->th.th_new_place = newp;
4798 if (newp != th->th.th_current_place) {
4799 if (__kmp_display_affinity && team->t.t_display_affinity != 1)
4800 team->t.t_display_affinity = 1;
4802 th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
4803 th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
4811static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4813 if (KMP_HIDDEN_HELPER_TEAM(team))
4816 kmp_info_t *master_th = team->t.t_threads[0];
4817 KMP_DEBUG_ASSERT(master_th != NULL);
4818 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4819 int first_place = master_th->th.th_first_place;
4820 int last_place = master_th->th.th_last_place;
4821 int masters_place = master_th->th.th_current_place;
4822 int num_masks = __kmp_affinity.num_masks;
4823 team->t.t_first_place = first_place;
4824 team->t.t_last_place = last_place;
4826 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4827 "bound to place %d partition = [%d,%d]\n",
4828 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4829 team->t.t_id, masters_place, first_place, last_place));
4831 switch (proc_bind) {
4833 case proc_bind_default:
4836 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4839 case proc_bind_primary: {
4841 int n_th = team->t.t_nproc;
4842 for (f = 1; f < n_th; f++) {
4843 kmp_info_t *th = team->t.t_threads[f];
4844 KMP_DEBUG_ASSERT(th != NULL);
4845 __kmp_set_thread_place(team, th, first_place, last_place, masters_place);
4847 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4848 "partition = [%d,%d]\n",
4849 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4850 f, masters_place, first_place, last_place));
4854 case proc_bind_close: {
4856 int n_th = team->t.t_nproc;
4858 if (first_place <= last_place) {
4859 n_places = last_place - first_place + 1;
4861 n_places = num_masks - first_place + last_place + 1;
4863 if (n_th <= n_places) {
4864 int place = masters_place;
4865 for (f = 1; f < n_th; f++) {
4866 kmp_info_t *th = team->t.t_threads[f];
4867 KMP_DEBUG_ASSERT(th != NULL);
4869 if (place == last_place) {
4870 place = first_place;
4871 }
else if (place == (num_masks - 1)) {
4876 __kmp_set_thread_place(team, th, first_place, last_place, place);
4878 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4879 "partition = [%d,%d]\n",
4880 __kmp_gtid_from_thread(team->t.t_threads[f]),
4881 team->t.t_id, f, place, first_place, last_place));
4884 int S, rem, gap, s_count;
4885 S = n_th / n_places;
4887 rem = n_th - (S * n_places);
4888 gap = rem > 0 ? n_places / rem : n_places;
4889 int place = masters_place;
4891 for (f = 0; f < n_th; f++) {
4892 kmp_info_t *th = team->t.t_threads[f];
4893 KMP_DEBUG_ASSERT(th != NULL);
4895 __kmp_set_thread_place(team, th, first_place, last_place, place);
4898 if ((s_count == S) && rem && (gap_ct == gap)) {
4900 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4902 if (place == last_place) {
4903 place = first_place;
4904 }
else if (place == (num_masks - 1)) {
4912 }
else if (s_count == S) {
4913 if (place == last_place) {
4914 place = first_place;
4915 }
else if (place == (num_masks - 1)) {
4925 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4926 "partition = [%d,%d]\n",
4927 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4928 th->th.th_new_place, first_place, last_place));
4930 KMP_DEBUG_ASSERT(place == masters_place);
4934 case proc_bind_spread: {
4936 int n_th = team->t.t_nproc;
4939 if (first_place <= last_place) {
4940 n_places = last_place - first_place + 1;
4942 n_places = num_masks - first_place + last_place + 1;
4944 if (n_th <= n_places) {
4947 if (n_places != num_masks) {
4948 int S = n_places / n_th;
4949 int s_count, rem, gap, gap_ct;
4951 place = masters_place;
4952 rem = n_places - n_th * S;
4953 gap = rem ? n_th / rem : 1;
4956 if (update_master_only == 1)
4958 for (f = 0; f < thidx; f++) {
4959 kmp_info_t *th = team->t.t_threads[f];
4960 KMP_DEBUG_ASSERT(th != NULL);
4962 int fplace = place, nplace = place;
4964 while (s_count < S) {
4965 if (place == last_place) {
4966 place = first_place;
4967 }
else if (place == (num_masks - 1)) {
4974 if (rem && (gap_ct == gap)) {
4975 if (place == last_place) {
4976 place = first_place;
4977 }
else if (place == (num_masks - 1)) {
4985 __kmp_set_thread_place(team, th, fplace, place, nplace);
4988 if (place == last_place) {
4989 place = first_place;
4990 }
else if (place == (num_masks - 1)) {
4997 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4998 "partition = [%d,%d], num_masks: %u\n",
4999 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5000 f, th->th.th_new_place, th->th.th_first_place,
5001 th->th.th_last_place, num_masks));
5007 double current =
static_cast<double>(masters_place);
5009 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5014 if (update_master_only == 1)
5016 for (f = 0; f < thidx; f++) {
5017 first =
static_cast<int>(current);
5018 last =
static_cast<int>(current + spacing) - 1;
5019 KMP_DEBUG_ASSERT(last >= first);
5020 if (first >= n_places) {
5021 if (masters_place) {
5024 if (first == (masters_place + 1)) {
5025 KMP_DEBUG_ASSERT(f == n_th);
5028 if (last == masters_place) {
5029 KMP_DEBUG_ASSERT(f == (n_th - 1));
5033 KMP_DEBUG_ASSERT(f == n_th);
5038 if (last >= n_places) {
5039 last = (n_places - 1);
5044 KMP_DEBUG_ASSERT(0 <= first);
5045 KMP_DEBUG_ASSERT(n_places > first);
5046 KMP_DEBUG_ASSERT(0 <= last);
5047 KMP_DEBUG_ASSERT(n_places > last);
5048 KMP_DEBUG_ASSERT(last_place >= first_place);
5049 th = team->t.t_threads[f];
5050 KMP_DEBUG_ASSERT(th);
5051 __kmp_set_thread_place(team, th, first, last, place);
5053 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5054 "partition = [%d,%d], spacing = %.4f\n",
5055 __kmp_gtid_from_thread(team->t.t_threads[f]),
5056 team->t.t_id, f, th->th.th_new_place,
5057 th->th.th_first_place, th->th.th_last_place, spacing));
5061 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5063 int S, rem, gap, s_count;
5064 S = n_th / n_places;
5066 rem = n_th - (S * n_places);
5067 gap = rem > 0 ? n_places / rem : n_places;
5068 int place = masters_place;
5071 if (update_master_only == 1)
5073 for (f = 0; f < thidx; f++) {
5074 kmp_info_t *th = team->t.t_threads[f];
5075 KMP_DEBUG_ASSERT(th != NULL);
5077 __kmp_set_thread_place(team, th, place, place, place);
5080 if ((s_count == S) && rem && (gap_ct == gap)) {
5082 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5084 if (place == last_place) {
5085 place = first_place;
5086 }
else if (place == (num_masks - 1)) {
5094 }
else if (s_count == S) {
5095 if (place == last_place) {
5096 place = first_place;
5097 }
else if (place == (num_masks - 1)) {
5106 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5107 "partition = [%d,%d]\n",
5108 __kmp_gtid_from_thread(team->t.t_threads[f]),
5109 team->t.t_id, f, th->th.th_new_place,
5110 th->th.th_first_place, th->th.th_last_place));
5112 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5120 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5128__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5130 ompt_data_t ompt_parallel_data,
5132 kmp_proc_bind_t new_proc_bind,
5133 kmp_internal_control_t *new_icvs,
5134 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5135 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5138 int use_hot_team = !root->r.r_active;
5140 int do_place_partition = 1;
5142 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5143 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5144 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5147#if KMP_NESTED_HOT_TEAMS
5148 kmp_hot_team_ptr_t *hot_teams;
5150 team = master->th.th_team;
5151 level = team->t.t_active_level;
5152 if (master->th.th_teams_microtask) {
5153 if (master->th.th_teams_size.nteams > 1 &&
5156 (microtask_t)__kmp_teams_master ||
5157 master->th.th_teams_level <
5164 if ((master->th.th_teams_size.nteams == 1 &&
5165 master->th.th_teams_level >= team->t.t_level) ||
5166 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5167 do_place_partition = 0;
5169 hot_teams = master->th.th_hot_teams;
5170 if (level < __kmp_hot_teams_max_level && hot_teams &&
5171 hot_teams[level].hot_team) {
5179 KMP_DEBUG_ASSERT(new_nproc == 1);
5183 if (use_hot_team && new_nproc > 1) {
5184 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5185#if KMP_NESTED_HOT_TEAMS
5186 team = hot_teams[level].hot_team;
5188 team = root->r.r_hot_team;
5191 if (__kmp_tasking_mode != tskm_immediate_exec) {
5192 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5193 "task_team[1] = %p before reinit\n",
5194 team->t.t_task_team[0], team->t.t_task_team[1]));
5198 if (team->t.t_nproc != new_nproc &&
5199 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5201 int old_nthr = team->t.t_nproc;
5202 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5207 if (do_place_partition == 0)
5208 team->t.t_proc_bind = proc_bind_default;
5212 if (team->t.t_nproc == new_nproc) {
5213 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5216 if (team->t.t_size_changed == -1) {
5217 team->t.t_size_changed = 1;
5219 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5223 kmp_r_sched_t new_sched = new_icvs->sched;
5225 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5227 __kmp_reinitialize_team(team, new_icvs,
5228 root->r.r_uber_thread->th.th_ident);
5230 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5231 team->t.t_threads[0], team));
5232 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5234#if KMP_AFFINITY_SUPPORTED
5235 if ((team->t.t_size_changed == 0) &&
5236 (team->t.t_proc_bind == new_proc_bind)) {
5237 if (new_proc_bind == proc_bind_spread) {
5238 if (do_place_partition) {
5240 __kmp_partition_places(team, 1);
5243 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5244 "proc_bind = %d, partition = [%d,%d]\n",
5245 team->t.t_id, new_proc_bind, team->t.t_first_place,
5246 team->t.t_last_place));
5248 if (do_place_partition) {
5249 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5250 __kmp_partition_places(team);
5254 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5256 }
else if (team->t.t_nproc > new_nproc) {
5258 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5261 team->t.t_size_changed = 1;
5262 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5265 __kmp_add_threads_to_team(team, new_nproc);
5267#if KMP_NESTED_HOT_TEAMS
5268 if (__kmp_hot_teams_mode == 0) {
5271 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5272 hot_teams[level].hot_team_nth = new_nproc;
5275 for (f = new_nproc; f < team->t.t_nproc; f++) {
5276 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5277 if (__kmp_tasking_mode != tskm_immediate_exec) {
5280 team->t.t_threads[f]->th.th_task_team = NULL;
5282 __kmp_free_thread(team->t.t_threads[f]);
5283 team->t.t_threads[f] = NULL;
5285#if KMP_NESTED_HOT_TEAMS
5290 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5291 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5292 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5293 for (
int b = 0; b < bs_last_barrier; ++b) {
5294 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5295 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5297 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5302 team->t.t_nproc = new_nproc;
5304 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5305 __kmp_reinitialize_team(team, new_icvs,
5306 root->r.r_uber_thread->th.th_ident);
5309 for (f = 0; f < new_nproc; ++f) {
5310 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5315 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5316 team->t.t_threads[0], team));
5318 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5321 for (f = 0; f < team->t.t_nproc; f++) {
5322 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5323 team->t.t_threads[f]->th.th_team_nproc ==
5328 if (do_place_partition) {
5329 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5330#if KMP_AFFINITY_SUPPORTED
5331 __kmp_partition_places(team);
5337 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5339 int old_nproc = team->t.t_nproc;
5340 team->t.t_size_changed = 1;
5342#if KMP_NESTED_HOT_TEAMS
5343 int avail_threads = hot_teams[level].hot_team_nth;
5344 if (new_nproc < avail_threads)
5345 avail_threads = new_nproc;
5346 kmp_info_t **other_threads = team->t.t_threads;
5347 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5351 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5352 for (b = 0; b < bs_last_barrier; ++b) {
5353 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5354 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5356 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5360 if (hot_teams[level].hot_team_nth >= new_nproc) {
5363 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5364 team->t.t_nproc = new_nproc;
5368 team->t.t_nproc = hot_teams[level].hot_team_nth;
5369 hot_teams[level].hot_team_nth = new_nproc;
5371 if (team->t.t_max_nproc < new_nproc) {
5373 __kmp_reallocate_team_arrays(team, new_nproc);
5374 __kmp_reinitialize_team(team, new_icvs, NULL);
5377#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5383 kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};
5387 for (f = team->t.t_nproc; f < new_nproc; f++) {
5388 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5389 KMP_DEBUG_ASSERT(new_worker);
5390 team->t.t_threads[f] = new_worker;
5393 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5394 "join=%llu, plain=%llu\n",
5395 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5396 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5397 team->t.t_bar[bs_plain_barrier].b_arrived));
5401 kmp_balign_t *balign = new_worker->th.th_bar;
5402 for (b = 0; b < bs_last_barrier; ++b) {
5403 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5404 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5405 KMP_BARRIER_PARENT_FLAG);
5407 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5413#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5415 new_temp_affinity.restore();
5417#if KMP_NESTED_HOT_TEAMS
5420 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5423 __kmp_add_threads_to_team(team, new_nproc);
5427 __kmp_initialize_team(team, new_nproc, new_icvs,
5428 root->r.r_uber_thread->th.th_ident);
5431 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5432 for (f = 0; f < team->t.t_nproc; ++f)
5433 __kmp_initialize_info(team->t.t_threads[f], team, f,
5434 __kmp_gtid_from_tid(f, team));
5437 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5438 for (f = old_nproc; f < team->t.t_nproc; ++f)
5439 team->t.t_threads[f]->th.th_task_state = old_state;
5442 for (f = 0; f < team->t.t_nproc; ++f) {
5443 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5444 team->t.t_threads[f]->th.th_team_nproc ==
5449 if (do_place_partition) {
5450 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5451#if KMP_AFFINITY_SUPPORTED
5452 __kmp_partition_places(team);
5457 kmp_info_t *master = team->t.t_threads[0];
5458 if (master->th.th_teams_microtask) {
5459 for (f = 1; f < new_nproc; ++f) {
5461 kmp_info_t *thr = team->t.t_threads[f];
5462 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5463 thr->th.th_teams_level = master->th.th_teams_level;
5464 thr->th.th_teams_size = master->th.th_teams_size;
5467#if KMP_NESTED_HOT_TEAMS
5471 for (f = 1; f < new_nproc; ++f) {
5472 kmp_info_t *thr = team->t.t_threads[f];
5474 kmp_balign_t *balign = thr->th.th_bar;
5475 for (b = 0; b < bs_last_barrier; ++b) {
5476 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5477 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5479 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5487 __kmp_alloc_argv_entries(argc, team, TRUE);
5488 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5492 KF_TRACE(10, (
" hot_team = %p\n", team));
5495 if (__kmp_tasking_mode != tskm_immediate_exec) {
5496 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5497 "task_team[1] = %p after reinit\n",
5498 team->t.t_task_team[0], team->t.t_task_team[1]));
5503 __ompt_team_assign_id(team, ompt_parallel_data);
5513 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5516 if (team->t.t_max_nproc >= max_nproc) {
5518 __kmp_team_pool = team->t.t_next_pool;
5520 if (max_nproc > 1 &&
5521 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5523 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5528 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5530 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5531 "task_team[1] %p to NULL\n",
5532 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5533 team->t.t_task_team[0] = NULL;
5534 team->t.t_task_team[1] = NULL;
5537 __kmp_alloc_argv_entries(argc, team, TRUE);
5538 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5541 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5542 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5545 for (b = 0; b < bs_last_barrier; ++b) {
5546 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5548 team->t.t_bar[b].b_master_arrived = 0;
5549 team->t.t_bar[b].b_team_arrived = 0;
5554 team->t.t_proc_bind = new_proc_bind;
5556 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5560 __ompt_team_assign_id(team, ompt_parallel_data);
5572 team = __kmp_reap_team(team);
5573 __kmp_team_pool = team;
5578 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5581 team->t.t_max_nproc = max_nproc;
5582 if (max_nproc > 1 &&
5583 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5585 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5590 __kmp_allocate_team_arrays(team, max_nproc);
5592 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5593 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5595 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5597 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5598 team->t.t_task_team[0] = NULL;
5600 team->t.t_task_team[1] = NULL;
5603 if (__kmp_storage_map) {
5604 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5608 __kmp_alloc_argv_entries(argc, team, FALSE);
5609 team->t.t_argc = argc;
5612 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5613 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5616 for (b = 0; b < bs_last_barrier; ++b) {
5617 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5619 team->t.t_bar[b].b_master_arrived = 0;
5620 team->t.t_bar[b].b_team_arrived = 0;
5625 team->t.t_proc_bind = new_proc_bind;
5628 __ompt_team_assign_id(team, ompt_parallel_data);
5629 team->t.ompt_serialized_team_info = NULL;
5634 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5645void __kmp_free_team(kmp_root_t *root,
5646 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5648 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5652 KMP_DEBUG_ASSERT(root);
5653 KMP_DEBUG_ASSERT(team);
5654 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5655 KMP_DEBUG_ASSERT(team->t.t_threads);
5657 int use_hot_team = team == root->r.r_hot_team;
5658#if KMP_NESTED_HOT_TEAMS
5661 level = team->t.t_active_level - 1;
5662 if (master->th.th_teams_microtask) {
5663 if (master->th.th_teams_size.nteams > 1) {
5667 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5668 master->th.th_teams_level == team->t.t_level) {
5674 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5676 if (level < __kmp_hot_teams_max_level) {
5677 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5684 TCW_SYNC_PTR(team->t.t_pkfn,
5687 team->t.t_copyin_counter = 0;
5692 if (!use_hot_team) {
5693 if (__kmp_tasking_mode != tskm_immediate_exec) {
5695 for (f = 1; f < team->t.t_nproc; ++f) {
5696 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5697 kmp_info_t *th = team->t.t_threads[f];
5698 volatile kmp_uint32 *state = &th->th.th_reap_state;
5699 while (*state != KMP_SAFE_TO_REAP) {
5703 if (!__kmp_is_thread_alive(th, &ecode)) {
5704 *state = KMP_SAFE_TO_REAP;
5709 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5710 if (fl.is_sleeping())
5711 fl.resume(__kmp_gtid_from_thread(th));
5718 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5719 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5720 if (task_team != NULL) {
5721 for (f = 0; f < team->t.t_nproc; ++f) {
5722 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5723 team->t.t_threads[f]->th.th_task_team = NULL;
5727 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5728 __kmp_get_gtid(), task_team, team->t.t_id));
5729#if KMP_NESTED_HOT_TEAMS
5730 __kmp_free_task_team(master, task_team);
5732 team->t.t_task_team[tt_idx] = NULL;
5738 team->t.t_parent = NULL;
5739 team->t.t_level = 0;
5740 team->t.t_active_level = 0;
5743 for (f = 1; f < team->t.t_nproc; ++f) {
5744 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5745 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5746 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5749 __kmp_free_thread(team->t.t_threads[f]);
5752 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5755 team->t.b->go_release();
5756 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5757 for (f = 1; f < team->t.t_nproc; ++f) {
5758 if (team->t.b->sleep[f].sleep) {
5759 __kmp_atomic_resume_64(
5760 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5761 (kmp_atomic_flag_64<> *)NULL);
5766 for (
int f = 1; f < team->t.t_nproc; ++f) {
5767 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5773 for (f = 1; f < team->t.t_nproc; ++f) {
5774 team->t.t_threads[f] = NULL;
5777 if (team->t.t_max_nproc > 1 &&
5778 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5779 distributedBarrier::deallocate(team->t.b);
5784 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5785 __kmp_team_pool = (
volatile kmp_team_t *)team;
5788 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5789 team->t.t_threads[1]->th.th_cg_roots);
5790 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5792 for (f = 1; f < team->t.t_nproc; ++f) {
5793 kmp_info_t *thr = team->t.t_threads[f];
5794 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5795 thr->th.th_cg_roots->cg_root == thr);
5797 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5798 thr->th.th_cg_roots = tmp->up;
5799 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5800 " up to node %p. cg_nthreads was %d\n",
5801 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5802 int i = tmp->cg_nthreads--;
5807 if (thr->th.th_cg_roots)
5808 thr->th.th_current_task->td_icvs.thread_limit =
5809 thr->th.th_cg_roots->cg_thread_limit;
5818kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5819 kmp_team_t *next_pool = team->t.t_next_pool;
5821 KMP_DEBUG_ASSERT(team);
5822 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5823 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5824 KMP_DEBUG_ASSERT(team->t.t_threads);
5825 KMP_DEBUG_ASSERT(team->t.t_argv);
5830 __kmp_free_team_arrays(team);
5831 if (team->t.t_argv != &team->t.t_inline_argv[0])
5832 __kmp_free((
void *)team->t.t_argv);
5864void __kmp_free_thread(kmp_info_t *this_th) {
5868 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5869 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5871 KMP_DEBUG_ASSERT(this_th);
5876 kmp_balign_t *balign = this_th->th.th_bar;
5877 for (b = 0; b < bs_last_barrier; ++b) {
5878 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5879 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5880 balign[b].bb.team = NULL;
5881 balign[b].bb.leaf_kids = 0;
5883 this_th->th.th_task_state = 0;
5884 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5887 TCW_PTR(this_th->th.th_team, NULL);
5888 TCW_PTR(this_th->th.th_root, NULL);
5889 TCW_PTR(this_th->th.th_dispatch, NULL);
5891 while (this_th->th.th_cg_roots) {
5892 this_th->th.th_cg_roots->cg_nthreads--;
5893 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5894 " %p of thread %p to %d\n",
5895 this_th, this_th->th.th_cg_roots,
5896 this_th->th.th_cg_roots->cg_root,
5897 this_th->th.th_cg_roots->cg_nthreads));
5898 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5899 if (tmp->cg_root == this_th) {
5900 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5902 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5903 this_th->th.th_cg_roots = tmp->up;
5906 if (tmp->cg_nthreads == 0) {
5909 this_th->th.th_cg_roots = NULL;
5919 __kmp_free_implicit_task(this_th);
5920 this_th->th.th_current_task = NULL;
5924 gtid = this_th->th.th_info.ds.ds_gtid;
5925 if (__kmp_thread_pool_insert_pt != NULL) {
5926 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5927 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5928 __kmp_thread_pool_insert_pt = NULL;
5937 if (__kmp_thread_pool_insert_pt != NULL) {
5938 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5940 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5942 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5943 scan = &((*scan)->th.th_next_pool))
5948 TCW_PTR(this_th->th.th_next_pool, *scan);
5949 __kmp_thread_pool_insert_pt = *scan = this_th;
5950 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5951 (this_th->th.th_info.ds.ds_gtid <
5952 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5953 TCW_4(this_th->th.th_in_pool, TRUE);
5954 __kmp_suspend_initialize_thread(this_th);
5955 __kmp_lock_suspend_mx(this_th);
5956 if (this_th->th.th_active == TRUE) {
5957 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5958 this_th->th.th_active_in_pool = TRUE;
5962 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5965 __kmp_unlock_suspend_mx(this_th);
5967 TCW_4(__kmp_nth, __kmp_nth - 1);
5969#ifdef KMP_ADJUST_BLOCKTIME
5972 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5973 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5974 if (__kmp_nth <= __kmp_avail_proc) {
5975 __kmp_zero_bt = FALSE;
5985void *__kmp_launch_thread(kmp_info_t *this_thr) {
5986#if OMP_PROFILING_SUPPORT
5987 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5989 if (ProfileTraceFile)
5990 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5993 int gtid = this_thr->th.th_info.ds.ds_gtid;
5995 kmp_team_t **
volatile pteam;
5998 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6000 if (__kmp_env_consistency_check) {
6001 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6005 if (ompd_state & OMPD_ENABLE_BP)
6006 ompd_bp_thread_begin();
6010 ompt_data_t *thread_data =
nullptr;
6011 if (ompt_enabled.enabled) {
6012 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6013 *thread_data = ompt_data_none;
6015 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6016 this_thr->th.ompt_thread_info.wait_id = 0;
6017 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6018 this_thr->th.ompt_thread_info.parallel_flags = 0;
6019 if (ompt_enabled.ompt_callback_thread_begin) {
6020 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6021 ompt_thread_worker, thread_data);
6023 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6028 while (!TCR_4(__kmp_global.g.g_done)) {
6029 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6033 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6036 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6039 if (ompt_enabled.enabled) {
6040 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6044 pteam = &this_thr->th.th_team;
6047 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6049 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6052 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6053 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6054 (*pteam)->t.t_pkfn));
6056 updateHWFPControl(*pteam);
6059 if (ompt_enabled.enabled) {
6060 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6064 rc = (*pteam)->t.t_invoke(gtid);
6068 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6069 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6070 (*pteam)->t.t_pkfn));
6073 if (ompt_enabled.enabled) {
6075 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6077 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6081 __kmp_join_barrier(gtid);
6086 if (ompd_state & OMPD_ENABLE_BP)
6087 ompd_bp_thread_end();
6091 if (ompt_enabled.ompt_callback_thread_end) {
6092 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6096 this_thr->th.th_task_team = NULL;
6098 __kmp_common_destroy_gtid(gtid);
6100 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6103#if OMP_PROFILING_SUPPORT
6104 llvm::timeTraceProfilerFinishThread();
6111void __kmp_internal_end_dest(
void *specific_gtid) {
6114 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6116 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6120 __kmp_internal_end_thread(gtid);
6123#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6125__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6126 __kmp_internal_end_atexit();
6133void __kmp_internal_end_atexit(
void) {
6134 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6158 __kmp_internal_end_library(-1);
6160 __kmp_close_console();
6164static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6169 KMP_DEBUG_ASSERT(thread != NULL);
6171 gtid = thread->th.th_info.ds.ds_gtid;
6174 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6177 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6179 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6181 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6183 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6187 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6189 __kmp_release_64(&flag);
6194 __kmp_reap_worker(thread);
6206 if (thread->th.th_active_in_pool) {
6207 thread->th.th_active_in_pool = FALSE;
6208 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6209 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6213 __kmp_free_implicit_task(thread);
6217 __kmp_free_fast_memory(thread);
6220 __kmp_suspend_uninitialize_thread(thread);
6222 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6223 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6228#ifdef KMP_ADJUST_BLOCKTIME
6231 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6232 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6233 if (__kmp_nth <= __kmp_avail_proc) {
6234 __kmp_zero_bt = FALSE;
6240 if (__kmp_env_consistency_check) {
6241 if (thread->th.th_cons) {
6242 __kmp_free_cons_stack(thread->th.th_cons);
6243 thread->th.th_cons = NULL;
6247 if (thread->th.th_pri_common != NULL) {
6248 __kmp_free(thread->th.th_pri_common);
6249 thread->th.th_pri_common = NULL;
6252 if (thread->th.th_task_state_memo_stack != NULL) {
6253 __kmp_free(thread->th.th_task_state_memo_stack);
6254 thread->th.th_task_state_memo_stack = NULL;
6258 if (thread->th.th_local.bget_data != NULL) {
6259 __kmp_finalize_bget(thread);
6263#if KMP_AFFINITY_SUPPORTED
6264 if (thread->th.th_affin_mask != NULL) {
6265 KMP_CPU_FREE(thread->th.th_affin_mask);
6266 thread->th.th_affin_mask = NULL;
6270#if KMP_USE_HIER_SCHED
6271 if (thread->th.th_hier_bar_data != NULL) {
6272 __kmp_free(thread->th.th_hier_bar_data);
6273 thread->th.th_hier_bar_data = NULL;
6277 __kmp_reap_team(thread->th.th_serial_team);
6278 thread->th.th_serial_team = NULL;
6285static void __kmp_itthash_clean(kmp_info_t *th) {
6287 if (__kmp_itt_region_domains.count > 0) {
6288 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6289 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6291 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6292 __kmp_thread_free(th, bucket);
6297 if (__kmp_itt_barrier_domains.count > 0) {
6298 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6299 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6301 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6302 __kmp_thread_free(th, bucket);
6310static void __kmp_internal_end(
void) {
6314 __kmp_unregister_library();
6321 __kmp_reclaim_dead_roots();
6325 for (i = 0; i < __kmp_threads_capacity; i++)
6327 if (__kmp_root[i]->r.r_active)
6330 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6332 if (i < __kmp_threads_capacity) {
6344 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6345 if (TCR_4(__kmp_init_monitor)) {
6346 __kmp_reap_monitor(&__kmp_monitor);
6347 TCW_4(__kmp_init_monitor, 0);
6349 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6350 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6356 for (i = 0; i < __kmp_threads_capacity; i++) {
6357 if (__kmp_root[i]) {
6360 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6369 while (__kmp_thread_pool != NULL) {
6371 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6372 __kmp_thread_pool = thread->th.th_next_pool;
6374 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6375 thread->th.th_next_pool = NULL;
6376 thread->th.th_in_pool = FALSE;
6377 __kmp_reap_thread(thread, 0);
6379 __kmp_thread_pool_insert_pt = NULL;
6382 while (__kmp_team_pool != NULL) {
6384 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6385 __kmp_team_pool = team->t.t_next_pool;
6387 team->t.t_next_pool = NULL;
6388 __kmp_reap_team(team);
6391 __kmp_reap_task_teams();
6398 for (i = 0; i < __kmp_threads_capacity; i++) {
6399 kmp_info_t *thr = __kmp_threads[i];
6400 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6405 for (i = 0; i < __kmp_threads_capacity; ++i) {
6412 TCW_SYNC_4(__kmp_init_common, FALSE);
6414 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6422 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6423 if (TCR_4(__kmp_init_monitor)) {
6424 __kmp_reap_monitor(&__kmp_monitor);
6425 TCW_4(__kmp_init_monitor, 0);
6427 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6428 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6431 TCW_4(__kmp_init_gtid, FALSE);
6440void __kmp_internal_end_library(
int gtid_req) {
6447 if (__kmp_global.g.g_abort) {
6448 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6452 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6453 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6458 if (TCR_4(__kmp_init_hidden_helper) &&
6459 !TCR_4(__kmp_hidden_helper_team_done)) {
6460 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6462 __kmp_hidden_helper_main_thread_release();
6464 __kmp_hidden_helper_threads_deinitz_wait();
6470 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6472 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6473 if (gtid == KMP_GTID_SHUTDOWN) {
6474 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6475 "already shutdown\n"));
6477 }
else if (gtid == KMP_GTID_MONITOR) {
6478 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6479 "registered, or system shutdown\n"));
6481 }
else if (gtid == KMP_GTID_DNE) {
6482 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6485 }
else if (KMP_UBER_GTID(gtid)) {
6487 if (__kmp_root[gtid]->r.r_active) {
6488 __kmp_global.g.g_abort = -1;
6489 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6490 __kmp_unregister_library();
6492 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6496 __kmp_itthash_clean(__kmp_threads[gtid]);
6499 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6500 __kmp_unregister_root_current_thread(gtid);
6507#ifdef DUMP_DEBUG_ON_EXIT
6508 if (__kmp_debug_buf)
6509 __kmp_dump_debug_buffer();
6514 __kmp_unregister_library();
6519 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6522 if (__kmp_global.g.g_abort) {
6523 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6525 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6528 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6529 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6538 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6541 __kmp_internal_end();
6543 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6544 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6546 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6548#ifdef DUMP_DEBUG_ON_EXIT
6549 if (__kmp_debug_buf)
6550 __kmp_dump_debug_buffer();
6554 __kmp_close_console();
6557 __kmp_fini_allocator();
6561void __kmp_internal_end_thread(
int gtid_req) {
6570 if (__kmp_global.g.g_abort) {
6571 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6575 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6576 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6581 if (TCR_4(__kmp_init_hidden_helper) &&
6582 !TCR_4(__kmp_hidden_helper_team_done)) {
6583 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6585 __kmp_hidden_helper_main_thread_release();
6587 __kmp_hidden_helper_threads_deinitz_wait();
6594 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6596 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6597 if (gtid == KMP_GTID_SHUTDOWN) {
6598 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6599 "already shutdown\n"));
6601 }
else if (gtid == KMP_GTID_MONITOR) {
6602 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6603 "registered, or system shutdown\n"));
6605 }
else if (gtid == KMP_GTID_DNE) {
6606 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6610 }
else if (KMP_UBER_GTID(gtid)) {
6612 if (__kmp_root[gtid]->r.r_active) {
6613 __kmp_global.g.g_abort = -1;
6614 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6616 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6620 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6622 __kmp_unregister_root_current_thread(gtid);
6626 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6629 __kmp_threads[gtid]->th.th_task_team = NULL;
6633 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6639 if (__kmp_pause_status != kmp_hard_paused)
6643 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6648 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6651 if (__kmp_global.g.g_abort) {
6652 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6654 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6657 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6658 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6669 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6671 for (i = 0; i < __kmp_threads_capacity; ++i) {
6672 if (KMP_UBER_GTID(i)) {
6675 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6676 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6677 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6684 __kmp_internal_end();
6686 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6687 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6689 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6691#ifdef DUMP_DEBUG_ON_EXIT
6692 if (__kmp_debug_buf)
6693 __kmp_dump_debug_buffer();
6700static long __kmp_registration_flag = 0;
6702static char *__kmp_registration_str = NULL;
6705static inline char *__kmp_reg_status_name() {
6711#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6712 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6715 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6719#if defined(KMP_USE_SHM)
6720bool __kmp_shm_available =
false;
6721bool __kmp_tmp_available =
false;
6723char *temp_reg_status_file_name =
nullptr;
6726void __kmp_register_library_startup(
void) {
6728 char *name = __kmp_reg_status_name();
6734#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6735 __kmp_initialize_system_tick();
6737 __kmp_read_system_time(&time.dtime);
6738 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6739 __kmp_registration_str =
6740 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6741 __kmp_registration_flag, KMP_LIBRARY_FILE);
6743 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6744 __kmp_registration_str));
6750#if defined(KMP_USE_SHM)
6751 char *shm_name =
nullptr;
6752 char *data1 =
nullptr;
6753 __kmp_shm_available = __kmp_detect_shm();
6754 if (__kmp_shm_available) {
6756 shm_name = __kmp_str_format(
"/%s", name);
6757 int shm_preexist = 0;
6758 fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6759 if ((fd1 == -1) && (errno == EEXIST)) {
6762 fd1 = shm_open(shm_name, O_RDWR, 0666);
6764 KMP_WARNING(FunctionError,
"Can't open SHM");
6765 __kmp_shm_available =
false;
6770 if (__kmp_shm_available && shm_preexist == 0) {
6771 if (ftruncate(fd1, SHM_SIZE) == -1) {
6772 KMP_WARNING(FunctionError,
"Can't set size of SHM");
6773 __kmp_shm_available =
false;
6776 if (__kmp_shm_available) {
6777 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6779 if (data1 == MAP_FAILED) {
6780 KMP_WARNING(FunctionError,
"Can't map SHM");
6781 __kmp_shm_available =
false;
6784 if (__kmp_shm_available) {
6785 if (shm_preexist == 0) {
6786 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6789 value = __kmp_str_format(
"%s", data1);
6790 munmap(data1, SHM_SIZE);
6795 if (!__kmp_shm_available)
6796 __kmp_tmp_available = __kmp_detect_tmp();
6797 if (!__kmp_shm_available && __kmp_tmp_available) {
6804 temp_reg_status_file_name = __kmp_str_format(
"/tmp/%s", name);
6805 int tmp_preexist = 0;
6806 fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6807 if ((fd1 == -1) && (errno == EEXIST)) {
6810 fd1 = open(temp_reg_status_file_name, O_RDWR, 0666);
6812 KMP_WARNING(FunctionError,
"Can't open TEMP");
6813 __kmp_tmp_available =
false;
6818 if (__kmp_tmp_available && tmp_preexist == 0) {
6820 if (ftruncate(fd1, SHM_SIZE) == -1) {
6821 KMP_WARNING(FunctionError,
"Can't set size of /tmp file");
6822 __kmp_tmp_available =
false;
6825 if (__kmp_tmp_available) {
6826 data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
6828 if (data1 == MAP_FAILED) {
6829 KMP_WARNING(FunctionError,
"Can't map /tmp");
6830 __kmp_tmp_available =
false;
6833 if (__kmp_tmp_available) {
6834 if (tmp_preexist == 0) {
6835 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6838 value = __kmp_str_format(
"%s", data1);
6839 munmap(data1, SHM_SIZE);
6844 if (!__kmp_shm_available && !__kmp_tmp_available) {
6847 __kmp_env_set(name, __kmp_registration_str, 0);
6849 value = __kmp_env_get(name);
6853 __kmp_env_set(name, __kmp_registration_str, 0);
6855 value = __kmp_env_get(name);
6858 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6865 char *flag_addr_str = NULL;
6866 char *flag_val_str = NULL;
6867 char const *file_name = NULL;
6868 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6869 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6872 unsigned long *flag_addr = 0;
6873 unsigned long flag_val = 0;
6874 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6875 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6876 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6880 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6894 file_name =
"unknown library";
6899 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6900 if (!__kmp_str_match_true(duplicate_ok)) {
6902 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6903 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6905 KMP_INTERNAL_FREE(duplicate_ok);
6906 __kmp_duplicate_library_ok = 1;
6911#if defined(KMP_USE_SHM)
6912 if (__kmp_shm_available) {
6913 shm_unlink(shm_name);
6914 }
else if (__kmp_tmp_available) {
6915 unlink(temp_reg_status_file_name);
6918 __kmp_env_unset(name);
6922 __kmp_env_unset(name);
6926 KMP_DEBUG_ASSERT(0);
6930 KMP_INTERNAL_FREE((
void *)value);
6931#if defined(KMP_USE_SHM)
6933 KMP_INTERNAL_FREE((
void *)shm_name);
6936 KMP_INTERNAL_FREE((
void *)name);
6940void __kmp_unregister_library(
void) {
6942 char *name = __kmp_reg_status_name();
6945#if defined(KMP_USE_SHM)
6946 char *shm_name =
nullptr;
6948 if (__kmp_shm_available) {
6949 shm_name = __kmp_str_format(
"/%s", name);
6950 fd1 = shm_open(shm_name, O_RDONLY, 0666);
6952 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6953 if (data1 != MAP_FAILED) {
6954 value = __kmp_str_format(
"%s", data1);
6955 munmap(data1, SHM_SIZE);
6959 }
else if (__kmp_tmp_available) {
6960 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6962 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6963 if (data1 != MAP_FAILED) {
6964 value = __kmp_str_format(
"%s", data1);
6965 munmap(data1, SHM_SIZE);
6970 value = __kmp_env_get(name);
6973 value = __kmp_env_get(name);
6976 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6977 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6978 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6980#if defined(KMP_USE_SHM)
6981 if (__kmp_shm_available) {
6982 shm_unlink(shm_name);
6983 }
else if (__kmp_tmp_available) {
6984 unlink(temp_reg_status_file_name);
6986 __kmp_env_unset(name);
6989 __kmp_env_unset(name);
6993#if defined(KMP_USE_SHM)
6995 KMP_INTERNAL_FREE(shm_name);
6996 if (temp_reg_status_file_name)
6997 KMP_INTERNAL_FREE(temp_reg_status_file_name);
7000 KMP_INTERNAL_FREE(__kmp_registration_str);
7001 KMP_INTERNAL_FREE(value);
7002 KMP_INTERNAL_FREE(name);
7004 __kmp_registration_flag = 0;
7005 __kmp_registration_str = NULL;
7012#if KMP_MIC_SUPPORTED
7014static void __kmp_check_mic_type() {
7015 kmp_cpuid_t cpuid_state = {0};
7016 kmp_cpuid_t *cs_p = &cpuid_state;
7017 __kmp_x86_cpuid(1, 0, cs_p);
7019 if ((cs_p->eax & 0xff0) == 0xB10) {
7020 __kmp_mic_type = mic2;
7021 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
7022 __kmp_mic_type = mic3;
7024 __kmp_mic_type = non_mic;
7031static void __kmp_user_level_mwait_init() {
7032 struct kmp_cpuid buf;
7033 __kmp_x86_cpuid(7, 0, &buf);
7034 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
7035 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
7036 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
7037 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
7038 __kmp_umwait_enabled));
7041#ifndef AT_INTELPHIUSERMWAIT
7044#define AT_INTELPHIUSERMWAIT 10000
7049unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7050unsigned long getauxval(
unsigned long) {
return 0; }
7052static void __kmp_user_level_mwait_init() {
7057 if (__kmp_mic_type == mic3) {
7058 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7059 if ((res & 0x1) || __kmp_user_level_mwait) {
7060 __kmp_mwait_enabled = TRUE;
7061 if (__kmp_user_level_mwait) {
7062 KMP_INFORM(EnvMwaitWarn);
7065 __kmp_mwait_enabled = FALSE;
7068 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7069 "__kmp_mwait_enabled = %d\n",
7070 __kmp_mic_type, __kmp_mwait_enabled));
7074static void __kmp_do_serial_initialize(
void) {
7078 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7080 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7081 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7082 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7083 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7084 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7094 __kmp_validate_locks();
7096#if ENABLE_LIBOMPTARGET
7098 __kmp_init_omptarget();
7102 __kmp_init_allocator();
7108 if (__kmp_need_register_serial)
7109 __kmp_register_library_startup();
7112 if (TCR_4(__kmp_global.g.g_done)) {
7113 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7116 __kmp_global.g.g_abort = 0;
7117 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7120#if KMP_USE_ADAPTIVE_LOCKS
7121#if KMP_DEBUG_ADAPTIVE_LOCKS
7122 __kmp_init_speculative_stats();
7125#if KMP_STATS_ENABLED
7128 __kmp_init_lock(&__kmp_global_lock);
7129 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7130 __kmp_init_lock(&__kmp_debug_lock);
7131 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7132 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7133 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7134 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7135 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7136 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7137 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7138 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7139 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7140 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7141 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7142 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7143 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7144 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7145 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7147 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7149 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7153 __kmp_runtime_initialize();
7155#if KMP_MIC_SUPPORTED
7156 __kmp_check_mic_type();
7163 __kmp_abort_delay = 0;
7167 __kmp_dflt_team_nth_ub = __kmp_xproc;
7168 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7169 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7171 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7172 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7174 __kmp_max_nth = __kmp_sys_max_nth;
7175 __kmp_cg_max_nth = __kmp_sys_max_nth;
7176 __kmp_teams_max_nth = __kmp_xproc;
7177 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7178 __kmp_teams_max_nth = __kmp_sys_max_nth;
7183 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7185 __kmp_monitor_wakeups =
7186 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7187 __kmp_bt_intervals =
7188 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7191 __kmp_library = library_throughput;
7193 __kmp_static = kmp_sch_static_balanced;
7200#if KMP_FAST_REDUCTION_BARRIER
7201#define kmp_reduction_barrier_gather_bb ((int)1)
7202#define kmp_reduction_barrier_release_bb ((int)1)
7203#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7204#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7206 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7207 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7208 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7209 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7210 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7211#if KMP_FAST_REDUCTION_BARRIER
7212 if (i == bs_reduction_barrier) {
7214 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7215 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7216 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7217 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7221#if KMP_FAST_REDUCTION_BARRIER
7222#undef kmp_reduction_barrier_release_pat
7223#undef kmp_reduction_barrier_gather_pat
7224#undef kmp_reduction_barrier_release_bb
7225#undef kmp_reduction_barrier_gather_bb
7227#if KMP_MIC_SUPPORTED
7228 if (__kmp_mic_type == mic2) {
7230 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7231 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7233 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7234 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7236#if KMP_FAST_REDUCTION_BARRIER
7237 if (__kmp_mic_type == mic2) {
7238 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7239 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7246 __kmp_env_checks = TRUE;
7248 __kmp_env_checks = FALSE;
7252 __kmp_foreign_tp = TRUE;
7254 __kmp_global.g.g_dynamic = FALSE;
7255 __kmp_global.g.g_dynamic_mode = dynamic_default;
7257 __kmp_init_nesting_mode();
7259 __kmp_env_initialize(NULL);
7261#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7262 __kmp_user_level_mwait_init();
7266 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7267 if (__kmp_str_match_true(val)) {
7268 kmp_str_buf_t buffer;
7269 __kmp_str_buf_init(&buffer);
7270 __kmp_i18n_dump_catalog(&buffer);
7271 __kmp_printf(
"%s", buffer.str);
7272 __kmp_str_buf_free(&buffer);
7274 __kmp_env_free(&val);
7277 __kmp_threads_capacity =
7278 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7280 __kmp_tp_capacity = __kmp_default_tp_capacity(
7281 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7286 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7287 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7288 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7289 __kmp_thread_pool = NULL;
7290 __kmp_thread_pool_insert_pt = NULL;
7291 __kmp_team_pool = NULL;
7298 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7300 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7301 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7302 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7305 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7307 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7312 gtid = __kmp_register_root(TRUE);
7313 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7314 KMP_ASSERT(KMP_UBER_GTID(gtid));
7315 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7319 __kmp_common_initialize();
7323 __kmp_register_atfork();
7326#if !KMP_DYNAMIC_LIB || \
7327 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7332 int rc = atexit(__kmp_internal_end_atexit);
7334 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7340#if KMP_HANDLE_SIGNALS
7346 __kmp_install_signals(FALSE);
7349 __kmp_install_signals(TRUE);
7354 __kmp_init_counter++;
7356 __kmp_init_serial = TRUE;
7358 if (__kmp_version) {
7359 __kmp_print_version_1();
7362 if (__kmp_settings) {
7366 if (__kmp_display_env || __kmp_display_env_verbose) {
7367 __kmp_env_print_2();
7376 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7379void __kmp_serial_initialize(
void) {
7380 if (__kmp_init_serial) {
7383 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7384 if (__kmp_init_serial) {
7385 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7388 __kmp_do_serial_initialize();
7389 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7392static void __kmp_do_middle_initialize(
void) {
7394 int prev_dflt_team_nth;
7396 if (!__kmp_init_serial) {
7397 __kmp_do_serial_initialize();
7400 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7402 if (UNLIKELY(!__kmp_need_register_serial)) {
7405 __kmp_register_library_startup();
7410 prev_dflt_team_nth = __kmp_dflt_team_nth;
7412#if KMP_AFFINITY_SUPPORTED
7415 __kmp_affinity_initialize(__kmp_affinity);
7419 KMP_ASSERT(__kmp_xproc > 0);
7420 if (__kmp_avail_proc == 0) {
7421 __kmp_avail_proc = __kmp_xproc;
7427 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7428 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7433 if (__kmp_dflt_team_nth == 0) {
7434#ifdef KMP_DFLT_NTH_CORES
7436 __kmp_dflt_team_nth = __kmp_ncores;
7437 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7438 "__kmp_ncores (%d)\n",
7439 __kmp_dflt_team_nth));
7442 __kmp_dflt_team_nth = __kmp_avail_proc;
7443 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7444 "__kmp_avail_proc(%d)\n",
7445 __kmp_dflt_team_nth));
7449 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7450 __kmp_dflt_team_nth = KMP_MIN_NTH;
7452 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7453 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7456 if (__kmp_nesting_mode > 0)
7457 __kmp_set_nesting_mode_threads();
7461 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7463 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7468 for (i = 0; i < __kmp_threads_capacity; i++) {
7469 kmp_info_t *thread = __kmp_threads[i];
7472 if (thread->th.th_current_task->td_icvs.nproc != 0)
7475 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7480 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7481 __kmp_dflt_team_nth));
7483#ifdef KMP_ADJUST_BLOCKTIME
7485 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7486 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7487 if (__kmp_nth > __kmp_avail_proc) {
7488 __kmp_zero_bt = TRUE;
7494 TCW_SYNC_4(__kmp_init_middle, TRUE);
7496 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7499void __kmp_middle_initialize(
void) {
7500 if (__kmp_init_middle) {
7503 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7504 if (__kmp_init_middle) {
7505 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7508 __kmp_do_middle_initialize();
7509 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7512void __kmp_parallel_initialize(
void) {
7513 int gtid = __kmp_entry_gtid();
7516 if (TCR_4(__kmp_init_parallel))
7518 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7519 if (TCR_4(__kmp_init_parallel)) {
7520 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7525 if (TCR_4(__kmp_global.g.g_done)) {
7528 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7529 __kmp_infinite_loop();
7535 if (!__kmp_init_middle) {
7536 __kmp_do_middle_initialize();
7538 __kmp_assign_root_init_mask();
7539 __kmp_resume_if_hard_paused();
7542 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7543 KMP_ASSERT(KMP_UBER_GTID(gtid));
7545#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7548 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7549 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7550 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7554#if KMP_HANDLE_SIGNALS
7556 __kmp_install_signals(TRUE);
7560 __kmp_suspend_initialize();
7562#if defined(USE_LOAD_BALANCE)
7563 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7564 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7567 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7568 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7572 if (__kmp_version) {
7573 __kmp_print_version_2();
7577 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7580 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7582 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7585void __kmp_hidden_helper_initialize() {
7586 if (TCR_4(__kmp_init_hidden_helper))
7590 if (!TCR_4(__kmp_init_parallel))
7591 __kmp_parallel_initialize();
7595 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7596 if (TCR_4(__kmp_init_hidden_helper)) {
7597 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7601#if KMP_AFFINITY_SUPPORTED
7605 if (!__kmp_hh_affinity.flags.initialized)
7606 __kmp_affinity_initialize(__kmp_hh_affinity);
7610 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7614 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7617 __kmp_do_initialize_hidden_helper_threads();
7620 __kmp_hidden_helper_threads_initz_wait();
7623 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7625 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7630void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7632 kmp_disp_t *dispatch;
7637 this_thr->th.th_local.this_construct = 0;
7639 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7641 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7642 KMP_DEBUG_ASSERT(dispatch);
7643 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7647 dispatch->th_disp_index = 0;
7648 dispatch->th_doacross_buf_idx = 0;
7649 if (__kmp_env_consistency_check)
7650 __kmp_push_parallel(gtid, team->t.t_ident);
7655void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7657 if (__kmp_env_consistency_check)
7658 __kmp_pop_parallel(gtid, team->t.t_ident);
7660 __kmp_finish_implicit_task(this_thr);
7663int __kmp_invoke_task_func(
int gtid) {
7665 int tid = __kmp_tid_from_gtid(gtid);
7666 kmp_info_t *this_thr = __kmp_threads[gtid];
7667 kmp_team_t *team = this_thr->th.th_team;
7669 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7671 if (__itt_stack_caller_create_ptr) {
7673 if (team->t.t_stack_id != NULL) {
7674 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7676 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7677 __kmp_itt_stack_callee_enter(
7678 (__itt_caller)team->t.t_parent->t.t_stack_id);
7682#if INCLUDE_SSC_MARKS
7683 SSC_MARK_INVOKING();
7688 void **exit_frame_p;
7689 ompt_data_t *my_task_data;
7690 ompt_data_t *my_parallel_data;
7693 if (ompt_enabled.enabled) {
7694 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7695 .ompt_task_info.frame.exit_frame.ptr);
7697 exit_frame_p = &dummy;
7701 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7702 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7703 if (ompt_enabled.ompt_callback_implicit_task) {
7704 ompt_team_size = team->t.t_nproc;
7705 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7706 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7707 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7708 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7712#if KMP_STATS_ENABLED
7714 if (previous_state == stats_state_e::TEAMS_REGION) {
7715 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7717 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7719 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7722 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7723 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7730 *exit_frame_p = NULL;
7731 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7734#if KMP_STATS_ENABLED
7735 if (previous_state == stats_state_e::TEAMS_REGION) {
7736 KMP_SET_THREAD_STATE(previous_state);
7738 KMP_POP_PARTITIONED_TIMER();
7742 if (__itt_stack_caller_create_ptr) {
7744 if (team->t.t_stack_id != NULL) {
7745 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7747 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7748 __kmp_itt_stack_callee_leave(
7749 (__itt_caller)team->t.t_parent->t.t_stack_id);
7753 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7758void __kmp_teams_master(
int gtid) {
7760 kmp_info_t *thr = __kmp_threads[gtid];
7761 kmp_team_t *team = thr->th.th_team;
7762 ident_t *loc = team->t.t_ident;
7763 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7764 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7765 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7766 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7767 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7770 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7773 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7774 tmp->cg_nthreads = 1;
7775 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7776 " cg_nthreads to 1\n",
7778 tmp->up = thr->th.th_cg_roots;
7779 thr->th.th_cg_roots = tmp;
7783#if INCLUDE_SSC_MARKS
7786 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7787 (microtask_t)thr->th.th_teams_microtask,
7788 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7789#if INCLUDE_SSC_MARKS
7793 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7794 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7797 __kmp_join_call(loc, gtid
7806int __kmp_invoke_teams_master(
int gtid) {
7807 kmp_info_t *this_thr = __kmp_threads[gtid];
7808 kmp_team_t *team = this_thr->th.th_team;
7810 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7811 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7812 (
void *)__kmp_teams_master);
7814 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7816 int tid = __kmp_tid_from_gtid(gtid);
7817 ompt_data_t *task_data =
7818 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7819 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7820 if (ompt_enabled.ompt_callback_implicit_task) {
7821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7822 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7824 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7827 __kmp_teams_master(gtid);
7829 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7831 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7840void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7841 kmp_info_t *thr = __kmp_threads[gtid];
7843 if (num_threads > 0)
7844 thr->th.th_set_nproc = num_threads;
7847static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7849 KMP_DEBUG_ASSERT(thr);
7851 if (!TCR_4(__kmp_init_middle))
7852 __kmp_middle_initialize();
7853 __kmp_assign_root_init_mask();
7854 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7855 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7857 if (num_threads == 0) {
7858 if (__kmp_teams_thread_limit > 0) {
7859 num_threads = __kmp_teams_thread_limit;
7861 num_threads = __kmp_avail_proc / num_teams;
7866 if (num_threads > __kmp_dflt_team_nth) {
7867 num_threads = __kmp_dflt_team_nth;
7869 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7870 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7872 if (num_teams * num_threads > __kmp_teams_max_nth) {
7873 num_threads = __kmp_teams_max_nth / num_teams;
7875 if (num_threads == 0) {
7879 if (num_threads < 0) {
7880 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7886 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7888 if (num_threads > __kmp_dflt_team_nth) {
7889 num_threads = __kmp_dflt_team_nth;
7891 if (num_teams * num_threads > __kmp_teams_max_nth) {
7892 int new_threads = __kmp_teams_max_nth / num_teams;
7893 if (new_threads == 0) {
7896 if (new_threads != num_threads) {
7897 if (!__kmp_reserve_warn) {
7898 __kmp_reserve_warn = 1;
7899 __kmp_msg(kmp_ms_warning,
7900 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7901 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7904 num_threads = new_threads;
7907 thr->th.th_teams_size.nth = num_threads;
7912void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7914 kmp_info_t *thr = __kmp_threads[gtid];
7915 if (num_teams < 0) {
7918 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7922 if (num_teams == 0) {
7923 if (__kmp_nteams > 0) {
7924 num_teams = __kmp_nteams;
7929 if (num_teams > __kmp_teams_max_nth) {
7930 if (!__kmp_reserve_warn) {
7931 __kmp_reserve_warn = 1;
7932 __kmp_msg(kmp_ms_warning,
7933 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7934 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7936 num_teams = __kmp_teams_max_nth;
7940 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7942 __kmp_push_thread_limit(thr, num_teams, num_threads);
7947void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7948 int num_teams_ub,
int num_threads) {
7949 kmp_info_t *thr = __kmp_threads[gtid];
7950 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7951 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7952 KMP_DEBUG_ASSERT(num_threads >= 0);
7954 if (num_teams_lb > num_teams_ub) {
7955 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7956 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7961 if (num_teams_lb == 0 && num_teams_ub > 0)
7962 num_teams_lb = num_teams_ub;
7964 if (num_teams_lb == 0 && num_teams_ub == 0) {
7965 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7966 if (num_teams > __kmp_teams_max_nth) {
7967 if (!__kmp_reserve_warn) {
7968 __kmp_reserve_warn = 1;
7969 __kmp_msg(kmp_ms_warning,
7970 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7971 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7973 num_teams = __kmp_teams_max_nth;
7975 }
else if (num_teams_lb == num_teams_ub) {
7976 num_teams = num_teams_ub;
7978 if (num_threads <= 0) {
7979 if (num_teams_ub > __kmp_teams_max_nth) {
7980 num_teams = num_teams_lb;
7982 num_teams = num_teams_ub;
7985 num_teams = (num_threads > __kmp_teams_max_nth)
7987 : __kmp_teams_max_nth / num_threads;
7988 if (num_teams < num_teams_lb) {
7989 num_teams = num_teams_lb;
7990 }
else if (num_teams > num_teams_ub) {
7991 num_teams = num_teams_ub;
7997 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7999 __kmp_push_thread_limit(thr, num_teams, num_threads);
8003void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
8004 kmp_info_t *thr = __kmp_threads[gtid];
8005 thr->th.th_set_proc_bind = proc_bind;
8010void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
8011 kmp_info_t *this_thr = __kmp_threads[gtid];
8017 KMP_DEBUG_ASSERT(team);
8018 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8019 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8022 team->t.t_construct = 0;
8023 team->t.t_ordered.dt.t_value =
8027 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
8028 if (team->t.t_max_nproc > 1) {
8030 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
8031 team->t.t_disp_buffer[i].buffer_index = i;
8032 team->t.t_disp_buffer[i].doacross_buf_idx = i;
8035 team->t.t_disp_buffer[0].buffer_index = 0;
8036 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
8040 KMP_ASSERT(this_thr->th.th_team == team);
8043 for (f = 0; f < team->t.t_nproc; f++) {
8044 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8045 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8050 __kmp_fork_barrier(gtid, 0);
8053void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8054 kmp_info_t *this_thr = __kmp_threads[gtid];
8056 KMP_DEBUG_ASSERT(team);
8057 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8058 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8064 if (__kmp_threads[gtid] &&
8065 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8066 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8067 __kmp_threads[gtid]);
8068 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8069 "team->t.t_nproc=%d\n",
8070 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8072 __kmp_print_structure();
8074 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8075 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8078 __kmp_join_barrier(gtid);
8080 if (ompt_enabled.enabled &&
8081 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8082 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8083 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8084 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8086 void *codeptr = NULL;
8087 if (KMP_MASTER_TID(ds_tid) &&
8088 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8089 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8090 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8092 if (ompt_enabled.ompt_callback_sync_region_wait) {
8093 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8094 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8097 if (ompt_enabled.ompt_callback_sync_region) {
8098 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8099 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8103 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8104 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8105 ompt_scope_end, NULL, task_data, 0, ds_tid,
8106 ompt_task_implicit);
8112 KMP_ASSERT(this_thr->th.th_team == team);
8117#ifdef USE_LOAD_BALANCE
8121static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8124 kmp_team_t *hot_team;
8126 if (root->r.r_active) {
8129 hot_team = root->r.r_hot_team;
8130 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8131 return hot_team->t.t_nproc - 1;
8136 for (i = 1; i < hot_team->t.t_nproc; i++) {
8137 if (hot_team->t.t_threads[i]->th.th_active) {
8146static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8149 int hot_team_active;
8150 int team_curr_active;
8153 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8155 KMP_DEBUG_ASSERT(root);
8156 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8157 ->th.th_current_task->td_icvs.dynamic == TRUE);
8158 KMP_DEBUG_ASSERT(set_nproc > 1);
8160 if (set_nproc == 1) {
8161 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8170 pool_active = __kmp_thread_pool_active_nth;
8171 hot_team_active = __kmp_active_hot_team_nproc(root);
8172 team_curr_active = pool_active + hot_team_active + 1;
8175 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8176 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8177 "hot team active = %d\n",
8178 system_active, pool_active, hot_team_active));
8180 if (system_active < 0) {
8184 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8185 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8188 retval = __kmp_avail_proc - __kmp_nth +
8189 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8190 if (retval > set_nproc) {
8193 if (retval < KMP_MIN_NTH) {
8194 retval = KMP_MIN_NTH;
8197 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8205 if (system_active < team_curr_active) {
8206 system_active = team_curr_active;
8208 retval = __kmp_avail_proc - system_active + team_curr_active;
8209 if (retval > set_nproc) {
8212 if (retval < KMP_MIN_NTH) {
8213 retval = KMP_MIN_NTH;
8216 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8225void __kmp_cleanup(
void) {
8228 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8230 if (TCR_4(__kmp_init_parallel)) {
8231#if KMP_HANDLE_SIGNALS
8232 __kmp_remove_signals();
8234 TCW_4(__kmp_init_parallel, FALSE);
8237 if (TCR_4(__kmp_init_middle)) {
8238#if KMP_AFFINITY_SUPPORTED
8239 __kmp_affinity_uninitialize();
8241 __kmp_cleanup_hierarchy();
8242 TCW_4(__kmp_init_middle, FALSE);
8245 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8247 if (__kmp_init_serial) {
8248 __kmp_runtime_destroy();
8249 __kmp_init_serial = FALSE;
8252 __kmp_cleanup_threadprivate_caches();
8254 for (f = 0; f < __kmp_threads_capacity; f++) {
8255 if (__kmp_root[f] != NULL) {
8256 __kmp_free(__kmp_root[f]);
8257 __kmp_root[f] = NULL;
8260 __kmp_free(__kmp_threads);
8263 __kmp_threads = NULL;
8265 __kmp_threads_capacity = 0;
8268 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8270 kmp_old_threads_list_t *next = ptr->next;
8271 __kmp_free(ptr->threads);
8276#if KMP_USE_DYNAMIC_LOCK
8277 __kmp_cleanup_indirect_user_locks();
8279 __kmp_cleanup_user_locks();
8283 __kmp_free(ompd_env_block);
8284 ompd_env_block = NULL;
8285 ompd_env_block_size = 0;
8289#if KMP_AFFINITY_SUPPORTED
8290 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8291 __kmp_cpuinfo_file = NULL;
8294#if KMP_USE_ADAPTIVE_LOCKS
8295#if KMP_DEBUG_ADAPTIVE_LOCKS
8296 __kmp_print_speculative_stats();
8299 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8300 __kmp_nested_nth.nth = NULL;
8301 __kmp_nested_nth.size = 0;
8302 __kmp_nested_nth.used = 0;
8303 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8304 __kmp_nested_proc_bind.bind_types = NULL;
8305 __kmp_nested_proc_bind.size = 0;
8306 __kmp_nested_proc_bind.used = 0;
8307 if (__kmp_affinity_format) {
8308 KMP_INTERNAL_FREE(__kmp_affinity_format);
8309 __kmp_affinity_format = NULL;
8312 __kmp_i18n_catclose();
8314#if KMP_USE_HIER_SCHED
8315 __kmp_hier_scheds.deallocate();
8318#if KMP_STATS_ENABLED
8322 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8327int __kmp_ignore_mppbeg(
void) {
8330 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8331 if (__kmp_str_match_false(env))
8338int __kmp_ignore_mppend(
void) {
8341 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8342 if (__kmp_str_match_false(env))
8349void __kmp_internal_begin(
void) {
8355 gtid = __kmp_entry_gtid();
8356 root = __kmp_threads[gtid]->th.th_root;
8357 KMP_ASSERT(KMP_UBER_GTID(gtid));
8359 if (root->r.r_begin)
8361 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8362 if (root->r.r_begin) {
8363 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8367 root->r.r_begin = TRUE;
8369 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8374void __kmp_user_set_library(
enum library_type arg) {
8381 gtid = __kmp_entry_gtid();
8382 thread = __kmp_threads[gtid];
8384 root = thread->th.th_root;
8386 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8388 if (root->r.r_in_parallel) {
8390 KMP_WARNING(SetLibraryIncorrectCall);
8395 case library_serial:
8396 thread->th.th_set_nproc = 0;
8397 set__nproc(thread, 1);
8399 case library_turnaround:
8400 thread->th.th_set_nproc = 0;
8401 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8402 : __kmp_dflt_team_nth_ub);
8404 case library_throughput:
8405 thread->th.th_set_nproc = 0;
8406 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8407 : __kmp_dflt_team_nth_ub);
8410 KMP_FATAL(UnknownLibraryType, arg);
8413 __kmp_aux_set_library(arg);
8416void __kmp_aux_set_stacksize(
size_t arg) {
8417 if (!__kmp_init_serial)
8418 __kmp_serial_initialize();
8421 if (arg & (0x1000 - 1)) {
8422 arg &= ~(0x1000 - 1);
8427 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8430 if (!TCR_4(__kmp_init_parallel)) {
8433 if (value < __kmp_sys_min_stksize)
8434 value = __kmp_sys_min_stksize;
8435 else if (value > KMP_MAX_STKSIZE)
8436 value = KMP_MAX_STKSIZE;
8438 __kmp_stksize = value;
8440 __kmp_env_stksize = TRUE;
8443 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8448void __kmp_aux_set_library(
enum library_type arg) {
8449 __kmp_library = arg;
8451 switch (__kmp_library) {
8452 case library_serial: {
8453 KMP_INFORM(LibraryIsSerial);
8455 case library_turnaround:
8456 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8457 __kmp_use_yield = 2;
8459 case library_throughput:
8460 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8461 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8464 KMP_FATAL(UnknownLibraryType, arg);
8470static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8471 kmp_info_t *thr = __kmp_entry_thread();
8472 teams_serialized = 0;
8473 if (thr->th.th_teams_microtask) {
8474 kmp_team_t *team = thr->th.th_team;
8475 int tlevel = thr->th.th_teams_level;
8476 int ii = team->t.t_level;
8477 teams_serialized = team->t.t_serialized;
8478 int level = tlevel + 1;
8479 KMP_DEBUG_ASSERT(ii >= tlevel);
8480 while (ii > level) {
8481 for (teams_serialized = team->t.t_serialized;
8482 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8484 if (team->t.t_serialized && (!teams_serialized)) {
8485 team = team->t.t_parent;
8489 team = team->t.t_parent;
8498int __kmp_aux_get_team_num() {
8500 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8502 if (serialized > 1) {
8505 return team->t.t_master_tid;
8511int __kmp_aux_get_num_teams() {
8513 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8515 if (serialized > 1) {
8518 return team->t.t_parent->t.t_nproc;
8557typedef struct kmp_affinity_format_field_t {
8559 const char *long_name;
8562} kmp_affinity_format_field_t;
8564static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8565#if KMP_AFFINITY_SUPPORTED
8566 {
'A',
"thread_affinity",
's'},
8568 {
't',
"team_num",
'd'},
8569 {
'T',
"num_teams",
'd'},
8570 {
'L',
"nesting_level",
'd'},
8571 {
'n',
"thread_num",
'd'},
8572 {
'N',
"num_threads",
'd'},
8573 {
'a',
"ancestor_tnum",
'd'},
8575 {
'P',
"process_id",
'd'},
8576 {
'i',
"native_thread_id",
'd'}};
8579static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8581 kmp_str_buf_t *field_buffer) {
8582 int rc, format_index, field_value;
8583 const char *width_left, *width_right;
8584 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8585 static const int FORMAT_SIZE = 20;
8586 char format[FORMAT_SIZE] = {0};
8587 char absolute_short_name = 0;
8589 KMP_DEBUG_ASSERT(gtid >= 0);
8590 KMP_DEBUG_ASSERT(th);
8591 KMP_DEBUG_ASSERT(**ptr ==
'%');
8592 KMP_DEBUG_ASSERT(field_buffer);
8594 __kmp_str_buf_clear(field_buffer);
8601 __kmp_str_buf_cat(field_buffer,
"%", 1);
8612 right_justify =
false;
8614 right_justify =
true;
8618 width_left = width_right = NULL;
8619 if (**ptr >=
'0' && **ptr <=
'9') {
8627 format[format_index++] =
'%';
8629 format[format_index++] =
'-';
8631 format[format_index++] =
'0';
8632 if (width_left && width_right) {
8636 while (i < 8 && width_left < width_right) {
8637 format[format_index++] = *width_left;
8645 found_valid_name =
false;
8646 parse_long_name = (**ptr ==
'{');
8647 if (parse_long_name)
8649 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8650 sizeof(__kmp_affinity_format_table[0]);
8652 char short_name = __kmp_affinity_format_table[i].short_name;
8653 const char *long_name = __kmp_affinity_format_table[i].long_name;
8654 char field_format = __kmp_affinity_format_table[i].field_format;
8655 if (parse_long_name) {
8656 size_t length = KMP_STRLEN(long_name);
8657 if (strncmp(*ptr, long_name, length) == 0) {
8658 found_valid_name =
true;
8661 }
else if (**ptr == short_name) {
8662 found_valid_name =
true;
8665 if (found_valid_name) {
8666 format[format_index++] = field_format;
8667 format[format_index++] =
'\0';
8668 absolute_short_name = short_name;
8672 if (parse_long_name) {
8674 absolute_short_name = 0;
8682 switch (absolute_short_name) {
8684 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8687 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8690 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8693 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8696 static const int BUFFER_SIZE = 256;
8697 char buf[BUFFER_SIZE];
8698 __kmp_expand_host_name(buf, BUFFER_SIZE);
8699 rc = __kmp_str_buf_print(field_buffer, format, buf);
8702 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8705 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8708 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8712 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8713 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8715#if KMP_AFFINITY_SUPPORTED
8718 __kmp_str_buf_init(&buf);
8719 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8720 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8721 __kmp_str_buf_free(&buf);
8727 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8729 if (parse_long_name) {
8738 KMP_ASSERT(format_index <= FORMAT_SIZE);
8748size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8749 kmp_str_buf_t *buffer) {
8750 const char *parse_ptr;
8752 const kmp_info_t *th;
8753 kmp_str_buf_t field;
8755 KMP_DEBUG_ASSERT(buffer);
8756 KMP_DEBUG_ASSERT(gtid >= 0);
8758 __kmp_str_buf_init(&field);
8759 __kmp_str_buf_clear(buffer);
8761 th = __kmp_threads[gtid];
8767 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8768 parse_ptr = __kmp_affinity_format;
8770 KMP_DEBUG_ASSERT(parse_ptr);
8772 while (*parse_ptr !=
'\0') {
8774 if (*parse_ptr ==
'%') {
8776 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8777 __kmp_str_buf_catbuf(buffer, &field);
8781 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8786 __kmp_str_buf_free(&field);
8791void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8793 __kmp_str_buf_init(&buf);
8794 __kmp_aux_capture_affinity(gtid, format, &buf);
8795 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8796 __kmp_str_buf_free(&buf);
8800void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8801 int blocktime = arg;
8807 __kmp_save_internal_controls(thread);
8810 if (blocktime < KMP_MIN_BLOCKTIME)
8811 blocktime = KMP_MIN_BLOCKTIME;
8812 else if (blocktime > KMP_MAX_BLOCKTIME)
8813 blocktime = KMP_MAX_BLOCKTIME;
8815 set__blocktime_team(thread->th.th_team, tid, blocktime);
8816 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8820 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8822 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8823 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8829 set__bt_set_team(thread->th.th_team, tid, bt_set);
8830 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8832 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8833 "bt_intervals=%d, monitor_updates=%d\n",
8834 __kmp_gtid_from_tid(tid, thread->th.th_team),
8835 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8836 __kmp_monitor_wakeups));
8838 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8839 __kmp_gtid_from_tid(tid, thread->th.th_team),
8840 thread->th.th_team->t.t_id, tid, blocktime));
8844void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8845 if (!__kmp_init_serial) {
8846 __kmp_serial_initialize();
8848 __kmp_env_initialize(str);
8850 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8858PACKED_REDUCTION_METHOD_T
8859__kmp_determine_reduction_method(
8860 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8861 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8862 kmp_critical_name *lck) {
8873 PACKED_REDUCTION_METHOD_T retval;
8877 KMP_DEBUG_ASSERT(lck);
8879#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8881 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8882#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8884 retval = critical_reduce_block;
8887 team_size = __kmp_get_team_num_threads(global_tid);
8888 if (team_size == 1) {
8890 retval = empty_reduce_block;
8894 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8896#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8897 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \
8898 KMP_ARCH_VE || KMP_ARCH_S390X
8900#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8901 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \
8904 int teamsize_cutoff = 4;
8906#if KMP_MIC_SUPPORTED
8907 if (__kmp_mic_type != non_mic) {
8908 teamsize_cutoff = 8;
8911 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8912 if (tree_available) {
8913 if (team_size <= teamsize_cutoff) {
8914 if (atomic_available) {
8915 retval = atomic_reduce_block;
8918 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8920 }
else if (atomic_available) {
8921 retval = atomic_reduce_block;
8924#error "Unknown or unsupported OS"
8929#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8931#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8932 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS
8936 if (atomic_available) {
8937 if (num_vars <= 2) {
8938 retval = atomic_reduce_block;
8944 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8945 if (atomic_available && (num_vars <= 3)) {
8946 retval = atomic_reduce_block;
8947 }
else if (tree_available) {
8948 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8949 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8950 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8955#error "Unknown or unsupported OS"
8959#error "Unknown or unsupported architecture"
8967 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8970 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8972 int atomic_available, tree_available;
8974 switch ((forced_retval = __kmp_force_reduction_method)) {
8975 case critical_reduce_block:
8979 case atomic_reduce_block:
8980 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8981 if (!atomic_available) {
8982 KMP_WARNING(RedMethodNotSupported,
"atomic");
8983 forced_retval = critical_reduce_block;
8987 case tree_reduce_block:
8988 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8989 if (!tree_available) {
8990 KMP_WARNING(RedMethodNotSupported,
"tree");
8991 forced_retval = critical_reduce_block;
8993#if KMP_FAST_REDUCTION_BARRIER
8994 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
9003 retval = forced_retval;
9006 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
9008#undef FAST_REDUCTION_TREE_METHOD_GENERATED
9009#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
9014kmp_int32 __kmp_get_reduce_method(
void) {
9015 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
9020void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
9024void __kmp_hard_pause() {
9025 __kmp_pause_status = kmp_hard_paused;
9026 __kmp_internal_end_thread(-1);
9030void __kmp_resume_if_soft_paused() {
9031 if (__kmp_pause_status == kmp_soft_paused) {
9032 __kmp_pause_status = kmp_not_paused;
9034 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
9035 kmp_info_t *thread = __kmp_threads[gtid];
9037 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
9039 if (fl.is_sleeping())
9041 else if (__kmp_try_suspend_mx(thread)) {
9042 __kmp_unlock_suspend_mx(thread);
9045 if (fl.is_sleeping()) {
9048 }
else if (__kmp_try_suspend_mx(thread)) {
9049 __kmp_unlock_suspend_mx(thread);
9061int __kmp_pause_resource(kmp_pause_status_t level) {
9062 if (level == kmp_not_paused) {
9063 if (__kmp_pause_status == kmp_not_paused) {
9067 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9068 __kmp_pause_status == kmp_hard_paused);
9069 __kmp_pause_status = kmp_not_paused;
9072 }
else if (level == kmp_soft_paused) {
9073 if (__kmp_pause_status != kmp_not_paused) {
9080 }
else if (level == kmp_hard_paused) {
9081 if (__kmp_pause_status != kmp_not_paused) {
9094void __kmp_omp_display_env(
int verbose) {
9095 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9096 if (__kmp_init_serial == 0)
9097 __kmp_do_serial_initialize();
9098 __kmp_display_env_impl(!verbose, verbose);
9099 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9103void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9105 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9107 kmp_info_t **other_threads = team->t.t_threads;
9111 for (
int f = 1; f < old_nthreads; ++f) {
9112 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9114 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9120 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9121 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9125 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9127 team->t.t_threads[f]->th.th_used_in_team.store(2);
9128 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9131 team->t.b->go_release();
9137 int count = old_nthreads - 1;
9139 count = old_nthreads - 1;
9140 for (
int f = 1; f < old_nthreads; ++f) {
9141 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9142 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9143 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9144 void *, other_threads[f]->th.th_sleep_loc);
9145 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9148 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9154 team->t.b->update_num_threads(new_nthreads);
9155 team->t.b->go_reset();
9158void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9160 KMP_DEBUG_ASSERT(team);
9166 for (
int f = 1; f < new_nthreads; ++f) {
9167 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9168 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9170 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9171 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9172 (kmp_flag_32<false, false> *)NULL);
9178 int count = new_nthreads - 1;
9180 count = new_nthreads - 1;
9181 for (
int f = 1; f < new_nthreads; ++f) {
9182 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9190kmp_info_t **__kmp_hidden_helper_threads;
9191kmp_info_t *__kmp_hidden_helper_main_thread;
9192std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9194kmp_int32 __kmp_hidden_helper_threads_num = 8;
9195kmp_int32 __kmp_enable_hidden_helper = TRUE;
9197kmp_int32 __kmp_hidden_helper_threads_num = 0;
9198kmp_int32 __kmp_enable_hidden_helper = FALSE;
9202std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9204void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9209 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9210 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9211 __kmp_hidden_helper_threads_num)
9217 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9218 __kmp_hidden_helper_initz_release();
9219 __kmp_hidden_helper_main_thread_wait();
9221 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9222 __kmp_hidden_helper_worker_thread_signal();
9228void __kmp_hidden_helper_threads_initz_routine() {
9230 const int gtid = __kmp_register_root(TRUE);
9231 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9232 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9233 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9234 __kmp_hidden_helper_threads_num;
9236 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9241 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9243 __kmp_hidden_helper_threads_deinitz_release();
9263void __kmp_init_nesting_mode() {
9264 int levels = KMP_HW_LAST;
9265 __kmp_nesting_mode_nlevels = levels;
9266 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9267 for (
int i = 0; i < levels; ++i)
9268 __kmp_nesting_nth_level[i] = 0;
9269 if (__kmp_nested_nth.size < levels) {
9270 __kmp_nested_nth.nth =
9271 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9272 __kmp_nested_nth.size = levels;
9277void __kmp_set_nesting_mode_threads() {
9278 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9280 if (__kmp_nesting_mode == 1)
9281 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9282 else if (__kmp_nesting_mode > 1)
9283 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9285 if (__kmp_topology) {
9287 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9288 loc < __kmp_nesting_mode_nlevels;
9289 loc++, hw_level++) {
9290 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9291 if (__kmp_nesting_nth_level[loc] == 1)
9295 if (__kmp_nesting_mode > 1 && loc > 1) {
9296 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9297 int num_cores = __kmp_topology->get_count(core_level);
9298 int upper_levels = 1;
9299 for (
int level = 0; level < loc - 1; ++level)
9300 upper_levels *= __kmp_nesting_nth_level[level];
9301 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9302 __kmp_nesting_nth_level[loc - 1] =
9303 num_cores / __kmp_nesting_nth_level[loc - 2];
9305 __kmp_nesting_mode_nlevels = loc;
9306 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9308 if (__kmp_avail_proc >= 4) {
9309 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9310 __kmp_nesting_nth_level[1] = 2;
9311 __kmp_nesting_mode_nlevels = 2;
9313 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9314 __kmp_nesting_mode_nlevels = 1;
9316 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9318 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9319 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9321 set__nproc(thread, __kmp_nesting_nth_level[0]);
9322 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9323 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9324 if (get__max_active_levels(thread) > 1) {
9326 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9328 if (__kmp_nesting_mode == 1)
9329 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9334#if !KMP_STATS_ENABLED
9335void __kmp_reset_stats() {}
9338int __kmp_omp_debug_struct_info = FALSE;
9339int __kmp_debugging = FALSE;
9341#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9342void __kmp_itt_fini_ittlib() {}
9343void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)