21#include "ompt-specific.h"
23#define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
44 }
else if (__kmp_ignore_mppbeg() == FALSE) {
46 __kmp_internal_begin();
47 KC_TRACE(10, (
"__kmpc_begin: called\n"));
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, (
"__kmpc_end: called\n"));
67 KA_TRACE(30, (
"__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
100 kmp_int32 gtid = __kmp_entry_gtid();
102 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
123 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 return TCR_4(__kmp_all_nth);
135 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
145 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
167 if (__kmp_par_range == 0) {
174 semi2 = strchr(semi2,
';');
178 semi2 = strchr(semi2 + 1,
';');
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
187 if ((*name ==
'/') || (*name ==
';')) {
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
194 semi3 = strchr(semi2 + 1,
';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
201 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
205 return __kmp_par_range < 0;
219 return __kmp_entry_thread()->th.th_root->r.r_active;
232 kmp_int32 num_threads) {
233 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
239void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265#if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 OMPT_STORE_RETURN_ADDRESS(gtid);
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask,
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
307 __kmp_join_call(loc, gtid
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
328 KMP_POP_PARTITIONED_TIMER();
344 kmp_int32 cond,
void *args) {
345 int gtid = __kmp_entry_gtid();
355 void *exit_frame_ptr;
359 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
368 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
394 kmp_int32 num_teams, kmp_int32 num_threads) {
396 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
397 global_tid, num_teams, num_threads));
398 __kmp_assert_valid_gtid(global_tid);
399 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
413 kmp_int32 thread_limit) {
414 __kmp_assert_valid_gtid(global_tid);
415 kmp_info_t *thread = __kmp_threads[global_tid];
416 if (thread_limit > 0)
417 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
437 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
438 kmp_int32 num_threads) {
439 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
440 " num_teams_ub=%d num_threads=%d\n",
441 global_tid, num_teams_lb, num_teams_ub, num_threads));
442 __kmp_assert_valid_gtid(global_tid);
443 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
459 int gtid = __kmp_entry_gtid();
460 kmp_info_t *this_thr = __kmp_threads[gtid];
462 va_start(ap, microtask);
467 if (previous_state == stats_state_e::SERIAL_REGION) {
468 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
470 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
475 this_thr->th.th_teams_microtask = microtask;
476 this_thr->th.th_teams_level =
477 this_thr->th.th_team->t.t_level;
480 kmp_team_t *parent_team = this_thr->th.th_team;
481 int tid = __kmp_tid_from_gtid(gtid);
482 if (ompt_enabled.enabled) {
483 parent_team->t.t_implicit_task_taskdata[tid]
484 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
486 OMPT_STORE_RETURN_ADDRESS(gtid);
491 if (this_thr->th.th_teams_size.nteams == 0) {
492 __kmp_push_num_teams(loc, gtid, 0, 0);
494 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
495 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
499 loc, gtid, fork_context_intel, argc,
500 VOLATILE_CAST(microtask_t) __kmp_teams_master,
501 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
502 __kmp_join_call(loc, gtid
510 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
511 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
512 this_thr->th.th_cg_roots = tmp->up;
513 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
514 " to node %p. cg_nthreads was %d\n",
515 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
516 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
517 int i = tmp->cg_nthreads--;
522 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
523 this_thr->th.th_current_task->td_icvs.thread_limit =
524 this_thr->th.th_cg_roots->cg_thread_limit;
526 this_thr->th.th_teams_microtask = NULL;
527 this_thr->th.th_teams_level = 0;
528 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
531 if (previous_state == stats_state_e::SERIAL_REGION) {
532 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
533 KMP_SET_THREAD_STATE(previous_state);
535 KMP_POP_PARTITIONED_TIMER();
544int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
562 __kmp_assert_valid_gtid(global_tid);
564 OMPT_STORE_RETURN_ADDRESS(global_tid);
566 __kmp_serialized_parallel(loc, global_tid);
577 kmp_internal_control_t *top;
578 kmp_info_t *this_thr;
579 kmp_team_t *serial_team;
582 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
590 __kmp_assert_valid_gtid(global_tid);
591 if (!TCR_4(__kmp_init_parallel))
592 __kmp_parallel_initialize();
594 __kmp_resume_if_soft_paused();
596 this_thr = __kmp_threads[global_tid];
597 serial_team = this_thr->th.th_serial_team;
599 kmp_task_team_t *task_team = this_thr->th.th_task_team;
601 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
602 task_team->tt.tt_hidden_helper_task_encountered))
603 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
606 KMP_DEBUG_ASSERT(serial_team);
607 KMP_ASSERT(serial_team->t.t_serialized);
608 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
609 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
610 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
614 if (ompt_enabled.enabled &&
615 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
616 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
617 if (ompt_enabled.ompt_callback_implicit_task) {
618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
619 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
620 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
624 ompt_data_t *parent_task_data;
625 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
627 if (ompt_enabled.ompt_callback_parallel_end) {
628 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
629 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
630 ompt_parallel_invoker_program | ompt_parallel_team,
631 OMPT_LOAD_RETURN_ADDRESS(global_tid));
633 __ompt_lw_taskteam_unlink(this_thr);
634 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
640 top = serial_team->t.t_control_stack_top;
641 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
642 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
643 serial_team->t.t_control_stack_top = top->next;
648 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
650 dispatch_private_info_t *disp_buffer =
651 serial_team->t.t_dispatch->th_disp_buffer;
652 serial_team->t.t_dispatch->th_disp_buffer =
653 serial_team->t.t_dispatch->th_disp_buffer->next;
654 __kmp_free(disp_buffer);
656 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
658 --serial_team->t.t_serialized;
659 if (serial_team->t.t_serialized == 0) {
663#if KMP_ARCH_X86 || KMP_ARCH_X86_64
664 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
665 __kmp_clear_x87_fpu_status_word();
666 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
667 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
671 __kmp_pop_current_task_from_thread(this_thr);
673 if (ompd_state & OMPD_ENABLE_BP)
674 ompd_bp_parallel_end();
677 this_thr->th.th_team = serial_team->t.t_parent;
678 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
681 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
682 this_thr->th.th_team_master =
683 serial_team->t.t_parent->t.t_threads[0];
684 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
687 this_thr->th.th_dispatch =
688 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
690 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
691 this_thr->th.th_current_task->td_flags.executing = 1;
693 if (__kmp_tasking_mode != tskm_immediate_exec) {
695 this_thr->th.th_task_team =
696 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
698 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
700 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
702#if KMP_AFFINITY_SUPPORTED
703 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
704 __kmp_reset_root_init_mask(global_tid);
708 if (__kmp_tasking_mode != tskm_immediate_exec) {
709 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
710 "depth of serial team %p to %d\n",
711 global_tid, serial_team, serial_team->t.t_serialized));
715 serial_team->t.t_level--;
716 if (__kmp_env_consistency_check)
717 __kmp_pop_parallel(global_tid, NULL);
719 if (ompt_enabled.enabled)
720 this_thr->th.ompt_thread_info.state =
721 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
722 : ompt_state_work_parallel);
735 KC_TRACE(10, (
"__kmpc_flush: called\n"));
740#if OMPT_SUPPORT && OMPT_OPTIONAL
741 if (ompt_enabled.ompt_callback_flush) {
742 ompt_callbacks.ompt_callback(ompt_callback_flush)(
743 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
758 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
759 __kmp_assert_valid_gtid(global_tid);
761 if (!TCR_4(__kmp_init_parallel))
762 __kmp_parallel_initialize();
764 __kmp_resume_if_soft_paused();
766 if (__kmp_env_consistency_check) {
768 KMP_WARNING(ConstructIdentInvalid);
770 __kmp_check_barrier(global_tid, ct_barrier, loc);
774 ompt_frame_t *ompt_frame;
775 if (ompt_enabled.enabled) {
776 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
777 if (ompt_frame->enter_frame.ptr == NULL)
778 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
780 OMPT_STORE_RETURN_ADDRESS(global_tid);
782 __kmp_threads[global_tid]->th.th_ident = loc;
790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
791#if OMPT_SUPPORT && OMPT_OPTIONAL
792 if (ompt_enabled.enabled) {
793 ompt_frame->enter_frame = ompt_data_none;
808 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
809 __kmp_assert_valid_gtid(global_tid);
811 if (!TCR_4(__kmp_init_parallel))
812 __kmp_parallel_initialize();
814 __kmp_resume_if_soft_paused();
816 if (KMP_MASTER_GTID(global_tid)) {
818 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
822#if OMPT_SUPPORT && OMPT_OPTIONAL
824 if (ompt_enabled.ompt_callback_masked) {
825 kmp_info_t *this_thr = __kmp_threads[global_tid];
826 kmp_team_t *team = this_thr->th.th_team;
828 int tid = __kmp_tid_from_gtid(global_tid);
829 ompt_callbacks.ompt_callback(ompt_callback_masked)(
830 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832 OMPT_GET_RETURN_ADDRESS(0));
837 if (__kmp_env_consistency_check) {
838#if KMP_USE_DYNAMIC_LOCK
840 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
842 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
845 __kmp_push_sync(global_tid, ct_master, loc, NULL);
847 __kmp_check_sync(global_tid, ct_master, loc, NULL);
863 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
864 __kmp_assert_valid_gtid(global_tid);
865 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
866 KMP_POP_PARTITIONED_TIMER();
868#if OMPT_SUPPORT && OMPT_OPTIONAL
869 kmp_info_t *this_thr = __kmp_threads[global_tid];
870 kmp_team_t *team = this_thr->th.th_team;
871 if (ompt_enabled.ompt_callback_masked) {
872 int tid = __kmp_tid_from_gtid(global_tid);
873 ompt_callbacks.ompt_callback(ompt_callback_masked)(
874 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
875 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
876 OMPT_GET_RETURN_ADDRESS(0));
880 if (__kmp_env_consistency_check) {
881 if (KMP_MASTER_GTID(global_tid))
882 __kmp_pop_sync(global_tid, ct_master, loc);
897 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
898 __kmp_assert_valid_gtid(global_tid);
900 if (!TCR_4(__kmp_init_parallel))
901 __kmp_parallel_initialize();
903 __kmp_resume_if_soft_paused();
905 tid = __kmp_tid_from_gtid(global_tid);
908 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
912#if OMPT_SUPPORT && OMPT_OPTIONAL
914 if (ompt_enabled.ompt_callback_masked) {
915 kmp_info_t *this_thr = __kmp_threads[global_tid];
916 kmp_team_t *team = this_thr->th.th_team;
917 ompt_callbacks.ompt_callback(ompt_callback_masked)(
918 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
919 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
920 OMPT_GET_RETURN_ADDRESS(0));
925 if (__kmp_env_consistency_check) {
926#if KMP_USE_DYNAMIC_LOCK
928 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
930 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
933 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
935 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
951 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
952 __kmp_assert_valid_gtid(global_tid);
953 KMP_POP_PARTITIONED_TIMER();
955#if OMPT_SUPPORT && OMPT_OPTIONAL
956 kmp_info_t *this_thr = __kmp_threads[global_tid];
957 kmp_team_t *team = this_thr->th.th_team;
958 if (ompt_enabled.ompt_callback_masked) {
959 int tid = __kmp_tid_from_gtid(global_tid);
960 ompt_callbacks.ompt_callback(ompt_callback_masked)(
961 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
962 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
963 OMPT_GET_RETURN_ADDRESS(0));
967 if (__kmp_env_consistency_check) {
968 __kmp_pop_sync(global_tid, ct_masked, loc);
982 KMP_DEBUG_ASSERT(__kmp_init_serial);
984 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
985 __kmp_assert_valid_gtid(gtid);
987 if (!TCR_4(__kmp_init_parallel))
988 __kmp_parallel_initialize();
990 __kmp_resume_if_soft_paused();
993 __kmp_itt_ordered_prep(gtid);
997 th = __kmp_threads[gtid];
999#if OMPT_SUPPORT && OMPT_OPTIONAL
1003 OMPT_STORE_RETURN_ADDRESS(gtid);
1004 if (ompt_enabled.enabled) {
1005 team = __kmp_team_from_gtid(gtid);
1006 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1008 th->th.ompt_thread_info.wait_id = lck;
1009 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1012 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1013 if (ompt_enabled.ompt_callback_mutex_acquire) {
1014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1015 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1021 if (th->th.th_dispatch->th_deo_fcn != 0)
1022 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
1024 __kmp_parallel_deo(>id, &cid, loc);
1026#if OMPT_SUPPORT && OMPT_OPTIONAL
1027 if (ompt_enabled.enabled) {
1029 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1030 th->th.ompt_thread_info.wait_id = 0;
1033 if (ompt_enabled.ompt_callback_mutex_acquired) {
1034 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1035 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1041 __kmp_itt_ordered_start(gtid);
1056 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1057 __kmp_assert_valid_gtid(gtid);
1060 __kmp_itt_ordered_end(gtid);
1064 th = __kmp_threads[gtid];
1066 if (th->th.th_dispatch->th_dxo_fcn != 0)
1067 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1069 __kmp_parallel_dxo(>id, &cid, loc);
1071#if OMPT_SUPPORT && OMPT_OPTIONAL
1072 OMPT_STORE_RETURN_ADDRESS(gtid);
1073 if (ompt_enabled.ompt_callback_mutex_released) {
1074 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1076 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1077 ->t.t_ordered.dt.t_value,
1078 OMPT_LOAD_RETURN_ADDRESS(gtid));
1083#if KMP_USE_DYNAMIC_LOCK
1085static __forceinline
void
1086__kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1087 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1091 kmp_indirect_lock_t **lck;
1092 lck = (kmp_indirect_lock_t **)crit;
1093 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1094 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1095 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1096 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1098 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1100 __kmp_itt_critical_creating(ilk->lock, loc);
1102 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1105 __kmp_itt_critical_destroyed(ilk->lock);
1111 KMP_DEBUG_ASSERT(*lck != NULL);
1115#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1117 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1118 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1119 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1120 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1121 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1123 KMP_FSYNC_PREPARE(l); \
1124 KMP_INIT_YIELD(spins); \
1125 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1127 if (TCR_4(__kmp_nth) > \
1128 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1131 KMP_YIELD_SPIN(spins); \
1133 __kmp_spin_backoff(&backoff); \
1135 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1136 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1138 KMP_FSYNC_ACQUIRED(l); \
1142#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1144 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1147 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1148 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1152#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1153 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1157#include <sys/syscall.h>
1167#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 kmp_int32 gtid_code = (gtid + 1) << 1; \
1172 KMP_FSYNC_PREPARE(ftx); \
1173 kmp_int32 poll_val; \
1174 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1175 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1176 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1177 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1179 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1181 KMP_LOCK_BUSY(1, futex))) { \
1184 poll_val |= KMP_LOCK_BUSY(1, futex); \
1187 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1188 NULL, NULL, 0)) != 0) { \
1193 KMP_FSYNC_ACQUIRED(ftx); \
1197#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1199 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1200 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1201 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1202 KMP_FSYNC_ACQUIRED(ftx); \
1210#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1212 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1214 KMP_FSYNC_RELEASING(ftx); \
1215 kmp_int32 poll_val = \
1216 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1217 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1218 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1219 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1222 KMP_YIELD_OVERSUB(); \
1229static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1232 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1235 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1242 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1243 __kmp_init_user_lock_with_checks(lck);
1244 __kmp_set_user_lock_location(lck, loc);
1246 __kmp_itt_critical_creating(lck);
1257 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1262 __kmp_itt_critical_destroyed(lck);
1266 __kmp_destroy_user_lock_with_checks(lck);
1267 __kmp_user_lock_free(&idx, gtid, lck);
1268 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1269 KMP_DEBUG_ASSERT(lck != NULL);
1288 kmp_critical_name *crit) {
1289#if KMP_USE_DYNAMIC_LOCK
1290#if OMPT_SUPPORT && OMPT_OPTIONAL
1291 OMPT_STORE_RETURN_ADDRESS(global_tid);
1293 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1296#if OMPT_SUPPORT && OMPT_OPTIONAL
1297 ompt_state_t prev_state = ompt_state_undefined;
1298 ompt_thread_info_t ti;
1300 kmp_user_lock_p lck;
1302 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1303 __kmp_assert_valid_gtid(global_tid);
1307 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1308 KMP_CHECK_USER_LOCK_INIT();
1310 if ((__kmp_user_lock_kind == lk_tas) &&
1311 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1312 lck = (kmp_user_lock_p)crit;
1315 else if ((__kmp_user_lock_kind == lk_futex) &&
1316 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1317 lck = (kmp_user_lock_p)crit;
1321 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1324 if (__kmp_env_consistency_check)
1325 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1333 __kmp_itt_critical_acquiring(lck);
1335#if OMPT_SUPPORT && OMPT_OPTIONAL
1336 OMPT_STORE_RETURN_ADDRESS(gtid);
1337 void *codeptr_ra = NULL;
1338 if (ompt_enabled.enabled) {
1339 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1341 prev_state = ti.state;
1342 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1343 ti.state = ompt_state_wait_critical;
1346 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1347 if (ompt_enabled.ompt_callback_mutex_acquire) {
1348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1349 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1350 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1356 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1359 __kmp_itt_critical_acquired(lck);
1361#if OMPT_SUPPORT && OMPT_OPTIONAL
1362 if (ompt_enabled.enabled) {
1364 ti.state = prev_state;
1368 if (ompt_enabled.ompt_callback_mutex_acquired) {
1369 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1370 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1374 KMP_POP_PARTITIONED_TIMER();
1376 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1377 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1381#if KMP_USE_DYNAMIC_LOCK
1384static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1386#define KMP_TSX_LOCK(seq) lockseq_##seq
1388#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1391#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1392#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1394#define KMP_CPUINFO_RTM 0
1398 if (hint & kmp_lock_hint_hle)
1399 return KMP_TSX_LOCK(hle);
1400 if (hint & kmp_lock_hint_rtm)
1401 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1402 if (hint & kmp_lock_hint_adaptive)
1403 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1406 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1407 return __kmp_user_lock_seq;
1408 if ((hint & omp_lock_hint_speculative) &&
1409 (hint & omp_lock_hint_nonspeculative))
1410 return __kmp_user_lock_seq;
1413 if (hint & omp_lock_hint_contended)
1414 return lockseq_queuing;
1417 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1421 if (hint & omp_lock_hint_speculative)
1422 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1424 return __kmp_user_lock_seq;
1427#if OMPT_SUPPORT && OMPT_OPTIONAL
1428#if KMP_USE_DYNAMIC_LOCK
1429static kmp_mutex_impl_t
1430__ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1432 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1437 return kmp_mutex_impl_queuing;
1440 return kmp_mutex_impl_spin;
1443 case locktag_rtm_spin:
1444 return kmp_mutex_impl_speculative;
1447 return kmp_mutex_impl_none;
1449 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1452 switch (ilock->type) {
1454 case locktag_adaptive:
1455 case locktag_rtm_queuing:
1456 return kmp_mutex_impl_speculative;
1458 case locktag_nested_tas:
1459 return kmp_mutex_impl_spin;
1461 case locktag_nested_futex:
1463 case locktag_ticket:
1464 case locktag_queuing:
1466 case locktag_nested_ticket:
1467 case locktag_nested_queuing:
1468 case locktag_nested_drdpa:
1469 return kmp_mutex_impl_queuing;
1471 return kmp_mutex_impl_none;
1476static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1477 switch (__kmp_user_lock_kind) {
1479 return kmp_mutex_impl_spin;
1486 return kmp_mutex_impl_queuing;
1489 case lk_rtm_queuing:
1492 return kmp_mutex_impl_speculative;
1495 return kmp_mutex_impl_none;
1514void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1515 kmp_critical_name *crit, uint32_t hint) {
1517 kmp_user_lock_p lck;
1518#if OMPT_SUPPORT && OMPT_OPTIONAL
1519 ompt_state_t prev_state = ompt_state_undefined;
1520 ompt_thread_info_t ti;
1522 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1524 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1527 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1528 __kmp_assert_valid_gtid(global_tid);
1530 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1532 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1533 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1535 if (KMP_IS_D_LOCK(lockseq)) {
1536 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1537 KMP_GET_D_TAG(lockseq));
1539 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1545 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1546 lck = (kmp_user_lock_p)lk;
1547 if (__kmp_env_consistency_check) {
1548 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1549 __kmp_map_hint_to_lock(hint));
1552 __kmp_itt_critical_acquiring(lck);
1554#if OMPT_SUPPORT && OMPT_OPTIONAL
1555 if (ompt_enabled.enabled) {
1556 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1558 prev_state = ti.state;
1559 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1560 ti.state = ompt_state_wait_critical;
1563 if (ompt_enabled.ompt_callback_mutex_acquire) {
1564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1565 ompt_mutex_critical, (
unsigned int)hint,
1566 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1571#if KMP_USE_INLINED_TAS
1572 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1573 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1575#elif KMP_USE_INLINED_FUTEX
1576 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1577 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1581 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1584 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1586 if (__kmp_env_consistency_check) {
1587 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1588 __kmp_map_hint_to_lock(hint));
1591 __kmp_itt_critical_acquiring(lck);
1593#if OMPT_SUPPORT && OMPT_OPTIONAL
1594 if (ompt_enabled.enabled) {
1595 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1597 prev_state = ti.state;
1598 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1599 ti.state = ompt_state_wait_critical;
1602 if (ompt_enabled.ompt_callback_mutex_acquire) {
1603 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1604 ompt_mutex_critical, (
unsigned int)hint,
1605 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1610 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1612 KMP_POP_PARTITIONED_TIMER();
1615 __kmp_itt_critical_acquired(lck);
1617#if OMPT_SUPPORT && OMPT_OPTIONAL
1618 if (ompt_enabled.enabled) {
1620 ti.state = prev_state;
1624 if (ompt_enabled.ompt_callback_mutex_acquired) {
1625 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1626 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1631 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1632 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1647 kmp_critical_name *crit) {
1648 kmp_user_lock_p lck;
1650 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1652#if KMP_USE_DYNAMIC_LOCK
1653 int locktag = KMP_EXTRACT_D_TAG(crit);
1655 lck = (kmp_user_lock_p)crit;
1656 KMP_ASSERT(lck != NULL);
1657 if (__kmp_env_consistency_check) {
1658 __kmp_pop_sync(global_tid, ct_critical, loc);
1661 __kmp_itt_critical_releasing(lck);
1663#if KMP_USE_INLINED_TAS
1664 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1665 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1667#elif KMP_USE_INLINED_FUTEX
1668 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1669 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1673 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1676 kmp_indirect_lock_t *ilk =
1677 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1678 KMP_ASSERT(ilk != NULL);
1680 if (__kmp_env_consistency_check) {
1681 __kmp_pop_sync(global_tid, ct_critical, loc);
1684 __kmp_itt_critical_releasing(lck);
1686 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1691 if ((__kmp_user_lock_kind == lk_tas) &&
1692 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1693 lck = (kmp_user_lock_p)crit;
1696 else if ((__kmp_user_lock_kind == lk_futex) &&
1697 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1698 lck = (kmp_user_lock_p)crit;
1702 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1705 KMP_ASSERT(lck != NULL);
1707 if (__kmp_env_consistency_check)
1708 __kmp_pop_sync(global_tid, ct_critical, loc);
1711 __kmp_itt_critical_releasing(lck);
1715 __kmp_release_user_lock_with_checks(lck, global_tid);
1719#if OMPT_SUPPORT && OMPT_OPTIONAL
1722 OMPT_STORE_RETURN_ADDRESS(global_tid);
1723 if (ompt_enabled.ompt_callback_mutex_released) {
1724 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1725 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1726 OMPT_LOAD_RETURN_ADDRESS(0));
1730 KMP_POP_PARTITIONED_TIMER();
1731 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1745 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1746 __kmp_assert_valid_gtid(global_tid);
1748 if (!TCR_4(__kmp_init_parallel))
1749 __kmp_parallel_initialize();
1751 __kmp_resume_if_soft_paused();
1753 if (__kmp_env_consistency_check)
1754 __kmp_check_barrier(global_tid, ct_barrier, loc);
1757 ompt_frame_t *ompt_frame;
1758 if (ompt_enabled.enabled) {
1759 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1760 if (ompt_frame->enter_frame.ptr == NULL)
1761 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1763 OMPT_STORE_RETURN_ADDRESS(global_tid);
1766 __kmp_threads[global_tid]->th.th_ident = loc;
1768 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1769#if OMPT_SUPPORT && OMPT_OPTIONAL
1770 if (ompt_enabled.enabled) {
1771 ompt_frame->enter_frame = ompt_data_none;
1775 return (status != 0) ? 0 : 1;
1788 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1789 __kmp_assert_valid_gtid(global_tid);
1790 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1805 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1806 __kmp_assert_valid_gtid(global_tid);
1808 if (!TCR_4(__kmp_init_parallel))
1809 __kmp_parallel_initialize();
1811 __kmp_resume_if_soft_paused();
1813 if (__kmp_env_consistency_check) {
1815 KMP_WARNING(ConstructIdentInvalid);
1817 __kmp_check_barrier(global_tid, ct_barrier, loc);
1821 ompt_frame_t *ompt_frame;
1822 if (ompt_enabled.enabled) {
1823 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1824 if (ompt_frame->enter_frame.ptr == NULL)
1825 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1827 OMPT_STORE_RETURN_ADDRESS(global_tid);
1830 __kmp_threads[global_tid]->th.th_ident = loc;
1832 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1833#if OMPT_SUPPORT && OMPT_OPTIONAL
1834 if (ompt_enabled.enabled) {
1835 ompt_frame->enter_frame = ompt_data_none;
1841 if (__kmp_env_consistency_check) {
1847 __kmp_pop_sync(global_tid, ct_master, loc);
1867 __kmp_assert_valid_gtid(global_tid);
1868 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1873 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1876#if OMPT_SUPPORT && OMPT_OPTIONAL
1877 kmp_info_t *this_thr = __kmp_threads[global_tid];
1878 kmp_team_t *team = this_thr->th.th_team;
1879 int tid = __kmp_tid_from_gtid(global_tid);
1881 if (ompt_enabled.enabled) {
1883 if (ompt_enabled.ompt_callback_work) {
1884 ompt_callbacks.ompt_callback(ompt_callback_work)(
1885 ompt_work_single_executor, ompt_scope_begin,
1886 &(team->t.ompt_team_info.parallel_data),
1887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1888 1, OMPT_GET_RETURN_ADDRESS(0));
1891 if (ompt_enabled.ompt_callback_work) {
1892 ompt_callbacks.ompt_callback(ompt_callback_work)(
1893 ompt_work_single_other, ompt_scope_begin,
1894 &(team->t.ompt_team_info.parallel_data),
1895 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1896 1, OMPT_GET_RETURN_ADDRESS(0));
1897 ompt_callbacks.ompt_callback(ompt_callback_work)(
1898 ompt_work_single_other, ompt_scope_end,
1899 &(team->t.ompt_team_info.parallel_data),
1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1901 1, OMPT_GET_RETURN_ADDRESS(0));
1920 __kmp_assert_valid_gtid(global_tid);
1921 __kmp_exit_single(global_tid);
1922 KMP_POP_PARTITIONED_TIMER();
1924#if OMPT_SUPPORT && OMPT_OPTIONAL
1925 kmp_info_t *this_thr = __kmp_threads[global_tid];
1926 kmp_team_t *team = this_thr->th.th_team;
1927 int tid = __kmp_tid_from_gtid(global_tid);
1929 if (ompt_enabled.ompt_callback_work) {
1930 ompt_callbacks.ompt_callback(ompt_callback_work)(
1931 ompt_work_single_executor, ompt_scope_end,
1932 &(team->t.ompt_team_info.parallel_data),
1933 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1934 OMPT_GET_RETURN_ADDRESS(0));
1947 KMP_POP_PARTITIONED_TIMER();
1948 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1950#if OMPT_SUPPORT && OMPT_OPTIONAL
1951 if (ompt_enabled.ompt_callback_work) {
1952 ompt_work_t ompt_work_type = ompt_work_loop;
1953 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1954 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1958 ompt_work_type = ompt_work_loop;
1960 ompt_work_type = ompt_work_sections;
1962 ompt_work_type = ompt_work_distribute;
1967 KMP_DEBUG_ASSERT(ompt_work_type);
1969 ompt_callbacks.ompt_callback(ompt_callback_work)(
1970 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1971 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1974 if (__kmp_env_consistency_check)
1975 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1981void ompc_set_num_threads(
int arg) {
1983 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1986void ompc_set_dynamic(
int flag) {
1990 thread = __kmp_entry_thread();
1992 __kmp_save_internal_controls(thread);
1994 set__dynamic(thread, flag ?
true : false);
1997void ompc_set_nested(
int flag) {
2001 thread = __kmp_entry_thread();
2003 __kmp_save_internal_controls(thread);
2005 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2008void ompc_set_max_active_levels(
int max_active_levels) {
2013 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2016void ompc_set_schedule(omp_sched_t kind,
int modifier) {
2018 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2021int ompc_get_ancestor_thread_num(
int level) {
2022 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2025int ompc_get_team_size(
int level) {
2026 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2030void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
2031 if (!__kmp_init_serial) {
2032 __kmp_serial_initialize();
2034 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2035 format, KMP_STRLEN(format) + 1);
2038size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2040 if (!__kmp_init_serial) {
2041 __kmp_serial_initialize();
2043 format_size = KMP_STRLEN(__kmp_affinity_format);
2044 if (buffer && size) {
2045 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2051void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2053 if (!TCR_4(__kmp_init_middle)) {
2054 __kmp_middle_initialize();
2056 __kmp_assign_root_init_mask();
2057 gtid = __kmp_get_gtid();
2058#if KMP_AFFINITY_SUPPORTED
2059 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2060 __kmp_affinity.flags.reset) {
2061 __kmp_reset_root_init_mask(gtid);
2064 __kmp_aux_display_affinity(gtid, format);
2067size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2068 char const *format) {
2070 size_t num_required;
2071 kmp_str_buf_t capture_buf;
2072 if (!TCR_4(__kmp_init_middle)) {
2073 __kmp_middle_initialize();
2075 __kmp_assign_root_init_mask();
2076 gtid = __kmp_get_gtid();
2077#if KMP_AFFINITY_SUPPORTED
2078 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2079 __kmp_affinity.flags.reset) {
2080 __kmp_reset_root_init_mask(gtid);
2083 __kmp_str_buf_init(&capture_buf);
2084 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2085 if (buffer && buf_size) {
2086 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2087 capture_buf.used + 1);
2089 __kmp_str_buf_free(&capture_buf);
2090 return num_required;
2093void kmpc_set_stacksize(
int arg) {
2095 __kmp_aux_set_stacksize(arg);
2098void kmpc_set_stacksize_s(
size_t arg) {
2100 __kmp_aux_set_stacksize(arg);
2103void kmpc_set_blocktime(
int arg) {
2104 int gtid, tid, bt = arg;
2107 gtid = __kmp_entry_gtid();
2108 tid = __kmp_tid_from_gtid(gtid);
2109 thread = __kmp_thread_from_gtid(gtid);
2111 __kmp_aux_convert_blocktime(&bt);
2112 __kmp_aux_set_blocktime(bt, thread, tid);
2115void kmpc_set_library(
int arg) {
2117 __kmp_user_set_library((
enum library_type)arg);
2120void kmpc_set_defaults(
char const *str) {
2122 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2125void kmpc_set_disp_num_buffers(
int arg) {
2128 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2129 arg <= KMP_MAX_DISP_NUM_BUFF) {
2130 __kmp_dispatch_num_buffers = arg;
2134int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2135#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2138 if (!TCR_4(__kmp_init_middle)) {
2139 __kmp_middle_initialize();
2141 __kmp_assign_root_init_mask();
2142 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2146int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2147#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2150 if (!TCR_4(__kmp_init_middle)) {
2151 __kmp_middle_initialize();
2153 __kmp_assign_root_init_mask();
2154 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2158int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2159#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2162 if (!TCR_4(__kmp_init_middle)) {
2163 __kmp_middle_initialize();
2165 __kmp_assign_root_init_mask();
2166 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2216 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2219 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2220 __kmp_assert_valid_gtid(gtid);
2224 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2226 if (__kmp_env_consistency_check) {
2228 KMP_WARNING(ConstructIdentInvalid);
2235 *data_ptr = cpy_data;
2238 ompt_frame_t *ompt_frame;
2239 if (ompt_enabled.enabled) {
2240 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2241 if (ompt_frame->enter_frame.ptr == NULL)
2242 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2244 OMPT_STORE_RETURN_ADDRESS(gtid);
2248 __kmp_threads[gtid]->th.th_ident = loc;
2250 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2253 (*cpy_func)(cpy_data, *data_ptr);
2259 OMPT_STORE_RETURN_ADDRESS(gtid);
2262 __kmp_threads[gtid]->th.th_ident = loc;
2265 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2266#if OMPT_SUPPORT && OMPT_OPTIONAL
2267 if (ompt_enabled.enabled) {
2268 ompt_frame->enter_frame = ompt_data_none;
2294 KC_TRACE(10, (
"__kmpc_copyprivate_light: called T#%d\n", gtid));
2298 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2300 if (__kmp_env_consistency_check) {
2302 KMP_WARNING(ConstructIdentInvalid);
2309 *data_ptr = cpy_data;
2312 ompt_frame_t *ompt_frame;
2313 if (ompt_enabled.enabled) {
2314 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2315 if (ompt_frame->enter_frame.ptr == NULL)
2316 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2317 OMPT_STORE_RETURN_ADDRESS(gtid);
2322 __kmp_threads[gtid]->th.th_ident = loc;
2324 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2331#define INIT_LOCK __kmp_init_user_lock_with_checks
2332#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2333#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2334#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2335#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2336#define ACQUIRE_NESTED_LOCK_TIMED \
2337 __kmp_acquire_nested_user_lock_with_checks_timed
2338#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2339#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2340#define TEST_LOCK __kmp_test_user_lock_with_checks
2341#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2342#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2343#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2348#if KMP_USE_DYNAMIC_LOCK
2351static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2352 kmp_dyna_lockseq_t seq) {
2353 if (KMP_IS_D_LOCK(seq)) {
2354 KMP_INIT_D_LOCK(lock, seq);
2356 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2359 KMP_INIT_I_LOCK(lock, seq);
2361 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2362 __kmp_itt_lock_creating(ilk->lock, loc);
2368static __forceinline
void
2369__kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2370 kmp_dyna_lockseq_t seq) {
2373 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2374 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2375 seq = __kmp_user_lock_seq;
2379 seq = lockseq_nested_tas;
2383 seq = lockseq_nested_futex;
2386 case lockseq_ticket:
2387 seq = lockseq_nested_ticket;
2389 case lockseq_queuing:
2390 seq = lockseq_nested_queuing;
2393 seq = lockseq_nested_drdpa;
2396 seq = lockseq_nested_queuing;
2398 KMP_INIT_I_LOCK(lock, seq);
2400 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2401 __kmp_itt_lock_creating(ilk->lock, loc);
2406void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2408 KMP_DEBUG_ASSERT(__kmp_init_serial);
2409 if (__kmp_env_consistency_check && user_lock == NULL) {
2410 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2413 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2415#if OMPT_SUPPORT && OMPT_OPTIONAL
2417 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2419 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2420 if (ompt_enabled.ompt_callback_lock_init) {
2421 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2422 ompt_mutex_lock, (omp_lock_hint_t)hint,
2423 __ompt_get_mutex_impl_type(user_lock),
2424 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2430void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2431 void **user_lock, uintptr_t hint) {
2432 KMP_DEBUG_ASSERT(__kmp_init_serial);
2433 if (__kmp_env_consistency_check && user_lock == NULL) {
2434 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2439#if OMPT_SUPPORT && OMPT_OPTIONAL
2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2443 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2444 if (ompt_enabled.ompt_callback_lock_init) {
2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2446 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2447 __ompt_get_mutex_impl_type(user_lock),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2456void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2457#if KMP_USE_DYNAMIC_LOCK
2459 KMP_DEBUG_ASSERT(__kmp_init_serial);
2460 if (__kmp_env_consistency_check && user_lock == NULL) {
2461 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2463 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2465#if OMPT_SUPPORT && OMPT_OPTIONAL
2467 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2469 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2470 if (ompt_enabled.ompt_callback_lock_init) {
2471 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2472 ompt_mutex_lock, omp_lock_hint_none,
2473 __ompt_get_mutex_impl_type(user_lock),
2474 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2480 static char const *
const func =
"omp_init_lock";
2481 kmp_user_lock_p lck;
2482 KMP_DEBUG_ASSERT(__kmp_init_serial);
2484 if (__kmp_env_consistency_check) {
2485 if (user_lock == NULL) {
2486 KMP_FATAL(LockIsUninitialized, func);
2490 KMP_CHECK_USER_LOCK_INIT();
2492 if ((__kmp_user_lock_kind == lk_tas) &&
2493 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2494 lck = (kmp_user_lock_p)user_lock;
2497 else if ((__kmp_user_lock_kind == lk_futex) &&
2498 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2499 lck = (kmp_user_lock_p)user_lock;
2503 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2506 __kmp_set_user_lock_location(lck, loc);
2508#if OMPT_SUPPORT && OMPT_OPTIONAL
2510 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2512 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2513 if (ompt_enabled.ompt_callback_lock_init) {
2514 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2515 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2516 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2521 __kmp_itt_lock_creating(lck);
2528void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2529#if KMP_USE_DYNAMIC_LOCK
2531 KMP_DEBUG_ASSERT(__kmp_init_serial);
2532 if (__kmp_env_consistency_check && user_lock == NULL) {
2533 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2535 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2537#if OMPT_SUPPORT && OMPT_OPTIONAL
2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2541 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542 if (ompt_enabled.ompt_callback_lock_init) {
2543 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2544 ompt_mutex_nest_lock, omp_lock_hint_none,
2545 __ompt_get_mutex_impl_type(user_lock),
2546 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2552 static char const *
const func =
"omp_init_nest_lock";
2553 kmp_user_lock_p lck;
2554 KMP_DEBUG_ASSERT(__kmp_init_serial);
2556 if (__kmp_env_consistency_check) {
2557 if (user_lock == NULL) {
2558 KMP_FATAL(LockIsUninitialized, func);
2562 KMP_CHECK_USER_LOCK_INIT();
2564 if ((__kmp_user_lock_kind == lk_tas) &&
2565 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2566 OMP_NEST_LOCK_T_SIZE)) {
2567 lck = (kmp_user_lock_p)user_lock;
2570 else if ((__kmp_user_lock_kind == lk_futex) &&
2571 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2572 OMP_NEST_LOCK_T_SIZE)) {
2573 lck = (kmp_user_lock_p)user_lock;
2577 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2580 INIT_NESTED_LOCK(lck);
2581 __kmp_set_user_lock_location(lck, loc);
2583#if OMPT_SUPPORT && OMPT_OPTIONAL
2585 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2587 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2588 if (ompt_enabled.ompt_callback_lock_init) {
2589 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2590 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2591 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2596 __kmp_itt_lock_creating(lck);
2602void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2603#if KMP_USE_DYNAMIC_LOCK
2606 kmp_user_lock_p lck;
2607 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2608 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2610 lck = (kmp_user_lock_p)user_lock;
2612 __kmp_itt_lock_destroyed(lck);
2614#if OMPT_SUPPORT && OMPT_OPTIONAL
2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2618 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2619 if (ompt_enabled.ompt_callback_lock_destroy) {
2620 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2621 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2624 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2626 kmp_user_lock_p lck;
2628 if ((__kmp_user_lock_kind == lk_tas) &&
2629 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2630 lck = (kmp_user_lock_p)user_lock;
2633 else if ((__kmp_user_lock_kind == lk_futex) &&
2634 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2635 lck = (kmp_user_lock_p)user_lock;
2639 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2642#if OMPT_SUPPORT && OMPT_OPTIONAL
2644 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2646 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2647 if (ompt_enabled.ompt_callback_lock_destroy) {
2648 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2649 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2654 __kmp_itt_lock_destroyed(lck);
2658 if ((__kmp_user_lock_kind == lk_tas) &&
2659 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2663 else if ((__kmp_user_lock_kind == lk_futex) &&
2664 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2669 __kmp_user_lock_free(user_lock, gtid, lck);
2675void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2676#if KMP_USE_DYNAMIC_LOCK
2679 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2680 __kmp_itt_lock_destroyed(ilk->lock);
2682#if OMPT_SUPPORT && OMPT_OPTIONAL
2684 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2686 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2687 if (ompt_enabled.ompt_callback_lock_destroy) {
2688 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2689 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2692 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2696 kmp_user_lock_p lck;
2698 if ((__kmp_user_lock_kind == lk_tas) &&
2699 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2700 OMP_NEST_LOCK_T_SIZE)) {
2701 lck = (kmp_user_lock_p)user_lock;
2704 else if ((__kmp_user_lock_kind == lk_futex) &&
2705 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2706 OMP_NEST_LOCK_T_SIZE)) {
2707 lck = (kmp_user_lock_p)user_lock;
2711 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2714#if OMPT_SUPPORT && OMPT_OPTIONAL
2716 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2718 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2719 if (ompt_enabled.ompt_callback_lock_destroy) {
2720 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2721 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2726 __kmp_itt_lock_destroyed(lck);
2729 DESTROY_NESTED_LOCK(lck);
2731 if ((__kmp_user_lock_kind == lk_tas) &&
2732 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2733 OMP_NEST_LOCK_T_SIZE)) {
2737 else if ((__kmp_user_lock_kind == lk_futex) &&
2738 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2739 OMP_NEST_LOCK_T_SIZE)) {
2744 __kmp_user_lock_free(user_lock, gtid, lck);
2749void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2751#if KMP_USE_DYNAMIC_LOCK
2752 int tag = KMP_EXTRACT_D_TAG(user_lock);
2754 __kmp_itt_lock_acquiring(
2758#if OMPT_SUPPORT && OMPT_OPTIONAL
2760 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2762 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2763 if (ompt_enabled.ompt_callback_mutex_acquire) {
2764 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2765 ompt_mutex_lock, omp_lock_hint_none,
2766 __ompt_get_mutex_impl_type(user_lock),
2767 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2770#if KMP_USE_INLINED_TAS
2771 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2772 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2774#elif KMP_USE_INLINED_FUTEX
2775 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2776 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2780 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2783 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2785#if OMPT_SUPPORT && OMPT_OPTIONAL
2786 if (ompt_enabled.ompt_callback_mutex_acquired) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2788 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2794 kmp_user_lock_p lck;
2796 if ((__kmp_user_lock_kind == lk_tas) &&
2797 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2798 lck = (kmp_user_lock_p)user_lock;
2801 else if ((__kmp_user_lock_kind == lk_futex) &&
2802 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2803 lck = (kmp_user_lock_p)user_lock;
2807 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2811 __kmp_itt_lock_acquiring(lck);
2813#if OMPT_SUPPORT && OMPT_OPTIONAL
2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2817 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2818 if (ompt_enabled.ompt_callback_mutex_acquire) {
2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2820 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2821 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2825 ACQUIRE_LOCK(lck, gtid);
2828 __kmp_itt_lock_acquired(lck);
2831#if OMPT_SUPPORT && OMPT_OPTIONAL
2832 if (ompt_enabled.ompt_callback_mutex_acquired) {
2833 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2834 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2841void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2842#if KMP_USE_DYNAMIC_LOCK
2845 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2847#if OMPT_SUPPORT && OMPT_OPTIONAL
2849 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2851 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2852 if (ompt_enabled.enabled) {
2853 if (ompt_enabled.ompt_callback_mutex_acquire) {
2854 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2855 ompt_mutex_nest_lock, omp_lock_hint_none,
2856 __ompt_get_mutex_impl_type(user_lock),
2857 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2861 int acquire_status =
2862 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2863 (void)acquire_status;
2865 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2868#if OMPT_SUPPORT && OMPT_OPTIONAL
2869 if (ompt_enabled.enabled) {
2870 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2871 if (ompt_enabled.ompt_callback_mutex_acquired) {
2873 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2874 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2878 if (ompt_enabled.ompt_callback_nest_lock) {
2880 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2881 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2889 kmp_user_lock_p lck;
2891 if ((__kmp_user_lock_kind == lk_tas) &&
2892 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2893 OMP_NEST_LOCK_T_SIZE)) {
2894 lck = (kmp_user_lock_p)user_lock;
2897 else if ((__kmp_user_lock_kind == lk_futex) &&
2898 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2899 OMP_NEST_LOCK_T_SIZE)) {
2900 lck = (kmp_user_lock_p)user_lock;
2904 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2908 __kmp_itt_lock_acquiring(lck);
2910#if OMPT_SUPPORT && OMPT_OPTIONAL
2912 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2914 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2915 if (ompt_enabled.enabled) {
2916 if (ompt_enabled.ompt_callback_mutex_acquire) {
2917 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2918 ompt_mutex_nest_lock, omp_lock_hint_none,
2919 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2925 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2928 __kmp_itt_lock_acquired(lck);
2931#if OMPT_SUPPORT && OMPT_OPTIONAL
2932 if (ompt_enabled.enabled) {
2933 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2934 if (ompt_enabled.ompt_callback_mutex_acquired) {
2936 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2937 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2940 if (ompt_enabled.ompt_callback_nest_lock) {
2942 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2943 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2952void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2953#if KMP_USE_DYNAMIC_LOCK
2955 int tag = KMP_EXTRACT_D_TAG(user_lock);
2957 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2959#if KMP_USE_INLINED_TAS
2960 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2961 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2963#elif KMP_USE_INLINED_FUTEX
2964 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2965 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2969 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2972#if OMPT_SUPPORT && OMPT_OPTIONAL
2974 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2976 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2977 if (ompt_enabled.ompt_callback_mutex_released) {
2978 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2979 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2985 kmp_user_lock_p lck;
2990 if ((__kmp_user_lock_kind == lk_tas) &&
2991 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2992#if KMP_OS_LINUX && \
2993 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2996 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2998 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3001#if OMPT_SUPPORT && OMPT_OPTIONAL
3003 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3005 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3006 if (ompt_enabled.ompt_callback_mutex_released) {
3007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3008 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3014 lck = (kmp_user_lock_p)user_lock;
3018 else if ((__kmp_user_lock_kind == lk_futex) &&
3019 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3020 lck = (kmp_user_lock_p)user_lock;
3024 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
3028 __kmp_itt_lock_releasing(lck);
3031 RELEASE_LOCK(lck, gtid);
3033#if OMPT_SUPPORT && OMPT_OPTIONAL
3035 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3037 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3038 if (ompt_enabled.ompt_callback_mutex_released) {
3039 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3040 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3048void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3049#if KMP_USE_DYNAMIC_LOCK
3052 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054 int release_status =
3055 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3056 (void)release_status;
3058#if OMPT_SUPPORT && OMPT_OPTIONAL
3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3062 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled.enabled) {
3064 if (release_status == KMP_LOCK_RELEASED) {
3065 if (ompt_enabled.ompt_callback_mutex_released) {
3067 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3068 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3071 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3073 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3074 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3081 kmp_user_lock_p lck;
3085 if ((__kmp_user_lock_kind == lk_tas) &&
3086 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3087 OMP_NEST_LOCK_T_SIZE)) {
3088#if KMP_OS_LINUX && \
3089 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3091 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3093 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3096#if OMPT_SUPPORT && OMPT_OPTIONAL
3097 int release_status = KMP_LOCK_STILL_HELD;
3100 if (--(tl->lk.depth_locked) == 0) {
3101 TCW_4(tl->lk.poll, 0);
3102#if OMPT_SUPPORT && OMPT_OPTIONAL
3103 release_status = KMP_LOCK_RELEASED;
3108#if OMPT_SUPPORT && OMPT_OPTIONAL
3110 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3112 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3113 if (ompt_enabled.enabled) {
3114 if (release_status == KMP_LOCK_RELEASED) {
3115 if (ompt_enabled.ompt_callback_mutex_released) {
3117 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3118 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3120 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3122 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3123 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3130 lck = (kmp_user_lock_p)user_lock;
3134 else if ((__kmp_user_lock_kind == lk_futex) &&
3135 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3136 OMP_NEST_LOCK_T_SIZE)) {
3137 lck = (kmp_user_lock_p)user_lock;
3141 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3145 __kmp_itt_lock_releasing(lck);
3149 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3150#if OMPT_SUPPORT && OMPT_OPTIONAL
3152 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3154 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3155 if (ompt_enabled.enabled) {
3156 if (release_status == KMP_LOCK_RELEASED) {
3157 if (ompt_enabled.ompt_callback_mutex_released) {
3159 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3160 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3162 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3164 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3165 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3174int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3177#if KMP_USE_DYNAMIC_LOCK
3179 int tag = KMP_EXTRACT_D_TAG(user_lock);
3181 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3183#if OMPT_SUPPORT && OMPT_OPTIONAL
3185 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3187 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3188 if (ompt_enabled.ompt_callback_mutex_acquire) {
3189 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3190 ompt_mutex_test_lock, omp_lock_hint_none,
3191 __ompt_get_mutex_impl_type(user_lock),
3192 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3195#if KMP_USE_INLINED_TAS
3196 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3197 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3199#elif KMP_USE_INLINED_FUTEX
3200 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3201 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3205 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3209 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3211#if OMPT_SUPPORT && OMPT_OPTIONAL
3212 if (ompt_enabled.ompt_callback_mutex_acquired) {
3213 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3214 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3220 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3227 kmp_user_lock_p lck;
3230 if ((__kmp_user_lock_kind == lk_tas) &&
3231 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3232 lck = (kmp_user_lock_p)user_lock;
3235 else if ((__kmp_user_lock_kind == lk_futex) &&
3236 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3237 lck = (kmp_user_lock_p)user_lock;
3241 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3245 __kmp_itt_lock_acquiring(lck);
3247#if OMPT_SUPPORT && OMPT_OPTIONAL
3249 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3251 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3252 if (ompt_enabled.ompt_callback_mutex_acquire) {
3253 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3254 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3255 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3259 rc = TEST_LOCK(lck, gtid);
3262 __kmp_itt_lock_acquired(lck);
3264 __kmp_itt_lock_cancelled(lck);
3267#if OMPT_SUPPORT && OMPT_OPTIONAL
3268 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3269 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3270 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3274 return (rc ? FTN_TRUE : FTN_FALSE);
3282int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3283#if KMP_USE_DYNAMIC_LOCK
3286 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3288#if OMPT_SUPPORT && OMPT_OPTIONAL
3290 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3292 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3293 if (ompt_enabled.ompt_callback_mutex_acquire) {
3294 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3295 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3296 __ompt_get_mutex_impl_type(user_lock),
3297 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3300 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3303 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3305 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3308#if OMPT_SUPPORT && OMPT_OPTIONAL
3309 if (ompt_enabled.enabled && rc) {
3311 if (ompt_enabled.ompt_callback_mutex_acquired) {
3313 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3314 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3318 if (ompt_enabled.ompt_callback_nest_lock) {
3320 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3321 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3330 kmp_user_lock_p lck;
3333 if ((__kmp_user_lock_kind == lk_tas) &&
3334 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3335 OMP_NEST_LOCK_T_SIZE)) {
3336 lck = (kmp_user_lock_p)user_lock;
3339 else if ((__kmp_user_lock_kind == lk_futex) &&
3340 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3341 OMP_NEST_LOCK_T_SIZE)) {
3342 lck = (kmp_user_lock_p)user_lock;
3346 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3350 __kmp_itt_lock_acquiring(lck);
3353#if OMPT_SUPPORT && OMPT_OPTIONAL
3355 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3357 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3358 if (ompt_enabled.enabled) &&
3359 ompt_enabled.ompt_callback_mutex_acquire) {
3360 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3361 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3362 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3367 rc = TEST_NESTED_LOCK(lck, gtid);
3370 __kmp_itt_lock_acquired(lck);
3372 __kmp_itt_lock_cancelled(lck);
3375#if OMPT_SUPPORT && OMPT_OPTIONAL
3376 if (ompt_enabled.enabled && rc) {
3378 if (ompt_enabled.ompt_callback_mutex_acquired) {
3380 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3381 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3384 if (ompt_enabled.ompt_callback_nest_lock) {
3386 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3387 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3406#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3407 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3409#define __KMP_GET_REDUCTION_METHOD(gtid) \
3410 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3416static __forceinline
void
3417__kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3418 kmp_critical_name *crit) {
3424 kmp_user_lock_p lck;
3426#if KMP_USE_DYNAMIC_LOCK
3428 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3431 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3432 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3433 KMP_GET_D_TAG(__kmp_user_lock_seq));
3435 __kmp_init_indirect_csptr(crit, loc, global_tid,
3436 KMP_GET_I_TAG(__kmp_user_lock_seq));
3442 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3443 lck = (kmp_user_lock_p)lk;
3444 KMP_DEBUG_ASSERT(lck != NULL);
3445 if (__kmp_env_consistency_check) {
3446 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3448 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3452 KMP_DEBUG_ASSERT(lck != NULL);
3453 if (__kmp_env_consistency_check) {
3454 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3456 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3464 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3465 lck = (kmp_user_lock_p)crit;
3467 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3469 KMP_DEBUG_ASSERT(lck != NULL);
3471 if (__kmp_env_consistency_check)
3472 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3474 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3480static __forceinline
void
3481__kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3482 kmp_critical_name *crit) {
3484 kmp_user_lock_p lck;
3486#if KMP_USE_DYNAMIC_LOCK
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489 lck = (kmp_user_lock_p)crit;
3490 if (__kmp_env_consistency_check)
3491 __kmp_pop_sync(global_tid, ct_critical, loc);
3492 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3494 kmp_indirect_lock_t *ilk =
3495 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3496 if (__kmp_env_consistency_check)
3497 __kmp_pop_sync(global_tid, ct_critical, loc);
3498 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3506 if (__kmp_base_user_lock_size > 32) {
3507 lck = *((kmp_user_lock_p *)crit);
3508 KMP_ASSERT(lck != NULL);
3510 lck = (kmp_user_lock_p)crit;
3513 if (__kmp_env_consistency_check)
3514 __kmp_pop_sync(global_tid, ct_critical, loc);
3516 __kmp_release_user_lock_with_checks(lck, global_tid);
3521static __forceinline
int
3522__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3527 if (th->th.th_teams_microtask) {
3528 *team_p = team = th->th.th_team;
3529 if (team->t.t_level == th->th.th_teams_level) {
3531 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3533 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3534 th->th.th_team = team->t.t_parent;
3535 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3536 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3537 *task_state = th->th.th_task_state;
3538 th->th.th_task_state = 0;
3546static __forceinline
void
3547__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3549 th->th.th_info.ds.ds_tid = 0;
3550 th->th.th_team = team;
3551 th->th.th_team_nproc = team->t.t_nproc;
3552 th->th.th_task_team = team->t.t_task_team[task_state];
3553 __kmp_type_convert(task_state, &(th->th.th_task_state));
3574 size_t reduce_size,
void *reduce_data,
3575 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3576 kmp_critical_name *lck) {
3580 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3583 int teams_swapped = 0, task_state;
3584 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3585 __kmp_assert_valid_gtid(global_tid);
3593 if (!TCR_4(__kmp_init_parallel))
3594 __kmp_parallel_initialize();
3596 __kmp_resume_if_soft_paused();
3599#if KMP_USE_DYNAMIC_LOCK
3600 if (__kmp_env_consistency_check)
3601 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3603 if (__kmp_env_consistency_check)
3604 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3607 th = __kmp_thread_from_gtid(global_tid);
3608 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3626 packed_reduction_method = __kmp_determine_reduction_method(
3627 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3628 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3630 OMPT_REDUCTION_DECL(th, global_tid);
3631 if (packed_reduction_method == critical_reduce_block) {
3633 OMPT_REDUCTION_BEGIN;
3635 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3638 }
else if (packed_reduction_method == empty_reduce_block) {
3640 OMPT_REDUCTION_BEGIN;
3646 }
else if (packed_reduction_method == atomic_reduce_block) {
3656 if (__kmp_env_consistency_check)
3657 __kmp_pop_sync(global_tid, ct_reduce, loc);
3659 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660 tree_reduce_block)) {
3680 ompt_frame_t *ompt_frame;
3681 if (ompt_enabled.enabled) {
3682 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3683 if (ompt_frame->enter_frame.ptr == NULL)
3684 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3686 OMPT_STORE_RETURN_ADDRESS(global_tid);
3689 __kmp_threads[global_tid]->th.th_ident = loc;
3692 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3693 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3694 retval = (retval != 0) ? (0) : (1);
3695#if OMPT_SUPPORT && OMPT_OPTIONAL
3696 if (ompt_enabled.enabled) {
3697 ompt_frame->enter_frame = ompt_data_none;
3703 if (__kmp_env_consistency_check) {
3705 __kmp_pop_sync(global_tid, ct_reduce, loc);
3714 if (teams_swapped) {
3715 __kmp_restore_swapped_teams(th, team, task_state);
3719 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3720 global_tid, packed_reduction_method, retval));
3734 kmp_critical_name *lck) {
3736 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3738 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3739 __kmp_assert_valid_gtid(global_tid);
3741 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3743 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3745 if (packed_reduction_method == critical_reduce_block) {
3747 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3750 }
else if (packed_reduction_method == empty_reduce_block) {
3757 }
else if (packed_reduction_method == atomic_reduce_block) {
3764 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3765 tree_reduce_block)) {
3776 if (__kmp_env_consistency_check)
3777 __kmp_pop_sync(global_tid, ct_reduce, loc);
3779 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3780 global_tid, packed_reduction_method));
3803 size_t reduce_size,
void *reduce_data,
3804 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3805 kmp_critical_name *lck) {
3808 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3811 int teams_swapped = 0, task_state;
3813 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3814 __kmp_assert_valid_gtid(global_tid);
3822 if (!TCR_4(__kmp_init_parallel))
3823 __kmp_parallel_initialize();
3825 __kmp_resume_if_soft_paused();
3828#if KMP_USE_DYNAMIC_LOCK
3829 if (__kmp_env_consistency_check)
3830 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3832 if (__kmp_env_consistency_check)
3833 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3836 th = __kmp_thread_from_gtid(global_tid);
3837 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3839 packed_reduction_method = __kmp_determine_reduction_method(
3840 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3841 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3843 OMPT_REDUCTION_DECL(th, global_tid);
3845 if (packed_reduction_method == critical_reduce_block) {
3847 OMPT_REDUCTION_BEGIN;
3848 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3851 }
else if (packed_reduction_method == empty_reduce_block) {
3853 OMPT_REDUCTION_BEGIN;
3858 }
else if (packed_reduction_method == atomic_reduce_block) {
3862 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3863 tree_reduce_block)) {
3869 ompt_frame_t *ompt_frame;
3870 if (ompt_enabled.enabled) {
3871 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3872 if (ompt_frame->enter_frame.ptr == NULL)
3873 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3875 OMPT_STORE_RETURN_ADDRESS(global_tid);
3878 __kmp_threads[global_tid]->th.th_ident =
3882 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3883 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3884 retval = (retval != 0) ? (0) : (1);
3885#if OMPT_SUPPORT && OMPT_OPTIONAL
3886 if (ompt_enabled.enabled) {
3887 ompt_frame->enter_frame = ompt_data_none;
3893 if (__kmp_env_consistency_check) {
3895 __kmp_pop_sync(global_tid, ct_reduce, loc);
3904 if (teams_swapped) {
3905 __kmp_restore_swapped_teams(th, team, task_state);
3909 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3910 global_tid, packed_reduction_method, retval));
3925 kmp_critical_name *lck) {
3927 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3930 int teams_swapped = 0, task_state;
3932 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3933 __kmp_assert_valid_gtid(global_tid);
3935 th = __kmp_thread_from_gtid(global_tid);
3936 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3938 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3942 OMPT_REDUCTION_DECL(th, global_tid);
3944 if (packed_reduction_method == critical_reduce_block) {
3945 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3951 ompt_frame_t *ompt_frame;
3952 if (ompt_enabled.enabled) {
3953 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3954 if (ompt_frame->enter_frame.ptr == NULL)
3955 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3957 OMPT_STORE_RETURN_ADDRESS(global_tid);
3960 __kmp_threads[global_tid]->th.th_ident = loc;
3962 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3963#if OMPT_SUPPORT && OMPT_OPTIONAL
3964 if (ompt_enabled.enabled) {
3965 ompt_frame->enter_frame = ompt_data_none;
3969 }
else if (packed_reduction_method == empty_reduce_block) {
3977 ompt_frame_t *ompt_frame;
3978 if (ompt_enabled.enabled) {
3979 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3980 if (ompt_frame->enter_frame.ptr == NULL)
3981 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3983 OMPT_STORE_RETURN_ADDRESS(global_tid);
3986 __kmp_threads[global_tid]->th.th_ident = loc;
3988 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3989#if OMPT_SUPPORT && OMPT_OPTIONAL
3990 if (ompt_enabled.enabled) {
3991 ompt_frame->enter_frame = ompt_data_none;
3995 }
else if (packed_reduction_method == atomic_reduce_block) {
3998 ompt_frame_t *ompt_frame;
3999 if (ompt_enabled.enabled) {
4000 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4001 if (ompt_frame->enter_frame.ptr == NULL)
4002 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4004 OMPT_STORE_RETURN_ADDRESS(global_tid);
4008 __kmp_threads[global_tid]->th.th_ident = loc;
4010 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4011#if OMPT_SUPPORT && OMPT_OPTIONAL
4012 if (ompt_enabled.enabled) {
4013 ompt_frame->enter_frame = ompt_data_none;
4017 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4018 tree_reduce_block)) {
4021 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4029 if (teams_swapped) {
4030 __kmp_restore_swapped_teams(th, team, task_state);
4033 if (__kmp_env_consistency_check)
4034 __kmp_pop_sync(global_tid, ct_reduce, loc);
4036 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4037 global_tid, packed_reduction_method));
4042#undef __KMP_GET_REDUCTION_METHOD
4043#undef __KMP_SET_REDUCTION_METHOD
4047kmp_uint64 __kmpc_get_taskid() {
4052 gtid = __kmp_get_gtid();
4056 thread = __kmp_thread_from_gtid(gtid);
4057 return thread->th.th_current_task->td_task_id;
4061kmp_uint64 __kmpc_get_parent_taskid() {
4065 kmp_taskdata_t *parent_task;
4067 gtid = __kmp_get_gtid();
4071 thread = __kmp_thread_from_gtid(gtid);
4072 parent_task = thread->th.th_current_task->td_parent;
4073 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4089 const struct kmp_dim *dims) {
4090 __kmp_assert_valid_gtid(gtid);
4092 kmp_int64 last, trace_count;
4093 kmp_info_t *th = __kmp_threads[gtid];
4094 kmp_team_t *team = th->th.th_team;
4096 kmp_disp_t *pr_buf = th->th.th_dispatch;
4097 dispatch_shared_info_t *sh_buf;
4101 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4102 gtid, num_dims, !team->t.t_serialized));
4103 KMP_DEBUG_ASSERT(dims != NULL);
4104 KMP_DEBUG_ASSERT(num_dims > 0);
4106 if (team->t.t_serialized) {
4107 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4110 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4111 idx = pr_buf->th_doacross_buf_idx++;
4113 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4116 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4117 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4118 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4119 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4120 pr_buf->th_doacross_info[0] =
4121 (kmp_int64)num_dims;
4124 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4125 pr_buf->th_doacross_info[2] = dims[0].lo;
4126 pr_buf->th_doacross_info[3] = dims[0].up;
4127 pr_buf->th_doacross_info[4] = dims[0].st;
4129 for (j = 1; j < num_dims; ++j) {
4132 if (dims[j].st == 1) {
4134 range_length = dims[j].up - dims[j].lo + 1;
4136 if (dims[j].st > 0) {
4137 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4138 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4140 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4142 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4145 pr_buf->th_doacross_info[last++] = range_length;
4146 pr_buf->th_doacross_info[last++] = dims[j].lo;
4147 pr_buf->th_doacross_info[last++] = dims[j].up;
4148 pr_buf->th_doacross_info[last++] = dims[j].st;
4153 if (dims[0].st == 1) {
4154 trace_count = dims[0].up - dims[0].lo + 1;
4155 }
else if (dims[0].st > 0) {
4156 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4157 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4159 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4160 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4162 for (j = 1; j < num_dims; ++j) {
4163 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4165 KMP_DEBUG_ASSERT(trace_count > 0);
4169 if (idx != sh_buf->doacross_buf_idx) {
4171 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4178 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4179 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4181 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4182 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4184 if (flags == NULL) {
4187 (size_t)trace_count / 8 + 8;
4188 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4190 sh_buf->doacross_flags = flags;
4191 }
else if (flags == (kmp_uint32 *)1) {
4194 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4196 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4203 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4204 pr_buf->th_doacross_flags =
4205 sh_buf->doacross_flags;
4207 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4210void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4211 __kmp_assert_valid_gtid(gtid);
4215 kmp_int64 iter_number;
4216 kmp_info_t *th = __kmp_threads[gtid];
4217 kmp_team_t *team = th->th.th_team;
4219 kmp_int64 lo, up, st;
4221 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4222 if (team->t.t_serialized) {
4223 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4228 pr_buf = th->th.th_dispatch;
4229 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4230 num_dims = (size_t)pr_buf->th_doacross_info[0];
4231 lo = pr_buf->th_doacross_info[2];
4232 up = pr_buf->th_doacross_info[3];
4233 st = pr_buf->th_doacross_info[4];
4234#if OMPT_SUPPORT && OMPT_OPTIONAL
4235 ompt_dependence_t deps[num_dims];
4238 if (vec[0] < lo || vec[0] > up) {
4239 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4240 "bounds [%lld,%lld]\n",
4241 gtid, vec[0], lo, up));
4244 iter_number = vec[0] - lo;
4245 }
else if (st > 0) {
4246 if (vec[0] < lo || vec[0] > up) {
4247 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4248 "bounds [%lld,%lld]\n",
4249 gtid, vec[0], lo, up));
4252 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4254 if (vec[0] > lo || vec[0] < up) {
4255 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4256 "bounds [%lld,%lld]\n",
4257 gtid, vec[0], lo, up));
4260 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4262#if OMPT_SUPPORT && OMPT_OPTIONAL
4263 deps[0].variable.value = iter_number;
4264 deps[0].dependence_type = ompt_dependence_type_sink;
4266 for (i = 1; i < num_dims; ++i) {
4269 ln = pr_buf->th_doacross_info[j + 1];
4270 lo = pr_buf->th_doacross_info[j + 2];
4271 up = pr_buf->th_doacross_info[j + 3];
4272 st = pr_buf->th_doacross_info[j + 4];
4274 if (vec[i] < lo || vec[i] > up) {
4275 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4276 "bounds [%lld,%lld]\n",
4277 gtid, vec[i], lo, up));
4281 }
else if (st > 0) {
4282 if (vec[i] < lo || vec[i] > up) {
4283 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4284 "bounds [%lld,%lld]\n",
4285 gtid, vec[i], lo, up));
4288 iter = (kmp_uint64)(vec[i] - lo) / st;
4290 if (vec[i] > lo || vec[i] < up) {
4291 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4292 "bounds [%lld,%lld]\n",
4293 gtid, vec[i], lo, up));
4296 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4298 iter_number = iter + ln * iter_number;
4299#if OMPT_SUPPORT && OMPT_OPTIONAL
4300 deps[i].variable.value = iter;
4301 deps[i].dependence_type = ompt_dependence_type_sink;
4304 shft = iter_number % 32;
4307 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4311#if OMPT_SUPPORT && OMPT_OPTIONAL
4312 if (ompt_enabled.ompt_callback_dependences) {
4313 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4314 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4318 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4319 gtid, (iter_number << 5) + shft));
4322void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4323 __kmp_assert_valid_gtid(gtid);
4327 kmp_int64 iter_number;
4328 kmp_info_t *th = __kmp_threads[gtid];
4329 kmp_team_t *team = th->th.th_team;
4333 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4334 if (team->t.t_serialized) {
4335 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4341 pr_buf = th->th.th_dispatch;
4342 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4343 num_dims = (size_t)pr_buf->th_doacross_info[0];
4344 lo = pr_buf->th_doacross_info[2];
4345 st = pr_buf->th_doacross_info[4];
4346#if OMPT_SUPPORT && OMPT_OPTIONAL
4347 ompt_dependence_t deps[num_dims];
4350 iter_number = vec[0] - lo;
4351 }
else if (st > 0) {
4352 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4354 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4356#if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps[0].variable.value = iter_number;
4358 deps[0].dependence_type = ompt_dependence_type_source;
4360 for (i = 1; i < num_dims; ++i) {
4363 ln = pr_buf->th_doacross_info[j + 1];
4364 lo = pr_buf->th_doacross_info[j + 2];
4365 st = pr_buf->th_doacross_info[j + 4];
4368 }
else if (st > 0) {
4369 iter = (kmp_uint64)(vec[i] - lo) / st;
4371 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4373 iter_number = iter + ln * iter_number;
4374#if OMPT_SUPPORT && OMPT_OPTIONAL
4375 deps[i].variable.value = iter;
4376 deps[i].dependence_type = ompt_dependence_type_source;
4379#if OMPT_SUPPORT && OMPT_OPTIONAL
4380 if (ompt_enabled.ompt_callback_dependences) {
4381 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4382 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4385 shft = iter_number % 32;
4389 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4390 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4391 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4392 (iter_number << 5) + shft));
4395void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4396 __kmp_assert_valid_gtid(gtid);
4398 kmp_info_t *th = __kmp_threads[gtid];
4399 kmp_team_t *team = th->th.th_team;
4400 kmp_disp_t *pr_buf = th->th.th_dispatch;
4402 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4403 if (team->t.t_serialized) {
4404 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4408 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4409 if (num_done == th->th.th_team_nproc) {
4411 int idx = pr_buf->th_doacross_buf_idx - 1;
4412 dispatch_shared_info_t *sh_buf =
4413 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4414 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4415 (kmp_int64)&sh_buf->doacross_num_done);
4416 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4417 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4418 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4419 sh_buf->doacross_flags = NULL;
4420 sh_buf->doacross_num_done = 0;
4421 sh_buf->doacross_buf_idx +=
4422 __kmp_dispatch_num_buffers;
4425 pr_buf->th_doacross_flags = NULL;
4426 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4427 pr_buf->th_doacross_info = NULL;
4428 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4432void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4433 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4436void *omp_aligned_alloc(
size_t align,
size_t size,
4437 omp_allocator_handle_t allocator) {
4438 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4441void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4442 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4445void *omp_aligned_calloc(
size_t align,
size_t nmemb,
size_t size,
4446 omp_allocator_handle_t allocator) {
4447 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4450void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4451 omp_allocator_handle_t free_allocator) {
4452 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4456void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4457 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4461int __kmpc_get_target_offload(
void) {
4462 if (!__kmp_init_serial) {
4463 __kmp_serial_initialize();
4465 return __kmp_target_offload;
4468int __kmpc_pause_resource(kmp_pause_status_t level) {
4469 if (!__kmp_init_serial) {
4472 return __kmp_pause_resource(level);
4475void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4476 if (!__kmp_init_serial)
4477 __kmp_serial_initialize();
4479 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4482 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4483 ompt_callbacks.ompt_callback(ompt_callback_error)(
4484 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4485 OMPT_GET_RETURN_ADDRESS(0));
4491 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4493 __kmp_str_format(
"%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4494 __kmp_str_loc_free(&str_loc);
4496 src_loc = __kmp_str_format(
"unknown");
4499 if (severity == severity_warning)
4500 KMP_WARNING(UserDirectedWarning, src_loc, message);
4502 KMP_FATAL(UserDirectedError, src_loc, message);
4504 __kmp_str_free(&src_loc);
4508void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4510#if OMPT_SUPPORT && OMPT_OPTIONAL
4511 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4512 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4513 int tid = __kmp_tid_from_gtid(gtid);
4514 ompt_callbacks.ompt_callback(ompt_callback_work)(
4515 ompt_work_scope, ompt_scope_begin,
4516 &(team->t.ompt_team_info.parallel_data),
4517 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4518 OMPT_GET_RETURN_ADDRESS(0));
4524void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4526#if OMPT_SUPPORT && OMPT_OPTIONAL
4527 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4528 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4529 int tid = __kmp_tid_from_gtid(gtid);
4530 ompt_callbacks.ompt_callback(ompt_callback_work)(
4531 ompt_work_scope, ompt_scope_end,
4532 &(team->t.ompt_team_info.parallel_data),
4533 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4534 OMPT_GET_RETURN_ADDRESS(0));
4539#ifdef KMP_USE_VERSION_SYMBOLS
4548#ifdef omp_set_affinity_format
4549#undef omp_set_affinity_format
4551#ifdef omp_get_affinity_format
4552#undef omp_get_affinity_format
4554#ifdef omp_display_affinity
4555#undef omp_display_affinity
4557#ifdef omp_capture_affinity
4558#undef omp_capture_affinity
4560KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4562KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4564KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4566KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)