LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49}
50
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105}
106
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126}
127
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137}
138
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG
158
159 return TRUE;
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209#endif /* KMP_DEBUG */
210}
211
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid);
295#endif
296
297#if INCLUDE_SSC_MARKS
298 SSC_MARK_FORKING();
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap));
304#if INCLUDE_SSC_MARKS
305 SSC_MARK_JOINING();
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap);
315
316#if OMPT_SUPPORT
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
327 } else {
328 KMP_POP_PARTITIONED_TIMER();
329 }
330#endif // KMP_STATS_ENABLED
331}
332
343void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
344 kmp_int32 cond, void *args) {
345 int gtid = __kmp_entry_gtid();
346 if (cond) {
347 if (args)
348 __kmpc_fork_call(loc, argc, microtask, args);
349 else
350 __kmpc_fork_call(loc, argc, microtask);
351 } else {
353
354#if OMPT_SUPPORT
355 void *exit_frame_ptr;
356#endif
357
358 if (args)
359 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
360 /*npr=*/0,
361 /*argc=*/1, &args
362#if OMPT_SUPPORT
363 ,
364 &exit_frame_ptr
365#endif
366 );
367 else
368 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
369 /*npr=*/0,
370 /*argc=*/0,
371 /*args=*/nullptr
372#if OMPT_SUPPORT
373 ,
374 &exit_frame_ptr
375#endif
376 );
377
379 }
380}
381
393void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
394 kmp_int32 num_teams, kmp_int32 num_threads) {
395 KA_TRACE(20,
396 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
397 global_tid, num_teams, num_threads));
398 __kmp_assert_valid_gtid(global_tid);
399 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
400}
401
412void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
413 kmp_int32 thread_limit) {
414 __kmp_assert_valid_gtid(global_tid);
415 kmp_info_t *thread = __kmp_threads[global_tid];
416 if (thread_limit > 0)
417 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
418}
419
436void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
437 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
438 kmp_int32 num_threads) {
439 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
440 " num_teams_ub=%d num_threads=%d\n",
441 global_tid, num_teams_lb, num_teams_ub, num_threads));
442 __kmp_assert_valid_gtid(global_tid);
443 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
444 num_threads);
445}
446
457void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
458 ...) {
459 int gtid = __kmp_entry_gtid();
460 kmp_info_t *this_thr = __kmp_threads[gtid];
461 va_list ap;
462 va_start(ap, microtask);
463
464#if KMP_STATS_ENABLED
465 KMP_COUNT_BLOCK(OMP_TEAMS);
466 stats_state_e previous_state = KMP_GET_THREAD_STATE();
467 if (previous_state == stats_state_e::SERIAL_REGION) {
468 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
469 } else {
470 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
471 }
472#endif
473
474 // remember teams entry point and nesting level
475 this_thr->th.th_teams_microtask = microtask;
476 this_thr->th.th_teams_level =
477 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
478
479#if OMPT_SUPPORT
480 kmp_team_t *parent_team = this_thr->th.th_team;
481 int tid = __kmp_tid_from_gtid(gtid);
482 if (ompt_enabled.enabled) {
483 parent_team->t.t_implicit_task_taskdata[tid]
484 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
485 }
486 OMPT_STORE_RETURN_ADDRESS(gtid);
487#endif
488
489 // check if __kmpc_push_num_teams called, set default number of teams
490 // otherwise
491 if (this_thr->th.th_teams_size.nteams == 0) {
492 __kmp_push_num_teams(loc, gtid, 0, 0);
493 }
494 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
495 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
497
498 __kmp_fork_call(
499 loc, gtid, fork_context_intel, argc,
500 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
501 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
502 __kmp_join_call(loc, gtid
503#if OMPT_SUPPORT
504 ,
505 fork_context_intel
506#endif
507 );
508
509 // Pop current CG root off list
510 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
511 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
512 this_thr->th.th_cg_roots = tmp->up;
513 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
514 " to node %p. cg_nthreads was %d\n",
515 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
516 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
517 int i = tmp->cg_nthreads--;
518 if (i == 1) { // check is we are the last thread in CG (not always the case)
519 __kmp_free(tmp);
520 }
521 // Restore current task's thread_limit from CG root
522 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
523 this_thr->th.th_current_task->td_icvs.thread_limit =
524 this_thr->th.th_cg_roots->cg_thread_limit;
525
526 this_thr->th.th_teams_microtask = NULL;
527 this_thr->th.th_teams_level = 0;
528 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
529 va_end(ap);
530#if KMP_STATS_ENABLED
531 if (previous_state == stats_state_e::SERIAL_REGION) {
532 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
533 KMP_SET_THREAD_STATE(previous_state);
534 } else {
535 KMP_POP_PARTITIONED_TIMER();
536 }
537#endif // KMP_STATS_ENABLED
538}
539
540// I don't think this function should ever have been exported.
541// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
542// openmp code ever called it, but it's been exported from the RTL for so
543// long that I'm afraid to remove the definition.
544int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
545
558void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
559 // The implementation is now in kmp_runtime.cpp so that it can share static
560 // functions with kmp_fork_call since the tasks to be done are similar in
561 // each case.
562 __kmp_assert_valid_gtid(global_tid);
563#if OMPT_SUPPORT
564 OMPT_STORE_RETURN_ADDRESS(global_tid);
565#endif
566 __kmp_serialized_parallel(loc, global_tid);
567}
568
576void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
577 kmp_internal_control_t *top;
578 kmp_info_t *this_thr;
579 kmp_team_t *serial_team;
580
581 KC_TRACE(10,
582 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
583
584 /* skip all this code for autopar serialized loops since it results in
585 unacceptable overhead */
586 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
587 return;
588
589 // Not autopar code
590 __kmp_assert_valid_gtid(global_tid);
591 if (!TCR_4(__kmp_init_parallel))
592 __kmp_parallel_initialize();
593
594 __kmp_resume_if_soft_paused();
595
596 this_thr = __kmp_threads[global_tid];
597 serial_team = this_thr->th.th_serial_team;
598
599 kmp_task_team_t *task_team = this_thr->th.th_task_team;
600 // we need to wait for the proxy tasks before finishing the thread
601 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
602 task_team->tt.tt_hidden_helper_task_encountered))
603 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
604
605 KMP_MB();
606 KMP_DEBUG_ASSERT(serial_team);
607 KMP_ASSERT(serial_team->t.t_serialized);
608 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
609 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
610 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
611 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
612
613#if OMPT_SUPPORT
614 if (ompt_enabled.enabled &&
615 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
616 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
617 if (ompt_enabled.ompt_callback_implicit_task) {
618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
619 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
620 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
621 }
622
623 // reset clear the task id only after unlinking the task
624 ompt_data_t *parent_task_data;
625 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
626
627 if (ompt_enabled.ompt_callback_parallel_end) {
628 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
629 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
630 ompt_parallel_invoker_program | ompt_parallel_team,
631 OMPT_LOAD_RETURN_ADDRESS(global_tid));
632 }
633 __ompt_lw_taskteam_unlink(this_thr);
634 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
635 }
636#endif
637
638 /* If necessary, pop the internal control stack values and replace the team
639 * values */
640 top = serial_team->t.t_control_stack_top;
641 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
642 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
643 serial_team->t.t_control_stack_top = top->next;
644 __kmp_free(top);
645 }
646
647 /* pop dispatch buffers stack */
648 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
649 {
650 dispatch_private_info_t *disp_buffer =
651 serial_team->t.t_dispatch->th_disp_buffer;
652 serial_team->t.t_dispatch->th_disp_buffer =
653 serial_team->t.t_dispatch->th_disp_buffer->next;
654 __kmp_free(disp_buffer);
655 }
656 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
657
658 --serial_team->t.t_serialized;
659 if (serial_team->t.t_serialized == 0) {
660
661 /* return to the parallel section */
662
663#if KMP_ARCH_X86 || KMP_ARCH_X86_64
664 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
665 __kmp_clear_x87_fpu_status_word();
666 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
667 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
668 }
669#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
670
671 __kmp_pop_current_task_from_thread(this_thr);
672#if OMPD_SUPPORT
673 if (ompd_state & OMPD_ENABLE_BP)
674 ompd_bp_parallel_end();
675#endif
676
677 this_thr->th.th_team = serial_team->t.t_parent;
678 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
679
680 /* restore values cached in the thread */
681 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
682 this_thr->th.th_team_master =
683 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
684 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
685
686 /* TODO the below shouldn't need to be adjusted for serialized teams */
687 this_thr->th.th_dispatch =
688 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
689
690 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
691 this_thr->th.th_current_task->td_flags.executing = 1;
692
693 if (__kmp_tasking_mode != tskm_immediate_exec) {
694 // Copy the task team from the new child / old parent team to the thread.
695 this_thr->th.th_task_team =
696 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
697 KA_TRACE(20,
698 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
699 "team %p\n",
700 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
701 }
702#if KMP_AFFINITY_SUPPORTED
703 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
704 __kmp_reset_root_init_mask(global_tid);
705 }
706#endif
707 } else {
708 if (__kmp_tasking_mode != tskm_immediate_exec) {
709 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
710 "depth of serial team %p to %d\n",
711 global_tid, serial_team, serial_team->t.t_serialized));
712 }
713 }
714
715 serial_team->t.t_level--;
716 if (__kmp_env_consistency_check)
717 __kmp_pop_parallel(global_tid, NULL);
718#if OMPT_SUPPORT
719 if (ompt_enabled.enabled)
720 this_thr->th.ompt_thread_info.state =
721 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
722 : ompt_state_work_parallel);
723#endif
724}
725
735 KC_TRACE(10, ("__kmpc_flush: called\n"));
736
737 /* need explicit __mf() here since use volatile instead in library */
738 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
739
740#if OMPT_SUPPORT && OMPT_OPTIONAL
741 if (ompt_enabled.ompt_callback_flush) {
742 ompt_callbacks.ompt_callback(ompt_callback_flush)(
743 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
744 }
745#endif
746}
747
748/* -------------------------------------------------------------------------- */
756void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
757 KMP_COUNT_BLOCK(OMP_BARRIER);
758 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
759 __kmp_assert_valid_gtid(global_tid);
760
761 if (!TCR_4(__kmp_init_parallel))
762 __kmp_parallel_initialize();
763
764 __kmp_resume_if_soft_paused();
765
766 if (__kmp_env_consistency_check) {
767 if (loc == 0) {
768 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
769 }
770 __kmp_check_barrier(global_tid, ct_barrier, loc);
771 }
772
773#if OMPT_SUPPORT
774 ompt_frame_t *ompt_frame;
775 if (ompt_enabled.enabled) {
776 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
777 if (ompt_frame->enter_frame.ptr == NULL)
778 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
779 }
780 OMPT_STORE_RETURN_ADDRESS(global_tid);
781#endif
782 __kmp_threads[global_tid]->th.th_ident = loc;
783 // TODO: explicit barrier_wait_id:
784 // this function is called when 'barrier' directive is present or
785 // implicit barrier at the end of a worksharing construct.
786 // 1) better to add a per-thread barrier counter to a thread data structure
787 // 2) set to 0 when a new team is created
788 // 4) no sync is required
789
790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
791#if OMPT_SUPPORT && OMPT_OPTIONAL
792 if (ompt_enabled.enabled) {
793 ompt_frame->enter_frame = ompt_data_none;
794 }
795#endif
796}
797
798/* The BARRIER for a MASTER section is always explicit */
805kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
806 int status = 0;
807
808 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
809 __kmp_assert_valid_gtid(global_tid);
810
811 if (!TCR_4(__kmp_init_parallel))
812 __kmp_parallel_initialize();
813
814 __kmp_resume_if_soft_paused();
815
816 if (KMP_MASTER_GTID(global_tid)) {
817 KMP_COUNT_BLOCK(OMP_MASTER);
818 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
819 status = 1;
820 }
821
822#if OMPT_SUPPORT && OMPT_OPTIONAL
823 if (status) {
824 if (ompt_enabled.ompt_callback_masked) {
825 kmp_info_t *this_thr = __kmp_threads[global_tid];
826 kmp_team_t *team = this_thr->th.th_team;
827
828 int tid = __kmp_tid_from_gtid(global_tid);
829 ompt_callbacks.ompt_callback(ompt_callback_masked)(
830 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
832 OMPT_GET_RETURN_ADDRESS(0));
833 }
834 }
835#endif
836
837 if (__kmp_env_consistency_check) {
838#if KMP_USE_DYNAMIC_LOCK
839 if (status)
840 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
841 else
842 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
843#else
844 if (status)
845 __kmp_push_sync(global_tid, ct_master, loc, NULL);
846 else
847 __kmp_check_sync(global_tid, ct_master, loc, NULL);
848#endif
849 }
850
851 return status;
852}
853
862void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
863 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
864 __kmp_assert_valid_gtid(global_tid);
865 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
866 KMP_POP_PARTITIONED_TIMER();
867
868#if OMPT_SUPPORT && OMPT_OPTIONAL
869 kmp_info_t *this_thr = __kmp_threads[global_tid];
870 kmp_team_t *team = this_thr->th.th_team;
871 if (ompt_enabled.ompt_callback_masked) {
872 int tid = __kmp_tid_from_gtid(global_tid);
873 ompt_callbacks.ompt_callback(ompt_callback_masked)(
874 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
875 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
876 OMPT_GET_RETURN_ADDRESS(0));
877 }
878#endif
879
880 if (__kmp_env_consistency_check) {
881 if (KMP_MASTER_GTID(global_tid))
882 __kmp_pop_sync(global_tid, ct_master, loc);
883 }
884}
885
894kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
895 int status = 0;
896 int tid;
897 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
898 __kmp_assert_valid_gtid(global_tid);
899
900 if (!TCR_4(__kmp_init_parallel))
901 __kmp_parallel_initialize();
902
903 __kmp_resume_if_soft_paused();
904
905 tid = __kmp_tid_from_gtid(global_tid);
906 if (tid == filter) {
907 KMP_COUNT_BLOCK(OMP_MASKED);
908 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
909 status = 1;
910 }
911
912#if OMPT_SUPPORT && OMPT_OPTIONAL
913 if (status) {
914 if (ompt_enabled.ompt_callback_masked) {
915 kmp_info_t *this_thr = __kmp_threads[global_tid];
916 kmp_team_t *team = this_thr->th.th_team;
917 ompt_callbacks.ompt_callback(ompt_callback_masked)(
918 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
919 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
920 OMPT_GET_RETURN_ADDRESS(0));
921 }
922 }
923#endif
924
925 if (__kmp_env_consistency_check) {
926#if KMP_USE_DYNAMIC_LOCK
927 if (status)
928 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
929 else
930 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
931#else
932 if (status)
933 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
934 else
935 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
936#endif
937 }
938
939 return status;
940}
941
950void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
951 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
952 __kmp_assert_valid_gtid(global_tid);
953 KMP_POP_PARTITIONED_TIMER();
954
955#if OMPT_SUPPORT && OMPT_OPTIONAL
956 kmp_info_t *this_thr = __kmp_threads[global_tid];
957 kmp_team_t *team = this_thr->th.th_team;
958 if (ompt_enabled.ompt_callback_masked) {
959 int tid = __kmp_tid_from_gtid(global_tid);
960 ompt_callbacks.ompt_callback(ompt_callback_masked)(
961 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
962 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
963 OMPT_GET_RETURN_ADDRESS(0));
964 }
965#endif
966
967 if (__kmp_env_consistency_check) {
968 __kmp_pop_sync(global_tid, ct_masked, loc);
969 }
970}
971
979void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
980 int cid = 0;
981 kmp_info_t *th;
982 KMP_DEBUG_ASSERT(__kmp_init_serial);
983
984 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
985 __kmp_assert_valid_gtid(gtid);
986
987 if (!TCR_4(__kmp_init_parallel))
988 __kmp_parallel_initialize();
989
990 __kmp_resume_if_soft_paused();
991
992#if USE_ITT_BUILD
993 __kmp_itt_ordered_prep(gtid);
994// TODO: ordered_wait_id
995#endif /* USE_ITT_BUILD */
996
997 th = __kmp_threads[gtid];
998
999#if OMPT_SUPPORT && OMPT_OPTIONAL
1000 kmp_team_t *team;
1001 ompt_wait_id_t lck;
1002 void *codeptr_ra;
1003 OMPT_STORE_RETURN_ADDRESS(gtid);
1004 if (ompt_enabled.enabled) {
1005 team = __kmp_team_from_gtid(gtid);
1006 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1007 /* OMPT state update */
1008 th->th.ompt_thread_info.wait_id = lck;
1009 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1010
1011 /* OMPT event callback */
1012 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1013 if (ompt_enabled.ompt_callback_mutex_acquire) {
1014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1015 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1016 codeptr_ra);
1017 }
1018 }
1019#endif
1020
1021 if (th->th.th_dispatch->th_deo_fcn != 0)
1022 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1023 else
1024 __kmp_parallel_deo(&gtid, &cid, loc);
1025
1026#if OMPT_SUPPORT && OMPT_OPTIONAL
1027 if (ompt_enabled.enabled) {
1028 /* OMPT state update */
1029 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1030 th->th.ompt_thread_info.wait_id = 0;
1031
1032 /* OMPT event callback */
1033 if (ompt_enabled.ompt_callback_mutex_acquired) {
1034 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1035 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1036 }
1037 }
1038#endif
1039
1040#if USE_ITT_BUILD
1041 __kmp_itt_ordered_start(gtid);
1042#endif /* USE_ITT_BUILD */
1043}
1044
1052void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1053 int cid = 0;
1054 kmp_info_t *th;
1055
1056 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1057 __kmp_assert_valid_gtid(gtid);
1058
1059#if USE_ITT_BUILD
1060 __kmp_itt_ordered_end(gtid);
1061// TODO: ordered_wait_id
1062#endif /* USE_ITT_BUILD */
1063
1064 th = __kmp_threads[gtid];
1065
1066 if (th->th.th_dispatch->th_dxo_fcn != 0)
1067 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1068 else
1069 __kmp_parallel_dxo(&gtid, &cid, loc);
1070
1071#if OMPT_SUPPORT && OMPT_OPTIONAL
1072 OMPT_STORE_RETURN_ADDRESS(gtid);
1073 if (ompt_enabled.ompt_callback_mutex_released) {
1074 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1075 ompt_mutex_ordered,
1076 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1077 ->t.t_ordered.dt.t_value,
1078 OMPT_LOAD_RETURN_ADDRESS(gtid));
1079 }
1080#endif
1081}
1082
1083#if KMP_USE_DYNAMIC_LOCK
1084
1085static __forceinline void
1086__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1087 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1088 // Pointer to the allocated indirect lock is written to crit, while indexing
1089 // is ignored.
1090 void *idx;
1091 kmp_indirect_lock_t **lck;
1092 lck = (kmp_indirect_lock_t **)crit;
1093 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1094 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1095 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1096 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1097 KA_TRACE(20,
1098 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1099#if USE_ITT_BUILD
1100 __kmp_itt_critical_creating(ilk->lock, loc);
1101#endif
1102 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1103 if (status == 0) {
1104#if USE_ITT_BUILD
1105 __kmp_itt_critical_destroyed(ilk->lock);
1106#endif
1107 // We don't really need to destroy the unclaimed lock here since it will be
1108 // cleaned up at program exit.
1109 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1110 }
1111 KMP_DEBUG_ASSERT(*lck != NULL);
1112}
1113
1114// Fast-path acquire tas lock
1115#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1116 { \
1117 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1118 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1119 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1120 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1121 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1122 kmp_uint32 spins; \
1123 KMP_FSYNC_PREPARE(l); \
1124 KMP_INIT_YIELD(spins); \
1125 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1126 do { \
1127 if (TCR_4(__kmp_nth) > \
1128 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1129 KMP_YIELD(TRUE); \
1130 } else { \
1131 KMP_YIELD_SPIN(spins); \
1132 } \
1133 __kmp_spin_backoff(&backoff); \
1134 } while ( \
1135 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1136 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1137 } \
1138 KMP_FSYNC_ACQUIRED(l); \
1139 }
1140
1141// Fast-path test tas lock
1142#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1143 { \
1144 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1147 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1148 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1149 }
1150
1151// Fast-path release tas lock
1152#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1153 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1154
1155#if KMP_USE_FUTEX
1156
1157#include <sys/syscall.h>
1158#include <unistd.h>
1159#ifndef FUTEX_WAIT
1160#define FUTEX_WAIT 0
1161#endif
1162#ifndef FUTEX_WAKE
1163#define FUTEX_WAKE 1
1164#endif
1165
1166// Fast-path acquire futex lock
1167#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1168 { \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 kmp_int32 gtid_code = (gtid + 1) << 1; \
1171 KMP_MB(); \
1172 KMP_FSYNC_PREPARE(ftx); \
1173 kmp_int32 poll_val; \
1174 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1175 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1176 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1177 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1178 if (!cond) { \
1179 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1180 poll_val | \
1181 KMP_LOCK_BUSY(1, futex))) { \
1182 continue; \
1183 } \
1184 poll_val |= KMP_LOCK_BUSY(1, futex); \
1185 } \
1186 kmp_int32 rc; \
1187 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1188 NULL, NULL, 0)) != 0) { \
1189 continue; \
1190 } \
1191 gtid_code |= 1; \
1192 } \
1193 KMP_FSYNC_ACQUIRED(ftx); \
1194 }
1195
1196// Fast-path test futex lock
1197#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1198 { \
1199 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1200 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1201 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1202 KMP_FSYNC_ACQUIRED(ftx); \
1203 rc = TRUE; \
1204 } else { \
1205 rc = FALSE; \
1206 } \
1207 }
1208
1209// Fast-path release futex lock
1210#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1211 { \
1212 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1213 KMP_MB(); \
1214 KMP_FSYNC_RELEASING(ftx); \
1215 kmp_int32 poll_val = \
1216 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1217 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1218 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1219 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1220 } \
1221 KMP_MB(); \
1222 KMP_YIELD_OVERSUB(); \
1223 }
1224
1225#endif // KMP_USE_FUTEX
1226
1227#else // KMP_USE_DYNAMIC_LOCK
1228
1229static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1230 ident_t const *loc,
1231 kmp_int32 gtid) {
1232 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1233
1234 // Because of the double-check, the following load doesn't need to be volatile
1235 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1236
1237 if (lck == NULL) {
1238 void *idx;
1239
1240 // Allocate & initialize the lock.
1241 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1242 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1243 __kmp_init_user_lock_with_checks(lck);
1244 __kmp_set_user_lock_location(lck, loc);
1245#if USE_ITT_BUILD
1246 __kmp_itt_critical_creating(lck);
1247// __kmp_itt_critical_creating() should be called *before* the first usage
1248// of underlying lock. It is the only place where we can guarantee it. There
1249// are chances the lock will destroyed with no usage, but it is not a
1250// problem, because this is not real event seen by user but rather setting
1251// name for object (lock). See more details in kmp_itt.h.
1252#endif /* USE_ITT_BUILD */
1253
1254 // Use a cmpxchg instruction to slam the start of the critical section with
1255 // the lock pointer. If another thread beat us to it, deallocate the lock,
1256 // and use the lock that the other thread allocated.
1257 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1258
1259 if (status == 0) {
1260// Deallocate the lock and reload the value.
1261#if USE_ITT_BUILD
1262 __kmp_itt_critical_destroyed(lck);
1263// Let ITT know the lock is destroyed and the same memory location may be reused
1264// for another purpose.
1265#endif /* USE_ITT_BUILD */
1266 __kmp_destroy_user_lock_with_checks(lck);
1267 __kmp_user_lock_free(&idx, gtid, lck);
1268 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1269 KMP_DEBUG_ASSERT(lck != NULL);
1270 }
1271 }
1272 return lck;
1273}
1274
1275#endif // KMP_USE_DYNAMIC_LOCK
1276
1287void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1288 kmp_critical_name *crit) {
1289#if KMP_USE_DYNAMIC_LOCK
1290#if OMPT_SUPPORT && OMPT_OPTIONAL
1291 OMPT_STORE_RETURN_ADDRESS(global_tid);
1292#endif // OMPT_SUPPORT
1293 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1294#else
1295 KMP_COUNT_BLOCK(OMP_CRITICAL);
1296#if OMPT_SUPPORT && OMPT_OPTIONAL
1297 ompt_state_t prev_state = ompt_state_undefined;
1298 ompt_thread_info_t ti;
1299#endif
1300 kmp_user_lock_p lck;
1301
1302 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1303 __kmp_assert_valid_gtid(global_tid);
1304
1305 // TODO: add THR_OVHD_STATE
1306
1307 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1308 KMP_CHECK_USER_LOCK_INIT();
1309
1310 if ((__kmp_user_lock_kind == lk_tas) &&
1311 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1312 lck = (kmp_user_lock_p)crit;
1313 }
1314#if KMP_USE_FUTEX
1315 else if ((__kmp_user_lock_kind == lk_futex) &&
1316 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1317 lck = (kmp_user_lock_p)crit;
1318 }
1319#endif
1320 else { // ticket, queuing or drdpa
1321 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1322 }
1323
1324 if (__kmp_env_consistency_check)
1325 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1326
1327 // since the critical directive binds to all threads, not just the current
1328 // team we have to check this even if we are in a serialized team.
1329 // also, even if we are the uber thread, we still have to conduct the lock,
1330 // as we have to contend with sibling threads.
1331
1332#if USE_ITT_BUILD
1333 __kmp_itt_critical_acquiring(lck);
1334#endif /* USE_ITT_BUILD */
1335#if OMPT_SUPPORT && OMPT_OPTIONAL
1336 OMPT_STORE_RETURN_ADDRESS(gtid);
1337 void *codeptr_ra = NULL;
1338 if (ompt_enabled.enabled) {
1339 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1340 /* OMPT state update */
1341 prev_state = ti.state;
1342 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1343 ti.state = ompt_state_wait_critical;
1344
1345 /* OMPT event callback */
1346 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1347 if (ompt_enabled.ompt_callback_mutex_acquire) {
1348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1349 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1350 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1351 }
1352 }
1353#endif
1354 // Value of 'crit' should be good for using as a critical_id of the critical
1355 // section directive.
1356 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1357
1358#if USE_ITT_BUILD
1359 __kmp_itt_critical_acquired(lck);
1360#endif /* USE_ITT_BUILD */
1361#if OMPT_SUPPORT && OMPT_OPTIONAL
1362 if (ompt_enabled.enabled) {
1363 /* OMPT state update */
1364 ti.state = prev_state;
1365 ti.wait_id = 0;
1366
1367 /* OMPT event callback */
1368 if (ompt_enabled.ompt_callback_mutex_acquired) {
1369 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1370 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1371 }
1372 }
1373#endif
1374 KMP_POP_PARTITIONED_TIMER();
1375
1376 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1377 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1378#endif // KMP_USE_DYNAMIC_LOCK
1379}
1380
1381#if KMP_USE_DYNAMIC_LOCK
1382
1383// Converts the given hint to an internal lock implementation
1384static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1385#if KMP_USE_TSX
1386#define KMP_TSX_LOCK(seq) lockseq_##seq
1387#else
1388#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1389#endif
1390
1391#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1392#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1393#else
1394#define KMP_CPUINFO_RTM 0
1395#endif
1396
1397 // Hints that do not require further logic
1398 if (hint & kmp_lock_hint_hle)
1399 return KMP_TSX_LOCK(hle);
1400 if (hint & kmp_lock_hint_rtm)
1401 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1402 if (hint & kmp_lock_hint_adaptive)
1403 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1404
1405 // Rule out conflicting hints first by returning the default lock
1406 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1407 return __kmp_user_lock_seq;
1408 if ((hint & omp_lock_hint_speculative) &&
1409 (hint & omp_lock_hint_nonspeculative))
1410 return __kmp_user_lock_seq;
1411
1412 // Do not even consider speculation when it appears to be contended
1413 if (hint & omp_lock_hint_contended)
1414 return lockseq_queuing;
1415
1416 // Uncontended lock without speculation
1417 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1418 return lockseq_tas;
1419
1420 // Use RTM lock for speculation
1421 if (hint & omp_lock_hint_speculative)
1422 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1423
1424 return __kmp_user_lock_seq;
1425}
1426
1427#if OMPT_SUPPORT && OMPT_OPTIONAL
1428#if KMP_USE_DYNAMIC_LOCK
1429static kmp_mutex_impl_t
1430__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1431 if (user_lock) {
1432 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1433 case 0:
1434 break;
1435#if KMP_USE_FUTEX
1436 case locktag_futex:
1437 return kmp_mutex_impl_queuing;
1438#endif
1439 case locktag_tas:
1440 return kmp_mutex_impl_spin;
1441#if KMP_USE_TSX
1442 case locktag_hle:
1443 case locktag_rtm_spin:
1444 return kmp_mutex_impl_speculative;
1445#endif
1446 default:
1447 return kmp_mutex_impl_none;
1448 }
1449 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1450 }
1451 KMP_ASSERT(ilock);
1452 switch (ilock->type) {
1453#if KMP_USE_TSX
1454 case locktag_adaptive:
1455 case locktag_rtm_queuing:
1456 return kmp_mutex_impl_speculative;
1457#endif
1458 case locktag_nested_tas:
1459 return kmp_mutex_impl_spin;
1460#if KMP_USE_FUTEX
1461 case locktag_nested_futex:
1462#endif
1463 case locktag_ticket:
1464 case locktag_queuing:
1465 case locktag_drdpa:
1466 case locktag_nested_ticket:
1467 case locktag_nested_queuing:
1468 case locktag_nested_drdpa:
1469 return kmp_mutex_impl_queuing;
1470 default:
1471 return kmp_mutex_impl_none;
1472 }
1473}
1474#else
1475// For locks without dynamic binding
1476static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1477 switch (__kmp_user_lock_kind) {
1478 case lk_tas:
1479 return kmp_mutex_impl_spin;
1480#if KMP_USE_FUTEX
1481 case lk_futex:
1482#endif
1483 case lk_ticket:
1484 case lk_queuing:
1485 case lk_drdpa:
1486 return kmp_mutex_impl_queuing;
1487#if KMP_USE_TSX
1488 case lk_hle:
1489 case lk_rtm_queuing:
1490 case lk_rtm_spin:
1491 case lk_adaptive:
1492 return kmp_mutex_impl_speculative;
1493#endif
1494 default:
1495 return kmp_mutex_impl_none;
1496 }
1497}
1498#endif // KMP_USE_DYNAMIC_LOCK
1499#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1500
1514void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1515 kmp_critical_name *crit, uint32_t hint) {
1516 KMP_COUNT_BLOCK(OMP_CRITICAL);
1517 kmp_user_lock_p lck;
1518#if OMPT_SUPPORT && OMPT_OPTIONAL
1519 ompt_state_t prev_state = ompt_state_undefined;
1520 ompt_thread_info_t ti;
1521 // This is the case, if called from __kmpc_critical:
1522 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1523 if (!codeptr)
1524 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1525#endif
1526
1527 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1528 __kmp_assert_valid_gtid(global_tid);
1529
1530 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1531 // Check if it is initialized.
1532 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1533 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1534 if (*lk == 0) {
1535 if (KMP_IS_D_LOCK(lockseq)) {
1536 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1537 KMP_GET_D_TAG(lockseq));
1538 } else {
1539 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1540 }
1541 }
1542 // Branch for accessing the actual lock object and set operation. This
1543 // branching is inevitable since this lock initialization does not follow the
1544 // normal dispatch path (lock table is not used).
1545 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1546 lck = (kmp_user_lock_p)lk;
1547 if (__kmp_env_consistency_check) {
1548 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1549 __kmp_map_hint_to_lock(hint));
1550 }
1551#if USE_ITT_BUILD
1552 __kmp_itt_critical_acquiring(lck);
1553#endif
1554#if OMPT_SUPPORT && OMPT_OPTIONAL
1555 if (ompt_enabled.enabled) {
1556 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1557 /* OMPT state update */
1558 prev_state = ti.state;
1559 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1560 ti.state = ompt_state_wait_critical;
1561
1562 /* OMPT event callback */
1563 if (ompt_enabled.ompt_callback_mutex_acquire) {
1564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1565 ompt_mutex_critical, (unsigned int)hint,
1566 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1567 codeptr);
1568 }
1569 }
1570#endif
1571#if KMP_USE_INLINED_TAS
1572 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1573 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1574 } else
1575#elif KMP_USE_INLINED_FUTEX
1576 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1577 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1578 } else
1579#endif
1580 {
1581 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1582 }
1583 } else {
1584 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1585 lck = ilk->lock;
1586 if (__kmp_env_consistency_check) {
1587 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1588 __kmp_map_hint_to_lock(hint));
1589 }
1590#if USE_ITT_BUILD
1591 __kmp_itt_critical_acquiring(lck);
1592#endif
1593#if OMPT_SUPPORT && OMPT_OPTIONAL
1594 if (ompt_enabled.enabled) {
1595 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1596 /* OMPT state update */
1597 prev_state = ti.state;
1598 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1599 ti.state = ompt_state_wait_critical;
1600
1601 /* OMPT event callback */
1602 if (ompt_enabled.ompt_callback_mutex_acquire) {
1603 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1604 ompt_mutex_critical, (unsigned int)hint,
1605 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1606 codeptr);
1607 }
1608 }
1609#endif
1610 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1611 }
1612 KMP_POP_PARTITIONED_TIMER();
1613
1614#if USE_ITT_BUILD
1615 __kmp_itt_critical_acquired(lck);
1616#endif /* USE_ITT_BUILD */
1617#if OMPT_SUPPORT && OMPT_OPTIONAL
1618 if (ompt_enabled.enabled) {
1619 /* OMPT state update */
1620 ti.state = prev_state;
1621 ti.wait_id = 0;
1622
1623 /* OMPT event callback */
1624 if (ompt_enabled.ompt_callback_mutex_acquired) {
1625 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1626 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1627 }
1628 }
1629#endif
1630
1631 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1632 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1633} // __kmpc_critical_with_hint
1634
1635#endif // KMP_USE_DYNAMIC_LOCK
1636
1646void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1647 kmp_critical_name *crit) {
1648 kmp_user_lock_p lck;
1649
1650 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1651
1652#if KMP_USE_DYNAMIC_LOCK
1653 int locktag = KMP_EXTRACT_D_TAG(crit);
1654 if (locktag) {
1655 lck = (kmp_user_lock_p)crit;
1656 KMP_ASSERT(lck != NULL);
1657 if (__kmp_env_consistency_check) {
1658 __kmp_pop_sync(global_tid, ct_critical, loc);
1659 }
1660#if USE_ITT_BUILD
1661 __kmp_itt_critical_releasing(lck);
1662#endif
1663#if KMP_USE_INLINED_TAS
1664 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1665 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1666 } else
1667#elif KMP_USE_INLINED_FUTEX
1668 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1669 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1670 } else
1671#endif
1672 {
1673 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1674 }
1675 } else {
1676 kmp_indirect_lock_t *ilk =
1677 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1678 KMP_ASSERT(ilk != NULL);
1679 lck = ilk->lock;
1680 if (__kmp_env_consistency_check) {
1681 __kmp_pop_sync(global_tid, ct_critical, loc);
1682 }
1683#if USE_ITT_BUILD
1684 __kmp_itt_critical_releasing(lck);
1685#endif
1686 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1687 }
1688
1689#else // KMP_USE_DYNAMIC_LOCK
1690
1691 if ((__kmp_user_lock_kind == lk_tas) &&
1692 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1693 lck = (kmp_user_lock_p)crit;
1694 }
1695#if KMP_USE_FUTEX
1696 else if ((__kmp_user_lock_kind == lk_futex) &&
1697 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1698 lck = (kmp_user_lock_p)crit;
1699 }
1700#endif
1701 else { // ticket, queuing or drdpa
1702 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1703 }
1704
1705 KMP_ASSERT(lck != NULL);
1706
1707 if (__kmp_env_consistency_check)
1708 __kmp_pop_sync(global_tid, ct_critical, loc);
1709
1710#if USE_ITT_BUILD
1711 __kmp_itt_critical_releasing(lck);
1712#endif /* USE_ITT_BUILD */
1713 // Value of 'crit' should be good for using as a critical_id of the critical
1714 // section directive.
1715 __kmp_release_user_lock_with_checks(lck, global_tid);
1716
1717#endif // KMP_USE_DYNAMIC_LOCK
1718
1719#if OMPT_SUPPORT && OMPT_OPTIONAL
1720 /* OMPT release event triggers after lock is released; place here to trigger
1721 * for all #if branches */
1722 OMPT_STORE_RETURN_ADDRESS(global_tid);
1723 if (ompt_enabled.ompt_callback_mutex_released) {
1724 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1725 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1726 OMPT_LOAD_RETURN_ADDRESS(0));
1727 }
1728#endif
1729
1730 KMP_POP_PARTITIONED_TIMER();
1731 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1732}
1733
1743kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1744 int status;
1745 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1746 __kmp_assert_valid_gtid(global_tid);
1747
1748 if (!TCR_4(__kmp_init_parallel))
1749 __kmp_parallel_initialize();
1750
1751 __kmp_resume_if_soft_paused();
1752
1753 if (__kmp_env_consistency_check)
1754 __kmp_check_barrier(global_tid, ct_barrier, loc);
1755
1756#if OMPT_SUPPORT
1757 ompt_frame_t *ompt_frame;
1758 if (ompt_enabled.enabled) {
1759 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1760 if (ompt_frame->enter_frame.ptr == NULL)
1761 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1762 }
1763 OMPT_STORE_RETURN_ADDRESS(global_tid);
1764#endif
1765#if USE_ITT_NOTIFY
1766 __kmp_threads[global_tid]->th.th_ident = loc;
1767#endif
1768 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1769#if OMPT_SUPPORT && OMPT_OPTIONAL
1770 if (ompt_enabled.enabled) {
1771 ompt_frame->enter_frame = ompt_data_none;
1772 }
1773#endif
1774
1775 return (status != 0) ? 0 : 1;
1776}
1777
1787void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1788 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1789 __kmp_assert_valid_gtid(global_tid);
1790 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1791}
1792
1803kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1804 kmp_int32 ret;
1805 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1806 __kmp_assert_valid_gtid(global_tid);
1807
1808 if (!TCR_4(__kmp_init_parallel))
1809 __kmp_parallel_initialize();
1810
1811 __kmp_resume_if_soft_paused();
1812
1813 if (__kmp_env_consistency_check) {
1814 if (loc == 0) {
1815 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1816 }
1817 __kmp_check_barrier(global_tid, ct_barrier, loc);
1818 }
1819
1820#if OMPT_SUPPORT
1821 ompt_frame_t *ompt_frame;
1822 if (ompt_enabled.enabled) {
1823 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1824 if (ompt_frame->enter_frame.ptr == NULL)
1825 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1826 }
1827 OMPT_STORE_RETURN_ADDRESS(global_tid);
1828#endif
1829#if USE_ITT_NOTIFY
1830 __kmp_threads[global_tid]->th.th_ident = loc;
1831#endif
1832 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1833#if OMPT_SUPPORT && OMPT_OPTIONAL
1834 if (ompt_enabled.enabled) {
1835 ompt_frame->enter_frame = ompt_data_none;
1836 }
1837#endif
1838
1839 ret = __kmpc_master(loc, global_tid);
1840
1841 if (__kmp_env_consistency_check) {
1842 /* there's no __kmpc_end_master called; so the (stats) */
1843 /* actions of __kmpc_end_master are done here */
1844 if (ret) {
1845 /* only one thread should do the pop since only */
1846 /* one did the push (see __kmpc_master()) */
1847 __kmp_pop_sync(global_tid, ct_master, loc);
1848 }
1849 }
1850
1851 return (ret);
1852}
1853
1854/* The BARRIER for a SINGLE process section is always explicit */
1866kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1867 __kmp_assert_valid_gtid(global_tid);
1868 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1869
1870 if (rc) {
1871 // We are going to execute the single statement, so we should count it.
1872 KMP_COUNT_BLOCK(OMP_SINGLE);
1873 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1874 }
1875
1876#if OMPT_SUPPORT && OMPT_OPTIONAL
1877 kmp_info_t *this_thr = __kmp_threads[global_tid];
1878 kmp_team_t *team = this_thr->th.th_team;
1879 int tid = __kmp_tid_from_gtid(global_tid);
1880
1881 if (ompt_enabled.enabled) {
1882 if (rc) {
1883 if (ompt_enabled.ompt_callback_work) {
1884 ompt_callbacks.ompt_callback(ompt_callback_work)(
1885 ompt_work_single_executor, ompt_scope_begin,
1886 &(team->t.ompt_team_info.parallel_data),
1887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1888 1, OMPT_GET_RETURN_ADDRESS(0));
1889 }
1890 } else {
1891 if (ompt_enabled.ompt_callback_work) {
1892 ompt_callbacks.ompt_callback(ompt_callback_work)(
1893 ompt_work_single_other, ompt_scope_begin,
1894 &(team->t.ompt_team_info.parallel_data),
1895 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1896 1, OMPT_GET_RETURN_ADDRESS(0));
1897 ompt_callbacks.ompt_callback(ompt_callback_work)(
1898 ompt_work_single_other, ompt_scope_end,
1899 &(team->t.ompt_team_info.parallel_data),
1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1901 1, OMPT_GET_RETURN_ADDRESS(0));
1902 }
1903 }
1904 }
1905#endif
1906
1907 return rc;
1908}
1909
1919void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1920 __kmp_assert_valid_gtid(global_tid);
1921 __kmp_exit_single(global_tid);
1922 KMP_POP_PARTITIONED_TIMER();
1923
1924#if OMPT_SUPPORT && OMPT_OPTIONAL
1925 kmp_info_t *this_thr = __kmp_threads[global_tid];
1926 kmp_team_t *team = this_thr->th.th_team;
1927 int tid = __kmp_tid_from_gtid(global_tid);
1928
1929 if (ompt_enabled.ompt_callback_work) {
1930 ompt_callbacks.ompt_callback(ompt_callback_work)(
1931 ompt_work_single_executor, ompt_scope_end,
1932 &(team->t.ompt_team_info.parallel_data),
1933 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1934 OMPT_GET_RETURN_ADDRESS(0));
1935 }
1936#endif
1937}
1938
1946void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1947 KMP_POP_PARTITIONED_TIMER();
1948 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1949
1950#if OMPT_SUPPORT && OMPT_OPTIONAL
1951 if (ompt_enabled.ompt_callback_work) {
1952 ompt_work_t ompt_work_type = ompt_work_loop;
1953 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1954 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1955 // Determine workshare type
1956 if (loc != NULL) {
1957 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1958 ompt_work_type = ompt_work_loop;
1959 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1960 ompt_work_type = ompt_work_sections;
1961 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1962 ompt_work_type = ompt_work_distribute;
1963 } else {
1964 // use default set above.
1965 // a warning about this case is provided in __kmpc_for_static_init
1966 }
1967 KMP_DEBUG_ASSERT(ompt_work_type);
1968 }
1969 ompt_callbacks.ompt_callback(ompt_callback_work)(
1970 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1971 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1972 }
1973#endif
1974 if (__kmp_env_consistency_check)
1975 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1976}
1977
1978// User routines which take C-style arguments (call by value)
1979// different from the Fortran equivalent routines
1980
1981void ompc_set_num_threads(int arg) {
1982 // !!!!! TODO: check the per-task binding
1983 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1984}
1985
1986void ompc_set_dynamic(int flag) {
1987 kmp_info_t *thread;
1988
1989 /* For the thread-private implementation of the internal controls */
1990 thread = __kmp_entry_thread();
1991
1992 __kmp_save_internal_controls(thread);
1993
1994 set__dynamic(thread, flag ? true : false);
1995}
1996
1997void ompc_set_nested(int flag) {
1998 kmp_info_t *thread;
1999
2000 /* For the thread-private internal controls implementation */
2001 thread = __kmp_entry_thread();
2002
2003 __kmp_save_internal_controls(thread);
2004
2005 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2006}
2007
2008void ompc_set_max_active_levels(int max_active_levels) {
2009 /* TO DO */
2010 /* we want per-task implementation of this internal control */
2011
2012 /* For the per-thread internal controls implementation */
2013 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2014}
2015
2016void ompc_set_schedule(omp_sched_t kind, int modifier) {
2017 // !!!!! TODO: check the per-task binding
2018 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2019}
2020
2021int ompc_get_ancestor_thread_num(int level) {
2022 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2023}
2024
2025int ompc_get_team_size(int level) {
2026 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2027}
2028
2029/* OpenMP 5.0 Affinity Format API */
2030void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2031 if (!__kmp_init_serial) {
2032 __kmp_serial_initialize();
2033 }
2034 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2035 format, KMP_STRLEN(format) + 1);
2036}
2037
2038size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2039 size_t format_size;
2040 if (!__kmp_init_serial) {
2041 __kmp_serial_initialize();
2042 }
2043 format_size = KMP_STRLEN(__kmp_affinity_format);
2044 if (buffer && size) {
2045 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2046 format_size + 1);
2047 }
2048 return format_size;
2049}
2050
2051void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2052 int gtid;
2053 if (!TCR_4(__kmp_init_middle)) {
2054 __kmp_middle_initialize();
2055 }
2056 __kmp_assign_root_init_mask();
2057 gtid = __kmp_get_gtid();
2058#if KMP_AFFINITY_SUPPORTED
2059 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2060 __kmp_affinity.flags.reset) {
2061 __kmp_reset_root_init_mask(gtid);
2062 }
2063#endif
2064 __kmp_aux_display_affinity(gtid, format);
2065}
2066
2067size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2068 char const *format) {
2069 int gtid;
2070 size_t num_required;
2071 kmp_str_buf_t capture_buf;
2072 if (!TCR_4(__kmp_init_middle)) {
2073 __kmp_middle_initialize();
2074 }
2075 __kmp_assign_root_init_mask();
2076 gtid = __kmp_get_gtid();
2077#if KMP_AFFINITY_SUPPORTED
2078 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2079 __kmp_affinity.flags.reset) {
2080 __kmp_reset_root_init_mask(gtid);
2081 }
2082#endif
2083 __kmp_str_buf_init(&capture_buf);
2084 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2085 if (buffer && buf_size) {
2086 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2087 capture_buf.used + 1);
2088 }
2089 __kmp_str_buf_free(&capture_buf);
2090 return num_required;
2091}
2092
2093void kmpc_set_stacksize(int arg) {
2094 // __kmp_aux_set_stacksize initializes the library if needed
2095 __kmp_aux_set_stacksize(arg);
2096}
2097
2098void kmpc_set_stacksize_s(size_t arg) {
2099 // __kmp_aux_set_stacksize initializes the library if needed
2100 __kmp_aux_set_stacksize(arg);
2101}
2102
2103void kmpc_set_blocktime(int arg) {
2104 int gtid, tid, bt = arg;
2105 kmp_info_t *thread;
2106
2107 gtid = __kmp_entry_gtid();
2108 tid = __kmp_tid_from_gtid(gtid);
2109 thread = __kmp_thread_from_gtid(gtid);
2110
2111 __kmp_aux_convert_blocktime(&bt);
2112 __kmp_aux_set_blocktime(bt, thread, tid);
2113}
2114
2115void kmpc_set_library(int arg) {
2116 // __kmp_user_set_library initializes the library if needed
2117 __kmp_user_set_library((enum library_type)arg);
2118}
2119
2120void kmpc_set_defaults(char const *str) {
2121 // __kmp_aux_set_defaults initializes the library if needed
2122 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2123}
2124
2125void kmpc_set_disp_num_buffers(int arg) {
2126 // ignore after initialization because some teams have already
2127 // allocated dispatch buffers
2128 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2129 arg <= KMP_MAX_DISP_NUM_BUFF) {
2130 __kmp_dispatch_num_buffers = arg;
2131 }
2132}
2133
2134int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2135#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2136 return -1;
2137#else
2138 if (!TCR_4(__kmp_init_middle)) {
2139 __kmp_middle_initialize();
2140 }
2141 __kmp_assign_root_init_mask();
2142 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2143#endif
2144}
2145
2146int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2147#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2148 return -1;
2149#else
2150 if (!TCR_4(__kmp_init_middle)) {
2151 __kmp_middle_initialize();
2152 }
2153 __kmp_assign_root_init_mask();
2154 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2155#endif
2156}
2157
2158int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2159#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2160 return -1;
2161#else
2162 if (!TCR_4(__kmp_init_middle)) {
2163 __kmp_middle_initialize();
2164 }
2165 __kmp_assign_root_init_mask();
2166 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2167#endif
2168}
2169
2170/* -------------------------------------------------------------------------- */
2215void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2216 void *cpy_data, void (*cpy_func)(void *, void *),
2217 kmp_int32 didit) {
2218 void **data_ptr;
2219 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2220 __kmp_assert_valid_gtid(gtid);
2221
2222 KMP_MB();
2223
2224 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2225
2226 if (__kmp_env_consistency_check) {
2227 if (loc == 0) {
2228 KMP_WARNING(ConstructIdentInvalid);
2229 }
2230 }
2231
2232 // ToDo: Optimize the following two barriers into some kind of split barrier
2233
2234 if (didit)
2235 *data_ptr = cpy_data;
2236
2237#if OMPT_SUPPORT
2238 ompt_frame_t *ompt_frame;
2239 if (ompt_enabled.enabled) {
2240 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2241 if (ompt_frame->enter_frame.ptr == NULL)
2242 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2243 }
2244 OMPT_STORE_RETURN_ADDRESS(gtid);
2245#endif
2246/* This barrier is not a barrier region boundary */
2247#if USE_ITT_NOTIFY
2248 __kmp_threads[gtid]->th.th_ident = loc;
2249#endif
2250 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2251
2252 if (!didit)
2253 (*cpy_func)(cpy_data, *data_ptr);
2254
2255 // Consider next barrier a user-visible barrier for barrier region boundaries
2256 // Nesting checks are already handled by the single construct checks
2257 {
2258#if OMPT_SUPPORT
2259 OMPT_STORE_RETURN_ADDRESS(gtid);
2260#endif
2261#if USE_ITT_NOTIFY
2262 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2263// tasks can overwrite the location)
2264#endif
2265 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2266#if OMPT_SUPPORT && OMPT_OPTIONAL
2267 if (ompt_enabled.enabled) {
2268 ompt_frame->enter_frame = ompt_data_none;
2269 }
2270#endif
2271 }
2272}
2273
2274/* --------------------------------------------------------------------------*/
2291void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2292 void **data_ptr;
2293
2294 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2295
2296 KMP_MB();
2297
2298 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2299
2300 if (__kmp_env_consistency_check) {
2301 if (loc == 0) {
2302 KMP_WARNING(ConstructIdentInvalid);
2303 }
2304 }
2305
2306 // ToDo: Optimize the following barrier
2307
2308 if (cpy_data)
2309 *data_ptr = cpy_data;
2310
2311#if OMPT_SUPPORT
2312 ompt_frame_t *ompt_frame;
2313 if (ompt_enabled.enabled) {
2314 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2315 if (ompt_frame->enter_frame.ptr == NULL)
2316 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2317 OMPT_STORE_RETURN_ADDRESS(gtid);
2318 }
2319#endif
2320/* This barrier is not a barrier region boundary */
2321#if USE_ITT_NOTIFY
2322 __kmp_threads[gtid]->th.th_ident = loc;
2323#endif
2324 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2325
2326 return *data_ptr;
2327}
2328
2329/* -------------------------------------------------------------------------- */
2330
2331#define INIT_LOCK __kmp_init_user_lock_with_checks
2332#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2333#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2334#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2335#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2336#define ACQUIRE_NESTED_LOCK_TIMED \
2337 __kmp_acquire_nested_user_lock_with_checks_timed
2338#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2339#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2340#define TEST_LOCK __kmp_test_user_lock_with_checks
2341#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2342#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2343#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2344
2345// TODO: Make check abort messages use location info & pass it into
2346// with_checks routines
2347
2348#if KMP_USE_DYNAMIC_LOCK
2349
2350// internal lock initializer
2351static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2352 kmp_dyna_lockseq_t seq) {
2353 if (KMP_IS_D_LOCK(seq)) {
2354 KMP_INIT_D_LOCK(lock, seq);
2355#if USE_ITT_BUILD
2356 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2357#endif
2358 } else {
2359 KMP_INIT_I_LOCK(lock, seq);
2360#if USE_ITT_BUILD
2361 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2362 __kmp_itt_lock_creating(ilk->lock, loc);
2363#endif
2364 }
2365}
2366
2367// internal nest lock initializer
2368static __forceinline void
2369__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2370 kmp_dyna_lockseq_t seq) {
2371#if KMP_USE_TSX
2372 // Don't have nested lock implementation for speculative locks
2373 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2374 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2375 seq = __kmp_user_lock_seq;
2376#endif
2377 switch (seq) {
2378 case lockseq_tas:
2379 seq = lockseq_nested_tas;
2380 break;
2381#if KMP_USE_FUTEX
2382 case lockseq_futex:
2383 seq = lockseq_nested_futex;
2384 break;
2385#endif
2386 case lockseq_ticket:
2387 seq = lockseq_nested_ticket;
2388 break;
2389 case lockseq_queuing:
2390 seq = lockseq_nested_queuing;
2391 break;
2392 case lockseq_drdpa:
2393 seq = lockseq_nested_drdpa;
2394 break;
2395 default:
2396 seq = lockseq_nested_queuing;
2397 }
2398 KMP_INIT_I_LOCK(lock, seq);
2399#if USE_ITT_BUILD
2400 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2401 __kmp_itt_lock_creating(ilk->lock, loc);
2402#endif
2403}
2404
2405/* initialize the lock with a hint */
2406void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2407 uintptr_t hint) {
2408 KMP_DEBUG_ASSERT(__kmp_init_serial);
2409 if (__kmp_env_consistency_check && user_lock == NULL) {
2410 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2411 }
2412
2413 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2414
2415#if OMPT_SUPPORT && OMPT_OPTIONAL
2416 // This is the case, if called from omp_init_lock_with_hint:
2417 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2418 if (!codeptr)
2419 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2420 if (ompt_enabled.ompt_callback_lock_init) {
2421 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2422 ompt_mutex_lock, (omp_lock_hint_t)hint,
2423 __ompt_get_mutex_impl_type(user_lock),
2424 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2425 }
2426#endif
2427}
2428
2429/* initialize the lock with a hint */
2430void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2431 void **user_lock, uintptr_t hint) {
2432 KMP_DEBUG_ASSERT(__kmp_init_serial);
2433 if (__kmp_env_consistency_check && user_lock == NULL) {
2434 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2435 }
2436
2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2438
2439#if OMPT_SUPPORT && OMPT_OPTIONAL
2440 // This is the case, if called from omp_init_lock_with_hint:
2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2442 if (!codeptr)
2443 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2444 if (ompt_enabled.ompt_callback_lock_init) {
2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2446 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2447 __ompt_get_mutex_impl_type(user_lock),
2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2449 }
2450#endif
2451}
2452
2453#endif // KMP_USE_DYNAMIC_LOCK
2454
2455/* initialize the lock */
2456void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2457#if KMP_USE_DYNAMIC_LOCK
2458
2459 KMP_DEBUG_ASSERT(__kmp_init_serial);
2460 if (__kmp_env_consistency_check && user_lock == NULL) {
2461 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2462 }
2463 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2464
2465#if OMPT_SUPPORT && OMPT_OPTIONAL
2466 // This is the case, if called from omp_init_lock_with_hint:
2467 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2468 if (!codeptr)
2469 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2470 if (ompt_enabled.ompt_callback_lock_init) {
2471 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2472 ompt_mutex_lock, omp_lock_hint_none,
2473 __ompt_get_mutex_impl_type(user_lock),
2474 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2475 }
2476#endif
2477
2478#else // KMP_USE_DYNAMIC_LOCK
2479
2480 static char const *const func = "omp_init_lock";
2481 kmp_user_lock_p lck;
2482 KMP_DEBUG_ASSERT(__kmp_init_serial);
2483
2484 if (__kmp_env_consistency_check) {
2485 if (user_lock == NULL) {
2486 KMP_FATAL(LockIsUninitialized, func);
2487 }
2488 }
2489
2490 KMP_CHECK_USER_LOCK_INIT();
2491
2492 if ((__kmp_user_lock_kind == lk_tas) &&
2493 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2494 lck = (kmp_user_lock_p)user_lock;
2495 }
2496#if KMP_USE_FUTEX
2497 else if ((__kmp_user_lock_kind == lk_futex) &&
2498 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2499 lck = (kmp_user_lock_p)user_lock;
2500 }
2501#endif
2502 else {
2503 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2504 }
2505 INIT_LOCK(lck);
2506 __kmp_set_user_lock_location(lck, loc);
2507
2508#if OMPT_SUPPORT && OMPT_OPTIONAL
2509 // This is the case, if called from omp_init_lock_with_hint:
2510 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2511 if (!codeptr)
2512 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2513 if (ompt_enabled.ompt_callback_lock_init) {
2514 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2515 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2516 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2517 }
2518#endif
2519
2520#if USE_ITT_BUILD
2521 __kmp_itt_lock_creating(lck);
2522#endif /* USE_ITT_BUILD */
2523
2524#endif // KMP_USE_DYNAMIC_LOCK
2525} // __kmpc_init_lock
2526
2527/* initialize the lock */
2528void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2529#if KMP_USE_DYNAMIC_LOCK
2530
2531 KMP_DEBUG_ASSERT(__kmp_init_serial);
2532 if (__kmp_env_consistency_check && user_lock == NULL) {
2533 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2534 }
2535 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2536
2537#if OMPT_SUPPORT && OMPT_OPTIONAL
2538 // This is the case, if called from omp_init_lock_with_hint:
2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540 if (!codeptr)
2541 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542 if (ompt_enabled.ompt_callback_lock_init) {
2543 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2544 ompt_mutex_nest_lock, omp_lock_hint_none,
2545 __ompt_get_mutex_impl_type(user_lock),
2546 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2547 }
2548#endif
2549
2550#else // KMP_USE_DYNAMIC_LOCK
2551
2552 static char const *const func = "omp_init_nest_lock";
2553 kmp_user_lock_p lck;
2554 KMP_DEBUG_ASSERT(__kmp_init_serial);
2555
2556 if (__kmp_env_consistency_check) {
2557 if (user_lock == NULL) {
2558 KMP_FATAL(LockIsUninitialized, func);
2559 }
2560 }
2561
2562 KMP_CHECK_USER_LOCK_INIT();
2563
2564 if ((__kmp_user_lock_kind == lk_tas) &&
2565 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2566 OMP_NEST_LOCK_T_SIZE)) {
2567 lck = (kmp_user_lock_p)user_lock;
2568 }
2569#if KMP_USE_FUTEX
2570 else if ((__kmp_user_lock_kind == lk_futex) &&
2571 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2572 OMP_NEST_LOCK_T_SIZE)) {
2573 lck = (kmp_user_lock_p)user_lock;
2574 }
2575#endif
2576 else {
2577 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2578 }
2579
2580 INIT_NESTED_LOCK(lck);
2581 __kmp_set_user_lock_location(lck, loc);
2582
2583#if OMPT_SUPPORT && OMPT_OPTIONAL
2584 // This is the case, if called from omp_init_lock_with_hint:
2585 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2586 if (!codeptr)
2587 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2588 if (ompt_enabled.ompt_callback_lock_init) {
2589 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2590 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2591 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2592 }
2593#endif
2594
2595#if USE_ITT_BUILD
2596 __kmp_itt_lock_creating(lck);
2597#endif /* USE_ITT_BUILD */
2598
2599#endif // KMP_USE_DYNAMIC_LOCK
2600} // __kmpc_init_nest_lock
2601
2602void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2603#if KMP_USE_DYNAMIC_LOCK
2604
2605#if USE_ITT_BUILD
2606 kmp_user_lock_p lck;
2607 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2608 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2609 } else {
2610 lck = (kmp_user_lock_p)user_lock;
2611 }
2612 __kmp_itt_lock_destroyed(lck);
2613#endif
2614#if OMPT_SUPPORT && OMPT_OPTIONAL
2615 // This is the case, if called from omp_init_lock_with_hint:
2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2617 if (!codeptr)
2618 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2619 if (ompt_enabled.ompt_callback_lock_destroy) {
2620 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2621 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2622 }
2623#endif
2624 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2625#else
2626 kmp_user_lock_p lck;
2627
2628 if ((__kmp_user_lock_kind == lk_tas) &&
2629 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2630 lck = (kmp_user_lock_p)user_lock;
2631 }
2632#if KMP_USE_FUTEX
2633 else if ((__kmp_user_lock_kind == lk_futex) &&
2634 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2635 lck = (kmp_user_lock_p)user_lock;
2636 }
2637#endif
2638 else {
2639 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2640 }
2641
2642#if OMPT_SUPPORT && OMPT_OPTIONAL
2643 // This is the case, if called from omp_init_lock_with_hint:
2644 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2645 if (!codeptr)
2646 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2647 if (ompt_enabled.ompt_callback_lock_destroy) {
2648 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2649 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2650 }
2651#endif
2652
2653#if USE_ITT_BUILD
2654 __kmp_itt_lock_destroyed(lck);
2655#endif /* USE_ITT_BUILD */
2656 DESTROY_LOCK(lck);
2657
2658 if ((__kmp_user_lock_kind == lk_tas) &&
2659 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2660 ;
2661 }
2662#if KMP_USE_FUTEX
2663 else if ((__kmp_user_lock_kind == lk_futex) &&
2664 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2665 ;
2666 }
2667#endif
2668 else {
2669 __kmp_user_lock_free(user_lock, gtid, lck);
2670 }
2671#endif // KMP_USE_DYNAMIC_LOCK
2672} // __kmpc_destroy_lock
2673
2674/* destroy the lock */
2675void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2676#if KMP_USE_DYNAMIC_LOCK
2677
2678#if USE_ITT_BUILD
2679 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2680 __kmp_itt_lock_destroyed(ilk->lock);
2681#endif
2682#if OMPT_SUPPORT && OMPT_OPTIONAL
2683 // This is the case, if called from omp_init_lock_with_hint:
2684 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2685 if (!codeptr)
2686 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2687 if (ompt_enabled.ompt_callback_lock_destroy) {
2688 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2689 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2690 }
2691#endif
2692 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2693
2694#else // KMP_USE_DYNAMIC_LOCK
2695
2696 kmp_user_lock_p lck;
2697
2698 if ((__kmp_user_lock_kind == lk_tas) &&
2699 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2700 OMP_NEST_LOCK_T_SIZE)) {
2701 lck = (kmp_user_lock_p)user_lock;
2702 }
2703#if KMP_USE_FUTEX
2704 else if ((__kmp_user_lock_kind == lk_futex) &&
2705 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2706 OMP_NEST_LOCK_T_SIZE)) {
2707 lck = (kmp_user_lock_p)user_lock;
2708 }
2709#endif
2710 else {
2711 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2712 }
2713
2714#if OMPT_SUPPORT && OMPT_OPTIONAL
2715 // This is the case, if called from omp_init_lock_with_hint:
2716 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2717 if (!codeptr)
2718 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2719 if (ompt_enabled.ompt_callback_lock_destroy) {
2720 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2721 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2722 }
2723#endif
2724
2725#if USE_ITT_BUILD
2726 __kmp_itt_lock_destroyed(lck);
2727#endif /* USE_ITT_BUILD */
2728
2729 DESTROY_NESTED_LOCK(lck);
2730
2731 if ((__kmp_user_lock_kind == lk_tas) &&
2732 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2733 OMP_NEST_LOCK_T_SIZE)) {
2734 ;
2735 }
2736#if KMP_USE_FUTEX
2737 else if ((__kmp_user_lock_kind == lk_futex) &&
2738 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2739 OMP_NEST_LOCK_T_SIZE)) {
2740 ;
2741 }
2742#endif
2743 else {
2744 __kmp_user_lock_free(user_lock, gtid, lck);
2745 }
2746#endif // KMP_USE_DYNAMIC_LOCK
2747} // __kmpc_destroy_nest_lock
2748
2749void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2750 KMP_COUNT_BLOCK(OMP_set_lock);
2751#if KMP_USE_DYNAMIC_LOCK
2752 int tag = KMP_EXTRACT_D_TAG(user_lock);
2753#if USE_ITT_BUILD
2754 __kmp_itt_lock_acquiring(
2755 (kmp_user_lock_p)
2756 user_lock); // itt function will get to the right lock object.
2757#endif
2758#if OMPT_SUPPORT && OMPT_OPTIONAL
2759 // This is the case, if called from omp_init_lock_with_hint:
2760 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2761 if (!codeptr)
2762 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2763 if (ompt_enabled.ompt_callback_mutex_acquire) {
2764 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2765 ompt_mutex_lock, omp_lock_hint_none,
2766 __ompt_get_mutex_impl_type(user_lock),
2767 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2768 }
2769#endif
2770#if KMP_USE_INLINED_TAS
2771 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2772 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2773 } else
2774#elif KMP_USE_INLINED_FUTEX
2775 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2776 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2777 } else
2778#endif
2779 {
2780 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2781 }
2782#if USE_ITT_BUILD
2783 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2784#endif
2785#if OMPT_SUPPORT && OMPT_OPTIONAL
2786 if (ompt_enabled.ompt_callback_mutex_acquired) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2788 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2789 }
2790#endif
2791
2792#else // KMP_USE_DYNAMIC_LOCK
2793
2794 kmp_user_lock_p lck;
2795
2796 if ((__kmp_user_lock_kind == lk_tas) &&
2797 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2798 lck = (kmp_user_lock_p)user_lock;
2799 }
2800#if KMP_USE_FUTEX
2801 else if ((__kmp_user_lock_kind == lk_futex) &&
2802 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2803 lck = (kmp_user_lock_p)user_lock;
2804 }
2805#endif
2806 else {
2807 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2808 }
2809
2810#if USE_ITT_BUILD
2811 __kmp_itt_lock_acquiring(lck);
2812#endif /* USE_ITT_BUILD */
2813#if OMPT_SUPPORT && OMPT_OPTIONAL
2814 // This is the case, if called from omp_init_lock_with_hint:
2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2816 if (!codeptr)
2817 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2818 if (ompt_enabled.ompt_callback_mutex_acquire) {
2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2820 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2821 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2822 }
2823#endif
2824
2825 ACQUIRE_LOCK(lck, gtid);
2826
2827#if USE_ITT_BUILD
2828 __kmp_itt_lock_acquired(lck);
2829#endif /* USE_ITT_BUILD */
2830
2831#if OMPT_SUPPORT && OMPT_OPTIONAL
2832 if (ompt_enabled.ompt_callback_mutex_acquired) {
2833 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2834 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2835 }
2836#endif
2837
2838#endif // KMP_USE_DYNAMIC_LOCK
2839}
2840
2841void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2842#if KMP_USE_DYNAMIC_LOCK
2843
2844#if USE_ITT_BUILD
2845 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2846#endif
2847#if OMPT_SUPPORT && OMPT_OPTIONAL
2848 // This is the case, if called from omp_init_lock_with_hint:
2849 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2850 if (!codeptr)
2851 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2852 if (ompt_enabled.enabled) {
2853 if (ompt_enabled.ompt_callback_mutex_acquire) {
2854 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2855 ompt_mutex_nest_lock, omp_lock_hint_none,
2856 __ompt_get_mutex_impl_type(user_lock),
2857 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2858 }
2859 }
2860#endif
2861 int acquire_status =
2862 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2863 (void)acquire_status;
2864#if USE_ITT_BUILD
2865 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2866#endif
2867
2868#if OMPT_SUPPORT && OMPT_OPTIONAL
2869 if (ompt_enabled.enabled) {
2870 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2871 if (ompt_enabled.ompt_callback_mutex_acquired) {
2872 // lock_first
2873 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2874 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2875 codeptr);
2876 }
2877 } else {
2878 if (ompt_enabled.ompt_callback_nest_lock) {
2879 // lock_next
2880 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2881 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2882 }
2883 }
2884 }
2885#endif
2886
2887#else // KMP_USE_DYNAMIC_LOCK
2888 int acquire_status;
2889 kmp_user_lock_p lck;
2890
2891 if ((__kmp_user_lock_kind == lk_tas) &&
2892 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2893 OMP_NEST_LOCK_T_SIZE)) {
2894 lck = (kmp_user_lock_p)user_lock;
2895 }
2896#if KMP_USE_FUTEX
2897 else if ((__kmp_user_lock_kind == lk_futex) &&
2898 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2899 OMP_NEST_LOCK_T_SIZE)) {
2900 lck = (kmp_user_lock_p)user_lock;
2901 }
2902#endif
2903 else {
2904 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2905 }
2906
2907#if USE_ITT_BUILD
2908 __kmp_itt_lock_acquiring(lck);
2909#endif /* USE_ITT_BUILD */
2910#if OMPT_SUPPORT && OMPT_OPTIONAL
2911 // This is the case, if called from omp_init_lock_with_hint:
2912 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2913 if (!codeptr)
2914 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2915 if (ompt_enabled.enabled) {
2916 if (ompt_enabled.ompt_callback_mutex_acquire) {
2917 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2918 ompt_mutex_nest_lock, omp_lock_hint_none,
2919 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2920 codeptr);
2921 }
2922 }
2923#endif
2924
2925 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2926
2927#if USE_ITT_BUILD
2928 __kmp_itt_lock_acquired(lck);
2929#endif /* USE_ITT_BUILD */
2930
2931#if OMPT_SUPPORT && OMPT_OPTIONAL
2932 if (ompt_enabled.enabled) {
2933 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2934 if (ompt_enabled.ompt_callback_mutex_acquired) {
2935 // lock_first
2936 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2937 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2938 }
2939 } else {
2940 if (ompt_enabled.ompt_callback_nest_lock) {
2941 // lock_next
2942 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2943 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2944 }
2945 }
2946 }
2947#endif
2948
2949#endif // KMP_USE_DYNAMIC_LOCK
2950}
2951
2952void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2953#if KMP_USE_DYNAMIC_LOCK
2954
2955 int tag = KMP_EXTRACT_D_TAG(user_lock);
2956#if USE_ITT_BUILD
2957 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2958#endif
2959#if KMP_USE_INLINED_TAS
2960 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2961 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2962 } else
2963#elif KMP_USE_INLINED_FUTEX
2964 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2965 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2966 } else
2967#endif
2968 {
2969 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2970 }
2971
2972#if OMPT_SUPPORT && OMPT_OPTIONAL
2973 // This is the case, if called from omp_init_lock_with_hint:
2974 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2975 if (!codeptr)
2976 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2977 if (ompt_enabled.ompt_callback_mutex_released) {
2978 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2979 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2980 }
2981#endif
2982
2983#else // KMP_USE_DYNAMIC_LOCK
2984
2985 kmp_user_lock_p lck;
2986
2987 /* Can't use serial interval since not block structured */
2988 /* release the lock */
2989
2990 if ((__kmp_user_lock_kind == lk_tas) &&
2991 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2992#if KMP_OS_LINUX && \
2993 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2994// "fast" path implemented to fix customer performance issue
2995#if USE_ITT_BUILD
2996 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2997#endif /* USE_ITT_BUILD */
2998 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2999 KMP_MB();
3000
3001#if OMPT_SUPPORT && OMPT_OPTIONAL
3002 // This is the case, if called from omp_init_lock_with_hint:
3003 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3004 if (!codeptr)
3005 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3006 if (ompt_enabled.ompt_callback_mutex_released) {
3007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3008 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3009 }
3010#endif
3011
3012 return;
3013#else
3014 lck = (kmp_user_lock_p)user_lock;
3015#endif
3016 }
3017#if KMP_USE_FUTEX
3018 else if ((__kmp_user_lock_kind == lk_futex) &&
3019 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3020 lck = (kmp_user_lock_p)user_lock;
3021 }
3022#endif
3023 else {
3024 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3025 }
3026
3027#if USE_ITT_BUILD
3028 __kmp_itt_lock_releasing(lck);
3029#endif /* USE_ITT_BUILD */
3030
3031 RELEASE_LOCK(lck, gtid);
3032
3033#if OMPT_SUPPORT && OMPT_OPTIONAL
3034 // This is the case, if called from omp_init_lock_with_hint:
3035 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3036 if (!codeptr)
3037 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3038 if (ompt_enabled.ompt_callback_mutex_released) {
3039 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3040 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3041 }
3042#endif
3043
3044#endif // KMP_USE_DYNAMIC_LOCK
3045}
3046
3047/* release the lock */
3048void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3049#if KMP_USE_DYNAMIC_LOCK
3050
3051#if USE_ITT_BUILD
3052 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3053#endif
3054 int release_status =
3055 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3056 (void)release_status;
3057
3058#if OMPT_SUPPORT && OMPT_OPTIONAL
3059 // This is the case, if called from omp_init_lock_with_hint:
3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061 if (!codeptr)
3062 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled.enabled) {
3064 if (release_status == KMP_LOCK_RELEASED) {
3065 if (ompt_enabled.ompt_callback_mutex_released) {
3066 // release_lock_last
3067 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3068 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3069 codeptr);
3070 }
3071 } else if (ompt_enabled.ompt_callback_nest_lock) {
3072 // release_lock_prev
3073 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3074 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3075 }
3076 }
3077#endif
3078
3079#else // KMP_USE_DYNAMIC_LOCK
3080
3081 kmp_user_lock_p lck;
3082
3083 /* Can't use serial interval since not block structured */
3084
3085 if ((__kmp_user_lock_kind == lk_tas) &&
3086 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3087 OMP_NEST_LOCK_T_SIZE)) {
3088#if KMP_OS_LINUX && \
3089 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3090 // "fast" path implemented to fix customer performance issue
3091 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3092#if USE_ITT_BUILD
3093 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3094#endif /* USE_ITT_BUILD */
3095
3096#if OMPT_SUPPORT && OMPT_OPTIONAL
3097 int release_status = KMP_LOCK_STILL_HELD;
3098#endif
3099
3100 if (--(tl->lk.depth_locked) == 0) {
3101 TCW_4(tl->lk.poll, 0);
3102#if OMPT_SUPPORT && OMPT_OPTIONAL
3103 release_status = KMP_LOCK_RELEASED;
3104#endif
3105 }
3106 KMP_MB();
3107
3108#if OMPT_SUPPORT && OMPT_OPTIONAL
3109 // This is the case, if called from omp_init_lock_with_hint:
3110 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3111 if (!codeptr)
3112 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3113 if (ompt_enabled.enabled) {
3114 if (release_status == KMP_LOCK_RELEASED) {
3115 if (ompt_enabled.ompt_callback_mutex_released) {
3116 // release_lock_last
3117 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3118 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3119 }
3120 } else if (ompt_enabled.ompt_callback_nest_lock) {
3121 // release_lock_previous
3122 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3123 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3124 }
3125 }
3126#endif
3127
3128 return;
3129#else
3130 lck = (kmp_user_lock_p)user_lock;
3131#endif
3132 }
3133#if KMP_USE_FUTEX
3134 else if ((__kmp_user_lock_kind == lk_futex) &&
3135 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3136 OMP_NEST_LOCK_T_SIZE)) {
3137 lck = (kmp_user_lock_p)user_lock;
3138 }
3139#endif
3140 else {
3141 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3142 }
3143
3144#if USE_ITT_BUILD
3145 __kmp_itt_lock_releasing(lck);
3146#endif /* USE_ITT_BUILD */
3147
3148 int release_status;
3149 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3150#if OMPT_SUPPORT && OMPT_OPTIONAL
3151 // This is the case, if called from omp_init_lock_with_hint:
3152 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3153 if (!codeptr)
3154 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3155 if (ompt_enabled.enabled) {
3156 if (release_status == KMP_LOCK_RELEASED) {
3157 if (ompt_enabled.ompt_callback_mutex_released) {
3158 // release_lock_last
3159 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3160 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3161 }
3162 } else if (ompt_enabled.ompt_callback_nest_lock) {
3163 // release_lock_previous
3164 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3165 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3166 }
3167 }
3168#endif
3169
3170#endif // KMP_USE_DYNAMIC_LOCK
3171}
3172
3173/* try to acquire the lock */
3174int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3175 KMP_COUNT_BLOCK(OMP_test_lock);
3176
3177#if KMP_USE_DYNAMIC_LOCK
3178 int rc;
3179 int tag = KMP_EXTRACT_D_TAG(user_lock);
3180#if USE_ITT_BUILD
3181 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3182#endif
3183#if OMPT_SUPPORT && OMPT_OPTIONAL
3184 // This is the case, if called from omp_init_lock_with_hint:
3185 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3186 if (!codeptr)
3187 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3188 if (ompt_enabled.ompt_callback_mutex_acquire) {
3189 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3190 ompt_mutex_test_lock, omp_lock_hint_none,
3191 __ompt_get_mutex_impl_type(user_lock),
3192 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3193 }
3194#endif
3195#if KMP_USE_INLINED_TAS
3196 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3197 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3198 } else
3199#elif KMP_USE_INLINED_FUTEX
3200 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3201 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3202 } else
3203#endif
3204 {
3205 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3206 }
3207 if (rc) {
3208#if USE_ITT_BUILD
3209 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3210#endif
3211#if OMPT_SUPPORT && OMPT_OPTIONAL
3212 if (ompt_enabled.ompt_callback_mutex_acquired) {
3213 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3214 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3215 }
3216#endif
3217 return FTN_TRUE;
3218 } else {
3219#if USE_ITT_BUILD
3220 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3221#endif
3222 return FTN_FALSE;
3223 }
3224
3225#else // KMP_USE_DYNAMIC_LOCK
3226
3227 kmp_user_lock_p lck;
3228 int rc;
3229
3230 if ((__kmp_user_lock_kind == lk_tas) &&
3231 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3232 lck = (kmp_user_lock_p)user_lock;
3233 }
3234#if KMP_USE_FUTEX
3235 else if ((__kmp_user_lock_kind == lk_futex) &&
3236 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3237 lck = (kmp_user_lock_p)user_lock;
3238 }
3239#endif
3240 else {
3241 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3242 }
3243
3244#if USE_ITT_BUILD
3245 __kmp_itt_lock_acquiring(lck);
3246#endif /* USE_ITT_BUILD */
3247#if OMPT_SUPPORT && OMPT_OPTIONAL
3248 // This is the case, if called from omp_init_lock_with_hint:
3249 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3250 if (!codeptr)
3251 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3252 if (ompt_enabled.ompt_callback_mutex_acquire) {
3253 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3254 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3255 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3256 }
3257#endif
3258
3259 rc = TEST_LOCK(lck, gtid);
3260#if USE_ITT_BUILD
3261 if (rc) {
3262 __kmp_itt_lock_acquired(lck);
3263 } else {
3264 __kmp_itt_lock_cancelled(lck);
3265 }
3266#endif /* USE_ITT_BUILD */
3267#if OMPT_SUPPORT && OMPT_OPTIONAL
3268 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3269 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3270 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3271 }
3272#endif
3273
3274 return (rc ? FTN_TRUE : FTN_FALSE);
3275
3276 /* Can't use serial interval since not block structured */
3277
3278#endif // KMP_USE_DYNAMIC_LOCK
3279}
3280
3281/* try to acquire the lock */
3282int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3283#if KMP_USE_DYNAMIC_LOCK
3284 int rc;
3285#if USE_ITT_BUILD
3286 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3287#endif
3288#if OMPT_SUPPORT && OMPT_OPTIONAL
3289 // This is the case, if called from omp_init_lock_with_hint:
3290 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3291 if (!codeptr)
3292 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3293 if (ompt_enabled.ompt_callback_mutex_acquire) {
3294 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3295 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3296 __ompt_get_mutex_impl_type(user_lock),
3297 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3298 }
3299#endif
3300 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3301#if USE_ITT_BUILD
3302 if (rc) {
3303 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3304 } else {
3305 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3306 }
3307#endif
3308#if OMPT_SUPPORT && OMPT_OPTIONAL
3309 if (ompt_enabled.enabled && rc) {
3310 if (rc == 1) {
3311 if (ompt_enabled.ompt_callback_mutex_acquired) {
3312 // lock_first
3313 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3314 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3315 codeptr);
3316 }
3317 } else {
3318 if (ompt_enabled.ompt_callback_nest_lock) {
3319 // lock_next
3320 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3321 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3322 }
3323 }
3324 }
3325#endif
3326 return rc;
3327
3328#else // KMP_USE_DYNAMIC_LOCK
3329
3330 kmp_user_lock_p lck;
3331 int rc;
3332
3333 if ((__kmp_user_lock_kind == lk_tas) &&
3334 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3335 OMP_NEST_LOCK_T_SIZE)) {
3336 lck = (kmp_user_lock_p)user_lock;
3337 }
3338#if KMP_USE_FUTEX
3339 else if ((__kmp_user_lock_kind == lk_futex) &&
3340 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3341 OMP_NEST_LOCK_T_SIZE)) {
3342 lck = (kmp_user_lock_p)user_lock;
3343 }
3344#endif
3345 else {
3346 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3347 }
3348
3349#if USE_ITT_BUILD
3350 __kmp_itt_lock_acquiring(lck);
3351#endif /* USE_ITT_BUILD */
3352
3353#if OMPT_SUPPORT && OMPT_OPTIONAL
3354 // This is the case, if called from omp_init_lock_with_hint:
3355 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3356 if (!codeptr)
3357 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3358 if (ompt_enabled.enabled) &&
3359 ompt_enabled.ompt_callback_mutex_acquire) {
3360 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3361 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3362 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3363 codeptr);
3364 }
3365#endif
3366
3367 rc = TEST_NESTED_LOCK(lck, gtid);
3368#if USE_ITT_BUILD
3369 if (rc) {
3370 __kmp_itt_lock_acquired(lck);
3371 } else {
3372 __kmp_itt_lock_cancelled(lck);
3373 }
3374#endif /* USE_ITT_BUILD */
3375#if OMPT_SUPPORT && OMPT_OPTIONAL
3376 if (ompt_enabled.enabled && rc) {
3377 if (rc == 1) {
3378 if (ompt_enabled.ompt_callback_mutex_acquired) {
3379 // lock_first
3380 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3381 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3382 }
3383 } else {
3384 if (ompt_enabled.ompt_callback_nest_lock) {
3385 // lock_next
3386 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3387 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3388 }
3389 }
3390 }
3391#endif
3392 return rc;
3393
3394 /* Can't use serial interval since not block structured */
3395
3396#endif // KMP_USE_DYNAMIC_LOCK
3397}
3398
3399// Interface to fast scalable reduce methods routines
3400
3401// keep the selected method in a thread local structure for cross-function
3402// usage: will be used in __kmpc_end_reduce* functions;
3403// another solution: to re-determine the method one more time in
3404// __kmpc_end_reduce* functions (new prototype required then)
3405// AT: which solution is better?
3406#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3407 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3408
3409#define __KMP_GET_REDUCTION_METHOD(gtid) \
3410 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3411
3412// description of the packed_reduction_method variable: look at the macros in
3413// kmp.h
3414
3415// used in a critical section reduce block
3416static __forceinline void
3417__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3418 kmp_critical_name *crit) {
3419
3420 // this lock was visible to a customer and to the threading profile tool as a
3421 // serial overhead span (although it's used for an internal purpose only)
3422 // why was it visible in previous implementation?
3423 // should we keep it visible in new reduce block?
3424 kmp_user_lock_p lck;
3425
3426#if KMP_USE_DYNAMIC_LOCK
3427
3428 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3429 // Check if it is initialized.
3430 if (*lk == 0) {
3431 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3432 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3433 KMP_GET_D_TAG(__kmp_user_lock_seq));
3434 } else {
3435 __kmp_init_indirect_csptr(crit, loc, global_tid,
3436 KMP_GET_I_TAG(__kmp_user_lock_seq));
3437 }
3438 }
3439 // Branch for accessing the actual lock object and set operation. This
3440 // branching is inevitable since this lock initialization does not follow the
3441 // normal dispatch path (lock table is not used).
3442 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3443 lck = (kmp_user_lock_p)lk;
3444 KMP_DEBUG_ASSERT(lck != NULL);
3445 if (__kmp_env_consistency_check) {
3446 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3447 }
3448 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3449 } else {
3450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3451 lck = ilk->lock;
3452 KMP_DEBUG_ASSERT(lck != NULL);
3453 if (__kmp_env_consistency_check) {
3454 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3455 }
3456 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3457 }
3458
3459#else // KMP_USE_DYNAMIC_LOCK
3460
3461 // We know that the fast reduction code is only emitted by Intel compilers
3462 // with 32 byte critical sections. If there isn't enough space, then we
3463 // have to use a pointer.
3464 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3465 lck = (kmp_user_lock_p)crit;
3466 } else {
3467 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3468 }
3469 KMP_DEBUG_ASSERT(lck != NULL);
3470
3471 if (__kmp_env_consistency_check)
3472 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3473
3474 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3475
3476#endif // KMP_USE_DYNAMIC_LOCK
3477}
3478
3479// used in a critical section reduce block
3480static __forceinline void
3481__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3482 kmp_critical_name *crit) {
3483
3484 kmp_user_lock_p lck;
3485
3486#if KMP_USE_DYNAMIC_LOCK
3487
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489 lck = (kmp_user_lock_p)crit;
3490 if (__kmp_env_consistency_check)
3491 __kmp_pop_sync(global_tid, ct_critical, loc);
3492 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3493 } else {
3494 kmp_indirect_lock_t *ilk =
3495 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3496 if (__kmp_env_consistency_check)
3497 __kmp_pop_sync(global_tid, ct_critical, loc);
3498 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3499 }
3500
3501#else // KMP_USE_DYNAMIC_LOCK
3502
3503 // We know that the fast reduction code is only emitted by Intel compilers
3504 // with 32 byte critical sections. If there isn't enough space, then we have
3505 // to use a pointer.
3506 if (__kmp_base_user_lock_size > 32) {
3507 lck = *((kmp_user_lock_p *)crit);
3508 KMP_ASSERT(lck != NULL);
3509 } else {
3510 lck = (kmp_user_lock_p)crit;
3511 }
3512
3513 if (__kmp_env_consistency_check)
3514 __kmp_pop_sync(global_tid, ct_critical, loc);
3515
3516 __kmp_release_user_lock_with_checks(lck, global_tid);
3517
3518#endif // KMP_USE_DYNAMIC_LOCK
3519} // __kmp_end_critical_section_reduce_block
3520
3521static __forceinline int
3522__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3523 int *task_state) {
3524 kmp_team_t *team;
3525
3526 // Check if we are inside the teams construct?
3527 if (th->th.th_teams_microtask) {
3528 *team_p = team = th->th.th_team;
3529 if (team->t.t_level == th->th.th_teams_level) {
3530 // This is reduction at teams construct.
3531 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3532 // Let's swap teams temporarily for the reduction.
3533 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3534 th->th.th_team = team->t.t_parent;
3535 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3536 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3537 *task_state = th->th.th_task_state;
3538 th->th.th_task_state = 0;
3539
3540 return 1;
3541 }
3542 }
3543 return 0;
3544}
3545
3546static __forceinline void
3547__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3548 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3549 th->th.th_info.ds.ds_tid = 0;
3550 th->th.th_team = team;
3551 th->th.th_team_nproc = team->t.t_nproc;
3552 th->th.th_task_team = team->t.t_task_team[task_state];
3553 __kmp_type_convert(task_state, &(th->th.th_task_state));
3554}
3555
3556/* 2.a.i. Reduce Block without a terminating barrier */
3572kmp_int32
3573__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3574 size_t reduce_size, void *reduce_data,
3575 void (*reduce_func)(void *lhs_data, void *rhs_data),
3576 kmp_critical_name *lck) {
3577
3578 KMP_COUNT_BLOCK(REDUCE_nowait);
3579 int retval = 0;
3580 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3581 kmp_info_t *th;
3582 kmp_team_t *team;
3583 int teams_swapped = 0, task_state;
3584 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3585 __kmp_assert_valid_gtid(global_tid);
3586
3587 // why do we need this initialization here at all?
3588 // Reduction clause can not be used as a stand-alone directive.
3589
3590 // do not call __kmp_serial_initialize(), it will be called by
3591 // __kmp_parallel_initialize() if needed
3592 // possible detection of false-positive race by the threadchecker ???
3593 if (!TCR_4(__kmp_init_parallel))
3594 __kmp_parallel_initialize();
3595
3596 __kmp_resume_if_soft_paused();
3597
3598// check correctness of reduce block nesting
3599#if KMP_USE_DYNAMIC_LOCK
3600 if (__kmp_env_consistency_check)
3601 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3602#else
3603 if (__kmp_env_consistency_check)
3604 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3605#endif
3606
3607 th = __kmp_thread_from_gtid(global_tid);
3608 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3609
3610 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3611 // the value should be kept in a variable
3612 // the variable should be either a construct-specific or thread-specific
3613 // property, not a team specific property
3614 // (a thread can reach the next reduce block on the next construct, reduce
3615 // method may differ on the next construct)
3616 // an ident_t "loc" parameter could be used as a construct-specific property
3617 // (what if loc == 0?)
3618 // (if both construct-specific and team-specific variables were shared,
3619 // then unness extra syncs should be needed)
3620 // a thread-specific variable is better regarding two issues above (next
3621 // construct and extra syncs)
3622 // a thread-specific "th_local.reduction_method" variable is used currently
3623 // each thread executes 'determine' and 'set' lines (no need to execute by one
3624 // thread, to avoid unness extra syncs)
3625
3626 packed_reduction_method = __kmp_determine_reduction_method(
3627 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3628 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3629
3630 OMPT_REDUCTION_DECL(th, global_tid);
3631 if (packed_reduction_method == critical_reduce_block) {
3632
3633 OMPT_REDUCTION_BEGIN;
3634
3635 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3636 retval = 1;
3637
3638 } else if (packed_reduction_method == empty_reduce_block) {
3639
3640 OMPT_REDUCTION_BEGIN;
3641
3642 // usage: if team size == 1, no synchronization is required ( Intel
3643 // platforms only )
3644 retval = 1;
3645
3646 } else if (packed_reduction_method == atomic_reduce_block) {
3647
3648 retval = 2;
3649
3650 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3651 // won't be called by the code gen)
3652 // (it's not quite good, because the checking block has been closed by
3653 // this 'pop',
3654 // but atomic operation has not been executed yet, will be executed
3655 // slightly later, literally on next instruction)
3656 if (__kmp_env_consistency_check)
3657 __kmp_pop_sync(global_tid, ct_reduce, loc);
3658
3659 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660 tree_reduce_block)) {
3661
3662// AT: performance issue: a real barrier here
3663// AT: (if primary thread is slow, other threads are blocked here waiting for
3664// the primary thread to come and release them)
3665// AT: (it's not what a customer might expect specifying NOWAIT clause)
3666// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3667// be confusing to a customer)
3668// AT: another implementation of *barrier_gather*nowait() (or some other design)
3669// might go faster and be more in line with sense of NOWAIT
3670// AT: TO DO: do epcc test and compare times
3671
3672// this barrier should be invisible to a customer and to the threading profile
3673// tool (it's neither a terminating barrier nor customer's code, it's
3674// used for an internal purpose)
3675#if OMPT_SUPPORT
3676 // JP: can this barrier potentially leed to task scheduling?
3677 // JP: as long as there is a barrier in the implementation, OMPT should and
3678 // will provide the barrier events
3679 // so we set-up the necessary frame/return addresses.
3680 ompt_frame_t *ompt_frame;
3681 if (ompt_enabled.enabled) {
3682 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3683 if (ompt_frame->enter_frame.ptr == NULL)
3684 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3685 }
3686 OMPT_STORE_RETURN_ADDRESS(global_tid);
3687#endif
3688#if USE_ITT_NOTIFY
3689 __kmp_threads[global_tid]->th.th_ident = loc;
3690#endif
3691 retval =
3692 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3693 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3694 retval = (retval != 0) ? (0) : (1);
3695#if OMPT_SUPPORT && OMPT_OPTIONAL
3696 if (ompt_enabled.enabled) {
3697 ompt_frame->enter_frame = ompt_data_none;
3698 }
3699#endif
3700
3701 // all other workers except primary thread should do this pop here
3702 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3703 if (__kmp_env_consistency_check) {
3704 if (retval == 0) {
3705 __kmp_pop_sync(global_tid, ct_reduce, loc);
3706 }
3707 }
3708
3709 } else {
3710
3711 // should never reach this block
3712 KMP_ASSERT(0); // "unexpected method"
3713 }
3714 if (teams_swapped) {
3715 __kmp_restore_swapped_teams(th, team, task_state);
3716 }
3717 KA_TRACE(
3718 10,
3719 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3720 global_tid, packed_reduction_method, retval));
3721
3722 return retval;
3723}
3724
3733void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3734 kmp_critical_name *lck) {
3735
3736 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3737
3738 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3739 __kmp_assert_valid_gtid(global_tid);
3740
3741 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3742
3743 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3744
3745 if (packed_reduction_method == critical_reduce_block) {
3746
3747 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3748 OMPT_REDUCTION_END;
3749
3750 } else if (packed_reduction_method == empty_reduce_block) {
3751
3752 // usage: if team size == 1, no synchronization is required ( on Intel
3753 // platforms only )
3754
3755 OMPT_REDUCTION_END;
3756
3757 } else if (packed_reduction_method == atomic_reduce_block) {
3758
3759 // neither primary thread nor other workers should get here
3760 // (code gen does not generate this call in case 2: atomic reduce block)
3761 // actually it's better to remove this elseif at all;
3762 // after removal this value will checked by the 'else' and will assert
3763
3764 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3765 tree_reduce_block)) {
3766
3767 // only primary thread gets here
3768 // OMPT: tree reduction is annotated in the barrier code
3769
3770 } else {
3771
3772 // should never reach this block
3773 KMP_ASSERT(0); // "unexpected method"
3774 }
3775
3776 if (__kmp_env_consistency_check)
3777 __kmp_pop_sync(global_tid, ct_reduce, loc);
3778
3779 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3780 global_tid, packed_reduction_method));
3781
3782 return;
3783}
3784
3785/* 2.a.ii. Reduce Block with a terminating barrier */
3786
3802kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3803 size_t reduce_size, void *reduce_data,
3804 void (*reduce_func)(void *lhs_data, void *rhs_data),
3805 kmp_critical_name *lck) {
3806 KMP_COUNT_BLOCK(REDUCE_wait);
3807 int retval = 0;
3808 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3809 kmp_info_t *th;
3810 kmp_team_t *team;
3811 int teams_swapped = 0, task_state;
3812
3813 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3814 __kmp_assert_valid_gtid(global_tid);
3815
3816 // why do we need this initialization here at all?
3817 // Reduction clause can not be a stand-alone directive.
3818
3819 // do not call __kmp_serial_initialize(), it will be called by
3820 // __kmp_parallel_initialize() if needed
3821 // possible detection of false-positive race by the threadchecker ???
3822 if (!TCR_4(__kmp_init_parallel))
3823 __kmp_parallel_initialize();
3824
3825 __kmp_resume_if_soft_paused();
3826
3827// check correctness of reduce block nesting
3828#if KMP_USE_DYNAMIC_LOCK
3829 if (__kmp_env_consistency_check)
3830 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3831#else
3832 if (__kmp_env_consistency_check)
3833 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3834#endif
3835
3836 th = __kmp_thread_from_gtid(global_tid);
3837 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3838
3839 packed_reduction_method = __kmp_determine_reduction_method(
3840 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3841 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3842
3843 OMPT_REDUCTION_DECL(th, global_tid);
3844
3845 if (packed_reduction_method == critical_reduce_block) {
3846
3847 OMPT_REDUCTION_BEGIN;
3848 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3849 retval = 1;
3850
3851 } else if (packed_reduction_method == empty_reduce_block) {
3852
3853 OMPT_REDUCTION_BEGIN;
3854 // usage: if team size == 1, no synchronization is required ( Intel
3855 // platforms only )
3856 retval = 1;
3857
3858 } else if (packed_reduction_method == atomic_reduce_block) {
3859
3860 retval = 2;
3861
3862 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3863 tree_reduce_block)) {
3864
3865// case tree_reduce_block:
3866// this barrier should be visible to a customer and to the threading profile
3867// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3868#if OMPT_SUPPORT
3869 ompt_frame_t *ompt_frame;
3870 if (ompt_enabled.enabled) {
3871 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3872 if (ompt_frame->enter_frame.ptr == NULL)
3873 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3874 }
3875 OMPT_STORE_RETURN_ADDRESS(global_tid);
3876#endif
3877#if USE_ITT_NOTIFY
3878 __kmp_threads[global_tid]->th.th_ident =
3879 loc; // needed for correct notification of frames
3880#endif
3881 retval =
3882 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3883 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3884 retval = (retval != 0) ? (0) : (1);
3885#if OMPT_SUPPORT && OMPT_OPTIONAL
3886 if (ompt_enabled.enabled) {
3887 ompt_frame->enter_frame = ompt_data_none;
3888 }
3889#endif
3890
3891 // all other workers except primary thread should do this pop here
3892 // (none of other workers except primary will enter __kmpc_end_reduce())
3893 if (__kmp_env_consistency_check) {
3894 if (retval == 0) { // 0: all other workers; 1: primary thread
3895 __kmp_pop_sync(global_tid, ct_reduce, loc);
3896 }
3897 }
3898
3899 } else {
3900
3901 // should never reach this block
3902 KMP_ASSERT(0); // "unexpected method"
3903 }
3904 if (teams_swapped) {
3905 __kmp_restore_swapped_teams(th, team, task_state);
3906 }
3907
3908 KA_TRACE(10,
3909 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3910 global_tid, packed_reduction_method, retval));
3911 return retval;
3912}
3913
3924void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3925 kmp_critical_name *lck) {
3926
3927 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3928 kmp_info_t *th;
3929 kmp_team_t *team;
3930 int teams_swapped = 0, task_state;
3931
3932 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3933 __kmp_assert_valid_gtid(global_tid);
3934
3935 th = __kmp_thread_from_gtid(global_tid);
3936 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3937
3938 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3939
3940 // this barrier should be visible to a customer and to the threading profile
3941 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3942 OMPT_REDUCTION_DECL(th, global_tid);
3943
3944 if (packed_reduction_method == critical_reduce_block) {
3945 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3946
3947 OMPT_REDUCTION_END;
3948
3949// TODO: implicit barrier: should be exposed
3950#if OMPT_SUPPORT
3951 ompt_frame_t *ompt_frame;
3952 if (ompt_enabled.enabled) {
3953 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3954 if (ompt_frame->enter_frame.ptr == NULL)
3955 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3956 }
3957 OMPT_STORE_RETURN_ADDRESS(global_tid);
3958#endif
3959#if USE_ITT_NOTIFY
3960 __kmp_threads[global_tid]->th.th_ident = loc;
3961#endif
3962 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3963#if OMPT_SUPPORT && OMPT_OPTIONAL
3964 if (ompt_enabled.enabled) {
3965 ompt_frame->enter_frame = ompt_data_none;
3966 }
3967#endif
3968
3969 } else if (packed_reduction_method == empty_reduce_block) {
3970
3971 OMPT_REDUCTION_END;
3972
3973// usage: if team size==1, no synchronization is required (Intel platforms only)
3974
3975// TODO: implicit barrier: should be exposed
3976#if OMPT_SUPPORT
3977 ompt_frame_t *ompt_frame;
3978 if (ompt_enabled.enabled) {
3979 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3980 if (ompt_frame->enter_frame.ptr == NULL)
3981 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3982 }
3983 OMPT_STORE_RETURN_ADDRESS(global_tid);
3984#endif
3985#if USE_ITT_NOTIFY
3986 __kmp_threads[global_tid]->th.th_ident = loc;
3987#endif
3988 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3989#if OMPT_SUPPORT && OMPT_OPTIONAL
3990 if (ompt_enabled.enabled) {
3991 ompt_frame->enter_frame = ompt_data_none;
3992 }
3993#endif
3994
3995 } else if (packed_reduction_method == atomic_reduce_block) {
3996
3997#if OMPT_SUPPORT
3998 ompt_frame_t *ompt_frame;
3999 if (ompt_enabled.enabled) {
4000 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4001 if (ompt_frame->enter_frame.ptr == NULL)
4002 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4003 }
4004 OMPT_STORE_RETURN_ADDRESS(global_tid);
4005#endif
4006// TODO: implicit barrier: should be exposed
4007#if USE_ITT_NOTIFY
4008 __kmp_threads[global_tid]->th.th_ident = loc;
4009#endif
4010 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4011#if OMPT_SUPPORT && OMPT_OPTIONAL
4012 if (ompt_enabled.enabled) {
4013 ompt_frame->enter_frame = ompt_data_none;
4014 }
4015#endif
4016
4017 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4018 tree_reduce_block)) {
4019
4020 // only primary thread executes here (primary releases all other workers)
4021 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4022 global_tid);
4023
4024 } else {
4025
4026 // should never reach this block
4027 KMP_ASSERT(0); // "unexpected method"
4028 }
4029 if (teams_swapped) {
4030 __kmp_restore_swapped_teams(th, team, task_state);
4031 }
4032
4033 if (__kmp_env_consistency_check)
4034 __kmp_pop_sync(global_tid, ct_reduce, loc);
4035
4036 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4037 global_tid, packed_reduction_method));
4038
4039 return;
4040}
4041
4042#undef __KMP_GET_REDUCTION_METHOD
4043#undef __KMP_SET_REDUCTION_METHOD
4044
4045/* end of interface to fast scalable reduce routines */
4046
4047kmp_uint64 __kmpc_get_taskid() {
4048
4049 kmp_int32 gtid;
4050 kmp_info_t *thread;
4051
4052 gtid = __kmp_get_gtid();
4053 if (gtid < 0) {
4054 return 0;
4055 }
4056 thread = __kmp_thread_from_gtid(gtid);
4057 return thread->th.th_current_task->td_task_id;
4058
4059} // __kmpc_get_taskid
4060
4061kmp_uint64 __kmpc_get_parent_taskid() {
4062
4063 kmp_int32 gtid;
4064 kmp_info_t *thread;
4065 kmp_taskdata_t *parent_task;
4066
4067 gtid = __kmp_get_gtid();
4068 if (gtid < 0) {
4069 return 0;
4070 }
4071 thread = __kmp_thread_from_gtid(gtid);
4072 parent_task = thread->th.th_current_task->td_parent;
4073 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4074
4075} // __kmpc_get_parent_taskid
4076
4088void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4089 const struct kmp_dim *dims) {
4090 __kmp_assert_valid_gtid(gtid);
4091 int j, idx;
4092 kmp_int64 last, trace_count;
4093 kmp_info_t *th = __kmp_threads[gtid];
4094 kmp_team_t *team = th->th.th_team;
4095 kmp_uint32 *flags;
4096 kmp_disp_t *pr_buf = th->th.th_dispatch;
4097 dispatch_shared_info_t *sh_buf;
4098
4099 KA_TRACE(
4100 20,
4101 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4102 gtid, num_dims, !team->t.t_serialized));
4103 KMP_DEBUG_ASSERT(dims != NULL);
4104 KMP_DEBUG_ASSERT(num_dims > 0);
4105
4106 if (team->t.t_serialized) {
4107 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4108 return; // no dependencies if team is serialized
4109 }
4110 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4111 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4112 // the next loop
4113 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4114
4115 // Save bounds info into allocated private buffer
4116 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4117 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4118 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4119 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4120 pr_buf->th_doacross_info[0] =
4121 (kmp_int64)num_dims; // first element is number of dimensions
4122 // Save also address of num_done in order to access it later without knowing
4123 // the buffer index
4124 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4125 pr_buf->th_doacross_info[2] = dims[0].lo;
4126 pr_buf->th_doacross_info[3] = dims[0].up;
4127 pr_buf->th_doacross_info[4] = dims[0].st;
4128 last = 5;
4129 for (j = 1; j < num_dims; ++j) {
4130 kmp_int64
4131 range_length; // To keep ranges of all dimensions but the first dims[0]
4132 if (dims[j].st == 1) { // most common case
4133 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4134 range_length = dims[j].up - dims[j].lo + 1;
4135 } else {
4136 if (dims[j].st > 0) {
4137 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4138 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4139 } else { // negative increment
4140 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4141 range_length =
4142 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4143 }
4144 }
4145 pr_buf->th_doacross_info[last++] = range_length;
4146 pr_buf->th_doacross_info[last++] = dims[j].lo;
4147 pr_buf->th_doacross_info[last++] = dims[j].up;
4148 pr_buf->th_doacross_info[last++] = dims[j].st;
4149 }
4150
4151 // Compute total trip count.
4152 // Start with range of dims[0] which we don't need to keep in the buffer.
4153 if (dims[0].st == 1) { // most common case
4154 trace_count = dims[0].up - dims[0].lo + 1;
4155 } else if (dims[0].st > 0) {
4156 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4157 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4158 } else { // negative increment
4159 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4160 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4161 }
4162 for (j = 1; j < num_dims; ++j) {
4163 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4164 }
4165 KMP_DEBUG_ASSERT(trace_count > 0);
4166
4167 // Check if shared buffer is not occupied by other loop (idx -
4168 // __kmp_dispatch_num_buffers)
4169 if (idx != sh_buf->doacross_buf_idx) {
4170 // Shared buffer is occupied, wait for it to be free
4171 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4172 __kmp_eq_4, NULL);
4173 }
4174#if KMP_32_BIT_ARCH
4175 // Check if we are the first thread. After the CAS the first thread gets 0,
4176 // others get 1 if initialization is in progress, allocated pointer otherwise.
4177 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4178 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4179 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4180#else
4181 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4182 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4183#endif
4184 if (flags == NULL) {
4185 // we are the first thread, allocate the array of flags
4186 size_t size =
4187 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4188 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4189 KMP_MB();
4190 sh_buf->doacross_flags = flags;
4191 } else if (flags == (kmp_uint32 *)1) {
4192#if KMP_32_BIT_ARCH
4193 // initialization is still in progress, need to wait
4194 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4195#else
4196 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4197#endif
4198 KMP_YIELD(TRUE);
4199 KMP_MB();
4200 } else {
4201 KMP_MB();
4202 }
4203 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4204 pr_buf->th_doacross_flags =
4205 sh_buf->doacross_flags; // save private copy in order to not
4206 // touch shared buffer on each iteration
4207 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4208}
4209
4210void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4211 __kmp_assert_valid_gtid(gtid);
4212 kmp_int64 shft;
4213 size_t num_dims, i;
4214 kmp_uint32 flag;
4215 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4216 kmp_info_t *th = __kmp_threads[gtid];
4217 kmp_team_t *team = th->th.th_team;
4218 kmp_disp_t *pr_buf;
4219 kmp_int64 lo, up, st;
4220
4221 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4222 if (team->t.t_serialized) {
4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4224 return; // no dependencies if team is serialized
4225 }
4226
4227 // calculate sequential iteration number and check out-of-bounds condition
4228 pr_buf = th->th.th_dispatch;
4229 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4230 num_dims = (size_t)pr_buf->th_doacross_info[0];
4231 lo = pr_buf->th_doacross_info[2];
4232 up = pr_buf->th_doacross_info[3];
4233 st = pr_buf->th_doacross_info[4];
4234#if OMPT_SUPPORT && OMPT_OPTIONAL
4235 ompt_dependence_t deps[num_dims];
4236#endif
4237 if (st == 1) { // most common case
4238 if (vec[0] < lo || vec[0] > up) {
4239 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4240 "bounds [%lld,%lld]\n",
4241 gtid, vec[0], lo, up));
4242 return;
4243 }
4244 iter_number = vec[0] - lo;
4245 } else if (st > 0) {
4246 if (vec[0] < lo || vec[0] > up) {
4247 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4248 "bounds [%lld,%lld]\n",
4249 gtid, vec[0], lo, up));
4250 return;
4251 }
4252 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4253 } else { // negative increment
4254 if (vec[0] > lo || vec[0] < up) {
4255 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4256 "bounds [%lld,%lld]\n",
4257 gtid, vec[0], lo, up));
4258 return;
4259 }
4260 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4261 }
4262#if OMPT_SUPPORT && OMPT_OPTIONAL
4263 deps[0].variable.value = iter_number;
4264 deps[0].dependence_type = ompt_dependence_type_sink;
4265#endif
4266 for (i = 1; i < num_dims; ++i) {
4267 kmp_int64 iter, ln;
4268 size_t j = i * 4;
4269 ln = pr_buf->th_doacross_info[j + 1];
4270 lo = pr_buf->th_doacross_info[j + 2];
4271 up = pr_buf->th_doacross_info[j + 3];
4272 st = pr_buf->th_doacross_info[j + 4];
4273 if (st == 1) {
4274 if (vec[i] < lo || vec[i] > up) {
4275 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4276 "bounds [%lld,%lld]\n",
4277 gtid, vec[i], lo, up));
4278 return;
4279 }
4280 iter = vec[i] - lo;
4281 } else if (st > 0) {
4282 if (vec[i] < lo || vec[i] > up) {
4283 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4284 "bounds [%lld,%lld]\n",
4285 gtid, vec[i], lo, up));
4286 return;
4287 }
4288 iter = (kmp_uint64)(vec[i] - lo) / st;
4289 } else { // st < 0
4290 if (vec[i] > lo || vec[i] < up) {
4291 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4292 "bounds [%lld,%lld]\n",
4293 gtid, vec[i], lo, up));
4294 return;
4295 }
4296 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4297 }
4298 iter_number = iter + ln * iter_number;
4299#if OMPT_SUPPORT && OMPT_OPTIONAL
4300 deps[i].variable.value = iter;
4301 deps[i].dependence_type = ompt_dependence_type_sink;
4302#endif
4303 }
4304 shft = iter_number % 32; // use 32-bit granularity
4305 iter_number >>= 5; // divided by 32
4306 flag = 1 << shft;
4307 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4308 KMP_YIELD(TRUE);
4309 }
4310 KMP_MB();
4311#if OMPT_SUPPORT && OMPT_OPTIONAL
4312 if (ompt_enabled.ompt_callback_dependences) {
4313 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4314 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4315 }
4316#endif
4317 KA_TRACE(20,
4318 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4319 gtid, (iter_number << 5) + shft));
4320}
4321
4322void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4323 __kmp_assert_valid_gtid(gtid);
4324 kmp_int64 shft;
4325 size_t num_dims, i;
4326 kmp_uint32 flag;
4327 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4328 kmp_info_t *th = __kmp_threads[gtid];
4329 kmp_team_t *team = th->th.th_team;
4330 kmp_disp_t *pr_buf;
4331 kmp_int64 lo, st;
4332
4333 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4334 if (team->t.t_serialized) {
4335 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4336 return; // no dependencies if team is serialized
4337 }
4338
4339 // calculate sequential iteration number (same as in "wait" but no
4340 // out-of-bounds checks)
4341 pr_buf = th->th.th_dispatch;
4342 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4343 num_dims = (size_t)pr_buf->th_doacross_info[0];
4344 lo = pr_buf->th_doacross_info[2];
4345 st = pr_buf->th_doacross_info[4];
4346#if OMPT_SUPPORT && OMPT_OPTIONAL
4347 ompt_dependence_t deps[num_dims];
4348#endif
4349 if (st == 1) { // most common case
4350 iter_number = vec[0] - lo;
4351 } else if (st > 0) {
4352 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4353 } else { // negative increment
4354 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4355 }
4356#if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps[0].variable.value = iter_number;
4358 deps[0].dependence_type = ompt_dependence_type_source;
4359#endif
4360 for (i = 1; i < num_dims; ++i) {
4361 kmp_int64 iter, ln;
4362 size_t j = i * 4;
4363 ln = pr_buf->th_doacross_info[j + 1];
4364 lo = pr_buf->th_doacross_info[j + 2];
4365 st = pr_buf->th_doacross_info[j + 4];
4366 if (st == 1) {
4367 iter = vec[i] - lo;
4368 } else if (st > 0) {
4369 iter = (kmp_uint64)(vec[i] - lo) / st;
4370 } else { // st < 0
4371 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4372 }
4373 iter_number = iter + ln * iter_number;
4374#if OMPT_SUPPORT && OMPT_OPTIONAL
4375 deps[i].variable.value = iter;
4376 deps[i].dependence_type = ompt_dependence_type_source;
4377#endif
4378 }
4379#if OMPT_SUPPORT && OMPT_OPTIONAL
4380 if (ompt_enabled.ompt_callback_dependences) {
4381 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4382 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4383 }
4384#endif
4385 shft = iter_number % 32; // use 32-bit granularity
4386 iter_number >>= 5; // divided by 32
4387 flag = 1 << shft;
4388 KMP_MB();
4389 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4390 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4391 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4392 (iter_number << 5) + shft));
4393}
4394
4395void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4396 __kmp_assert_valid_gtid(gtid);
4397 kmp_int32 num_done;
4398 kmp_info_t *th = __kmp_threads[gtid];
4399 kmp_team_t *team = th->th.th_team;
4400 kmp_disp_t *pr_buf = th->th.th_dispatch;
4401
4402 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4403 if (team->t.t_serialized) {
4404 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4405 return; // nothing to do
4406 }
4407 num_done =
4408 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4409 if (num_done == th->th.th_team_nproc) {
4410 // we are the last thread, need to free shared resources
4411 int idx = pr_buf->th_doacross_buf_idx - 1;
4412 dispatch_shared_info_t *sh_buf =
4413 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4414 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4415 (kmp_int64)&sh_buf->doacross_num_done);
4416 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4417 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4418 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4419 sh_buf->doacross_flags = NULL;
4420 sh_buf->doacross_num_done = 0;
4421 sh_buf->doacross_buf_idx +=
4422 __kmp_dispatch_num_buffers; // free buffer for future re-use
4423 }
4424 // free private resources (need to keep buffer index forever)
4425 pr_buf->th_doacross_flags = NULL;
4426 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4427 pr_buf->th_doacross_info = NULL;
4428 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4429}
4430
4431/* OpenMP 5.1 Memory Management routines */
4432void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4433 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4434}
4435
4436void *omp_aligned_alloc(size_t align, size_t size,
4437 omp_allocator_handle_t allocator) {
4438 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4439}
4440
4441void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4442 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4443}
4444
4445void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4446 omp_allocator_handle_t allocator) {
4447 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4448}
4449
4450void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4451 omp_allocator_handle_t free_allocator) {
4452 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4453 free_allocator);
4454}
4455
4456void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4457 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4458}
4459/* end of OpenMP 5.1 Memory Management routines */
4460
4461int __kmpc_get_target_offload(void) {
4462 if (!__kmp_init_serial) {
4463 __kmp_serial_initialize();
4464 }
4465 return __kmp_target_offload;
4466}
4467
4468int __kmpc_pause_resource(kmp_pause_status_t level) {
4469 if (!__kmp_init_serial) {
4470 return 1; // Can't pause if runtime is not initialized
4471 }
4472 return __kmp_pause_resource(level);
4473}
4474
4475void __kmpc_error(ident_t *loc, int severity, const char *message) {
4476 if (!__kmp_init_serial)
4477 __kmp_serial_initialize();
4478
4479 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4480
4481#if OMPT_SUPPORT
4482 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4483 ompt_callbacks.ompt_callback(ompt_callback_error)(
4484 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4485 OMPT_GET_RETURN_ADDRESS(0));
4486 }
4487#endif // OMPT_SUPPORT
4488
4489 char *src_loc;
4490 if (loc && loc->psource) {
4491 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4492 src_loc =
4493 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4494 __kmp_str_loc_free(&str_loc);
4495 } else {
4496 src_loc = __kmp_str_format("unknown");
4497 }
4498
4499 if (severity == severity_warning)
4500 KMP_WARNING(UserDirectedWarning, src_loc, message);
4501 else
4502 KMP_FATAL(UserDirectedError, src_loc, message);
4503
4504 __kmp_str_free(&src_loc);
4505}
4506
4507// Mark begin of scope directive.
4508void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4509// reserved is for extension of scope directive and not used.
4510#if OMPT_SUPPORT && OMPT_OPTIONAL
4511 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4512 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4513 int tid = __kmp_tid_from_gtid(gtid);
4514 ompt_callbacks.ompt_callback(ompt_callback_work)(
4515 ompt_work_scope, ompt_scope_begin,
4516 &(team->t.ompt_team_info.parallel_data),
4517 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4518 OMPT_GET_RETURN_ADDRESS(0));
4519 }
4520#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4521}
4522
4523// Mark end of scope directive
4524void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4525// reserved is for extension of scope directive and not used.
4526#if OMPT_SUPPORT && OMPT_OPTIONAL
4527 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4528 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4529 int tid = __kmp_tid_from_gtid(gtid);
4530 ompt_callbacks.ompt_callback(ompt_callback_work)(
4531 ompt_work_scope, ompt_scope_end,
4532 &(team->t.ompt_team_info.parallel_data),
4533 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4534 OMPT_GET_RETURN_ADDRESS(0));
4535 }
4536#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4537}
4538
4539#ifdef KMP_USE_VERSION_SYMBOLS
4540// For GOMP compatibility there are two versions of each omp_* API.
4541// One is the plain C symbol and one is the Fortran symbol with an appended
4542// underscore. When we implement a specific ompc_* version of an omp_*
4543// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4544// instead of the Fortran versions in kmp_ftn_entry.h
4545extern "C" {
4546// Have to undef these from omp.h so they aren't translated into
4547// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4548#ifdef omp_set_affinity_format
4549#undef omp_set_affinity_format
4550#endif
4551#ifdef omp_get_affinity_format
4552#undef omp_get_affinity_format
4553#endif
4554#ifdef omp_display_affinity
4555#undef omp_display_affinity
4556#endif
4557#ifdef omp_capture_affinity
4558#undef omp_capture_affinity
4559#endif
4560KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4561 "OMP_5.0");
4562KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4563 "OMP_5.0");
4564KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4565 "OMP_5.0");
4566KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4567 "OMP_5.0");
4568} // extern "C"
4569#endif
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:218
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:220
@ KMP_IDENT_AUTOPAR
Definition kmp.h:203
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:222
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition kmp.h:1717
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition kmp.h:238
char const * psource
Definition kmp.h:248
kmp_int32 flags
Definition kmp.h:240