LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define __KMP_IMP
15 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp.h"
17 #include "kmp_error.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-specific.h"
25 #endif
26 
27 #define MAX_MESSAGE 512
28 
29 // flags will be used in future, e.g. to implement openmp_strict library
30 // restrictions
31 
40 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41  // By default __kmpc_begin() is no-op.
42  char *env;
43  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44  __kmp_str_match_true(env)) {
45  __kmp_middle_initialize();
46  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47  } else if (__kmp_ignore_mppbeg() == FALSE) {
48  // By default __kmp_ignore_mppbeg() returns TRUE.
49  __kmp_internal_begin();
50  KC_TRACE(10, ("__kmpc_begin: called\n"));
51  }
52 }
53 
62 void __kmpc_end(ident_t *loc) {
63  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66  // returns FALSE and __kmpc_end() will unregister this root (it can cause
67  // library shut down).
68  if (__kmp_ignore_mppend() == FALSE) {
69  KC_TRACE(10, ("__kmpc_end: called\n"));
70  KA_TRACE(30, ("__kmpc_end\n"));
71 
72  __kmp_internal_end_thread(-1);
73  }
74 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75  // Normal exit process on Windows does not allow worker threads of the final
76  // parallel region to finish reporting their events, so shutting down the
77  // library here fixes the issue at least for the cases where __kmpc_end() is
78  // placed properly.
79  if (ompt_enabled.enabled)
80  __kmp_internal_end_library(__kmp_gtid_get_specific());
81 #endif
82 }
83 
103  kmp_int32 gtid = __kmp_entry_gtid();
104 
105  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
106 
107  return gtid;
108 }
109 
125  KC_TRACE(10,
126  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 
128  return TCR_4(__kmp_all_nth);
129 }
130 
138  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139  return __kmp_tid_from_gtid(__kmp_entry_gtid());
140 }
141 
148  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
149 
150  return __kmp_entry_thread()->th.th_team->t.t_nproc;
151 }
152 
159 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
160 #ifndef KMP_DEBUG
161 
162  return TRUE;
163 
164 #else
165 
166  const char *semi2;
167  const char *semi3;
168  int line_no;
169 
170  if (__kmp_par_range == 0) {
171  return TRUE;
172  }
173  semi2 = loc->psource;
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  semi2 = strchr(semi2 + 1, ';');
182  if (semi2 == NULL) {
183  return TRUE;
184  }
185  if (__kmp_par_range_filename[0]) {
186  const char *name = semi2 - 1;
187  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188  name--;
189  }
190  if ((*name == '/') || (*name == ';')) {
191  name++;
192  }
193  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194  return __kmp_par_range < 0;
195  }
196  }
197  semi3 = strchr(semi2 + 1, ';');
198  if (__kmp_par_range_routine[0]) {
199  if ((semi3 != NULL) && (semi3 > semi2) &&
200  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201  return __kmp_par_range < 0;
202  }
203  }
204  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206  return __kmp_par_range > 0;
207  }
208  return __kmp_par_range < 0;
209  }
210  return TRUE;
211 
212 #endif /* KMP_DEBUG */
213 }
214 
221 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222  return __kmp_entry_thread()->th.th_root->r.r_active;
223 }
224 
234 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235  kmp_int32 num_threads) {
236  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237  global_tid, num_threads));
238 
239  __kmp_push_num_threads(loc, global_tid, num_threads);
240 }
241 
242 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
244 
245  /* the num_threads are automatically popped */
246 }
247 
248 #if OMP_40_ENABLED
249 
250 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251  kmp_int32 proc_bind) {
252  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253  proc_bind));
254 
255  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
256 }
257 
258 #endif /* OMP_40_ENABLED */
259 
270 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271  int gtid = __kmp_entry_gtid();
272 
273 #if (KMP_STATS_ENABLED)
274  // If we were in a serial region, then stop the serial timer, record
275  // the event, and start parallel region timer
276  stats_state_e previous_state = KMP_GET_THREAD_STATE();
277  if (previous_state == stats_state_e::SERIAL_REGION) {
278  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279  } else {
280  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281  }
282  int inParallel = __kmpc_in_parallel(loc);
283  if (inParallel) {
284  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285  } else {
286  KMP_COUNT_BLOCK(OMP_PARALLEL);
287  }
288 #endif
289 
290  // maybe to save thr_state is enough here
291  {
292  va_list ap;
293  va_start(ap, microtask);
294 
295 #if OMPT_SUPPORT
296  ompt_frame_t *ompt_frame;
297  if (ompt_enabled.enabled) {
298  kmp_info_t *master_th = __kmp_threads[gtid];
299  kmp_team_t *parent_team = master_th->th.th_team;
300  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301  if (lwt)
302  ompt_frame = &(lwt->ompt_task_info.frame);
303  else {
304  int tid = __kmp_tid_from_gtid(gtid);
305  ompt_frame = &(
306  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307  }
308  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
309  OMPT_STORE_RETURN_ADDRESS(gtid);
310  }
311 #endif
312 
313 #if INCLUDE_SSC_MARKS
314  SSC_MARK_FORKING();
315 #endif
316  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
320 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321  &ap
322 #else
323  ap
324 #endif
325  );
326 #if INCLUDE_SSC_MARKS
327  SSC_MARK_JOINING();
328 #endif
329  __kmp_join_call(loc, gtid
330 #if OMPT_SUPPORT
331  ,
332  fork_context_intel
333 #endif
334  );
335 
336  va_end(ap);
337  }
338 
339 #if KMP_STATS_ENABLED
340  if (previous_state == stats_state_e::SERIAL_REGION) {
341  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342  } else {
343  KMP_POP_PARTITIONED_TIMER();
344  }
345 #endif // KMP_STATS_ENABLED
346 }
347 
348 #if OMP_40_ENABLED
349 
360 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361  kmp_int32 num_teams, kmp_int32 num_threads) {
362  KA_TRACE(20,
363  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364  global_tid, num_teams, num_threads));
365 
366  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
367 }
368 
379 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380  ...) {
381  int gtid = __kmp_entry_gtid();
382  kmp_info_t *this_thr = __kmp_threads[gtid];
383  va_list ap;
384  va_start(ap, microtask);
385 
386  KMP_COUNT_BLOCK(OMP_TEAMS);
387 
388  // remember teams entry point and nesting level
389  this_thr->th.th_teams_microtask = microtask;
390  this_thr->th.th_teams_level =
391  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
392 
393 #if OMPT_SUPPORT
394  kmp_team_t *parent_team = this_thr->th.th_team;
395  int tid = __kmp_tid_from_gtid(gtid);
396  if (ompt_enabled.enabled) {
397  parent_team->t.t_implicit_task_taskdata[tid]
398  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
399  }
400  OMPT_STORE_RETURN_ADDRESS(gtid);
401 #endif
402 
403  // check if __kmpc_push_num_teams called, set default number of teams
404  // otherwise
405  if (this_thr->th.th_teams_size.nteams == 0) {
406  __kmp_push_num_teams(loc, gtid, 0, 0);
407  }
408  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 
412  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413  VOLATILE_CAST(microtask_t)
414  __kmp_teams_master, // "wrapped" task
415  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
416 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
417  &ap
418 #else
419  ap
420 #endif
421  );
422  __kmp_join_call(loc, gtid
423 #if OMPT_SUPPORT
424  ,
425  fork_context_intel
426 #endif
427  );
428 
429  this_thr->th.th_teams_microtask = NULL;
430  this_thr->th.th_teams_level = 0;
431  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432  va_end(ap);
433 }
434 #endif /* OMP_40_ENABLED */
435 
436 // I don't think this function should ever have been exported.
437 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438 // openmp code ever called it, but it's been exported from the RTL for so
439 // long that I'm afraid to remove the definition.
440 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
441 
454 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
455 // The implementation is now in kmp_runtime.cpp so that it can share static
456 // functions with kmp_fork_call since the tasks to be done are similar in
457 // each case.
458 #if OMPT_SUPPORT
459  OMPT_STORE_RETURN_ADDRESS(global_tid);
460 #endif
461  __kmp_serialized_parallel(loc, global_tid);
462 }
463 
471 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472  kmp_internal_control_t *top;
473  kmp_info_t *this_thr;
474  kmp_team_t *serial_team;
475 
476  KC_TRACE(10,
477  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
478 
479  /* skip all this code for autopar serialized loops since it results in
480  unacceptable overhead */
481  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482  return;
483 
484  // Not autopar code
485  if (!TCR_4(__kmp_init_parallel))
486  __kmp_parallel_initialize();
487 
488  this_thr = __kmp_threads[global_tid];
489  serial_team = this_thr->th.th_serial_team;
490 
491 #if OMP_45_ENABLED
492  kmp_task_team_t *task_team = this_thr->th.th_task_team;
493 
494  // we need to wait for the proxy tasks before finishing the thread
495  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
496  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
497 #endif
498 
499  KMP_MB();
500  KMP_DEBUG_ASSERT(serial_team);
501  KMP_ASSERT(serial_team->t.t_serialized);
502  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
503  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
504  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
505  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
506 
507 #if OMPT_SUPPORT
508  if (ompt_enabled.enabled &&
509  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
510  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
511  if (ompt_enabled.ompt_callback_implicit_task) {
512  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
513  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
514  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
515  }
516 
517  // reset clear the task id only after unlinking the task
518  ompt_data_t *parent_task_data;
519  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
520 
521  if (ompt_enabled.ompt_callback_parallel_end) {
522  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
523  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
524  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
525  }
526  __ompt_lw_taskteam_unlink(this_thr);
527  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
528  }
529 #endif
530 
531  /* If necessary, pop the internal control stack values and replace the team
532  * values */
533  top = serial_team->t.t_control_stack_top;
534  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536  serial_team->t.t_control_stack_top = top->next;
537  __kmp_free(top);
538  }
539 
540  // if( serial_team -> t.t_serialized > 1 )
541  serial_team->t.t_level--;
542 
543  /* pop dispatch buffers stack */
544  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
545  {
546  dispatch_private_info_t *disp_buffer =
547  serial_team->t.t_dispatch->th_disp_buffer;
548  serial_team->t.t_dispatch->th_disp_buffer =
549  serial_team->t.t_dispatch->th_disp_buffer->next;
550  __kmp_free(disp_buffer);
551  }
552 #if OMP_50_ENABLED
553  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
554 #endif
555 
556  --serial_team->t.t_serialized;
557  if (serial_team->t.t_serialized == 0) {
558 
559 /* return to the parallel section */
560 
561 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
562  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563  __kmp_clear_x87_fpu_status_word();
564  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
566  }
567 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
568 
569  this_thr->th.th_team = serial_team->t.t_parent;
570  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
571 
572  /* restore values cached in the thread */
573  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
574  this_thr->th.th_team_master =
575  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
576  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
577 
578  /* TODO the below shouldn't need to be adjusted for serialized teams */
579  this_thr->th.th_dispatch =
580  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
581 
582  __kmp_pop_current_task_from_thread(this_thr);
583 
584  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
585  this_thr->th.th_current_task->td_flags.executing = 1;
586 
587  if (__kmp_tasking_mode != tskm_immediate_exec) {
588  // Copy the task team from the new child / old parent team to the thread.
589  this_thr->th.th_task_team =
590  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
591  KA_TRACE(20,
592  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
593  "team %p\n",
594  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
595  }
596  } else {
597  if (__kmp_tasking_mode != tskm_immediate_exec) {
598  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
599  "depth of serial team %p to %d\n",
600  global_tid, serial_team, serial_team->t.t_serialized));
601  }
602  }
603 
604  if (__kmp_env_consistency_check)
605  __kmp_pop_parallel(global_tid, NULL);
606 #if OMPT_SUPPORT
607  if (ompt_enabled.enabled)
608  this_thr->th.ompt_thread_info.state =
609  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
610  : ompt_state_work_parallel);
611 #endif
612 }
613 
622 void __kmpc_flush(ident_t *loc) {
623  KC_TRACE(10, ("__kmpc_flush: called\n"));
624 
625  /* need explicit __mf() here since use volatile instead in library */
626  KMP_MB(); /* Flush all pending memory write invalidates. */
627 
628 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
629 #if KMP_MIC
630 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
631 // We shouldn't need it, though, since the ABI rules require that
632 // * If the compiler generates NGO stores it also generates the fence
633 // * If users hand-code NGO stores they should insert the fence
634 // therefore no incomplete unordered stores should be visible.
635 #else
636  // C74404
637  // This is to address non-temporal store instructions (sfence needed).
638  // The clflush instruction is addressed either (mfence needed).
639  // Probably the non-temporal load monvtdqa instruction should also be
640  // addressed.
641  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
642  if (!__kmp_cpuinfo.initialized) {
643  __kmp_query_cpuid(&__kmp_cpuinfo);
644  }
645  if (!__kmp_cpuinfo.sse2) {
646  // CPU cannot execute SSE2 instructions.
647  } else {
648 #if KMP_COMPILER_ICC
649  _mm_mfence();
650 #elif KMP_COMPILER_MSVC
651  MemoryBarrier();
652 #else
653  __sync_synchronize();
654 #endif // KMP_COMPILER_ICC
655  }
656 #endif // KMP_MIC
657 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
658 // Nothing to see here move along
659 #elif KMP_ARCH_PPC64
660 // Nothing needed here (we have a real MB above).
661 #if KMP_OS_CNK
662  // The flushing thread needs to yield here; this prevents a
663  // busy-waiting thread from saturating the pipeline. flush is
664  // often used in loops like this:
665  // while (!flag) {
666  // #pragma omp flush(flag)
667  // }
668  // and adding the yield here is good for at least a 10x speedup
669  // when running >2 threads per core (on the NAS LU benchmark).
670  __kmp_yield(TRUE);
671 #endif
672 #else
673 #error Unknown or unsupported architecture
674 #endif
675 
676 #if OMPT_SUPPORT && OMPT_OPTIONAL
677  if (ompt_enabled.ompt_callback_flush) {
678  ompt_callbacks.ompt_callback(ompt_callback_flush)(
679  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
680  }
681 #endif
682 }
683 
684 /* -------------------------------------------------------------------------- */
692 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
693  KMP_COUNT_BLOCK(OMP_BARRIER);
694  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
695 
696  if (!TCR_4(__kmp_init_parallel))
697  __kmp_parallel_initialize();
698 
699  if (__kmp_env_consistency_check) {
700  if (loc == 0) {
701  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
702  }
703 
704  __kmp_check_barrier(global_tid, ct_barrier, loc);
705  }
706 
707 #if OMPT_SUPPORT
708  ompt_frame_t *ompt_frame;
709  if (ompt_enabled.enabled) {
710  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
711  if (ompt_frame->enter_frame.ptr == NULL)
712  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
713  OMPT_STORE_RETURN_ADDRESS(global_tid);
714  }
715 #endif
716  __kmp_threads[global_tid]->th.th_ident = loc;
717  // TODO: explicit barrier_wait_id:
718  // this function is called when 'barrier' directive is present or
719  // implicit barrier at the end of a worksharing construct.
720  // 1) better to add a per-thread barrier counter to a thread data structure
721  // 2) set to 0 when a new team is created
722  // 4) no sync is required
723 
724  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
725 #if OMPT_SUPPORT && OMPT_OPTIONAL
726  if (ompt_enabled.enabled) {
727  ompt_frame->enter_frame = ompt_data_none;
728  }
729 #endif
730 }
731 
732 /* The BARRIER for a MASTER section is always explicit */
739 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
740  int status = 0;
741 
742  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
743 
744  if (!TCR_4(__kmp_init_parallel))
745  __kmp_parallel_initialize();
746 
747  if (KMP_MASTER_GTID(global_tid)) {
748  KMP_COUNT_BLOCK(OMP_MASTER);
749  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
750  status = 1;
751  }
752 
753 #if OMPT_SUPPORT && OMPT_OPTIONAL
754  if (status) {
755  if (ompt_enabled.ompt_callback_master) {
756  kmp_info_t *this_thr = __kmp_threads[global_tid];
757  kmp_team_t *team = this_thr->th.th_team;
758 
759  int tid = __kmp_tid_from_gtid(global_tid);
760  ompt_callbacks.ompt_callback(ompt_callback_master)(
761  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763  OMPT_GET_RETURN_ADDRESS(0));
764  }
765  }
766 #endif
767 
768  if (__kmp_env_consistency_check) {
769 #if KMP_USE_DYNAMIC_LOCK
770  if (status)
771  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
772  else
773  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
774 #else
775  if (status)
776  __kmp_push_sync(global_tid, ct_master, loc, NULL);
777  else
778  __kmp_check_sync(global_tid, ct_master, loc, NULL);
779 #endif
780  }
781 
782  return status;
783 }
784 
793 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
794  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
795 
796  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
797  KMP_POP_PARTITIONED_TIMER();
798 
799 #if OMPT_SUPPORT && OMPT_OPTIONAL
800  kmp_info_t *this_thr = __kmp_threads[global_tid];
801  kmp_team_t *team = this_thr->th.th_team;
802  if (ompt_enabled.ompt_callback_master) {
803  int tid = __kmp_tid_from_gtid(global_tid);
804  ompt_callbacks.ompt_callback(ompt_callback_master)(
805  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807  OMPT_GET_RETURN_ADDRESS(0));
808  }
809 #endif
810 
811  if (__kmp_env_consistency_check) {
812  if (global_tid < 0)
813  KMP_WARNING(ThreadIdentInvalid);
814 
815  if (KMP_MASTER_GTID(global_tid))
816  __kmp_pop_sync(global_tid, ct_master, loc);
817  }
818 }
819 
827 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
828  int cid = 0;
829  kmp_info_t *th;
830  KMP_DEBUG_ASSERT(__kmp_init_serial);
831 
832  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
833 
834  if (!TCR_4(__kmp_init_parallel))
835  __kmp_parallel_initialize();
836 
837 #if USE_ITT_BUILD
838  __kmp_itt_ordered_prep(gtid);
839 // TODO: ordered_wait_id
840 #endif /* USE_ITT_BUILD */
841 
842  th = __kmp_threads[gtid];
843 
844 #if OMPT_SUPPORT && OMPT_OPTIONAL
845  kmp_team_t *team;
846  ompt_wait_id_t lck;
847  void *codeptr_ra;
848  if (ompt_enabled.enabled) {
849  OMPT_STORE_RETURN_ADDRESS(gtid);
850  team = __kmp_team_from_gtid(gtid);
851  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
852  /* OMPT state update */
853  th->th.ompt_thread_info.wait_id = lck;
854  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
855 
856  /* OMPT event callback */
857  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
858  if (ompt_enabled.ompt_callback_mutex_acquire) {
859  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
860  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
861  codeptr_ra);
862  }
863  }
864 #endif
865 
866  if (th->th.th_dispatch->th_deo_fcn != 0)
867  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
868  else
869  __kmp_parallel_deo(&gtid, &cid, loc);
870 
871 #if OMPT_SUPPORT && OMPT_OPTIONAL
872  if (ompt_enabled.enabled) {
873  /* OMPT state update */
874  th->th.ompt_thread_info.state = ompt_state_work_parallel;
875  th->th.ompt_thread_info.wait_id = 0;
876 
877  /* OMPT event callback */
878  if (ompt_enabled.ompt_callback_mutex_acquired) {
879  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
880  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
881  }
882  }
883 #endif
884 
885 #if USE_ITT_BUILD
886  __kmp_itt_ordered_start(gtid);
887 #endif /* USE_ITT_BUILD */
888 }
889 
897 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
898  int cid = 0;
899  kmp_info_t *th;
900 
901  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
902 
903 #if USE_ITT_BUILD
904  __kmp_itt_ordered_end(gtid);
905 // TODO: ordered_wait_id
906 #endif /* USE_ITT_BUILD */
907 
908  th = __kmp_threads[gtid];
909 
910  if (th->th.th_dispatch->th_dxo_fcn != 0)
911  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
912  else
913  __kmp_parallel_dxo(&gtid, &cid, loc);
914 
915 #if OMPT_SUPPORT && OMPT_OPTIONAL
916  OMPT_STORE_RETURN_ADDRESS(gtid);
917  if (ompt_enabled.ompt_callback_mutex_released) {
918  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
919  ompt_mutex_ordered,
920  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
921  ->t.t_ordered.dt.t_value,
922  OMPT_LOAD_RETURN_ADDRESS(gtid));
923  }
924 #endif
925 }
926 
927 #if KMP_USE_DYNAMIC_LOCK
928 
929 static __forceinline void
930 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
931  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
932  // Pointer to the allocated indirect lock is written to crit, while indexing
933  // is ignored.
934  void *idx;
935  kmp_indirect_lock_t **lck;
936  lck = (kmp_indirect_lock_t **)crit;
937  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
938  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
939  KMP_SET_I_LOCK_LOCATION(ilk, loc);
940  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
941  KA_TRACE(20,
942  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
943 #if USE_ITT_BUILD
944  __kmp_itt_critical_creating(ilk->lock, loc);
945 #endif
946  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
947  if (status == 0) {
948 #if USE_ITT_BUILD
949  __kmp_itt_critical_destroyed(ilk->lock);
950 #endif
951  // We don't really need to destroy the unclaimed lock here since it will be
952  // cleaned up at program exit.
953  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
954  }
955  KMP_DEBUG_ASSERT(*lck != NULL);
956 }
957 
958 // Fast-path acquire tas lock
959 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
960  { \
961  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
962  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
963  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
964  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
965  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
966  kmp_uint32 spins; \
967  KMP_FSYNC_PREPARE(l); \
968  KMP_INIT_YIELD(spins); \
969  if (TCR_4(__kmp_nth) > \
970  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
971  KMP_YIELD(TRUE); \
972  } else { \
973  KMP_YIELD_SPIN(spins); \
974  } \
975  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
976  while ( \
977  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
978  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
979  __kmp_spin_backoff(&backoff); \
980  if (TCR_4(__kmp_nth) > \
981  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
982  KMP_YIELD(TRUE); \
983  } else { \
984  KMP_YIELD_SPIN(spins); \
985  } \
986  } \
987  } \
988  KMP_FSYNC_ACQUIRED(l); \
989  }
990 
991 // Fast-path test tas lock
992 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
993  { \
994  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
995  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
996  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
997  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
998  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
999  }
1000 
1001 // Fast-path release tas lock
1002 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1003  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1004 
1005 #if KMP_USE_FUTEX
1006 
1007 #include <sys/syscall.h>
1008 #include <unistd.h>
1009 #ifndef FUTEX_WAIT
1010 #define FUTEX_WAIT 0
1011 #endif
1012 #ifndef FUTEX_WAKE
1013 #define FUTEX_WAKE 1
1014 #endif
1015 
1016 // Fast-path acquire futex lock
1017 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1018  { \
1019  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1020  kmp_int32 gtid_code = (gtid + 1) << 1; \
1021  KMP_MB(); \
1022  KMP_FSYNC_PREPARE(ftx); \
1023  kmp_int32 poll_val; \
1024  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1025  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1026  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1027  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1028  if (!cond) { \
1029  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1030  poll_val | \
1031  KMP_LOCK_BUSY(1, futex))) { \
1032  continue; \
1033  } \
1034  poll_val |= KMP_LOCK_BUSY(1, futex); \
1035  } \
1036  kmp_int32 rc; \
1037  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1038  NULL, NULL, 0)) != 0) { \
1039  continue; \
1040  } \
1041  gtid_code |= 1; \
1042  } \
1043  KMP_FSYNC_ACQUIRED(ftx); \
1044  }
1045 
1046 // Fast-path test futex lock
1047 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1048  { \
1049  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1050  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1051  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1052  KMP_FSYNC_ACQUIRED(ftx); \
1053  rc = TRUE; \
1054  } else { \
1055  rc = FALSE; \
1056  } \
1057  }
1058 
1059 // Fast-path release futex lock
1060 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1061  { \
1062  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1063  KMP_MB(); \
1064  KMP_FSYNC_RELEASING(ftx); \
1065  kmp_int32 poll_val = \
1066  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1067  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1068  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1069  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1070  } \
1071  KMP_MB(); \
1072  KMP_YIELD(TCR_4(__kmp_nth) > \
1073  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1074  }
1075 
1076 #endif // KMP_USE_FUTEX
1077 
1078 #else // KMP_USE_DYNAMIC_LOCK
1079 
1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1081  ident_t const *loc,
1082  kmp_int32 gtid) {
1083  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1084 
1085  // Because of the double-check, the following load doesn't need to be volatile
1086  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1087 
1088  if (lck == NULL) {
1089  void *idx;
1090 
1091  // Allocate & initialize the lock.
1092  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1093  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1094  __kmp_init_user_lock_with_checks(lck);
1095  __kmp_set_user_lock_location(lck, loc);
1096 #if USE_ITT_BUILD
1097  __kmp_itt_critical_creating(lck);
1098 // __kmp_itt_critical_creating() should be called *before* the first usage
1099 // of underlying lock. It is the only place where we can guarantee it. There
1100 // are chances the lock will destroyed with no usage, but it is not a
1101 // problem, because this is not real event seen by user but rather setting
1102 // name for object (lock). See more details in kmp_itt.h.
1103 #endif /* USE_ITT_BUILD */
1104 
1105  // Use a cmpxchg instruction to slam the start of the critical section with
1106  // the lock pointer. If another thread beat us to it, deallocate the lock,
1107  // and use the lock that the other thread allocated.
1108  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1109 
1110  if (status == 0) {
1111 // Deallocate the lock and reload the value.
1112 #if USE_ITT_BUILD
1113  __kmp_itt_critical_destroyed(lck);
1114 // Let ITT know the lock is destroyed and the same memory location may be reused
1115 // for another purpose.
1116 #endif /* USE_ITT_BUILD */
1117  __kmp_destroy_user_lock_with_checks(lck);
1118  __kmp_user_lock_free(&idx, gtid, lck);
1119  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120  KMP_DEBUG_ASSERT(lck != NULL);
1121  }
1122  }
1123  return lck;
1124 }
1125 
1126 #endif // KMP_USE_DYNAMIC_LOCK
1127 
1138 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1139  kmp_critical_name *crit) {
1140 #if KMP_USE_DYNAMIC_LOCK
1141 #if OMPT_SUPPORT && OMPT_OPTIONAL
1142  OMPT_STORE_RETURN_ADDRESS(global_tid);
1143 #endif // OMPT_SUPPORT
1144  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1145 #else
1146  KMP_COUNT_BLOCK(OMP_CRITICAL);
1147 #if OMPT_SUPPORT && OMPT_OPTIONAL
1148  ompt_state_t prev_state = ompt_state_undefined;
1149  ompt_thread_info_t ti;
1150 #endif
1151  kmp_user_lock_p lck;
1152 
1153  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1154 
1155  // TODO: add THR_OVHD_STATE
1156 
1157  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1158  KMP_CHECK_USER_LOCK_INIT();
1159 
1160  if ((__kmp_user_lock_kind == lk_tas) &&
1161  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1162  lck = (kmp_user_lock_p)crit;
1163  }
1164 #if KMP_USE_FUTEX
1165  else if ((__kmp_user_lock_kind == lk_futex) &&
1166  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1167  lck = (kmp_user_lock_p)crit;
1168  }
1169 #endif
1170  else { // ticket, queuing or drdpa
1171  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1172  }
1173 
1174  if (__kmp_env_consistency_check)
1175  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1176 
1177 // since the critical directive binds to all threads, not just the current
1178 // team we have to check this even if we are in a serialized team.
1179 // also, even if we are the uber thread, we still have to conduct the lock,
1180 // as we have to contend with sibling threads.
1181 
1182 #if USE_ITT_BUILD
1183  __kmp_itt_critical_acquiring(lck);
1184 #endif /* USE_ITT_BUILD */
1185 #if OMPT_SUPPORT && OMPT_OPTIONAL
1186  OMPT_STORE_RETURN_ADDRESS(gtid);
1187  void *codeptr_ra = NULL;
1188  if (ompt_enabled.enabled) {
1189  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1190  /* OMPT state update */
1191  prev_state = ti.state;
1192  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1193  ti.state = ompt_state_wait_critical;
1194 
1195  /* OMPT event callback */
1196  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1197  if (ompt_enabled.ompt_callback_mutex_acquire) {
1198  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1199  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1200  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1201  }
1202  }
1203 #endif
1204  // Value of 'crit' should be good for using as a critical_id of the critical
1205  // section directive.
1206  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1207 
1208 #if USE_ITT_BUILD
1209  __kmp_itt_critical_acquired(lck);
1210 #endif /* USE_ITT_BUILD */
1211 #if OMPT_SUPPORT && OMPT_OPTIONAL
1212  if (ompt_enabled.enabled) {
1213  /* OMPT state update */
1214  ti.state = prev_state;
1215  ti.wait_id = 0;
1216 
1217  /* OMPT event callback */
1218  if (ompt_enabled.ompt_callback_mutex_acquired) {
1219  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1220  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1221  }
1222  }
1223 #endif
1224  KMP_POP_PARTITIONED_TIMER();
1225 
1226  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1227  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1228 #endif // KMP_USE_DYNAMIC_LOCK
1229 }
1230 
1231 #if KMP_USE_DYNAMIC_LOCK
1232 
1233 // Converts the given hint to an internal lock implementation
1234 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1235 #if KMP_USE_TSX
1236 #define KMP_TSX_LOCK(seq) lockseq_##seq
1237 #else
1238 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1239 #endif
1240 
1241 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1242 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1243 #else
1244 #define KMP_CPUINFO_RTM 0
1245 #endif
1246 
1247  // Hints that do not require further logic
1248  if (hint & kmp_lock_hint_hle)
1249  return KMP_TSX_LOCK(hle);
1250  if (hint & kmp_lock_hint_rtm)
1251  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1252  if (hint & kmp_lock_hint_adaptive)
1253  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1254 
1255  // Rule out conflicting hints first by returning the default lock
1256  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1257  return __kmp_user_lock_seq;
1258  if ((hint & omp_lock_hint_speculative) &&
1259  (hint & omp_lock_hint_nonspeculative))
1260  return __kmp_user_lock_seq;
1261 
1262  // Do not even consider speculation when it appears to be contended
1263  if (hint & omp_lock_hint_contended)
1264  return lockseq_queuing;
1265 
1266  // Uncontended lock without speculation
1267  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1268  return lockseq_tas;
1269 
1270  // HLE lock for speculation
1271  if (hint & omp_lock_hint_speculative)
1272  return KMP_TSX_LOCK(hle);
1273 
1274  return __kmp_user_lock_seq;
1275 }
1276 
1277 #if OMPT_SUPPORT && OMPT_OPTIONAL
1278 #if KMP_USE_DYNAMIC_LOCK
1279 static kmp_mutex_impl_t
1280 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1281  if (user_lock) {
1282  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1283  case 0:
1284  break;
1285 #if KMP_USE_FUTEX
1286  case locktag_futex:
1287  return kmp_mutex_impl_queuing;
1288 #endif
1289  case locktag_tas:
1290  return kmp_mutex_impl_spin;
1291 #if KMP_USE_TSX
1292  case locktag_hle:
1293  return kmp_mutex_impl_speculative;
1294 #endif
1295  default:
1296  return kmp_mutex_impl_none;
1297  }
1298  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1299  }
1300  KMP_ASSERT(ilock);
1301  switch (ilock->type) {
1302 #if KMP_USE_TSX
1303  case locktag_adaptive:
1304  case locktag_rtm:
1305  return kmp_mutex_impl_speculative;
1306 #endif
1307  case locktag_nested_tas:
1308  return kmp_mutex_impl_spin;
1309 #if KMP_USE_FUTEX
1310  case locktag_nested_futex:
1311 #endif
1312  case locktag_ticket:
1313  case locktag_queuing:
1314  case locktag_drdpa:
1315  case locktag_nested_ticket:
1316  case locktag_nested_queuing:
1317  case locktag_nested_drdpa:
1318  return kmp_mutex_impl_queuing;
1319  default:
1320  return kmp_mutex_impl_none;
1321  }
1322 }
1323 #else
1324 // For locks without dynamic binding
1325 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1326  switch (__kmp_user_lock_kind) {
1327  case lk_tas:
1328  return kmp_mutex_impl_spin;
1329 #if KMP_USE_FUTEX
1330  case lk_futex:
1331 #endif
1332  case lk_ticket:
1333  case lk_queuing:
1334  case lk_drdpa:
1335  return kmp_mutex_impl_queuing;
1336 #if KMP_USE_TSX
1337  case lk_hle:
1338  case lk_rtm:
1339  case lk_adaptive:
1340  return kmp_mutex_impl_speculative;
1341 #endif
1342  default:
1343  return kmp_mutex_impl_none;
1344  }
1345 }
1346 #endif // KMP_USE_DYNAMIC_LOCK
1347 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1348 
1362 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1363  kmp_critical_name *crit, uint32_t hint) {
1364  KMP_COUNT_BLOCK(OMP_CRITICAL);
1365  kmp_user_lock_p lck;
1366 #if OMPT_SUPPORT && OMPT_OPTIONAL
1367  ompt_state_t prev_state = ompt_state_undefined;
1368  ompt_thread_info_t ti;
1369  // This is the case, if called from __kmpc_critical:
1370  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1371  if (!codeptr)
1372  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1373 #endif
1374 
1375  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1376 
1377  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1378  // Check if it is initialized.
1379  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1380  if (*lk == 0) {
1381  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1382  if (KMP_IS_D_LOCK(lckseq)) {
1383  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1384  KMP_GET_D_TAG(lckseq));
1385  } else {
1386  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1387  }
1388  }
1389  // Branch for accessing the actual lock object and set operation. This
1390  // branching is inevitable since this lock initialization does not follow the
1391  // normal dispatch path (lock table is not used).
1392  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1393  lck = (kmp_user_lock_p)lk;
1394  if (__kmp_env_consistency_check) {
1395  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1396  __kmp_map_hint_to_lock(hint));
1397  }
1398 #if USE_ITT_BUILD
1399  __kmp_itt_critical_acquiring(lck);
1400 #endif
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL
1402  if (ompt_enabled.enabled) {
1403  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1404  /* OMPT state update */
1405  prev_state = ti.state;
1406  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1407  ti.state = ompt_state_wait_critical;
1408 
1409  /* OMPT event callback */
1410  if (ompt_enabled.ompt_callback_mutex_acquire) {
1411  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1412  ompt_mutex_critical, (unsigned int)hint,
1413  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1414  codeptr);
1415  }
1416  }
1417 #endif
1418 #if KMP_USE_INLINED_TAS
1419  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1420  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1421  } else
1422 #elif KMP_USE_INLINED_FUTEX
1423  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1424  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1425  } else
1426 #endif
1427  {
1428  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1429  }
1430  } else {
1431  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1432  lck = ilk->lock;
1433  if (__kmp_env_consistency_check) {
1434  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1435  __kmp_map_hint_to_lock(hint));
1436  }
1437 #if USE_ITT_BUILD
1438  __kmp_itt_critical_acquiring(lck);
1439 #endif
1440 #if OMPT_SUPPORT && OMPT_OPTIONAL
1441  if (ompt_enabled.enabled) {
1442  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1443  /* OMPT state update */
1444  prev_state = ti.state;
1445  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1446  ti.state = ompt_state_wait_critical;
1447 
1448  /* OMPT event callback */
1449  if (ompt_enabled.ompt_callback_mutex_acquire) {
1450  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1451  ompt_mutex_critical, (unsigned int)hint,
1452  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1453  codeptr);
1454  }
1455  }
1456 #endif
1457  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1458  }
1459  KMP_POP_PARTITIONED_TIMER();
1460 
1461 #if USE_ITT_BUILD
1462  __kmp_itt_critical_acquired(lck);
1463 #endif /* USE_ITT_BUILD */
1464 #if OMPT_SUPPORT && OMPT_OPTIONAL
1465  if (ompt_enabled.enabled) {
1466  /* OMPT state update */
1467  ti.state = prev_state;
1468  ti.wait_id = 0;
1469 
1470  /* OMPT event callback */
1471  if (ompt_enabled.ompt_callback_mutex_acquired) {
1472  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1473  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1474  }
1475  }
1476 #endif
1477 
1478  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1479  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1480 } // __kmpc_critical_with_hint
1481 
1482 #endif // KMP_USE_DYNAMIC_LOCK
1483 
1493 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1494  kmp_critical_name *crit) {
1495  kmp_user_lock_p lck;
1496 
1497  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1498 
1499 #if KMP_USE_DYNAMIC_LOCK
1500  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1501  lck = (kmp_user_lock_p)crit;
1502  KMP_ASSERT(lck != NULL);
1503  if (__kmp_env_consistency_check) {
1504  __kmp_pop_sync(global_tid, ct_critical, loc);
1505  }
1506 #if USE_ITT_BUILD
1507  __kmp_itt_critical_releasing(lck);
1508 #endif
1509 #if KMP_USE_INLINED_TAS
1510  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1511  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1512  } else
1513 #elif KMP_USE_INLINED_FUTEX
1514  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1515  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1516  } else
1517 #endif
1518  {
1519  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1520  }
1521  } else {
1522  kmp_indirect_lock_t *ilk =
1523  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1524  KMP_ASSERT(ilk != NULL);
1525  lck = ilk->lock;
1526  if (__kmp_env_consistency_check) {
1527  __kmp_pop_sync(global_tid, ct_critical, loc);
1528  }
1529 #if USE_ITT_BUILD
1530  __kmp_itt_critical_releasing(lck);
1531 #endif
1532  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1533  }
1534 
1535 #else // KMP_USE_DYNAMIC_LOCK
1536 
1537  if ((__kmp_user_lock_kind == lk_tas) &&
1538  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1539  lck = (kmp_user_lock_p)crit;
1540  }
1541 #if KMP_USE_FUTEX
1542  else if ((__kmp_user_lock_kind == lk_futex) &&
1543  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1544  lck = (kmp_user_lock_p)crit;
1545  }
1546 #endif
1547  else { // ticket, queuing or drdpa
1548  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1549  }
1550 
1551  KMP_ASSERT(lck != NULL);
1552 
1553  if (__kmp_env_consistency_check)
1554  __kmp_pop_sync(global_tid, ct_critical, loc);
1555 
1556 #if USE_ITT_BUILD
1557  __kmp_itt_critical_releasing(lck);
1558 #endif /* USE_ITT_BUILD */
1559  // Value of 'crit' should be good for using as a critical_id of the critical
1560  // section directive.
1561  __kmp_release_user_lock_with_checks(lck, global_tid);
1562 
1563 #endif // KMP_USE_DYNAMIC_LOCK
1564 
1565 #if OMPT_SUPPORT && OMPT_OPTIONAL
1566  /* OMPT release event triggers after lock is released; place here to trigger
1567  * for all #if branches */
1568  OMPT_STORE_RETURN_ADDRESS(global_tid);
1569  if (ompt_enabled.ompt_callback_mutex_released) {
1570  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1571  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1572  OMPT_LOAD_RETURN_ADDRESS(0));
1573  }
1574 #endif
1575 
1576  KMP_POP_PARTITIONED_TIMER();
1577  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1578 }
1579 
1589 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1590  int status;
1591 
1592  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1593 
1594  if (!TCR_4(__kmp_init_parallel))
1595  __kmp_parallel_initialize();
1596 
1597  if (__kmp_env_consistency_check)
1598  __kmp_check_barrier(global_tid, ct_barrier, loc);
1599 
1600 #if OMPT_SUPPORT
1601  ompt_frame_t *ompt_frame;
1602  if (ompt_enabled.enabled) {
1603  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1604  if (ompt_frame->enter_frame.ptr == NULL)
1605  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1606  OMPT_STORE_RETURN_ADDRESS(global_tid);
1607  }
1608 #endif
1609 #if USE_ITT_NOTIFY
1610  __kmp_threads[global_tid]->th.th_ident = loc;
1611 #endif
1612  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1613 #if OMPT_SUPPORT && OMPT_OPTIONAL
1614  if (ompt_enabled.enabled) {
1615  ompt_frame->enter_frame = ompt_data_none;
1616  }
1617 #endif
1618 
1619  return (status != 0) ? 0 : 1;
1620 }
1621 
1631 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1632  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1633 
1634  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1635 }
1636 
1647 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1648  kmp_int32 ret;
1649 
1650  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1651 
1652  if (!TCR_4(__kmp_init_parallel))
1653  __kmp_parallel_initialize();
1654 
1655  if (__kmp_env_consistency_check) {
1656  if (loc == 0) {
1657  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1658  }
1659  __kmp_check_barrier(global_tid, ct_barrier, loc);
1660  }
1661 
1662 #if OMPT_SUPPORT
1663  ompt_frame_t *ompt_frame;
1664  if (ompt_enabled.enabled) {
1665  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1666  if (ompt_frame->enter_frame.ptr == NULL)
1667  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1668  OMPT_STORE_RETURN_ADDRESS(global_tid);
1669  }
1670 #endif
1671 #if USE_ITT_NOTIFY
1672  __kmp_threads[global_tid]->th.th_ident = loc;
1673 #endif
1674  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1675 #if OMPT_SUPPORT && OMPT_OPTIONAL
1676  if (ompt_enabled.enabled) {
1677  ompt_frame->enter_frame = ompt_data_none;
1678  }
1679 #endif
1680 
1681  ret = __kmpc_master(loc, global_tid);
1682 
1683  if (__kmp_env_consistency_check) {
1684  /* there's no __kmpc_end_master called; so the (stats) */
1685  /* actions of __kmpc_end_master are done here */
1686 
1687  if (global_tid < 0) {
1688  KMP_WARNING(ThreadIdentInvalid);
1689  }
1690  if (ret) {
1691  /* only one thread should do the pop since only */
1692  /* one did the push (see __kmpc_master()) */
1693 
1694  __kmp_pop_sync(global_tid, ct_master, loc);
1695  }
1696  }
1697 
1698  return (ret);
1699 }
1700 
1701 /* The BARRIER for a SINGLE process section is always explicit */
1713 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1714  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1715 
1716  if (rc) {
1717  // We are going to execute the single statement, so we should count it.
1718  KMP_COUNT_BLOCK(OMP_SINGLE);
1719  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1720  }
1721 
1722 #if OMPT_SUPPORT && OMPT_OPTIONAL
1723  kmp_info_t *this_thr = __kmp_threads[global_tid];
1724  kmp_team_t *team = this_thr->th.th_team;
1725  int tid = __kmp_tid_from_gtid(global_tid);
1726 
1727  if (ompt_enabled.enabled) {
1728  if (rc) {
1729  if (ompt_enabled.ompt_callback_work) {
1730  ompt_callbacks.ompt_callback(ompt_callback_work)(
1731  ompt_work_single_executor, ompt_scope_begin,
1732  &(team->t.ompt_team_info.parallel_data),
1733  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1734  1, OMPT_GET_RETURN_ADDRESS(0));
1735  }
1736  } else {
1737  if (ompt_enabled.ompt_callback_work) {
1738  ompt_callbacks.ompt_callback(ompt_callback_work)(
1739  ompt_work_single_other, ompt_scope_begin,
1740  &(team->t.ompt_team_info.parallel_data),
1741  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1742  1, OMPT_GET_RETURN_ADDRESS(0));
1743  ompt_callbacks.ompt_callback(ompt_callback_work)(
1744  ompt_work_single_other, ompt_scope_end,
1745  &(team->t.ompt_team_info.parallel_data),
1746  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1747  1, OMPT_GET_RETURN_ADDRESS(0));
1748  }
1749  }
1750  }
1751 #endif
1752 
1753  return rc;
1754 }
1755 
1765 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1766  __kmp_exit_single(global_tid);
1767  KMP_POP_PARTITIONED_TIMER();
1768 
1769 #if OMPT_SUPPORT && OMPT_OPTIONAL
1770  kmp_info_t *this_thr = __kmp_threads[global_tid];
1771  kmp_team_t *team = this_thr->th.th_team;
1772  int tid = __kmp_tid_from_gtid(global_tid);
1773 
1774  if (ompt_enabled.ompt_callback_work) {
1775  ompt_callbacks.ompt_callback(ompt_callback_work)(
1776  ompt_work_single_executor, ompt_scope_end,
1777  &(team->t.ompt_team_info.parallel_data),
1778  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1779  OMPT_GET_RETURN_ADDRESS(0));
1780  }
1781 #endif
1782 }
1783 
1791 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1792  KMP_POP_PARTITIONED_TIMER();
1793  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1794 
1795 #if OMPT_SUPPORT && OMPT_OPTIONAL
1796  if (ompt_enabled.ompt_callback_work) {
1797  ompt_work_t ompt_work_type = ompt_work_loop;
1798  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1799  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1800  // Determine workshare type
1801  if (loc != NULL) {
1802  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1803  ompt_work_type = ompt_work_loop;
1804  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1805  ompt_work_type = ompt_work_sections;
1806  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1807  ompt_work_type = ompt_work_distribute;
1808  } else {
1809  // use default set above.
1810  // a warning about this case is provided in __kmpc_for_static_init
1811  }
1812  KMP_DEBUG_ASSERT(ompt_work_type);
1813  }
1814  ompt_callbacks.ompt_callback(ompt_callback_work)(
1815  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1816  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1817  }
1818 #endif
1819  if (__kmp_env_consistency_check)
1820  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1821 }
1822 
1823 // User routines which take C-style arguments (call by value)
1824 // different from the Fortran equivalent routines
1825 
1826 void ompc_set_num_threads(int arg) {
1827  // !!!!! TODO: check the per-task binding
1828  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1829 }
1830 
1831 void ompc_set_dynamic(int flag) {
1832  kmp_info_t *thread;
1833 
1834  /* For the thread-private implementation of the internal controls */
1835  thread = __kmp_entry_thread();
1836 
1837  __kmp_save_internal_controls(thread);
1838 
1839  set__dynamic(thread, flag ? TRUE : FALSE);
1840 }
1841 
1842 void ompc_set_nested(int flag) {
1843  kmp_info_t *thread;
1844 
1845  /* For the thread-private internal controls implementation */
1846  thread = __kmp_entry_thread();
1847 
1848  __kmp_save_internal_controls(thread);
1849 
1850  set__nested(thread, flag ? TRUE : FALSE);
1851 }
1852 
1853 void ompc_set_max_active_levels(int max_active_levels) {
1854  /* TO DO */
1855  /* we want per-task implementation of this internal control */
1856 
1857  /* For the per-thread internal controls implementation */
1858  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1859 }
1860 
1861 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1862  // !!!!! TODO: check the per-task binding
1863  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1864 }
1865 
1866 int ompc_get_ancestor_thread_num(int level) {
1867  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1868 }
1869 
1870 int ompc_get_team_size(int level) {
1871  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1872 }
1873 
1874 #if OMP_50_ENABLED
1875 /* OpenMP 5.0 Affinity Format API */
1876 
1877 void ompc_set_affinity_format(char const *format) {
1878  if (!__kmp_init_serial) {
1879  __kmp_serial_initialize();
1880  }
1881  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1882  format, KMP_STRLEN(format) + 1);
1883 }
1884 
1885 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1886  size_t format_size;
1887  if (!__kmp_init_serial) {
1888  __kmp_serial_initialize();
1889  }
1890  format_size = KMP_STRLEN(__kmp_affinity_format);
1891  if (buffer && size) {
1892  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1893  format_size + 1);
1894  }
1895  return format_size;
1896 }
1897 
1898 void ompc_display_affinity(char const *format) {
1899  int gtid;
1900  if (!TCR_4(__kmp_init_middle)) {
1901  __kmp_middle_initialize();
1902  }
1903  gtid = __kmp_get_gtid();
1904  __kmp_aux_display_affinity(gtid, format);
1905 }
1906 
1907 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1908  char const *format) {
1909  int gtid;
1910  size_t num_required;
1911  kmp_str_buf_t capture_buf;
1912  if (!TCR_4(__kmp_init_middle)) {
1913  __kmp_middle_initialize();
1914  }
1915  gtid = __kmp_get_gtid();
1916  __kmp_str_buf_init(&capture_buf);
1917  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1918  if (buffer && buf_size) {
1919  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1920  capture_buf.used + 1);
1921  }
1922  __kmp_str_buf_free(&capture_buf);
1923  return num_required;
1924 }
1925 #endif /* OMP_50_ENABLED */
1926 
1927 void kmpc_set_stacksize(int arg) {
1928  // __kmp_aux_set_stacksize initializes the library if needed
1929  __kmp_aux_set_stacksize(arg);
1930 }
1931 
1932 void kmpc_set_stacksize_s(size_t arg) {
1933  // __kmp_aux_set_stacksize initializes the library if needed
1934  __kmp_aux_set_stacksize(arg);
1935 }
1936 
1937 void kmpc_set_blocktime(int arg) {
1938  int gtid, tid;
1939  kmp_info_t *thread;
1940 
1941  gtid = __kmp_entry_gtid();
1942  tid = __kmp_tid_from_gtid(gtid);
1943  thread = __kmp_thread_from_gtid(gtid);
1944 
1945  __kmp_aux_set_blocktime(arg, thread, tid);
1946 }
1947 
1948 void kmpc_set_library(int arg) {
1949  // __kmp_user_set_library initializes the library if needed
1950  __kmp_user_set_library((enum library_type)arg);
1951 }
1952 
1953 void kmpc_set_defaults(char const *str) {
1954  // __kmp_aux_set_defaults initializes the library if needed
1955  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1956 }
1957 
1958 void kmpc_set_disp_num_buffers(int arg) {
1959  // ignore after initialization because some teams have already
1960  // allocated dispatch buffers
1961  if (__kmp_init_serial == 0 && arg > 0)
1962  __kmp_dispatch_num_buffers = arg;
1963 }
1964 
1965 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1966 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1967  return -1;
1968 #else
1969  if (!TCR_4(__kmp_init_middle)) {
1970  __kmp_middle_initialize();
1971  }
1972  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1973 #endif
1974 }
1975 
1976 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1977 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1978  return -1;
1979 #else
1980  if (!TCR_4(__kmp_init_middle)) {
1981  __kmp_middle_initialize();
1982  }
1983  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1984 #endif
1985 }
1986 
1987 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1988 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1989  return -1;
1990 #else
1991  if (!TCR_4(__kmp_init_middle)) {
1992  __kmp_middle_initialize();
1993  }
1994  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1995 #endif
1996 }
1997 
1998 /* -------------------------------------------------------------------------- */
2043 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2044  void *cpy_data, void (*cpy_func)(void *, void *),
2045  kmp_int32 didit) {
2046  void **data_ptr;
2047 
2048  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2049 
2050  KMP_MB();
2051 
2052  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2053 
2054  if (__kmp_env_consistency_check) {
2055  if (loc == 0) {
2056  KMP_WARNING(ConstructIdentInvalid);
2057  }
2058  }
2059 
2060  // ToDo: Optimize the following two barriers into some kind of split barrier
2061 
2062  if (didit)
2063  *data_ptr = cpy_data;
2064 
2065 #if OMPT_SUPPORT
2066  ompt_frame_t *ompt_frame;
2067  if (ompt_enabled.enabled) {
2068  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2069  if (ompt_frame->enter_frame.ptr == NULL)
2070  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2071  OMPT_STORE_RETURN_ADDRESS(gtid);
2072  }
2073 #endif
2074 /* This barrier is not a barrier region boundary */
2075 #if USE_ITT_NOTIFY
2076  __kmp_threads[gtid]->th.th_ident = loc;
2077 #endif
2078  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2079 
2080  if (!didit)
2081  (*cpy_func)(cpy_data, *data_ptr);
2082 
2083 // Consider next barrier a user-visible barrier for barrier region boundaries
2084 // Nesting checks are already handled by the single construct checks
2085 
2086 #if OMPT_SUPPORT
2087  if (ompt_enabled.enabled) {
2088  OMPT_STORE_RETURN_ADDRESS(gtid);
2089  }
2090 #endif
2091 #if USE_ITT_NOTIFY
2092  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2093 // tasks can overwrite the location)
2094 #endif
2095  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2096 #if OMPT_SUPPORT && OMPT_OPTIONAL
2097  if (ompt_enabled.enabled) {
2098  ompt_frame->enter_frame = ompt_data_none;
2099  }
2100 #endif
2101 }
2102 
2103 /* -------------------------------------------------------------------------- */
2104 
2105 #define INIT_LOCK __kmp_init_user_lock_with_checks
2106 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2107 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2108 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2109 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2110 #define ACQUIRE_NESTED_LOCK_TIMED \
2111  __kmp_acquire_nested_user_lock_with_checks_timed
2112 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2113 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2114 #define TEST_LOCK __kmp_test_user_lock_with_checks
2115 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2116 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2117 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2118 
2119 // TODO: Make check abort messages use location info & pass it into
2120 // with_checks routines
2121 
2122 #if KMP_USE_DYNAMIC_LOCK
2123 
2124 // internal lock initializer
2125 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2126  kmp_dyna_lockseq_t seq) {
2127  if (KMP_IS_D_LOCK(seq)) {
2128  KMP_INIT_D_LOCK(lock, seq);
2129 #if USE_ITT_BUILD
2130  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2131 #endif
2132  } else {
2133  KMP_INIT_I_LOCK(lock, seq);
2134 #if USE_ITT_BUILD
2135  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2136  __kmp_itt_lock_creating(ilk->lock, loc);
2137 #endif
2138  }
2139 }
2140 
2141 // internal nest lock initializer
2142 static __forceinline void
2143 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2144  kmp_dyna_lockseq_t seq) {
2145 #if KMP_USE_TSX
2146  // Don't have nested lock implementation for speculative locks
2147  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2148  seq = __kmp_user_lock_seq;
2149 #endif
2150  switch (seq) {
2151  case lockseq_tas:
2152  seq = lockseq_nested_tas;
2153  break;
2154 #if KMP_USE_FUTEX
2155  case lockseq_futex:
2156  seq = lockseq_nested_futex;
2157  break;
2158 #endif
2159  case lockseq_ticket:
2160  seq = lockseq_nested_ticket;
2161  break;
2162  case lockseq_queuing:
2163  seq = lockseq_nested_queuing;
2164  break;
2165  case lockseq_drdpa:
2166  seq = lockseq_nested_drdpa;
2167  break;
2168  default:
2169  seq = lockseq_nested_queuing;
2170  }
2171  KMP_INIT_I_LOCK(lock, seq);
2172 #if USE_ITT_BUILD
2173  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2174  __kmp_itt_lock_creating(ilk->lock, loc);
2175 #endif
2176 }
2177 
2178 /* initialize the lock with a hint */
2179 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2180  uintptr_t hint) {
2181  KMP_DEBUG_ASSERT(__kmp_init_serial);
2182  if (__kmp_env_consistency_check && user_lock == NULL) {
2183  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2184  }
2185 
2186  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2187 
2188 #if OMPT_SUPPORT && OMPT_OPTIONAL
2189  // This is the case, if called from omp_init_lock_with_hint:
2190  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2191  if (!codeptr)
2192  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2193  if (ompt_enabled.ompt_callback_lock_init) {
2194  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2195  ompt_mutex_lock, (omp_lock_hint_t)hint,
2196  __ompt_get_mutex_impl_type(user_lock),
2197  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2198  }
2199 #endif
2200 }
2201 
2202 /* initialize the lock with a hint */
2203 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2204  void **user_lock, uintptr_t hint) {
2205  KMP_DEBUG_ASSERT(__kmp_init_serial);
2206  if (__kmp_env_consistency_check && user_lock == NULL) {
2207  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2208  }
2209 
2210  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2211 
2212 #if OMPT_SUPPORT && OMPT_OPTIONAL
2213  // This is the case, if called from omp_init_lock_with_hint:
2214  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2215  if (!codeptr)
2216  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217  if (ompt_enabled.ompt_callback_lock_init) {
2218  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2220  __ompt_get_mutex_impl_type(user_lock),
2221  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2222  }
2223 #endif
2224 }
2225 
2226 #endif // KMP_USE_DYNAMIC_LOCK
2227 
2228 /* initialize the lock */
2229 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2230 #if KMP_USE_DYNAMIC_LOCK
2231 
2232  KMP_DEBUG_ASSERT(__kmp_init_serial);
2233  if (__kmp_env_consistency_check && user_lock == NULL) {
2234  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2235  }
2236  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2237 
2238 #if OMPT_SUPPORT && OMPT_OPTIONAL
2239  // This is the case, if called from omp_init_lock_with_hint:
2240  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2241  if (!codeptr)
2242  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2243  if (ompt_enabled.ompt_callback_lock_init) {
2244  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2245  ompt_mutex_lock, omp_lock_hint_none,
2246  __ompt_get_mutex_impl_type(user_lock),
2247  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2248  }
2249 #endif
2250 
2251 #else // KMP_USE_DYNAMIC_LOCK
2252 
2253  static char const *const func = "omp_init_lock";
2254  kmp_user_lock_p lck;
2255  KMP_DEBUG_ASSERT(__kmp_init_serial);
2256 
2257  if (__kmp_env_consistency_check) {
2258  if (user_lock == NULL) {
2259  KMP_FATAL(LockIsUninitialized, func);
2260  }
2261  }
2262 
2263  KMP_CHECK_USER_LOCK_INIT();
2264 
2265  if ((__kmp_user_lock_kind == lk_tas) &&
2266  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2267  lck = (kmp_user_lock_p)user_lock;
2268  }
2269 #if KMP_USE_FUTEX
2270  else if ((__kmp_user_lock_kind == lk_futex) &&
2271  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2272  lck = (kmp_user_lock_p)user_lock;
2273  }
2274 #endif
2275  else {
2276  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2277  }
2278  INIT_LOCK(lck);
2279  __kmp_set_user_lock_location(lck, loc);
2280 
2281 #if OMPT_SUPPORT && OMPT_OPTIONAL
2282  // This is the case, if called from omp_init_lock_with_hint:
2283  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2284  if (!codeptr)
2285  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2286  if (ompt_enabled.ompt_callback_lock_init) {
2287  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2288  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2289  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2290  }
2291 #endif
2292 
2293 #if USE_ITT_BUILD
2294  __kmp_itt_lock_creating(lck);
2295 #endif /* USE_ITT_BUILD */
2296 
2297 #endif // KMP_USE_DYNAMIC_LOCK
2298 } // __kmpc_init_lock
2299 
2300 /* initialize the lock */
2301 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2302 #if KMP_USE_DYNAMIC_LOCK
2303 
2304  KMP_DEBUG_ASSERT(__kmp_init_serial);
2305  if (__kmp_env_consistency_check && user_lock == NULL) {
2306  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2307  }
2308  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2309 
2310 #if OMPT_SUPPORT && OMPT_OPTIONAL
2311  // This is the case, if called from omp_init_lock_with_hint:
2312  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2313  if (!codeptr)
2314  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2315  if (ompt_enabled.ompt_callback_lock_init) {
2316  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2317  ompt_mutex_nest_lock, omp_lock_hint_none,
2318  __ompt_get_mutex_impl_type(user_lock),
2319  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2320  }
2321 #endif
2322 
2323 #else // KMP_USE_DYNAMIC_LOCK
2324 
2325  static char const *const func = "omp_init_nest_lock";
2326  kmp_user_lock_p lck;
2327  KMP_DEBUG_ASSERT(__kmp_init_serial);
2328 
2329  if (__kmp_env_consistency_check) {
2330  if (user_lock == NULL) {
2331  KMP_FATAL(LockIsUninitialized, func);
2332  }
2333  }
2334 
2335  KMP_CHECK_USER_LOCK_INIT();
2336 
2337  if ((__kmp_user_lock_kind == lk_tas) &&
2338  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2339  OMP_NEST_LOCK_T_SIZE)) {
2340  lck = (kmp_user_lock_p)user_lock;
2341  }
2342 #if KMP_USE_FUTEX
2343  else if ((__kmp_user_lock_kind == lk_futex) &&
2344  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2345  OMP_NEST_LOCK_T_SIZE)) {
2346  lck = (kmp_user_lock_p)user_lock;
2347  }
2348 #endif
2349  else {
2350  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2351  }
2352 
2353  INIT_NESTED_LOCK(lck);
2354  __kmp_set_user_lock_location(lck, loc);
2355 
2356 #if OMPT_SUPPORT && OMPT_OPTIONAL
2357  // This is the case, if called from omp_init_lock_with_hint:
2358  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2359  if (!codeptr)
2360  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2361  if (ompt_enabled.ompt_callback_lock_init) {
2362  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2363  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2364  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2365  }
2366 #endif
2367 
2368 #if USE_ITT_BUILD
2369  __kmp_itt_lock_creating(lck);
2370 #endif /* USE_ITT_BUILD */
2371 
2372 #endif // KMP_USE_DYNAMIC_LOCK
2373 } // __kmpc_init_nest_lock
2374 
2375 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2376 #if KMP_USE_DYNAMIC_LOCK
2377 
2378 #if USE_ITT_BUILD
2379  kmp_user_lock_p lck;
2380  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2381  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2382  } else {
2383  lck = (kmp_user_lock_p)user_lock;
2384  }
2385  __kmp_itt_lock_destroyed(lck);
2386 #endif
2387 #if OMPT_SUPPORT && OMPT_OPTIONAL
2388  // This is the case, if called from omp_init_lock_with_hint:
2389  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2390  if (!codeptr)
2391  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2392  if (ompt_enabled.ompt_callback_lock_destroy) {
2393  kmp_user_lock_p lck;
2394  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2395  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2396  } else {
2397  lck = (kmp_user_lock_p)user_lock;
2398  }
2399  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2400  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2401  }
2402 #endif
2403  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2404 #else
2405  kmp_user_lock_p lck;
2406 
2407  if ((__kmp_user_lock_kind == lk_tas) &&
2408  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2409  lck = (kmp_user_lock_p)user_lock;
2410  }
2411 #if KMP_USE_FUTEX
2412  else if ((__kmp_user_lock_kind == lk_futex) &&
2413  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2414  lck = (kmp_user_lock_p)user_lock;
2415  }
2416 #endif
2417  else {
2418  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2419  }
2420 
2421 #if OMPT_SUPPORT && OMPT_OPTIONAL
2422  // This is the case, if called from omp_init_lock_with_hint:
2423  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2424  if (!codeptr)
2425  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2426  if (ompt_enabled.ompt_callback_lock_destroy) {
2427  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2428  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2429  }
2430 #endif
2431 
2432 #if USE_ITT_BUILD
2433  __kmp_itt_lock_destroyed(lck);
2434 #endif /* USE_ITT_BUILD */
2435  DESTROY_LOCK(lck);
2436 
2437  if ((__kmp_user_lock_kind == lk_tas) &&
2438  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2439  ;
2440  }
2441 #if KMP_USE_FUTEX
2442  else if ((__kmp_user_lock_kind == lk_futex) &&
2443  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2444  ;
2445  }
2446 #endif
2447  else {
2448  __kmp_user_lock_free(user_lock, gtid, lck);
2449  }
2450 #endif // KMP_USE_DYNAMIC_LOCK
2451 } // __kmpc_destroy_lock
2452 
2453 /* destroy the lock */
2454 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2455 #if KMP_USE_DYNAMIC_LOCK
2456 
2457 #if USE_ITT_BUILD
2458  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2459  __kmp_itt_lock_destroyed(ilk->lock);
2460 #endif
2461 #if OMPT_SUPPORT && OMPT_OPTIONAL
2462  // This is the case, if called from omp_init_lock_with_hint:
2463  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2464  if (!codeptr)
2465  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2466  if (ompt_enabled.ompt_callback_lock_destroy) {
2467  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2468  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2469  }
2470 #endif
2471  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2472 
2473 #else // KMP_USE_DYNAMIC_LOCK
2474 
2475  kmp_user_lock_p lck;
2476 
2477  if ((__kmp_user_lock_kind == lk_tas) &&
2478  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2479  OMP_NEST_LOCK_T_SIZE)) {
2480  lck = (kmp_user_lock_p)user_lock;
2481  }
2482 #if KMP_USE_FUTEX
2483  else if ((__kmp_user_lock_kind == lk_futex) &&
2484  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2485  OMP_NEST_LOCK_T_SIZE)) {
2486  lck = (kmp_user_lock_p)user_lock;
2487  }
2488 #endif
2489  else {
2490  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2491  }
2492 
2493 #if OMPT_SUPPORT && OMPT_OPTIONAL
2494  // This is the case, if called from omp_init_lock_with_hint:
2495  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2496  if (!codeptr)
2497  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2498  if (ompt_enabled.ompt_callback_lock_destroy) {
2499  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2500  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2501  }
2502 #endif
2503 
2504 #if USE_ITT_BUILD
2505  __kmp_itt_lock_destroyed(lck);
2506 #endif /* USE_ITT_BUILD */
2507 
2508  DESTROY_NESTED_LOCK(lck);
2509 
2510  if ((__kmp_user_lock_kind == lk_tas) &&
2511  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2512  OMP_NEST_LOCK_T_SIZE)) {
2513  ;
2514  }
2515 #if KMP_USE_FUTEX
2516  else if ((__kmp_user_lock_kind == lk_futex) &&
2517  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2518  OMP_NEST_LOCK_T_SIZE)) {
2519  ;
2520  }
2521 #endif
2522  else {
2523  __kmp_user_lock_free(user_lock, gtid, lck);
2524  }
2525 #endif // KMP_USE_DYNAMIC_LOCK
2526 } // __kmpc_destroy_nest_lock
2527 
2528 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2529  KMP_COUNT_BLOCK(OMP_set_lock);
2530 #if KMP_USE_DYNAMIC_LOCK
2531  int tag = KMP_EXTRACT_D_TAG(user_lock);
2532 #if USE_ITT_BUILD
2533  __kmp_itt_lock_acquiring(
2534  (kmp_user_lock_p)
2535  user_lock); // itt function will get to the right lock object.
2536 #endif
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL
2538  // This is the case, if called from omp_init_lock_with_hint:
2539  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2540  if (!codeptr)
2541  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2542  if (ompt_enabled.ompt_callback_mutex_acquire) {
2543  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2544  ompt_mutex_lock, omp_lock_hint_none,
2545  __ompt_get_mutex_impl_type(user_lock),
2546  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2547  }
2548 #endif
2549 #if KMP_USE_INLINED_TAS
2550  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2551  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2552  } else
2553 #elif KMP_USE_INLINED_FUTEX
2554  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2555  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2556  } else
2557 #endif
2558  {
2559  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2560  }
2561 #if USE_ITT_BUILD
2562  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2563 #endif
2564 #if OMPT_SUPPORT && OMPT_OPTIONAL
2565  if (ompt_enabled.ompt_callback_mutex_acquired) {
2566  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2567  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2568  }
2569 #endif
2570 
2571 #else // KMP_USE_DYNAMIC_LOCK
2572 
2573  kmp_user_lock_p lck;
2574 
2575  if ((__kmp_user_lock_kind == lk_tas) &&
2576  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2577  lck = (kmp_user_lock_p)user_lock;
2578  }
2579 #if KMP_USE_FUTEX
2580  else if ((__kmp_user_lock_kind == lk_futex) &&
2581  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2582  lck = (kmp_user_lock_p)user_lock;
2583  }
2584 #endif
2585  else {
2586  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2587  }
2588 
2589 #if USE_ITT_BUILD
2590  __kmp_itt_lock_acquiring(lck);
2591 #endif /* USE_ITT_BUILD */
2592 #if OMPT_SUPPORT && OMPT_OPTIONAL
2593  // This is the case, if called from omp_init_lock_with_hint:
2594  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2595  if (!codeptr)
2596  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2597  if (ompt_enabled.ompt_callback_mutex_acquire) {
2598  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2599  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2600  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2601  }
2602 #endif
2603 
2604  ACQUIRE_LOCK(lck, gtid);
2605 
2606 #if USE_ITT_BUILD
2607  __kmp_itt_lock_acquired(lck);
2608 #endif /* USE_ITT_BUILD */
2609 
2610 #if OMPT_SUPPORT && OMPT_OPTIONAL
2611  if (ompt_enabled.ompt_callback_mutex_acquired) {
2612  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2613  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2614  }
2615 #endif
2616 
2617 #endif // KMP_USE_DYNAMIC_LOCK
2618 }
2619 
2620 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2621 #if KMP_USE_DYNAMIC_LOCK
2622 
2623 #if USE_ITT_BUILD
2624  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2625 #endif
2626 #if OMPT_SUPPORT && OMPT_OPTIONAL
2627  // This is the case, if called from omp_init_lock_with_hint:
2628  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2629  if (!codeptr)
2630  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2631  if (ompt_enabled.enabled) {
2632  if (ompt_enabled.ompt_callback_mutex_acquire) {
2633  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2634  ompt_mutex_nest_lock, omp_lock_hint_none,
2635  __ompt_get_mutex_impl_type(user_lock),
2636  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2637  }
2638  }
2639 #endif
2640  int acquire_status =
2641  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2642  (void) acquire_status;
2643 #if USE_ITT_BUILD
2644  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2645 #endif
2646 
2647 #if OMPT_SUPPORT && OMPT_OPTIONAL
2648  if (ompt_enabled.enabled) {
2649  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2650  if (ompt_enabled.ompt_callback_mutex_acquired) {
2651  // lock_first
2652  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2653  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2654  codeptr);
2655  }
2656  } else {
2657  if (ompt_enabled.ompt_callback_nest_lock) {
2658  // lock_next
2659  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2660  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2661  }
2662  }
2663  }
2664 #endif
2665 
2666 #else // KMP_USE_DYNAMIC_LOCK
2667  int acquire_status;
2668  kmp_user_lock_p lck;
2669 
2670  if ((__kmp_user_lock_kind == lk_tas) &&
2671  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2672  OMP_NEST_LOCK_T_SIZE)) {
2673  lck = (kmp_user_lock_p)user_lock;
2674  }
2675 #if KMP_USE_FUTEX
2676  else if ((__kmp_user_lock_kind == lk_futex) &&
2677  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2678  OMP_NEST_LOCK_T_SIZE)) {
2679  lck = (kmp_user_lock_p)user_lock;
2680  }
2681 #endif
2682  else {
2683  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2684  }
2685 
2686 #if USE_ITT_BUILD
2687  __kmp_itt_lock_acquiring(lck);
2688 #endif /* USE_ITT_BUILD */
2689 #if OMPT_SUPPORT && OMPT_OPTIONAL
2690  // This is the case, if called from omp_init_lock_with_hint:
2691  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2692  if (!codeptr)
2693  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2694  if (ompt_enabled.enabled) {
2695  if (ompt_enabled.ompt_callback_mutex_acquire) {
2696  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2697  ompt_mutex_nest_lock, omp_lock_hint_none,
2698  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2699  codeptr);
2700  }
2701  }
2702 #endif
2703 
2704  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2705 
2706 #if USE_ITT_BUILD
2707  __kmp_itt_lock_acquired(lck);
2708 #endif /* USE_ITT_BUILD */
2709 
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL
2711  if (ompt_enabled.enabled) {
2712  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2713  if (ompt_enabled.ompt_callback_mutex_acquired) {
2714  // lock_first
2715  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2716  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2717  }
2718  } else {
2719  if (ompt_enabled.ompt_callback_nest_lock) {
2720  // lock_next
2721  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2722  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2723  }
2724  }
2725  }
2726 #endif
2727 
2728 #endif // KMP_USE_DYNAMIC_LOCK
2729 }
2730 
2731 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2732 #if KMP_USE_DYNAMIC_LOCK
2733 
2734  int tag = KMP_EXTRACT_D_TAG(user_lock);
2735 #if USE_ITT_BUILD
2736  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2737 #endif
2738 #if KMP_USE_INLINED_TAS
2739  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2740  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2741  } else
2742 #elif KMP_USE_INLINED_FUTEX
2743  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2744  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2745  } else
2746 #endif
2747  {
2748  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2749  }
2750 
2751 #if OMPT_SUPPORT && OMPT_OPTIONAL
2752  // This is the case, if called from omp_init_lock_with_hint:
2753  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2754  if (!codeptr)
2755  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2756  if (ompt_enabled.ompt_callback_mutex_released) {
2757  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2758  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2759  }
2760 #endif
2761 
2762 #else // KMP_USE_DYNAMIC_LOCK
2763 
2764  kmp_user_lock_p lck;
2765 
2766  /* Can't use serial interval since not block structured */
2767  /* release the lock */
2768 
2769  if ((__kmp_user_lock_kind == lk_tas) &&
2770  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2771 #if KMP_OS_LINUX && \
2772  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2773 // "fast" path implemented to fix customer performance issue
2774 #if USE_ITT_BUILD
2775  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2776 #endif /* USE_ITT_BUILD */
2777  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2778  KMP_MB();
2779 
2780 #if OMPT_SUPPORT && OMPT_OPTIONAL
2781  // This is the case, if called from omp_init_lock_with_hint:
2782  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2783  if (!codeptr)
2784  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2785  if (ompt_enabled.ompt_callback_mutex_released) {
2786  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2787  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2788  }
2789 #endif
2790 
2791  return;
2792 #else
2793  lck = (kmp_user_lock_p)user_lock;
2794 #endif
2795  }
2796 #if KMP_USE_FUTEX
2797  else if ((__kmp_user_lock_kind == lk_futex) &&
2798  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2799  lck = (kmp_user_lock_p)user_lock;
2800  }
2801 #endif
2802  else {
2803  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2804  }
2805 
2806 #if USE_ITT_BUILD
2807  __kmp_itt_lock_releasing(lck);
2808 #endif /* USE_ITT_BUILD */
2809 
2810  RELEASE_LOCK(lck, gtid);
2811 
2812 #if OMPT_SUPPORT && OMPT_OPTIONAL
2813  // This is the case, if called from omp_init_lock_with_hint:
2814  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2815  if (!codeptr)
2816  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2817  if (ompt_enabled.ompt_callback_mutex_released) {
2818  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2819  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2820  }
2821 #endif
2822 
2823 #endif // KMP_USE_DYNAMIC_LOCK
2824 }
2825 
2826 /* release the lock */
2827 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2828 #if KMP_USE_DYNAMIC_LOCK
2829 
2830 #if USE_ITT_BUILD
2831  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2832 #endif
2833  int release_status =
2834  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2835  (void) release_status;
2836 
2837 #if OMPT_SUPPORT && OMPT_OPTIONAL
2838  // This is the case, if called from omp_init_lock_with_hint:
2839  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2840  if (!codeptr)
2841  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2842  if (ompt_enabled.enabled) {
2843  if (release_status == KMP_LOCK_RELEASED) {
2844  if (ompt_enabled.ompt_callback_mutex_released) {
2845  // release_lock_last
2846  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2847  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2848  codeptr);
2849  }
2850  } else if (ompt_enabled.ompt_callback_nest_lock) {
2851  // release_lock_prev
2852  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2853  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2854  }
2855  }
2856 #endif
2857 
2858 #else // KMP_USE_DYNAMIC_LOCK
2859 
2860  kmp_user_lock_p lck;
2861 
2862  /* Can't use serial interval since not block structured */
2863 
2864  if ((__kmp_user_lock_kind == lk_tas) &&
2865  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2866  OMP_NEST_LOCK_T_SIZE)) {
2867 #if KMP_OS_LINUX && \
2868  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2869  // "fast" path implemented to fix customer performance issue
2870  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2871 #if USE_ITT_BUILD
2872  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2873 #endif /* USE_ITT_BUILD */
2874 
2875 #if OMPT_SUPPORT && OMPT_OPTIONAL
2876  int release_status = KMP_LOCK_STILL_HELD;
2877 #endif
2878 
2879  if (--(tl->lk.depth_locked) == 0) {
2880  TCW_4(tl->lk.poll, 0);
2881 #if OMPT_SUPPORT && OMPT_OPTIONAL
2882  release_status = KMP_LOCK_RELEASED;
2883 #endif
2884  }
2885  KMP_MB();
2886 
2887 #if OMPT_SUPPORT && OMPT_OPTIONAL
2888  // This is the case, if called from omp_init_lock_with_hint:
2889  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2890  if (!codeptr)
2891  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2892  if (ompt_enabled.enabled) {
2893  if (release_status == KMP_LOCK_RELEASED) {
2894  if (ompt_enabled.ompt_callback_mutex_released) {
2895  // release_lock_last
2896  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2897  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2898  }
2899  } else if (ompt_enabled.ompt_callback_nest_lock) {
2900  // release_lock_previous
2901  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2902  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2903  }
2904  }
2905 #endif
2906 
2907  return;
2908 #else
2909  lck = (kmp_user_lock_p)user_lock;
2910 #endif
2911  }
2912 #if KMP_USE_FUTEX
2913  else if ((__kmp_user_lock_kind == lk_futex) &&
2914  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2915  OMP_NEST_LOCK_T_SIZE)) {
2916  lck = (kmp_user_lock_p)user_lock;
2917  }
2918 #endif
2919  else {
2920  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2921  }
2922 
2923 #if USE_ITT_BUILD
2924  __kmp_itt_lock_releasing(lck);
2925 #endif /* USE_ITT_BUILD */
2926 
2927  int release_status;
2928  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2929 #if OMPT_SUPPORT && OMPT_OPTIONAL
2930  // This is the case, if called from omp_init_lock_with_hint:
2931  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2932  if (!codeptr)
2933  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2934  if (ompt_enabled.enabled) {
2935  if (release_status == KMP_LOCK_RELEASED) {
2936  if (ompt_enabled.ompt_callback_mutex_released) {
2937  // release_lock_last
2938  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2939  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2940  }
2941  } else if (ompt_enabled.ompt_callback_nest_lock) {
2942  // release_lock_previous
2943  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2944  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2945  }
2946  }
2947 #endif
2948 
2949 #endif // KMP_USE_DYNAMIC_LOCK
2950 }
2951 
2952 /* try to acquire the lock */
2953 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2954  KMP_COUNT_BLOCK(OMP_test_lock);
2955 
2956 #if KMP_USE_DYNAMIC_LOCK
2957  int rc;
2958  int tag = KMP_EXTRACT_D_TAG(user_lock);
2959 #if USE_ITT_BUILD
2960  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2961 #endif
2962 #if OMPT_SUPPORT && OMPT_OPTIONAL
2963  // This is the case, if called from omp_init_lock_with_hint:
2964  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2965  if (!codeptr)
2966  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2967  if (ompt_enabled.ompt_callback_mutex_acquire) {
2968  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2969  ompt_mutex_lock, omp_lock_hint_none,
2970  __ompt_get_mutex_impl_type(user_lock),
2971  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2972  }
2973 #endif
2974 #if KMP_USE_INLINED_TAS
2975  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2976  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2977  } else
2978 #elif KMP_USE_INLINED_FUTEX
2979  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2980  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2981  } else
2982 #endif
2983  {
2984  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2985  }
2986  if (rc) {
2987 #if USE_ITT_BUILD
2988  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2989 #endif
2990 #if OMPT_SUPPORT && OMPT_OPTIONAL
2991  if (ompt_enabled.ompt_callback_mutex_acquired) {
2992  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2993  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2994  }
2995 #endif
2996  return FTN_TRUE;
2997  } else {
2998 #if USE_ITT_BUILD
2999  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3000 #endif
3001  return FTN_FALSE;
3002  }
3003 
3004 #else // KMP_USE_DYNAMIC_LOCK
3005 
3006  kmp_user_lock_p lck;
3007  int rc;
3008 
3009  if ((__kmp_user_lock_kind == lk_tas) &&
3010  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3011  lck = (kmp_user_lock_p)user_lock;
3012  }
3013 #if KMP_USE_FUTEX
3014  else if ((__kmp_user_lock_kind == lk_futex) &&
3015  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3016  lck = (kmp_user_lock_p)user_lock;
3017  }
3018 #endif
3019  else {
3020  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3021  }
3022 
3023 #if USE_ITT_BUILD
3024  __kmp_itt_lock_acquiring(lck);
3025 #endif /* USE_ITT_BUILD */
3026 #if OMPT_SUPPORT && OMPT_OPTIONAL
3027  // This is the case, if called from omp_init_lock_with_hint:
3028  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3029  if (!codeptr)
3030  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3031  if (ompt_enabled.ompt_callback_mutex_acquire) {
3032  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3033  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3034  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3035  }
3036 #endif
3037 
3038  rc = TEST_LOCK(lck, gtid);
3039 #if USE_ITT_BUILD
3040  if (rc) {
3041  __kmp_itt_lock_acquired(lck);
3042  } else {
3043  __kmp_itt_lock_cancelled(lck);
3044  }
3045 #endif /* USE_ITT_BUILD */
3046 #if OMPT_SUPPORT && OMPT_OPTIONAL
3047  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3048  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3049  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3050  }
3051 #endif
3052 
3053  return (rc ? FTN_TRUE : FTN_FALSE);
3054 
3055 /* Can't use serial interval since not block structured */
3056 
3057 #endif // KMP_USE_DYNAMIC_LOCK
3058 }
3059 
3060 /* try to acquire the lock */
3061 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3062 #if KMP_USE_DYNAMIC_LOCK
3063  int rc;
3064 #if USE_ITT_BUILD
3065  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3066 #endif
3067 #if OMPT_SUPPORT && OMPT_OPTIONAL
3068  // This is the case, if called from omp_init_lock_with_hint:
3069  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3070  if (!codeptr)
3071  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3072  if (ompt_enabled.ompt_callback_mutex_acquire) {
3073  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3074  ompt_mutex_nest_lock, omp_lock_hint_none,
3075  __ompt_get_mutex_impl_type(user_lock),
3076  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3077  }
3078 #endif
3079  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3080 #if USE_ITT_BUILD
3081  if (rc) {
3082  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3083  } else {
3084  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3085  }
3086 #endif
3087 #if OMPT_SUPPORT && OMPT_OPTIONAL
3088  if (ompt_enabled.enabled && rc) {
3089  if (rc == 1) {
3090  if (ompt_enabled.ompt_callback_mutex_acquired) {
3091  // lock_first
3092  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3093  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3094  codeptr);
3095  }
3096  } else {
3097  if (ompt_enabled.ompt_callback_nest_lock) {
3098  // lock_next
3099  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3100  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3101  }
3102  }
3103  }
3104 #endif
3105  return rc;
3106 
3107 #else // KMP_USE_DYNAMIC_LOCK
3108 
3109  kmp_user_lock_p lck;
3110  int rc;
3111 
3112  if ((__kmp_user_lock_kind == lk_tas) &&
3113  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3114  OMP_NEST_LOCK_T_SIZE)) {
3115  lck = (kmp_user_lock_p)user_lock;
3116  }
3117 #if KMP_USE_FUTEX
3118  else if ((__kmp_user_lock_kind == lk_futex) &&
3119  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3120  OMP_NEST_LOCK_T_SIZE)) {
3121  lck = (kmp_user_lock_p)user_lock;
3122  }
3123 #endif
3124  else {
3125  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3126  }
3127 
3128 #if USE_ITT_BUILD
3129  __kmp_itt_lock_acquiring(lck);
3130 #endif /* USE_ITT_BUILD */
3131 
3132 #if OMPT_SUPPORT && OMPT_OPTIONAL
3133  // This is the case, if called from omp_init_lock_with_hint:
3134  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3135  if (!codeptr)
3136  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3137  if (ompt_enabled.enabled) &&
3138  ompt_enabled.ompt_callback_mutex_acquire) {
3139  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3140  ompt_mutex_nest_lock, omp_lock_hint_none,
3141  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3142  codeptr);
3143  }
3144 #endif
3145 
3146  rc = TEST_NESTED_LOCK(lck, gtid);
3147 #if USE_ITT_BUILD
3148  if (rc) {
3149  __kmp_itt_lock_acquired(lck);
3150  } else {
3151  __kmp_itt_lock_cancelled(lck);
3152  }
3153 #endif /* USE_ITT_BUILD */
3154 #if OMPT_SUPPORT && OMPT_OPTIONAL
3155  if (ompt_enabled.enabled && rc) {
3156  if (rc == 1) {
3157  if (ompt_enabled.ompt_callback_mutex_acquired) {
3158  // lock_first
3159  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3160  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3161  }
3162  } else {
3163  if (ompt_enabled.ompt_callback_nest_lock) {
3164  // lock_next
3165  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3166  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3167  }
3168  }
3169  }
3170 #endif
3171  return rc;
3172 
3173 /* Can't use serial interval since not block structured */
3174 
3175 #endif // KMP_USE_DYNAMIC_LOCK
3176 }
3177 
3178 // Interface to fast scalable reduce methods routines
3179 
3180 // keep the selected method in a thread local structure for cross-function
3181 // usage: will be used in __kmpc_end_reduce* functions;
3182 // another solution: to re-determine the method one more time in
3183 // __kmpc_end_reduce* functions (new prototype required then)
3184 // AT: which solution is better?
3185 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3186  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3187 
3188 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3189  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3190 
3191 // description of the packed_reduction_method variable: look at the macros in
3192 // kmp.h
3193 
3194 // used in a critical section reduce block
3195 static __forceinline void
3196 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3197  kmp_critical_name *crit) {
3198 
3199  // this lock was visible to a customer and to the threading profile tool as a
3200  // serial overhead span (although it's used for an internal purpose only)
3201  // why was it visible in previous implementation?
3202  // should we keep it visible in new reduce block?
3203  kmp_user_lock_p lck;
3204 
3205 #if KMP_USE_DYNAMIC_LOCK
3206 
3207  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3208  // Check if it is initialized.
3209  if (*lk == 0) {
3210  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3211  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3212  KMP_GET_D_TAG(__kmp_user_lock_seq));
3213  } else {
3214  __kmp_init_indirect_csptr(crit, loc, global_tid,
3215  KMP_GET_I_TAG(__kmp_user_lock_seq));
3216  }
3217  }
3218  // Branch for accessing the actual lock object and set operation. This
3219  // branching is inevitable since this lock initialization does not follow the
3220  // normal dispatch path (lock table is not used).
3221  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3222  lck = (kmp_user_lock_p)lk;
3223  KMP_DEBUG_ASSERT(lck != NULL);
3224  if (__kmp_env_consistency_check) {
3225  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3226  }
3227  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3228  } else {
3229  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3230  lck = ilk->lock;
3231  KMP_DEBUG_ASSERT(lck != NULL);
3232  if (__kmp_env_consistency_check) {
3233  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3234  }
3235  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3236  }
3237 
3238 #else // KMP_USE_DYNAMIC_LOCK
3239 
3240  // We know that the fast reduction code is only emitted by Intel compilers
3241  // with 32 byte critical sections. If there isn't enough space, then we
3242  // have to use a pointer.
3243  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3244  lck = (kmp_user_lock_p)crit;
3245  } else {
3246  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3247  }
3248  KMP_DEBUG_ASSERT(lck != NULL);
3249 
3250  if (__kmp_env_consistency_check)
3251  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3252 
3253  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3254 
3255 #endif // KMP_USE_DYNAMIC_LOCK
3256 }
3257 
3258 // used in a critical section reduce block
3259 static __forceinline void
3260 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3261  kmp_critical_name *crit) {
3262 
3263  kmp_user_lock_p lck;
3264 
3265 #if KMP_USE_DYNAMIC_LOCK
3266 
3267  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3268  lck = (kmp_user_lock_p)crit;
3269  if (__kmp_env_consistency_check)
3270  __kmp_pop_sync(global_tid, ct_critical, loc);
3271  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3272  } else {
3273  kmp_indirect_lock_t *ilk =
3274  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3275  if (__kmp_env_consistency_check)
3276  __kmp_pop_sync(global_tid, ct_critical, loc);
3277  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3278  }
3279 
3280 #else // KMP_USE_DYNAMIC_LOCK
3281 
3282  // We know that the fast reduction code is only emitted by Intel compilers
3283  // with 32 byte critical sections. If there isn't enough space, then we have
3284  // to use a pointer.
3285  if (__kmp_base_user_lock_size > 32) {
3286  lck = *((kmp_user_lock_p *)crit);
3287  KMP_ASSERT(lck != NULL);
3288  } else {
3289  lck = (kmp_user_lock_p)crit;
3290  }
3291 
3292  if (__kmp_env_consistency_check)
3293  __kmp_pop_sync(global_tid, ct_critical, loc);
3294 
3295  __kmp_release_user_lock_with_checks(lck, global_tid);
3296 
3297 #endif // KMP_USE_DYNAMIC_LOCK
3298 } // __kmp_end_critical_section_reduce_block
3299 
3300 #if OMP_40_ENABLED
3301 static __forceinline int
3302 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3303  int *task_state) {
3304  kmp_team_t *team;
3305 
3306  // Check if we are inside the teams construct?
3307  if (th->th.th_teams_microtask) {
3308  *team_p = team = th->th.th_team;
3309  if (team->t.t_level == th->th.th_teams_level) {
3310  // This is reduction at teams construct.
3311  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3312  // Let's swap teams temporarily for the reduction.
3313  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3314  th->th.th_team = team->t.t_parent;
3315  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3316  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3317  *task_state = th->th.th_task_state;
3318  th->th.th_task_state = 0;
3319 
3320  return 1;
3321  }
3322  }
3323  return 0;
3324 }
3325 
3326 static __forceinline void
3327 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3328  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3329  th->th.th_info.ds.ds_tid = 0;
3330  th->th.th_team = team;
3331  th->th.th_team_nproc = team->t.t_nproc;
3332  th->th.th_task_team = team->t.t_task_team[task_state];
3333  th->th.th_task_state = task_state;
3334 }
3335 #endif
3336 
3337 /* 2.a.i. Reduce Block without a terminating barrier */
3353 kmp_int32
3354 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3355  size_t reduce_size, void *reduce_data,
3356  void (*reduce_func)(void *lhs_data, void *rhs_data),
3357  kmp_critical_name *lck) {
3358 
3359  KMP_COUNT_BLOCK(REDUCE_nowait);
3360  int retval = 0;
3361  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3362 #if OMP_40_ENABLED
3363  kmp_info_t *th;
3364  kmp_team_t *team;
3365  int teams_swapped = 0, task_state;
3366 #endif
3367  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3368 
3369  // why do we need this initialization here at all?
3370  // Reduction clause can not be used as a stand-alone directive.
3371 
3372  // do not call __kmp_serial_initialize(), it will be called by
3373  // __kmp_parallel_initialize() if needed
3374  // possible detection of false-positive race by the threadchecker ???
3375  if (!TCR_4(__kmp_init_parallel))
3376  __kmp_parallel_initialize();
3377 
3378 // check correctness of reduce block nesting
3379 #if KMP_USE_DYNAMIC_LOCK
3380  if (__kmp_env_consistency_check)
3381  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3382 #else
3383  if (__kmp_env_consistency_check)
3384  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3385 #endif
3386 
3387 #if OMP_40_ENABLED
3388  th = __kmp_thread_from_gtid(global_tid);
3389  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3390 #endif // OMP_40_ENABLED
3391 
3392  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3393  // the value should be kept in a variable
3394  // the variable should be either a construct-specific or thread-specific
3395  // property, not a team specific property
3396  // (a thread can reach the next reduce block on the next construct, reduce
3397  // method may differ on the next construct)
3398  // an ident_t "loc" parameter could be used as a construct-specific property
3399  // (what if loc == 0?)
3400  // (if both construct-specific and team-specific variables were shared,
3401  // then unness extra syncs should be needed)
3402  // a thread-specific variable is better regarding two issues above (next
3403  // construct and extra syncs)
3404  // a thread-specific "th_local.reduction_method" variable is used currently
3405  // each thread executes 'determine' and 'set' lines (no need to execute by one
3406  // thread, to avoid unness extra syncs)
3407 
3408  packed_reduction_method = __kmp_determine_reduction_method(
3409  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3410  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3411 
3412  if (packed_reduction_method == critical_reduce_block) {
3413 
3414  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3415  retval = 1;
3416 
3417  } else if (packed_reduction_method == empty_reduce_block) {
3418 
3419  // usage: if team size == 1, no synchronization is required ( Intel
3420  // platforms only )
3421  retval = 1;
3422 
3423  } else if (packed_reduction_method == atomic_reduce_block) {
3424 
3425  retval = 2;
3426 
3427  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3428  // won't be called by the code gen)
3429  // (it's not quite good, because the checking block has been closed by
3430  // this 'pop',
3431  // but atomic operation has not been executed yet, will be executed
3432  // slightly later, literally on next instruction)
3433  if (__kmp_env_consistency_check)
3434  __kmp_pop_sync(global_tid, ct_reduce, loc);
3435 
3436  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3437  tree_reduce_block)) {
3438 
3439 // AT: performance issue: a real barrier here
3440 // AT: (if master goes slow, other threads are blocked here waiting for the
3441 // master to come and release them)
3442 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3443 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3444 // be confusing to a customer)
3445 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3446 // might go faster and be more in line with sense of NOWAIT
3447 // AT: TO DO: do epcc test and compare times
3448 
3449 // this barrier should be invisible to a customer and to the threading profile
3450 // tool (it's neither a terminating barrier nor customer's code, it's
3451 // used for an internal purpose)
3452 #if OMPT_SUPPORT
3453  // JP: can this barrier potentially leed to task scheduling?
3454  // JP: as long as there is a barrier in the implementation, OMPT should and
3455  // will provide the barrier events
3456  // so we set-up the necessary frame/return addresses.
3457  ompt_frame_t *ompt_frame;
3458  if (ompt_enabled.enabled) {
3459  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3460  if (ompt_frame->enter_frame.ptr == NULL)
3461  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3462  OMPT_STORE_RETURN_ADDRESS(global_tid);
3463  }
3464 #endif
3465 #if USE_ITT_NOTIFY
3466  __kmp_threads[global_tid]->th.th_ident = loc;
3467 #endif
3468  retval =
3469  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3470  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3471  retval = (retval != 0) ? (0) : (1);
3472 #if OMPT_SUPPORT && OMPT_OPTIONAL
3473  if (ompt_enabled.enabled) {
3474  ompt_frame->enter_frame = ompt_data_none;
3475  }
3476 #endif
3477 
3478  // all other workers except master should do this pop here
3479  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3480  if (__kmp_env_consistency_check) {
3481  if (retval == 0) {
3482  __kmp_pop_sync(global_tid, ct_reduce, loc);
3483  }
3484  }
3485 
3486  } else {
3487 
3488  // should never reach this block
3489  KMP_ASSERT(0); // "unexpected method"
3490  }
3491 #if OMP_40_ENABLED
3492  if (teams_swapped) {
3493  __kmp_restore_swapped_teams(th, team, task_state);
3494  }
3495 #endif
3496  KA_TRACE(
3497  10,
3498  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3499  global_tid, packed_reduction_method, retval));
3500 
3501  return retval;
3502 }
3503 
3512 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3513  kmp_critical_name *lck) {
3514 
3515  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3516 
3517  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3518 
3519  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3520 
3521  if (packed_reduction_method == critical_reduce_block) {
3522 
3523  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3524 
3525  } else if (packed_reduction_method == empty_reduce_block) {
3526 
3527  // usage: if team size == 1, no synchronization is required ( on Intel
3528  // platforms only )
3529 
3530  } else if (packed_reduction_method == atomic_reduce_block) {
3531 
3532  // neither master nor other workers should get here
3533  // (code gen does not generate this call in case 2: atomic reduce block)
3534  // actually it's better to remove this elseif at all;
3535  // after removal this value will checked by the 'else' and will assert
3536 
3537  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3538  tree_reduce_block)) {
3539 
3540  // only master gets here
3541 
3542  } else {
3543 
3544  // should never reach this block
3545  KMP_ASSERT(0); // "unexpected method"
3546  }
3547 
3548  if (__kmp_env_consistency_check)
3549  __kmp_pop_sync(global_tid, ct_reduce, loc);
3550 
3551  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3552  global_tid, packed_reduction_method));
3553 
3554  return;
3555 }
3556 
3557 /* 2.a.ii. Reduce Block with a terminating barrier */
3558 
3574 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3575  size_t reduce_size, void *reduce_data,
3576  void (*reduce_func)(void *lhs_data, void *rhs_data),
3577  kmp_critical_name *lck) {
3578  KMP_COUNT_BLOCK(REDUCE_wait);
3579  int retval = 0;
3580  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3581 #if OMP_40_ENABLED
3582  kmp_info_t *th;
3583  kmp_team_t *team;
3584  int teams_swapped = 0, task_state;
3585 #endif
3586 
3587  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3588 
3589  // why do we need this initialization here at all?
3590  // Reduction clause can not be a stand-alone directive.
3591 
3592  // do not call __kmp_serial_initialize(), it will be called by
3593  // __kmp_parallel_initialize() if needed
3594  // possible detection of false-positive race by the threadchecker ???
3595  if (!TCR_4(__kmp_init_parallel))
3596  __kmp_parallel_initialize();
3597 
3598 // check correctness of reduce block nesting
3599 #if KMP_USE_DYNAMIC_LOCK
3600  if (__kmp_env_consistency_check)
3601  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3602 #else
3603  if (__kmp_env_consistency_check)
3604  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3605 #endif
3606 
3607 #if OMP_40_ENABLED
3608  th = __kmp_thread_from_gtid(global_tid);
3609  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3610 #endif // OMP_40_ENABLED
3611 
3612  packed_reduction_method = __kmp_determine_reduction_method(
3613  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3614  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3615 
3616  if (packed_reduction_method == critical_reduce_block) {
3617 
3618  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3619  retval = 1;
3620 
3621  } else if (packed_reduction_method == empty_reduce_block) {
3622 
3623  // usage: if team size == 1, no synchronization is required ( Intel
3624  // platforms only )
3625  retval = 1;
3626 
3627  } else if (packed_reduction_method == atomic_reduce_block) {
3628 
3629  retval = 2;
3630 
3631  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3632  tree_reduce_block)) {
3633 
3634 // case tree_reduce_block:
3635 // this barrier should be visible to a customer and to the threading profile
3636 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3637 #if OMPT_SUPPORT
3638  ompt_frame_t *ompt_frame;
3639  if (ompt_enabled.enabled) {
3640  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3641  if (ompt_frame->enter_frame.ptr == NULL)
3642  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3643  OMPT_STORE_RETURN_ADDRESS(global_tid);
3644  }
3645 #endif
3646 #if USE_ITT_NOTIFY
3647  __kmp_threads[global_tid]->th.th_ident =
3648  loc; // needed for correct notification of frames
3649 #endif
3650  retval =
3651  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3652  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3653  retval = (retval != 0) ? (0) : (1);
3654 #if OMPT_SUPPORT && OMPT_OPTIONAL
3655  if (ompt_enabled.enabled) {
3656  ompt_frame->enter_frame = ompt_data_none;
3657  }
3658 #endif
3659 
3660  // all other workers except master should do this pop here
3661  // ( none of other workers except master will enter __kmpc_end_reduce() )
3662  if (__kmp_env_consistency_check) {
3663  if (retval == 0) { // 0: all other workers; 1: master
3664  __kmp_pop_sync(global_tid, ct_reduce, loc);
3665  }
3666  }
3667 
3668  } else {
3669 
3670  // should never reach this block
3671  KMP_ASSERT(0); // "unexpected method"
3672  }
3673 #if OMP_40_ENABLED
3674  if (teams_swapped) {
3675  __kmp_restore_swapped_teams(th, team, task_state);
3676  }
3677 #endif
3678 
3679  KA_TRACE(10,
3680  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3681  global_tid, packed_reduction_method, retval));
3682 
3683  return retval;
3684 }
3685 
3696 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3697  kmp_critical_name *lck) {
3698 
3699  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3700 #if OMP_40_ENABLED
3701  kmp_info_t *th;
3702  kmp_team_t *team;
3703  int teams_swapped = 0, task_state;
3704 #endif
3705 
3706  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3707 
3708 #if OMP_40_ENABLED
3709  th = __kmp_thread_from_gtid(global_tid);
3710  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3711 #endif // OMP_40_ENABLED
3712 
3713  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3714 
3715  // this barrier should be visible to a customer and to the threading profile
3716  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3717 
3718  if (packed_reduction_method == critical_reduce_block) {
3719 
3720  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3721 
3722 // TODO: implicit barrier: should be exposed
3723 #if OMPT_SUPPORT
3724  ompt_frame_t *ompt_frame;
3725  if (ompt_enabled.enabled) {
3726  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3727  if (ompt_frame->enter_frame.ptr == NULL)
3728  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3729  OMPT_STORE_RETURN_ADDRESS(global_tid);
3730  }
3731 #endif
3732 #if USE_ITT_NOTIFY
3733  __kmp_threads[global_tid]->th.th_ident = loc;
3734 #endif
3735  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3736 #if OMPT_SUPPORT && OMPT_OPTIONAL
3737  if (ompt_enabled.enabled) {
3738  ompt_frame->enter_frame = ompt_data_none;
3739  }
3740 #endif
3741 
3742  } else if (packed_reduction_method == empty_reduce_block) {
3743 
3744 // usage: if team size==1, no synchronization is required (Intel platforms only)
3745 
3746 // TODO: implicit barrier: should be exposed
3747 #if OMPT_SUPPORT
3748  ompt_frame_t *ompt_frame;
3749  if (ompt_enabled.enabled) {
3750  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3751  if (ompt_frame->enter_frame.ptr == NULL)
3752  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3753  OMPT_STORE_RETURN_ADDRESS(global_tid);
3754  }
3755 #endif
3756 #if USE_ITT_NOTIFY
3757  __kmp_threads[global_tid]->th.th_ident = loc;
3758 #endif
3759  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3760 #if OMPT_SUPPORT && OMPT_OPTIONAL
3761  if (ompt_enabled.enabled) {
3762  ompt_frame->enter_frame = ompt_data_none;
3763  }
3764 #endif
3765 
3766  } else if (packed_reduction_method == atomic_reduce_block) {
3767 
3768 #if OMPT_SUPPORT
3769  ompt_frame_t *ompt_frame;
3770  if (ompt_enabled.enabled) {
3771  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3772  if (ompt_frame->enter_frame.ptr == NULL)
3773  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3774  OMPT_STORE_RETURN_ADDRESS(global_tid);
3775  }
3776 #endif
3777 // TODO: implicit barrier: should be exposed
3778 #if USE_ITT_NOTIFY
3779  __kmp_threads[global_tid]->th.th_ident = loc;
3780 #endif
3781  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3782 #if OMPT_SUPPORT && OMPT_OPTIONAL
3783  if (ompt_enabled.enabled) {
3784  ompt_frame->enter_frame = ompt_data_none;
3785  }
3786 #endif
3787 
3788  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3789  tree_reduce_block)) {
3790 
3791  // only master executes here (master releases all other workers)
3792  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3793  global_tid);
3794 
3795  } else {
3796 
3797  // should never reach this block
3798  KMP_ASSERT(0); // "unexpected method"
3799  }
3800 #if OMP_40_ENABLED
3801  if (teams_swapped) {
3802  __kmp_restore_swapped_teams(th, team, task_state);
3803  }
3804 #endif
3805 
3806  if (__kmp_env_consistency_check)
3807  __kmp_pop_sync(global_tid, ct_reduce, loc);
3808 
3809  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3810  global_tid, packed_reduction_method));
3811 
3812  return;
3813 }
3814 
3815 #undef __KMP_GET_REDUCTION_METHOD
3816 #undef __KMP_SET_REDUCTION_METHOD
3817 
3818 /* end of interface to fast scalable reduce routines */
3819 
3820 kmp_uint64 __kmpc_get_taskid() {
3821 
3822  kmp_int32 gtid;
3823  kmp_info_t *thread;
3824 
3825  gtid = __kmp_get_gtid();
3826  if (gtid < 0) {
3827  return 0;
3828  }
3829  thread = __kmp_thread_from_gtid(gtid);
3830  return thread->th.th_current_task->td_task_id;
3831 
3832 } // __kmpc_get_taskid
3833 
3834 kmp_uint64 __kmpc_get_parent_taskid() {
3835 
3836  kmp_int32 gtid;
3837  kmp_info_t *thread;
3838  kmp_taskdata_t *parent_task;
3839 
3840  gtid = __kmp_get_gtid();
3841  if (gtid < 0) {
3842  return 0;
3843  }
3844  thread = __kmp_thread_from_gtid(gtid);
3845  parent_task = thread->th.th_current_task->td_parent;
3846  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3847 
3848 } // __kmpc_get_parent_taskid
3849 
3850 #if OMP_45_ENABLED
3851 
3862 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3863  const struct kmp_dim *dims) {
3864  int j, idx;
3865  kmp_int64 last, trace_count;
3866  kmp_info_t *th = __kmp_threads[gtid];
3867  kmp_team_t *team = th->th.th_team;
3868  kmp_uint32 *flags;
3869  kmp_disp_t *pr_buf = th->th.th_dispatch;
3870  dispatch_shared_info_t *sh_buf;
3871 
3872  KA_TRACE(
3873  20,
3874  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3875  gtid, num_dims, !team->t.t_serialized));
3876  KMP_DEBUG_ASSERT(dims != NULL);
3877  KMP_DEBUG_ASSERT(num_dims > 0);
3878 
3879  if (team->t.t_serialized) {
3880  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3881  return; // no dependencies if team is serialized
3882  }
3883  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3884  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3885  // the next loop
3886  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3887 
3888  // Save bounds info into allocated private buffer
3889  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3890  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3891  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3892  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3893  pr_buf->th_doacross_info[0] =
3894  (kmp_int64)num_dims; // first element is number of dimensions
3895  // Save also address of num_done in order to access it later without knowing
3896  // the buffer index
3897  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3898  pr_buf->th_doacross_info[2] = dims[0].lo;
3899  pr_buf->th_doacross_info[3] = dims[0].up;
3900  pr_buf->th_doacross_info[4] = dims[0].st;
3901  last = 5;
3902  for (j = 1; j < num_dims; ++j) {
3903  kmp_int64
3904  range_length; // To keep ranges of all dimensions but the first dims[0]
3905  if (dims[j].st == 1) { // most common case
3906  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3907  range_length = dims[j].up - dims[j].lo + 1;
3908  } else {
3909  if (dims[j].st > 0) {
3910  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3911  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3912  } else { // negative increment
3913  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3914  range_length =
3915  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3916  }
3917  }
3918  pr_buf->th_doacross_info[last++] = range_length;
3919  pr_buf->th_doacross_info[last++] = dims[j].lo;
3920  pr_buf->th_doacross_info[last++] = dims[j].up;
3921  pr_buf->th_doacross_info[last++] = dims[j].st;
3922  }
3923 
3924  // Compute total trip count.
3925  // Start with range of dims[0] which we don't need to keep in the buffer.
3926  if (dims[0].st == 1) { // most common case
3927  trace_count = dims[0].up - dims[0].lo + 1;
3928  } else if (dims[0].st > 0) {
3929  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3930  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3931  } else { // negative increment
3932  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3933  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3934  }
3935  for (j = 1; j < num_dims; ++j) {
3936  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3937  }
3938  KMP_DEBUG_ASSERT(trace_count > 0);
3939 
3940  // Check if shared buffer is not occupied by other loop (idx -
3941  // __kmp_dispatch_num_buffers)
3942  if (idx != sh_buf->doacross_buf_idx) {
3943  // Shared buffer is occupied, wait for it to be free
3944  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3945  __kmp_eq_4, NULL);
3946  }
3947 #if KMP_32_BIT_ARCH
3948  // Check if we are the first thread. After the CAS the first thread gets 0,
3949  // others get 1 if initialization is in progress, allocated pointer otherwise.
3950  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3951  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3952  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3953 #else
3954  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3955  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3956 #endif
3957  if (flags == NULL) {
3958  // we are the first thread, allocate the array of flags
3959  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3960  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3961  KMP_MB();
3962  sh_buf->doacross_flags = flags;
3963  } else if (flags == (kmp_uint32 *)1) {
3964 #if KMP_32_BIT_ARCH
3965  // initialization is still in progress, need to wait
3966  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3967 #else
3968  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3969 #endif
3970  KMP_YIELD(TRUE);
3971  KMP_MB();
3972  } else {
3973  KMP_MB();
3974  }
3975  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3976  pr_buf->th_doacross_flags =
3977  sh_buf->doacross_flags; // save private copy in order to not
3978  // touch shared buffer on each iteration
3979  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3980 }
3981 
3982 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3983  kmp_int32 shft, num_dims, i;
3984  kmp_uint32 flag;
3985  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3986  kmp_info_t *th = __kmp_threads[gtid];
3987  kmp_team_t *team = th->th.th_team;
3988  kmp_disp_t *pr_buf;
3989  kmp_int64 lo, up, st;
3990 
3991  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3992  if (team->t.t_serialized) {
3993  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3994  return; // no dependencies if team is serialized
3995  }
3996 
3997  // calculate sequential iteration number and check out-of-bounds condition
3998  pr_buf = th->th.th_dispatch;
3999  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4000  num_dims = pr_buf->th_doacross_info[0];
4001  lo = pr_buf->th_doacross_info[2];
4002  up = pr_buf->th_doacross_info[3];
4003  st = pr_buf->th_doacross_info[4];
4004  if (st == 1) { // most common case
4005  if (vec[0] < lo || vec[0] > up) {
4006  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4007  "bounds [%lld,%lld]\n",
4008  gtid, vec[0], lo, up));
4009  return;
4010  }
4011  iter_number = vec[0] - lo;
4012  } else if (st > 0) {
4013  if (vec[0] < lo || vec[0] > up) {
4014  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4015  "bounds [%lld,%lld]\n",
4016  gtid, vec[0], lo, up));
4017  return;
4018  }
4019  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4020  } else { // negative increment
4021  if (vec[0] > lo || vec[0] < up) {
4022  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4023  "bounds [%lld,%lld]\n",
4024  gtid, vec[0], lo, up));
4025  return;
4026  }
4027  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4028  }
4029  for (i = 1; i < num_dims; ++i) {
4030  kmp_int64 iter, ln;
4031  kmp_int32 j = i * 4;
4032  ln = pr_buf->th_doacross_info[j + 1];
4033  lo = pr_buf->th_doacross_info[j + 2];
4034  up = pr_buf->th_doacross_info[j + 3];
4035  st = pr_buf->th_doacross_info[j + 4];
4036  if (st == 1) {
4037  if (vec[i] < lo || vec[i] > up) {
4038  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4039  "bounds [%lld,%lld]\n",
4040  gtid, vec[i], lo, up));
4041  return;
4042  }
4043  iter = vec[i] - lo;
4044  } else if (st > 0) {
4045  if (vec[i] < lo || vec[i] > up) {
4046  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4047  "bounds [%lld,%lld]\n",
4048  gtid, vec[i], lo, up));
4049  return;
4050  }
4051  iter = (kmp_uint64)(vec[i] - lo) / st;
4052  } else { // st < 0
4053  if (vec[i] > lo || vec[i] < up) {
4054  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4055  "bounds [%lld,%lld]\n",
4056  gtid, vec[i], lo, up));
4057  return;
4058  }
4059  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4060  }
4061  iter_number = iter + ln * iter_number;
4062  }
4063  shft = iter_number % 32; // use 32-bit granularity
4064  iter_number >>= 5; // divided by 32
4065  flag = 1 << shft;
4066  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4067  KMP_YIELD(TRUE);
4068  }
4069  KMP_MB();
4070  KA_TRACE(20,
4071  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4072  gtid, (iter_number << 5) + shft));
4073 }
4074 
4075 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4076  kmp_int32 shft, num_dims, i;
4077  kmp_uint32 flag;
4078  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4079  kmp_info_t *th = __kmp_threads[gtid];
4080  kmp_team_t *team = th->th.th_team;
4081  kmp_disp_t *pr_buf;
4082  kmp_int64 lo, st;
4083 
4084  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4085  if (team->t.t_serialized) {
4086  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4087  return; // no dependencies if team is serialized
4088  }
4089 
4090  // calculate sequential iteration number (same as in "wait" but no
4091  // out-of-bounds checks)
4092  pr_buf = th->th.th_dispatch;
4093  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4094  num_dims = pr_buf->th_doacross_info[0];
4095  lo = pr_buf->th_doacross_info[2];
4096  st = pr_buf->th_doacross_info[4];
4097  if (st == 1) { // most common case
4098  iter_number = vec[0] - lo;
4099  } else if (st > 0) {
4100  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4101  } else { // negative increment
4102  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4103  }
4104  for (i = 1; i < num_dims; ++i) {
4105  kmp_int64 iter, ln;
4106  kmp_int32 j = i * 4;
4107  ln = pr_buf->th_doacross_info[j + 1];
4108  lo = pr_buf->th_doacross_info[j + 2];
4109  st = pr_buf->th_doacross_info[j + 4];
4110  if (st == 1) {
4111  iter = vec[i] - lo;
4112  } else if (st > 0) {
4113  iter = (kmp_uint64)(vec[i] - lo) / st;
4114  } else { // st < 0
4115  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4116  }
4117  iter_number = iter + ln * iter_number;
4118  }
4119  shft = iter_number % 32; // use 32-bit granularity
4120  iter_number >>= 5; // divided by 32
4121  flag = 1 << shft;
4122  KMP_MB();
4123  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4124  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4125  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4126  (iter_number << 5) + shft));
4127 }
4128 
4129 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4130  kmp_int32 num_done;
4131  kmp_info_t *th = __kmp_threads[gtid];
4132  kmp_team_t *team = th->th.th_team;
4133  kmp_disp_t *pr_buf = th->th.th_dispatch;
4134 
4135  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4136  if (team->t.t_serialized) {
4137  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4138  return; // nothing to do
4139  }
4140  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4141  if (num_done == th->th.th_team_nproc) {
4142  // we are the last thread, need to free shared resources
4143  int idx = pr_buf->th_doacross_buf_idx - 1;
4144  dispatch_shared_info_t *sh_buf =
4145  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4146  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4147  (kmp_int64)&sh_buf->doacross_num_done);
4148  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4149  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4150  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4151  sh_buf->doacross_flags = NULL;
4152  sh_buf->doacross_num_done = 0;
4153  sh_buf->doacross_buf_idx +=
4154  __kmp_dispatch_num_buffers; // free buffer for future re-use
4155  }
4156  // free private resources (need to keep buffer index forever)
4157  pr_buf->th_doacross_flags = NULL;
4158  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4159  pr_buf->th_doacross_info = NULL;
4160  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4161 }
4162 #endif
4163 
4164 #if OMP_50_ENABLED
4165 int __kmpc_get_target_offload(void) {
4166  if (!__kmp_init_serial) {
4167  __kmp_serial_initialize();
4168  }
4169  return __kmp_target_offload;
4170 }
4171 #endif // OMP_50_ENABLED
4172 
4173 // end of file //
__kmpc_end_single
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1765
__kmpc_fork_call
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.cpp:270
KMP_IDENT_WORK_LOOP
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:205
KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:207
__kmpc_reduce
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3574
KMP_IDENT_WORK_DISTRIBUTE
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:209
__kmpc_global_thread_num
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
Definition: kmp_csupport.cpp:102
__kmpc_end
void __kmpc_end(ident_t *loc)
Definition: kmp_csupport.cpp:62
__kmpc_master
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:739
__kmpc_single
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1713
stats_state_e
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64
__kmpc_end_serialized_parallel
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:471
__kmpc_flush
void __kmpc_flush(ident_t *loc)
Definition: kmp_csupport.cpp:622
__kmpc_end_reduce_nowait
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3512
__kmpc_bound_thread_num
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
Definition: kmp_csupport.cpp:137
__kmpc_end_reduce
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3696
ident::psource
char const * psource
Definition: kmp.h:234
__kmpc_ordered
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.cpp:827
__kmpc_end_ordered
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.cpp:897
__kmpc_begin
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
Definition: kmp_csupport.cpp:40
__kmpc_fork_teams
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.cpp:379
KMP_IDENT_AUTOPAR
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:190
__kmpc_end_barrier_master
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1631
ident
Definition: kmp.h:224
__kmpc_critical
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp_csupport.cpp:1138
__kmpc_global_num_threads
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
Definition: kmp_csupport.cpp:124
__kmpc_bound_num_threads
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
Definition: kmp_csupport.cpp:147
__kmpc_ok_to_fork
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
Definition: kmp_csupport.cpp:159
__kmpc_barrier
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:692
__kmpc_end_master
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:793
__kmpc_in_parallel
kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.cpp:221
__kmpc_reduce_nowait
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3354
__kmpc_push_num_threads
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:234
kmpc_micro
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1425
__kmpc_end_critical
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp_csupport.cpp:1493
__kmpc_barrier_master_nowait
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1647
__kmpc_copyprivate
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
Definition: kmp_csupport.cpp:2043
ident::flags
kmp_int32 flags
Definition: kmp.h:226
__kmpc_push_num_teams
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:360
__kmpc_serialized_parallel
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:454
__kmpc_for_static_fini
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1791
__kmpc_barrier_master
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1589
KMP_COUNT_BLOCK
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:890