LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_lock.h
1/*
2 * kmp_lock.h -- lock header file
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_LOCK_H
14#define KMP_LOCK_H
15
16#include <limits.h> // CHAR_BIT
17#include <stddef.h> // offsetof
18
19#include "kmp_debug.h"
20#include "kmp_os.h"
21
22#ifdef __cplusplus
23#include <atomic>
24
25extern "C" {
26#endif // __cplusplus
27
28// ----------------------------------------------------------------------------
29// Have to copy these definitions from kmp.h because kmp.h cannot be included
30// due to circular dependencies. Will undef these at end of file.
31
32#define KMP_PAD(type, sz) \
33 (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
34#define KMP_GTID_DNE (-2)
35
36// Forward declaration of ident and ident_t
37
38struct ident;
39typedef struct ident ident_t;
40
41// End of copied code.
42// ----------------------------------------------------------------------------
43
44// We need to know the size of the area we can assume that the compiler(s)
45// allocated for objects of type omp_lock_t and omp_nest_lock_t. The Intel
46// compiler always allocates a pointer-sized area, as does visual studio.
47//
48// gcc however, only allocates 4 bytes for regular locks, even on 64-bit
49// intel archs. It allocates at least 8 bytes for nested lock (more on
50// recent versions), but we are bounded by the pointer-sized chunks that
51// the Intel compiler allocates.
52
53#if (KMP_OS_LINUX || KMP_OS_AIX) && defined(KMP_GOMP_COMPAT)
54#define OMP_LOCK_T_SIZE sizeof(int)
55#define OMP_NEST_LOCK_T_SIZE sizeof(void *)
56#else
57#define OMP_LOCK_T_SIZE sizeof(void *)
58#define OMP_NEST_LOCK_T_SIZE sizeof(void *)
59#endif
60
61// The Intel compiler allocates a 32-byte chunk for a critical section.
62// Both gcc and visual studio only allocate enough space for a pointer.
63// Sometimes we know that the space was allocated by the Intel compiler.
64#define OMP_CRITICAL_SIZE sizeof(void *)
65#define INTEL_CRITICAL_SIZE 32
66
67// lock flags
68typedef kmp_uint32 kmp_lock_flags_t;
69
70#define kmp_lf_critical_section 1
71
72// When a lock table is used, the indices are of kmp_lock_index_t
73typedef kmp_uint32 kmp_lock_index_t;
74
75// When memory allocated for locks are on the lock pool (free list),
76// it is treated as structs of this type.
77struct kmp_lock_pool {
78 union kmp_user_lock *next;
79 kmp_lock_index_t index;
80};
81
82typedef struct kmp_lock_pool kmp_lock_pool_t;
83
84extern void __kmp_validate_locks(void);
85
86// ----------------------------------------------------------------------------
87// There are 5 lock implementations:
88// 1. Test and set locks.
89// 2. futex locks (Linux* OS on x86 and
90// Intel(R) Many Integrated Core Architecture)
91// 3. Ticket (Lamport bakery) locks.
92// 4. Queuing locks (with separate spin fields).
93// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
94//
95// and 3 lock purposes:
96// 1. Bootstrap locks -- Used for a few locks available at library
97// startup-shutdown time.
98// These do not require non-negative global thread ID's.
99// 2. Internal RTL locks -- Used everywhere else in the RTL
100// 3. User locks (includes critical sections)
101// ----------------------------------------------------------------------------
102
103// ============================================================================
104// Lock implementations.
105//
106// Test and set locks.
107//
108// Non-nested test and set locks differ from the other lock kinds (except
109// futex) in that we use the memory allocated by the compiler for the lock,
110// rather than a pointer to it.
111//
112// On lin32, lin_32e, and win_32, the space allocated may be as small as 4
113// bytes, so we have to use a lock table for nested locks, and avoid accessing
114// the depth_locked field for non-nested locks.
115//
116// Information normally available to the tools, such as lock location, lock
117// usage (normal lock vs. critical section), etc. is not available with test and
118// set locks.
119// ----------------------------------------------------------------------------
120
121struct kmp_base_tas_lock {
122 // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread
123#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __LP64__
124 // Flip the ordering of the high and low 32-bit member to be consistent
125 // with the memory layout of the address in 64-bit big-endian.
126 kmp_int32 depth_locked; // depth locked, for nested locks only
127 std::atomic<kmp_int32> poll;
128#else
129 std::atomic<kmp_int32> poll;
130 kmp_int32 depth_locked; // depth locked, for nested locks only
131#endif
132};
133
134typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
135
136union kmp_tas_lock {
137 kmp_base_tas_lock_t lk;
138 kmp_lock_pool_t pool; // make certain struct is large enough
139 double lk_align; // use worst case alignment; no cache line padding
140};
141
142typedef union kmp_tas_lock kmp_tas_lock_t;
143
144// Static initializer for test and set lock variables. Usage:
145// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
146#define KMP_TAS_LOCK_INITIALIZER(lock) \
147 { \
148 { KMP_LOCK_FREE(tas), 0 } \
149 }
150
151extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
152extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
153extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
154extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck);
155extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck);
156
157extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
158extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
159extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid);
160extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck);
161extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck);
162
163#define KMP_LOCK_RELEASED 1
164#define KMP_LOCK_STILL_HELD 0
165#define KMP_LOCK_ACQUIRED_FIRST 1
166#define KMP_LOCK_ACQUIRED_NEXT 0
167#ifndef KMP_USE_FUTEX
168#define KMP_USE_FUTEX \
169 (KMP_OS_LINUX && \
170 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
171#endif
172#if KMP_USE_FUTEX
173
174// ----------------------------------------------------------------------------
175// futex locks. futex locks are only available on Linux* OS.
176//
177// Like non-nested test and set lock, non-nested futex locks use the memory
178// allocated by the compiler for the lock, rather than a pointer to it.
179//
180// Information normally available to the tools, such as lock location, lock
181// usage (normal lock vs. critical section), etc. is not available with test and
182// set locks. With non-nested futex locks, the lock owner is not even available.
183// ----------------------------------------------------------------------------
184
185struct kmp_base_futex_lock {
186 volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked
187 // 2*(gtid+1) of owning thread, 0 if unlocked
188 // locked: (gtid+1) of owning thread
189 kmp_int32 depth_locked; // depth locked, for nested locks only
190};
191
192typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
193
194union kmp_futex_lock {
195 kmp_base_futex_lock_t lk;
196 kmp_lock_pool_t pool; // make certain struct is large enough
197 double lk_align; // use worst case alignment
198 // no cache line padding
199};
200
201typedef union kmp_futex_lock kmp_futex_lock_t;
202
203// Static initializer for futex lock variables. Usage:
204// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
205#define KMP_FUTEX_LOCK_INITIALIZER(lock) \
206 { \
207 { KMP_LOCK_FREE(futex), 0 } \
208 }
209
210extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
211extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
212extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
213extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck);
214extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck);
215
216extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck,
217 kmp_int32 gtid);
218extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid);
219extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck,
220 kmp_int32 gtid);
221extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck);
222extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck);
223
224#endif // KMP_USE_FUTEX
225
226// ----------------------------------------------------------------------------
227// Ticket locks.
228
229#ifdef __cplusplus
230
231#ifdef _MSC_VER
232// MSVC won't allow use of std::atomic<> in a union since it has non-trivial
233// copy constructor.
234
235struct kmp_base_ticket_lock {
236 // `initialized' must be the first entry in the lock data structure!
237 std::atomic_bool initialized;
238 volatile union kmp_ticket_lock *self; // points to the lock union
239 ident_t const *location; // Source code location of omp_init_lock().
240 std::atomic_uint
241 next_ticket; // ticket number to give to next thread which acquires
242 std::atomic_uint now_serving; // ticket number for thread which holds the lock
243 std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked
244 std::atomic_int depth_locked; // depth locked, for nested locks only
245 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
246};
247#else
248struct kmp_base_ticket_lock {
249 // `initialized' must be the first entry in the lock data structure!
250 std::atomic<bool> initialized;
251 volatile union kmp_ticket_lock *self; // points to the lock union
252 ident_t const *location; // Source code location of omp_init_lock().
253 std::atomic<unsigned>
254 next_ticket; // ticket number to give to next thread which acquires
255 std::atomic<unsigned>
256 now_serving; // ticket number for thread which holds the lock
257 std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked
258 std::atomic<int> depth_locked; // depth locked, for nested locks only
259 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
260};
261#endif
262
263#else // __cplusplus
264
265struct kmp_base_ticket_lock;
266
267#endif // !__cplusplus
268
269typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
270
271union KMP_ALIGN_CACHE kmp_ticket_lock {
272 kmp_base_ticket_lock_t
273 lk; // This field must be first to allow static initializing.
274 kmp_lock_pool_t pool;
275 double lk_align; // use worst case alignment
276 char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)];
277};
278
279typedef union kmp_ticket_lock kmp_ticket_lock_t;
280
281// Static initializer for simple ticket lock variables. Usage:
282// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
283// Note the macro argument. It is important to make var properly initialized.
284#define KMP_TICKET_LOCK_INITIALIZER(lock) \
285 { \
286 { true, &(lock), NULL, 0U, 0U, 0, -1 } \
287 }
288
289extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
290extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
291extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck,
292 kmp_int32 gtid);
293extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid);
294extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck);
295extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck);
296
297extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck,
298 kmp_int32 gtid);
299extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck,
300 kmp_int32 gtid);
301extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck,
302 kmp_int32 gtid);
303extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck);
304extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck);
305
306// ----------------------------------------------------------------------------
307// Queuing locks.
308
309#if KMP_USE_ADAPTIVE_LOCKS
310
311struct kmp_adaptive_lock_info;
312
313typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
314
315#if KMP_DEBUG_ADAPTIVE_LOCKS
316
317struct kmp_adaptive_lock_statistics {
318 /* So we can get stats from locks that haven't been destroyed. */
319 kmp_adaptive_lock_info_t *next;
320 kmp_adaptive_lock_info_t *prev;
321
322 /* Other statistics */
323 kmp_uint32 successfulSpeculations;
324 kmp_uint32 hardFailedSpeculations;
325 kmp_uint32 softFailedSpeculations;
326 kmp_uint32 nonSpeculativeAcquires;
327 kmp_uint32 nonSpeculativeAcquireAttempts;
328 kmp_uint32 lemmingYields;
329};
330
331typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
332
333extern void __kmp_print_speculative_stats();
334extern void __kmp_init_speculative_stats();
335
336#endif // KMP_DEBUG_ADAPTIVE_LOCKS
337
338struct kmp_adaptive_lock_info {
339 /* Values used for adaptivity.
340 Although these are accessed from multiple threads we don't access them
341 atomically, because if we miss updates it probably doesn't matter much. (It
342 just affects our decision about whether to try speculation on the lock). */
343 kmp_uint32 volatile badness;
344 kmp_uint32 volatile acquire_attempts;
345 /* Parameters of the lock. */
346 kmp_uint32 max_badness;
347 kmp_uint32 max_soft_retries;
348
349#if KMP_DEBUG_ADAPTIVE_LOCKS
350 kmp_adaptive_lock_statistics_t volatile stats;
351#endif
352};
353
354#endif // KMP_USE_ADAPTIVE_LOCKS
355
356struct kmp_base_queuing_lock {
357
358 // `initialized' must be the first entry in the lock data structure!
359 volatile union kmp_queuing_lock
360 *initialized; // Points to the lock union if in initialized state.
361
362 ident_t const *location; // Source code location of omp_init_lock().
363
364 KMP_ALIGN(8) // tail_id must be 8-byte aligned!
365
366 volatile kmp_int32
367 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty
368 // Must be no padding here since head/tail used in 8-byte CAS
369 volatile kmp_int32
370 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty
371 // Decl order assumes little endian
372 // bakery-style lock
373 volatile kmp_uint32
374 next_ticket; // ticket number to give to next thread which acquires
375 volatile kmp_uint32
376 now_serving; // ticket number for thread which holds the lock
377 volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
378 kmp_int32 depth_locked; // depth locked, for nested locks only
379
380 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
381};
382
383typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
384
385KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0);
386
387union KMP_ALIGN_CACHE kmp_queuing_lock {
388 kmp_base_queuing_lock_t
389 lk; // This field must be first to allow static initializing.
390 kmp_lock_pool_t pool;
391 double lk_align; // use worst case alignment
392 char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)];
393};
394
395typedef union kmp_queuing_lock kmp_queuing_lock_t;
396
397extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
398extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
399extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid);
400extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck);
401extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck);
402
403extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck,
404 kmp_int32 gtid);
405extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck,
406 kmp_int32 gtid);
407extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck,
408 kmp_int32 gtid);
409extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck);
410extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck);
411
412#if KMP_USE_ADAPTIVE_LOCKS
413
414// ----------------------------------------------------------------------------
415// Adaptive locks.
416struct kmp_base_adaptive_lock {
417 kmp_base_queuing_lock qlk;
418 KMP_ALIGN(CACHE_LINE)
419 kmp_adaptive_lock_info_t
420 adaptive; // Information for the speculative adaptive lock
421};
422
423typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
424
425union KMP_ALIGN_CACHE kmp_adaptive_lock {
426 kmp_base_adaptive_lock_t lk;
427 kmp_lock_pool_t pool;
428 double lk_align;
429 char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)];
430};
431typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
432
433#define GET_QLK_PTR(l) ((kmp_queuing_lock_t *)&(l)->lk.qlk)
434
435#endif // KMP_USE_ADAPTIVE_LOCKS
436
437// ----------------------------------------------------------------------------
438// DRDPA ticket locks.
439struct kmp_base_drdpa_lock {
440 // All of the fields on the first cache line are only written when
441 // initializing or reconfiguring the lock. These are relatively rare
442 // operations, so data from the first cache line will usually stay resident in
443 // the cache of each thread trying to acquire the lock.
444 //
445 // initialized must be the first entry in the lock data structure!
446 KMP_ALIGN_CACHE
447
448 volatile union kmp_drdpa_lock
449 *initialized; // points to the lock union if in initialized state
450 ident_t const *location; // Source code location of omp_init_lock().
451 std::atomic<std::atomic<kmp_uint64> *> polls;
452 std::atomic<kmp_uint64> mask; // is 2**num_polls-1 for mod op
453 kmp_uint64 cleanup_ticket; // thread with cleanup ticket
454 std::atomic<kmp_uint64> *old_polls; // will deallocate old_polls
455 kmp_uint32 num_polls; // must be power of 2
456
457 // next_ticket it needs to exist in a separate cache line, as it is
458 // invalidated every time a thread takes a new ticket.
459 KMP_ALIGN_CACHE
460
461 std::atomic<kmp_uint64> next_ticket;
462
463 // now_serving is used to store our ticket value while we hold the lock. It
464 // has a slightly different meaning in the DRDPA ticket locks (where it is
465 // written by the acquiring thread) than it does in the simple ticket locks
466 // (where it is written by the releasing thread).
467 //
468 // Since now_serving is only read and written in the critical section,
469 // it is non-volatile, but it needs to exist on a separate cache line,
470 // as it is invalidated at every lock acquire.
471 //
472 // Likewise, the vars used for nested locks (owner_id and depth_locked) are
473 // only written by the thread owning the lock, so they are put in this cache
474 // line. owner_id is read by other threads, so it must be declared volatile.
475 KMP_ALIGN_CACHE
476 kmp_uint64 now_serving; // doesn't have to be volatile
477 volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
478 kmp_int32 depth_locked; // depth locked
479 kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
480};
481
482typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
483
484union KMP_ALIGN_CACHE kmp_drdpa_lock {
485 kmp_base_drdpa_lock_t
486 lk; // This field must be first to allow static initializing. */
487 kmp_lock_pool_t pool;
488 double lk_align; // use worst case alignment
489 char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)];
490};
491
492typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
493
494extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
495extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
496extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
497extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck);
498extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck);
499
500extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
501 kmp_int32 gtid);
502extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid);
503extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck,
504 kmp_int32 gtid);
505extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
506extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck);
507
508// ============================================================================
509// Lock purposes.
510// ============================================================================
511
512// Bootstrap locks.
513//
514// Bootstrap locks -- very few locks used at library initialization time.
515// Bootstrap locks are currently implemented as ticket locks.
516// They could also be implemented as test and set lock, but cannot be
517// implemented with other lock kinds as they require gtids which are not
518// available at initialization time.
519
520typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
521
522#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock) KMP_TICKET_LOCK_INITIALIZER((lock))
523#define KMP_BOOTSTRAP_LOCK_INIT(lock) \
524 kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock)
525
526static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
527 return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE);
528}
529
530static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
531 return __kmp_test_ticket_lock(lck, KMP_GTID_DNE);
532}
533
534static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
535 __kmp_release_ticket_lock(lck, KMP_GTID_DNE);
536}
537
538static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
539 __kmp_init_ticket_lock(lck);
540}
541
542static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) {
543 __kmp_destroy_ticket_lock(lck);
544}
545
546// Internal RTL locks.
547//
548// Internal RTL locks are also implemented as ticket locks, for now.
549//
550// FIXME - We should go through and figure out which lock kind works best for
551// each internal lock, and use the type declaration and function calls for
552// that explicit lock kind (and get rid of this section).
553
554typedef kmp_ticket_lock_t kmp_lock_t;
555
556#define KMP_LOCK_INIT(lock) kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock)
557
558static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) {
559 return __kmp_acquire_ticket_lock(lck, gtid);
560}
561
562static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) {
563 return __kmp_test_ticket_lock(lck, gtid);
564}
565
566static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) {
567 __kmp_release_ticket_lock(lck, gtid);
568}
569
570static inline void __kmp_init_lock(kmp_lock_t *lck) {
571 __kmp_init_ticket_lock(lck);
572}
573
574static inline void __kmp_destroy_lock(kmp_lock_t *lck) {
575 __kmp_destroy_ticket_lock(lck);
576}
577
578// User locks.
579//
580// Do not allocate objects of type union kmp_user_lock!!! This will waste space
581// unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of
582// __kmp_user_lock_kind and allocate objects of the type of the appropriate
583// union member, and cast their addresses to kmp_user_lock_p.
584
585enum kmp_lock_kind {
586 lk_default = 0,
587 lk_tas,
588#if KMP_USE_FUTEX
589 lk_futex,
590#endif
591#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
592 lk_hle,
593 lk_rtm_queuing,
594 lk_rtm_spin,
595#endif
596 lk_ticket,
597 lk_queuing,
598 lk_drdpa,
599#if KMP_USE_ADAPTIVE_LOCKS
600 lk_adaptive
601#endif // KMP_USE_ADAPTIVE_LOCKS
602};
603
604typedef enum kmp_lock_kind kmp_lock_kind_t;
605
606extern kmp_lock_kind_t __kmp_user_lock_kind;
607
608union kmp_user_lock {
609 kmp_tas_lock_t tas;
610#if KMP_USE_FUTEX
611 kmp_futex_lock_t futex;
612#endif
613 kmp_ticket_lock_t ticket;
614 kmp_queuing_lock_t queuing;
615 kmp_drdpa_lock_t drdpa;
616#if KMP_USE_ADAPTIVE_LOCKS
617 kmp_adaptive_lock_t adaptive;
618#endif // KMP_USE_ADAPTIVE_LOCKS
619 kmp_lock_pool_t pool;
620};
621
622typedef union kmp_user_lock *kmp_user_lock_p;
623
624#if !KMP_USE_DYNAMIC_LOCK
625
626extern size_t __kmp_base_user_lock_size;
627extern size_t __kmp_user_lock_size;
628
629extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck);
630
631static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) {
632 KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL);
633 return (*__kmp_get_user_lock_owner_)(lck);
634}
635
636extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
637 kmp_int32 gtid);
638
639#if KMP_OS_LINUX && \
640 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
641
642#define __kmp_acquire_user_lock_with_checks(lck, gtid) \
643 if (__kmp_user_lock_kind == lk_tas) { \
644 if (__kmp_env_consistency_check) { \
645 char const *const func = "omp_set_lock"; \
646 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && \
647 lck->tas.lk.depth_locked != -1) { \
648 KMP_FATAL(LockNestableUsedAsSimple, func); \
649 } \
650 if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) { \
651 KMP_FATAL(LockIsAlreadyOwned, func); \
652 } \
653 } \
654 if (lck->tas.lk.poll != 0 || \
655 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
656 kmp_uint32 spins; \
657 kmp_uint64 time; \
658 KMP_FSYNC_PREPARE(lck); \
659 KMP_INIT_YIELD(spins); \
660 KMP_INIT_BACKOFF(time); \
661 do { \
662 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
663 } while ( \
664 lck->tas.lk.poll != 0 || \
665 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
666 } \
667 KMP_FSYNC_ACQUIRED(lck); \
668 } else { \
669 KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); \
670 (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); \
671 }
672
673#else
674static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck,
675 kmp_int32 gtid) {
676 KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL);
677 return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid);
678}
679#endif
680
681extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
682 kmp_int32 gtid);
683
684#if KMP_OS_LINUX && \
685 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
686
687#include "kmp_i18n.h" /* AC: KMP_FATAL definition */
688extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
689static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
690 kmp_int32 gtid) {
691 if (__kmp_user_lock_kind == lk_tas) {
692 if (__kmp_env_consistency_check) {
693 char const *const func = "omp_test_lock";
694 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
695 lck->tas.lk.depth_locked != -1) {
696 KMP_FATAL(LockNestableUsedAsSimple, func);
697 }
698 }
699 return ((lck->tas.lk.poll == 0) &&
700 __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
701 } else {
702 KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL);
703 return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
704 }
705}
706#else
707static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
708 kmp_int32 gtid) {
709 KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL);
710 return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
711}
712#endif
713
714extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
715 kmp_int32 gtid);
716
717static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck,
718 kmp_int32 gtid) {
719 KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL);
720 (*__kmp_release_user_lock_with_checks_)(lck, gtid);
721}
722
723extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck);
724
725static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) {
726 KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL);
727 (*__kmp_init_user_lock_with_checks_)(lck);
728}
729
730// We need a non-checking version of destroy lock for when the RTL is
731// doing the cleanup as it can't always tell if the lock is nested or not.
732extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck);
733
734static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) {
735 KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL);
736 (*__kmp_destroy_user_lock_)(lck);
737}
738
739extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck);
740
741static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) {
742 KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL);
743 (*__kmp_destroy_user_lock_with_checks_)(lck);
744}
745
746extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
747 kmp_int32 gtid);
748
749#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
750
751#define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \
752 if (__kmp_user_lock_kind == lk_tas) { \
753 if (__kmp_env_consistency_check) { \
754 char const *const func = "omp_set_nest_lock"; \
755 if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && \
756 lck->tas.lk.depth_locked == -1) { \
757 KMP_FATAL(LockSimpleUsedAsNestable, func); \
758 } \
759 } \
760 if (lck->tas.lk.poll - 1 == gtid) { \
761 lck->tas.lk.depth_locked += 1; \
762 *depth = KMP_LOCK_ACQUIRED_NEXT; \
763 } else { \
764 if ((lck->tas.lk.poll != 0) || \
765 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
766 kmp_uint32 spins; \
767 kmp_uint64 time; \
768 KMP_FSYNC_PREPARE(lck); \
769 KMP_INIT_YIELD(spins); \
770 KMP_INIT_BACKOFF(time); \
771 do { \
772 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time); \
773 } while ( \
774 (lck->tas.lk.poll != 0) || \
775 !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \
776 } \
777 lck->tas.lk.depth_locked = 1; \
778 *depth = KMP_LOCK_ACQUIRED_FIRST; \
779 } \
780 KMP_FSYNC_ACQUIRED(lck); \
781 } else { \
782 KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); \
783 *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); \
784 }
785
786#else
787static inline void
788__kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid,
789 int *depth) {
790 KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL);
791 *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid);
792}
793#endif
794
795extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
796 kmp_int32 gtid);
797
798#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
799static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
800 kmp_int32 gtid) {
801 if (__kmp_user_lock_kind == lk_tas) {
802 int retval;
803 if (__kmp_env_consistency_check) {
804 char const *const func = "omp_test_nest_lock";
805 if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) &&
806 lck->tas.lk.depth_locked == -1) {
807 KMP_FATAL(LockSimpleUsedAsNestable, func);
808 }
809 }
810 KMP_DEBUG_ASSERT(gtid >= 0);
811 if (lck->tas.lk.poll - 1 ==
812 gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */
813 return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
814 }
815 retval = ((lck->tas.lk.poll == 0) &&
816 __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
817 if (retval) {
818 KMP_MB();
819 lck->tas.lk.depth_locked = 1;
820 }
821 return retval;
822 } else {
823 KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL);
824 return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
825 }
826}
827#else
828static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
829 kmp_int32 gtid) {
830 KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL);
831 return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid);
832}
833#endif
834
835extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
836 kmp_int32 gtid);
837
838static inline int
839__kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck,
840 kmp_int32 gtid) {
841 KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL);
842 return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid);
843}
844
845extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
846
847static inline void
848__kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) {
849 KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL);
850 (*__kmp_init_nested_user_lock_with_checks_)(lck);
851}
852
853extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck);
854
855static inline void
856__kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) {
857 KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL);
858 (*__kmp_destroy_nested_user_lock_with_checks_)(lck);
859}
860
861// user lock functions which do not necessarily exist for all lock kinds.
862//
863// The "set" functions usually have wrapper routines that check for a NULL set
864// function pointer and call it if non-NULL.
865//
866// In some cases, it makes sense to have a "get" wrapper function check for a
867// NULL get function pointer and return NULL / invalid value / error code if
868// the function pointer is NULL.
869//
870// In other cases, the calling code really should differentiate between an
871// unimplemented function and one that is implemented but returning NULL /
872// invalid value. If this is the case, no get function wrapper exists.
873
874extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck);
875
876// no set function; fields set during local allocation
877
878extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck);
879
880static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) {
881 if (__kmp_get_user_lock_location_ != NULL) {
882 return (*__kmp_get_user_lock_location_)(lck);
883 } else {
884 return NULL;
885 }
886}
887
888extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
889 const ident_t *loc);
890
891static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck,
892 const ident_t *loc) {
893 if (__kmp_set_user_lock_location_ != NULL) {
894 (*__kmp_set_user_lock_location_)(lck, loc);
895 }
896}
897
898extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck);
899
900extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
901 kmp_lock_flags_t flags);
902
903static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck,
904 kmp_lock_flags_t flags) {
905 if (__kmp_set_user_lock_flags_ != NULL) {
906 (*__kmp_set_user_lock_flags_)(lck, flags);
907 }
908}
909
910// The function which sets up all of the vtbl pointers for kmp_user_lock_t.
911extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind);
912
913// Macros for binding user lock functions.
914#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) \
915 { \
916 __kmp_acquire##nest##user_lock_with_checks_ = (int (*)( \
917 kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix; \
918 __kmp_release##nest##user_lock_with_checks_ = (int (*)( \
919 kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix; \
920 __kmp_test##nest##user_lock_with_checks_ = (int (*)( \
921 kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix; \
922 __kmp_init##nest##user_lock_with_checks_ = \
923 (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix; \
924 __kmp_destroy##nest##user_lock_with_checks_ = \
925 (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix; \
926 }
927
928#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
929#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) \
930 KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
931#define KMP_BIND_NESTED_USER_LOCK(kind) \
932 KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
933#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) \
934 KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
935
936// User lock table & lock allocation
937/* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory
938 for lock variable, which is not enough to store a pointer, so we have to use
939 lock indexes instead of pointers and maintain lock table to map indexes to
940 pointers.
941
942
943 Note: The first element of the table is not a pointer to lock! It is a
944 pointer to previously allocated table (or NULL if it is the first table).
945
946 Usage:
947
948 if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
949 Lock table is fully utilized. User locks are indexes, so table is used on
950 user lock operation.
951 Note: it may be the case (lin_32) that we don't need to use a lock
952 table for regular locks, but do need the table for nested locks.
953 }
954 else {
955 Lock table initialized but not actually used.
956 }
957*/
958
959struct kmp_lock_table {
960 kmp_lock_index_t used; // Number of used elements
961 kmp_lock_index_t allocated; // Number of allocated elements
962 kmp_user_lock_p *table; // Lock table.
963};
964
965typedef struct kmp_lock_table kmp_lock_table_t;
966
967extern kmp_lock_table_t __kmp_user_lock_table;
968extern kmp_user_lock_p __kmp_lock_pool;
969
970struct kmp_block_of_locks {
971 struct kmp_block_of_locks *next_block;
972 void *locks;
973};
974
975typedef struct kmp_block_of_locks kmp_block_of_locks_t;
976
977extern kmp_block_of_locks_t *__kmp_lock_blocks;
978extern int __kmp_num_locks_in_block;
979
980extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock,
981 kmp_int32 gtid,
982 kmp_lock_flags_t flags);
983extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
984 kmp_user_lock_p lck);
985extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock,
986 char const *func);
987extern void __kmp_cleanup_user_locks();
988
989#define KMP_CHECK_USER_LOCK_INIT() \
990 { \
991 if (!TCR_4(__kmp_init_user_locks)) { \
992 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); \
993 if (!TCR_4(__kmp_init_user_locks)) { \
994 TCW_4(__kmp_init_user_locks, TRUE); \
995 } \
996 __kmp_release_bootstrap_lock(&__kmp_initz_lock); \
997 } \
998 }
999
1000#endif // KMP_USE_DYNAMIC_LOCK
1001
1002#undef KMP_PAD
1003#undef KMP_GTID_DNE
1004
1005#if KMP_USE_DYNAMIC_LOCK
1006// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without
1007// breaking the current compatibility. Essential functionality of this new code
1008// is dynamic dispatch, but it also implements (or enables implementation of)
1009// hinted user lock and critical section which will be part of OMP 4.5 soon.
1010//
1011// Lock type can be decided at creation time (i.e., lock initialization), and
1012// subsequent lock function call on the created lock object requires type
1013// extraction and call through jump table using the extracted type. This type
1014// information is stored in two different ways depending on the size of the lock
1015// object, and we differentiate lock types by this size requirement - direct and
1016// indirect locks.
1017//
1018// Direct locks:
1019// A direct lock object fits into the space created by the compiler for an
1020// omp_lock_t object, and TAS/Futex lock falls into this category. We use low
1021// one byte of the lock object as the storage for the lock type, and appropriate
1022// bit operation is required to access the data meaningful to the lock
1023// algorithms. Also, to differentiate direct lock from indirect lock, 1 is
1024// written to LSB of the lock object. The newly introduced "hle" lock is also a
1025// direct lock.
1026//
1027// Indirect locks:
1028// An indirect lock object requires more space than the compiler-generated
1029// space, and it should be allocated from heap. Depending on the size of the
1030// compiler-generated space for the lock (i.e., size of omp_lock_t), this
1031// omp_lock_t object stores either the address of the heap-allocated indirect
1032// lock (void * fits in the object) or an index to the indirect lock table entry
1033// that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this
1034// category, and the newly introduced "rtm" lock is also an indirect lock which
1035// was implemented on top of the Queuing lock. When the omp_lock_t object holds
1036// an index (not lock address), 0 is written to LSB to differentiate the lock
1037// from a direct lock, and the remaining part is the actual index to the
1038// indirect lock table.
1039
1040#include <stdint.h> // for uintptr_t
1041
1042// Shortcuts
1043#define KMP_USE_INLINED_TAS \
1044 (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1
1045#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0
1046
1047// List of lock definitions; all nested locks are indirect locks.
1048// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
1049// All nested locks are indirect lock types.
1050#if KMP_USE_TSX
1051#if KMP_USE_FUTEX
1052#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) m(rtm_spin, a)
1053#define KMP_FOREACH_I_LOCK(m, a) \
1054 m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \
1055 m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
1056 m(nested_queuing, a) m(nested_drdpa, a)
1057#else
1058#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) m(rtm_spin, a)
1059#define KMP_FOREACH_I_LOCK(m, a) \
1060 m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm_queuing, a) \
1061 m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \
1062 m(nested_drdpa, a)
1063#endif // KMP_USE_FUTEX
1064#define KMP_LAST_D_LOCK lockseq_rtm_spin
1065#else
1066#if KMP_USE_FUTEX
1067#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a)
1068#define KMP_FOREACH_I_LOCK(m, a) \
1069 m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a) \
1070 m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a)
1071#define KMP_LAST_D_LOCK lockseq_futex
1072#else
1073#define KMP_FOREACH_D_LOCK(m, a) m(tas, a)
1074#define KMP_FOREACH_I_LOCK(m, a) \
1075 m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a) \
1076 m(nested_queuing, a) m(nested_drdpa, a)
1077#define KMP_LAST_D_LOCK lockseq_tas
1078#endif // KMP_USE_FUTEX
1079#endif // KMP_USE_TSX
1080
1081// Information used in dynamic dispatch
1082#define KMP_LOCK_SHIFT \
1083 8 // number of low bits to be used as tag for direct locks
1084#define KMP_FIRST_D_LOCK lockseq_tas
1085#define KMP_FIRST_I_LOCK lockseq_ticket
1086#define KMP_LAST_I_LOCK lockseq_nested_drdpa
1087#define KMP_NUM_I_LOCKS \
1088 (locktag_nested_drdpa + 1) // number of indirect lock types
1089
1090// Base type for dynamic locks.
1091typedef kmp_uint32 kmp_dyna_lock_t;
1092
1093// Lock sequence that enumerates all lock kinds. Always make this enumeration
1094// consistent with kmp_lockseq_t in the include directory.
1095typedef enum {
1096 lockseq_indirect = 0,
1097#define expand_seq(l, a) lockseq_##l,
1098 KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0)
1099#undef expand_seq
1100} kmp_dyna_lockseq_t;
1101
1102// Enumerates indirect lock tags.
1103typedef enum {
1104#define expand_tag(l, a) locktag_##l,
1105 KMP_FOREACH_I_LOCK(expand_tag, 0)
1106#undef expand_tag
1107} kmp_indirect_locktag_t;
1108
1109// Utility macros that extract information from lock sequences.
1110#define KMP_IS_D_LOCK(seq) \
1111 ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK)
1112#define KMP_IS_I_LOCK(seq) \
1113 ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK)
1114#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCK)
1115#define KMP_GET_D_TAG(seq) ((seq) << 1 | 1)
1116
1117// Enumerates direct lock tags starting from indirect tag.
1118typedef enum {
1119#define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l),
1120 KMP_FOREACH_D_LOCK(expand_tag, 0)
1121#undef expand_tag
1122} kmp_direct_locktag_t;
1123
1124// Indirect lock type
1125typedef struct {
1126 kmp_user_lock_p lock;
1127 kmp_indirect_locktag_t type;
1128} kmp_indirect_lock_t;
1129
1130// Function tables for direct locks. Set/unset/test differentiate functions
1131// with/without consistency checking.
1132extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
1133extern void (**__kmp_direct_destroy)(kmp_dyna_lock_t *);
1134extern int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32);
1135extern int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32);
1136extern int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32);
1137
1138// Function tables for indirect locks. Set/unset/test differentiate functions
1139// with/without consistency checking.
1140extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
1141extern void (**__kmp_indirect_destroy)(kmp_user_lock_p);
1142extern int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32);
1143extern int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32);
1144extern int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32);
1145
1146// Extracts direct lock tag from a user lock pointer
1147#define KMP_EXTRACT_D_TAG(l) \
1148 ((kmp_dyna_lock_t)((kmp_base_tas_lock_t *)(l))->poll & \
1149 ((1 << KMP_LOCK_SHIFT) - 1) & \
1150 -((kmp_dyna_lock_t)((kmp_tas_lock_t *)(l))->lk.poll & 1))
1151
1152// Extracts indirect lock index from a user lock pointer
1153#define KMP_EXTRACT_I_INDEX(l) \
1154 ((kmp_lock_index_t)((kmp_base_tas_lock_t *)(l))->poll >> 1)
1155
1156// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t
1157// *) and op (operation type).
1158#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)]
1159
1160// Returns function pointer to the indirect lock function with l
1161// (kmp_indirect_lock_t *) and op (operation type).
1162#define KMP_I_LOCK_FUNC(l, op) \
1163 __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
1164
1165// Initializes a direct lock with the given lock pointer and lock sequence.
1166#define KMP_INIT_D_LOCK(l, seq) \
1167 __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)
1168
1169// Initializes an indirect lock with the given lock pointer and lock sequence.
1170#define KMP_INIT_I_LOCK(l, seq) \
1171 __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
1172
1173// Returns "free" lock value for the given lock type.
1174#define KMP_LOCK_FREE(type) (locktag_##type)
1175
1176// Returns "busy" lock value for the given lock teyp.
1177#define KMP_LOCK_BUSY(v, type) ((v) << KMP_LOCK_SHIFT | locktag_##type)
1178
1179// Returns lock value after removing (shifting) lock tag.
1180#define KMP_LOCK_STRIP(v) ((v) >> KMP_LOCK_SHIFT)
1181
1182// Initializes global states and data structures for managing dynamic user
1183// locks.
1184extern void __kmp_init_dynamic_user_locks();
1185
1186// Allocates and returns an indirect lock with the given indirect lock tag.
1187extern kmp_indirect_lock_t *
1188__kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
1189
1190// Cleans up global states and data structures for managing dynamic user locks.
1191extern void __kmp_cleanup_indirect_user_locks();
1192
1193// Default user lock sequence when not using hinted locks.
1194extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
1195
1196// Jump table for "set lock location", available only for indirect locks.
1197extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
1198 const ident_t *);
1199#define KMP_SET_I_LOCK_LOCATION(lck, loc) \
1200 { \
1201 if (__kmp_indirect_set_location[(lck)->type] != NULL) \
1202 __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
1203 }
1204
1205// Jump table for "set lock flags", available only for indirect locks.
1206extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
1207 kmp_lock_flags_t);
1208#define KMP_SET_I_LOCK_FLAGS(lck, flag) \
1209 { \
1210 if (__kmp_indirect_set_flags[(lck)->type] != NULL) \
1211 __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
1212 }
1213
1214// Jump table for "get lock location", available only for indirect locks.
1215extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
1216 kmp_user_lock_p);
1217#define KMP_GET_I_LOCK_LOCATION(lck) \
1218 (__kmp_indirect_get_location[(lck)->type] != NULL \
1219 ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
1220 : NULL)
1221
1222// Jump table for "get lock flags", available only for indirect locks.
1223extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
1224 kmp_user_lock_p);
1225#define KMP_GET_I_LOCK_FLAGS(lck) \
1226 (__kmp_indirect_get_flags[(lck)->type] != NULL \
1227 ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
1228 : NULL)
1229
1230// number of kmp_indirect_lock_t objects to be allocated together
1231#define KMP_I_LOCK_CHUNK 1024
1232// Keep at a power of 2 since it is used in multiplication & division
1233KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0);
1234// number of row entries in the initial lock table
1235#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8
1236
1237// Lock table for indirect locks.
1238typedef struct kmp_indirect_lock_table {
1239 kmp_indirect_lock_t **table; // blocks of indirect locks allocated
1240 kmp_uint32 nrow_ptrs; // number *table pointer entries in table
1241 kmp_lock_index_t next; // index to the next lock to be allocated
1242 struct kmp_indirect_lock_table *next_table;
1243} kmp_indirect_lock_table_t;
1244
1245extern kmp_indirect_lock_table_t __kmp_i_lock_table;
1246
1247// Returns the indirect lock associated with the given index.
1248// Returns nullptr if no lock at given index
1249static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) {
1250 kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
1251 while (lock_table) {
1252 kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK;
1253 if (idx < max_locks) {
1254 kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK;
1255 kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK;
1256 if (!lock_table->table[row] || idx >= lock_table->next)
1257 break;
1258 return &lock_table->table[row][col];
1259 }
1260 idx -= max_locks;
1261 lock_table = lock_table->next_table;
1262 }
1263 return nullptr;
1264}
1265
1266// Number of locks in a lock block, which is fixed to "1" now.
1267// TODO: No lock block implementation now. If we do support, we need to manage
1268// lock block data structure for each indirect lock type.
1269extern int __kmp_num_locks_in_block;
1270
1271// Fast lock table lookup without consistency checking
1272#define KMP_LOOKUP_I_LOCK(l) \
1273 ((OMP_LOCK_T_SIZE < sizeof(void *)) \
1274 ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)) \
1275 : *((kmp_indirect_lock_t **)(l)))
1276
1277// Used once in kmp_error.cpp
1278extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
1279
1280#else // KMP_USE_DYNAMIC_LOCK
1281
1282#define KMP_LOCK_BUSY(v, type) (v)
1283#define KMP_LOCK_FREE(type) 0
1284#define KMP_LOCK_STRIP(v) (v)
1285
1286#endif // KMP_USE_DYNAMIC_LOCK
1287
1288// data structure for using backoff within spin locks.
1289typedef struct {
1290 kmp_uint32 step; // current step
1291 kmp_uint32 max_backoff; // upper bound of outer delay loop
1292 kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent)
1293} kmp_backoff_t;
1294
1295// Runtime's default backoff parameters
1296extern kmp_backoff_t __kmp_spin_backoff_params;
1297
1298// Backoff function
1299extern void __kmp_spin_backoff(kmp_backoff_t *);
1300
1301#ifdef __cplusplus
1302} // extern "C"
1303#endif // __cplusplus
1304
1305#endif /* KMP_LOCK_H */
Definition kmp.h:247