LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_affinity.h
1/*
2 * kmp_affinity.h -- header for affinity management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_AFFINITY_H
14#define KMP_AFFINITY_H
15
16#include "kmp.h"
17#include "kmp_os.h"
18#include <limits>
19
20#if KMP_AFFINITY_SUPPORTED
21#if KMP_USE_HWLOC
22class KMPHwlocAffinity : public KMPAffinity {
23public:
24 class Mask : public KMPAffinity::Mask {
25 hwloc_cpuset_t mask;
26
27 public:
28 Mask() {
29 mask = hwloc_bitmap_alloc();
30 this->zero();
31 }
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(int i) override { hwloc_bitmap_set(mask, i); }
34 bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35 void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36 void zero() override { hwloc_bitmap_zero(mask); }
37 bool empty() const override { return hwloc_bitmap_iszero(mask); }
38 void copy(const KMPAffinity::Mask *src) override {
39 const Mask *convert = static_cast<const Mask *>(src);
40 hwloc_bitmap_copy(mask, convert->mask);
41 }
42 void bitwise_and(const KMPAffinity::Mask *rhs) override {
43 const Mask *convert = static_cast<const Mask *>(rhs);
44 hwloc_bitmap_and(mask, mask, convert->mask);
45 }
46 void bitwise_or(const KMPAffinity::Mask *rhs) override {
47 const Mask *convert = static_cast<const Mask *>(rhs);
48 hwloc_bitmap_or(mask, mask, convert->mask);
49 }
50 void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
51 bool is_equal(const KMPAffinity::Mask *rhs) const override {
52 const Mask *convert = static_cast<const Mask *>(rhs);
53 return hwloc_bitmap_isequal(mask, convert->mask);
54 }
55 int begin() const override { return hwloc_bitmap_first(mask); }
56 int end() const override { return -1; }
57 int next(int previous) const override {
58 return hwloc_bitmap_next(mask, previous);
59 }
60 int get_system_affinity(bool abort_on_error) override {
61 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
62 "Illegal get affinity operation when not capable");
63 long retval =
64 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
65 if (retval >= 0) {
66 return 0;
67 }
68 int error = errno;
69 if (abort_on_error) {
70 __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()"),
71 KMP_ERR(error), __kmp_msg_null);
72 }
73 return error;
74 }
75 int set_system_affinity(bool abort_on_error) const override {
76 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
77 "Illegal set affinity operation when not capable");
78 long retval =
79 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
80 if (retval >= 0) {
81 return 0;
82 }
83 int error = errno;
84 if (abort_on_error) {
85 __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
86 KMP_ERR(error), __kmp_msg_null);
87 }
88 return error;
89 }
90#if KMP_OS_WINDOWS
91 int set_process_affinity(bool abort_on_error) const override {
92 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
93 "Illegal set process affinity operation when not capable");
94 int error = 0;
95 const hwloc_topology_support *support =
96 hwloc_topology_get_support(__kmp_hwloc_topology);
97 if (support->cpubind->set_proc_cpubind) {
98 int retval;
99 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
100 HWLOC_CPUBIND_PROCESS);
101 if (retval >= 0)
102 return 0;
103 error = errno;
104 if (abort_on_error)
105 __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()"),
106 KMP_ERR(error), __kmp_msg_null);
107 }
108 return error;
109 }
110#endif
111 int get_proc_group() const override {
112 int group = -1;
113#if KMP_OS_WINDOWS
114 if (__kmp_num_proc_groups == 1) {
115 return 1;
116 }
117 for (int i = 0; i < __kmp_num_proc_groups; i++) {
118 // On windows, the long type is always 32 bits
119 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
120 unsigned long second_32_bits =
121 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
122 if (first_32_bits == 0 && second_32_bits == 0) {
123 continue;
124 }
125 if (group >= 0) {
126 return -1;
127 }
128 group = i;
129 }
130#endif /* KMP_OS_WINDOWS */
131 return group;
132 }
133 };
134 void determine_capable(const char *var) override {
135 const hwloc_topology_support *topology_support;
136 if (__kmp_hwloc_topology == NULL) {
137 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
138 __kmp_hwloc_error = TRUE;
139 if (__kmp_affinity.flags.verbose) {
140 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
141 }
142 }
143 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
144 __kmp_hwloc_error = TRUE;
145 if (__kmp_affinity.flags.verbose) {
146 KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
147 }
148 }
149 }
150 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
151 // Is the system capable of setting/getting this thread's affinity?
152 // Also, is topology discovery possible? (pu indicates ability to discover
153 // processing units). And finally, were there no errors when calling any
154 // hwloc_* API functions?
155 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
156 topology_support->cpubind->get_thisthread_cpubind &&
157 topology_support->discovery->pu && !__kmp_hwloc_error) {
158 // enables affinity according to KMP_AFFINITY_CAPABLE() macro
159 KMP_AFFINITY_ENABLE(TRUE);
160 } else {
161 // indicate that hwloc didn't work and disable affinity
162 __kmp_hwloc_error = TRUE;
163 KMP_AFFINITY_DISABLE();
164 }
165 }
166 void bind_thread(int which) override {
167 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
168 "Illegal set affinity operation when not capable");
169 KMPAffinity::Mask *mask;
170 KMP_CPU_ALLOC_ON_STACK(mask);
171 KMP_CPU_ZERO(mask);
172 KMP_CPU_SET(which, mask);
173 __kmp_set_system_affinity(mask, TRUE);
174 KMP_CPU_FREE_FROM_STACK(mask);
175 }
176 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
177 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
178 KMPAffinity::Mask *allocate_mask_array(int num) override {
179 return new Mask[num];
180 }
181 void deallocate_mask_array(KMPAffinity::Mask *array) override {
182 Mask *hwloc_array = static_cast<Mask *>(array);
183 delete[] hwloc_array;
184 }
185 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
186 int index) override {
187 Mask *hwloc_array = static_cast<Mask *>(array);
188 return &(hwloc_array[index]);
189 }
190 api_type get_api_type() const override { return HWLOC; }
191};
192#endif /* KMP_USE_HWLOC */
193
194#if KMP_OS_LINUX || KMP_OS_FREEBSD
195#if KMP_OS_LINUX
196/* On some of the older OS's that we build on, these constants aren't present
197 in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
198 all systems of the same arch where they are defined, and they cannot change.
199 stone forever. */
200#include <sys/syscall.h>
201#if KMP_ARCH_X86 || KMP_ARCH_ARM
202#ifndef __NR_sched_setaffinity
203#define __NR_sched_setaffinity 241
204#elif __NR_sched_setaffinity != 241
205#error Wrong code for setaffinity system call.
206#endif /* __NR_sched_setaffinity */
207#ifndef __NR_sched_getaffinity
208#define __NR_sched_getaffinity 242
209#elif __NR_sched_getaffinity != 242
210#error Wrong code for getaffinity system call.
211#endif /* __NR_sched_getaffinity */
212#elif KMP_ARCH_AARCH64
213#ifndef __NR_sched_setaffinity
214#define __NR_sched_setaffinity 122
215#elif __NR_sched_setaffinity != 122
216#error Wrong code for setaffinity system call.
217#endif /* __NR_sched_setaffinity */
218#ifndef __NR_sched_getaffinity
219#define __NR_sched_getaffinity 123
220#elif __NR_sched_getaffinity != 123
221#error Wrong code for getaffinity system call.
222#endif /* __NR_sched_getaffinity */
223#elif KMP_ARCH_RISCV64
224#ifndef __NR_sched_setaffinity
225#define __NR_sched_setaffinity 122
226#elif __NR_sched_setaffinity != 122
227#error Wrong code for setaffinity system call.
228#endif /* __NR_sched_setaffinity */
229#ifndef __NR_sched_getaffinity
230#define __NR_sched_getaffinity 123
231#elif __NR_sched_getaffinity != 123
232#error Wrong code for getaffinity system call.
233#endif /* __NR_sched_getaffinity */
234#elif KMP_ARCH_X86_64
235#ifndef __NR_sched_setaffinity
236#define __NR_sched_setaffinity 203
237#elif __NR_sched_setaffinity != 203
238#error Wrong code for setaffinity system call.
239#endif /* __NR_sched_setaffinity */
240#ifndef __NR_sched_getaffinity
241#define __NR_sched_getaffinity 204
242#elif __NR_sched_getaffinity != 204
243#error Wrong code for getaffinity system call.
244#endif /* __NR_sched_getaffinity */
245#elif KMP_ARCH_PPC64
246#ifndef __NR_sched_setaffinity
247#define __NR_sched_setaffinity 222
248#elif __NR_sched_setaffinity != 222
249#error Wrong code for setaffinity system call.
250#endif /* __NR_sched_setaffinity */
251#ifndef __NR_sched_getaffinity
252#define __NR_sched_getaffinity 223
253#elif __NR_sched_getaffinity != 223
254#error Wrong code for getaffinity system call.
255#endif /* __NR_sched_getaffinity */
256#elif KMP_ARCH_MIPS
257#ifndef __NR_sched_setaffinity
258#define __NR_sched_setaffinity 4239
259#elif __NR_sched_setaffinity != 4239
260#error Wrong code for setaffinity system call.
261#endif /* __NR_sched_setaffinity */
262#ifndef __NR_sched_getaffinity
263#define __NR_sched_getaffinity 4240
264#elif __NR_sched_getaffinity != 4240
265#error Wrong code for getaffinity system call.
266#endif /* __NR_sched_getaffinity */
267#elif KMP_ARCH_MIPS64
268#ifndef __NR_sched_setaffinity
269#define __NR_sched_setaffinity 5195
270#elif __NR_sched_setaffinity != 5195
271#error Wrong code for setaffinity system call.
272#endif /* __NR_sched_setaffinity */
273#ifndef __NR_sched_getaffinity
274#define __NR_sched_getaffinity 5196
275#elif __NR_sched_getaffinity != 5196
276#error Wrong code for getaffinity system call.
277#endif /* __NR_sched_getaffinity */
278#elif KMP_ARCH_LOONGARCH64
279#ifndef __NR_sched_setaffinity
280#define __NR_sched_setaffinity 122
281#elif __NR_sched_setaffinity != 122
282#error Wrong code for setaffinity system call.
283#endif /* __NR_sched_setaffinity */
284#ifndef __NR_sched_getaffinity
285#define __NR_sched_getaffinity 123
286#elif __NR_sched_getaffinity != 123
287#error Wrong code for getaffinity system call.
288#endif /* __NR_sched_getaffinity */
289#elif KMP_ARCH_RISCV64
290#ifndef __NR_sched_setaffinity
291#define __NR_sched_setaffinity 122
292#elif __NR_sched_setaffinity != 122
293#error Wrong code for setaffinity system call.
294#endif /* __NR_sched_setaffinity */
295#ifndef __NR_sched_getaffinity
296#define __NR_sched_getaffinity 123
297#elif __NR_sched_getaffinity != 123
298#error Wrong code for getaffinity system call.
299#endif /* __NR_sched_getaffinity */
300#elif KMP_ARCH_VE
301#ifndef __NR_sched_setaffinity
302#define __NR_sched_setaffinity 203
303#elif __NR_sched_setaffinity != 203
304#error Wrong code for setaffinity system call.
305#endif /* __NR_sched_setaffinity */
306#ifndef __NR_sched_getaffinity
307#define __NR_sched_getaffinity 204
308#elif __NR_sched_getaffinity != 204
309#error Wrong code for getaffinity system call.
310#endif /* __NR_sched_getaffinity */
311#elif KMP_ARCH_S390X
312#ifndef __NR_sched_setaffinity
313#define __NR_sched_setaffinity 239
314#elif __NR_sched_setaffinity != 239
315#error Wrong code for setaffinity system call.
316#endif /* __NR_sched_setaffinity */
317#ifndef __NR_sched_getaffinity
318#define __NR_sched_getaffinity 240
319#elif __NR_sched_getaffinity != 240
320#error Wrong code for getaffinity system call.
321#endif /* __NR_sched_getaffinity */
322#else
323#error Unknown or unsupported architecture
324#endif /* KMP_ARCH_* */
325#elif KMP_OS_FREEBSD
326#include <pthread.h>
327#include <pthread_np.h>
328#endif
329class KMPNativeAffinity : public KMPAffinity {
330 class Mask : public KMPAffinity::Mask {
331 typedef unsigned long mask_t;
332 typedef decltype(__kmp_affin_mask_size) mask_size_type;
333 static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
334 static const mask_t ONE = 1;
335 mask_size_type get_num_mask_types() const {
336 return __kmp_affin_mask_size / sizeof(mask_t);
337 }
338
339 public:
340 mask_t *mask;
341 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
342 ~Mask() {
343 if (mask)
344 __kmp_free(mask);
345 }
346 void set(int i) override {
347 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
348 }
349 bool is_set(int i) const override {
350 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
351 }
352 void clear(int i) override {
353 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
354 }
355 void zero() override {
356 mask_size_type e = get_num_mask_types();
357 for (mask_size_type i = 0; i < e; ++i)
358 mask[i] = (mask_t)0;
359 }
360 bool empty() const override {
361 mask_size_type e = get_num_mask_types();
362 for (mask_size_type i = 0; i < e; ++i)
363 if (mask[i] != (mask_t)0)
364 return false;
365 return true;
366 }
367 void copy(const KMPAffinity::Mask *src) override {
368 const Mask *convert = static_cast<const Mask *>(src);
369 mask_size_type e = get_num_mask_types();
370 for (mask_size_type i = 0; i < e; ++i)
371 mask[i] = convert->mask[i];
372 }
373 void bitwise_and(const KMPAffinity::Mask *rhs) override {
374 const Mask *convert = static_cast<const Mask *>(rhs);
375 mask_size_type e = get_num_mask_types();
376 for (mask_size_type i = 0; i < e; ++i)
377 mask[i] &= convert->mask[i];
378 }
379 void bitwise_or(const KMPAffinity::Mask *rhs) override {
380 const Mask *convert = static_cast<const Mask *>(rhs);
381 mask_size_type e = get_num_mask_types();
382 for (mask_size_type i = 0; i < e; ++i)
383 mask[i] |= convert->mask[i];
384 }
385 void bitwise_not() override {
386 mask_size_type e = get_num_mask_types();
387 for (mask_size_type i = 0; i < e; ++i)
388 mask[i] = ~(mask[i]);
389 }
390 bool is_equal(const KMPAffinity::Mask *rhs) const override {
391 const Mask *convert = static_cast<const Mask *>(rhs);
392 mask_size_type e = get_num_mask_types();
393 for (mask_size_type i = 0; i < e; ++i)
394 if (mask[i] != convert->mask[i])
395 return false;
396 return true;
397 }
398 int begin() const override {
399 int retval = 0;
400 while (retval < end() && !is_set(retval))
401 ++retval;
402 return retval;
403 }
404 int end() const override {
405 int e;
406 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
407 return e;
408 }
409 int next(int previous) const override {
410 int retval = previous + 1;
411 while (retval < end() && !is_set(retval))
412 ++retval;
413 return retval;
414 }
415 int get_system_affinity(bool abort_on_error) override {
416 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
417 "Illegal get affinity operation when not capable");
418#if KMP_OS_LINUX
419 long retval =
420 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
421#elif KMP_OS_FREEBSD
422 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
423 reinterpret_cast<cpuset_t *>(mask));
424 int retval = (r == 0 ? 0 : -1);
425#endif
426 if (retval >= 0) {
427 return 0;
428 }
429 int error = errno;
430 if (abort_on_error) {
431 __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()"),
432 KMP_ERR(error), __kmp_msg_null);
433 }
434 return error;
435 }
436 int set_system_affinity(bool abort_on_error) const override {
437 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
438 "Illegal set affinity operation when not capable");
439#if KMP_OS_LINUX
440 long retval =
441 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
442#elif KMP_OS_FREEBSD
443 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
444 reinterpret_cast<cpuset_t *>(mask));
445 int retval = (r == 0 ? 0 : -1);
446#endif
447 if (retval >= 0) {
448 return 0;
449 }
450 int error = errno;
451 if (abort_on_error) {
452 __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()"),
453 KMP_ERR(error), __kmp_msg_null);
454 }
455 return error;
456 }
457 };
458 void determine_capable(const char *env_var) override {
459 __kmp_affinity_determine_capable(env_var);
460 }
461 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
462 KMPAffinity::Mask *allocate_mask() override {
463 KMPNativeAffinity::Mask *retval = new Mask();
464 return retval;
465 }
466 void deallocate_mask(KMPAffinity::Mask *m) override {
467 KMPNativeAffinity::Mask *native_mask =
468 static_cast<KMPNativeAffinity::Mask *>(m);
469 delete native_mask;
470 }
471 KMPAffinity::Mask *allocate_mask_array(int num) override {
472 return new Mask[num];
473 }
474 void deallocate_mask_array(KMPAffinity::Mask *array) override {
475 Mask *linux_array = static_cast<Mask *>(array);
476 delete[] linux_array;
477 }
478 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
479 int index) override {
480 Mask *linux_array = static_cast<Mask *>(array);
481 return &(linux_array[index]);
482 }
483 api_type get_api_type() const override { return NATIVE_OS; }
484};
485#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
486
487#if KMP_OS_WINDOWS
488class KMPNativeAffinity : public KMPAffinity {
489 class Mask : public KMPAffinity::Mask {
490 typedef ULONG_PTR mask_t;
491 static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
492 mask_t *mask;
493
494 public:
495 Mask() {
496 mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
497 }
498 ~Mask() {
499 if (mask)
500 __kmp_free(mask);
501 }
502 void set(int i) override {
503 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
504 }
505 bool is_set(int i) const override {
506 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
507 }
508 void clear(int i) override {
509 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
510 }
511 void zero() override {
512 for (int i = 0; i < __kmp_num_proc_groups; ++i)
513 mask[i] = 0;
514 }
515 bool empty() const override {
516 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
517 if (mask[i])
518 return false;
519 return true;
520 }
521 void copy(const KMPAffinity::Mask *src) override {
522 const Mask *convert = static_cast<const Mask *>(src);
523 for (int i = 0; i < __kmp_num_proc_groups; ++i)
524 mask[i] = convert->mask[i];
525 }
526 void bitwise_and(const KMPAffinity::Mask *rhs) override {
527 const Mask *convert = static_cast<const Mask *>(rhs);
528 for (int i = 0; i < __kmp_num_proc_groups; ++i)
529 mask[i] &= convert->mask[i];
530 }
531 void bitwise_or(const KMPAffinity::Mask *rhs) override {
532 const Mask *convert = static_cast<const Mask *>(rhs);
533 for (int i = 0; i < __kmp_num_proc_groups; ++i)
534 mask[i] |= convert->mask[i];
535 }
536 void bitwise_not() override {
537 for (int i = 0; i < __kmp_num_proc_groups; ++i)
538 mask[i] = ~(mask[i]);
539 }
540 bool is_equal(const KMPAffinity::Mask *rhs) const override {
541 const Mask *convert = static_cast<const Mask *>(rhs);
542 for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
543 if (mask[i] != convert->mask[i])
544 return false;
545 return true;
546 }
547 int begin() const override {
548 int retval = 0;
549 while (retval < end() && !is_set(retval))
550 ++retval;
551 return retval;
552 }
553 int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
554 int next(int previous) const override {
555 int retval = previous + 1;
556 while (retval < end() && !is_set(retval))
557 ++retval;
558 return retval;
559 }
560 int set_process_affinity(bool abort_on_error) const override {
561 if (__kmp_num_proc_groups <= 1) {
562 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
563 DWORD error = GetLastError();
564 if (abort_on_error) {
565 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
566 __kmp_msg_null);
567 }
568 return error;
569 }
570 }
571 return 0;
572 }
573 int set_system_affinity(bool abort_on_error) const override {
574 if (__kmp_num_proc_groups > 1) {
575 // Check for a valid mask.
576 GROUP_AFFINITY ga;
577 int group = get_proc_group();
578 if (group < 0) {
579 if (abort_on_error) {
580 KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
581 }
582 return -1;
583 }
584 // Transform the bit vector into a GROUP_AFFINITY struct
585 // and make the system call to set affinity.
586 ga.Group = group;
587 ga.Mask = mask[group];
588 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
589
590 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
591 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
592 DWORD error = GetLastError();
593 if (abort_on_error) {
594 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
595 __kmp_msg_null);
596 }
597 return error;
598 }
599 } else {
600 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
601 DWORD error = GetLastError();
602 if (abort_on_error) {
603 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
604 __kmp_msg_null);
605 }
606 return error;
607 }
608 }
609 return 0;
610 }
611 int get_system_affinity(bool abort_on_error) override {
612 if (__kmp_num_proc_groups > 1) {
613 this->zero();
614 GROUP_AFFINITY ga;
615 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
616 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
617 DWORD error = GetLastError();
618 if (abort_on_error) {
619 __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
620 KMP_ERR(error), __kmp_msg_null);
621 }
622 return error;
623 }
624 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
625 (ga.Mask == 0)) {
626 return -1;
627 }
628 mask[ga.Group] = ga.Mask;
629 } else {
630 mask_t newMask, sysMask, retval;
631 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
632 DWORD error = GetLastError();
633 if (abort_on_error) {
634 __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
635 KMP_ERR(error), __kmp_msg_null);
636 }
637 return error;
638 }
639 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
640 if (!retval) {
641 DWORD error = GetLastError();
642 if (abort_on_error) {
643 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
644 KMP_ERR(error), __kmp_msg_null);
645 }
646 return error;
647 }
648 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
649 if (!newMask) {
650 DWORD error = GetLastError();
651 if (abort_on_error) {
652 __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
653 KMP_ERR(error), __kmp_msg_null);
654 }
655 }
656 *mask = retval;
657 }
658 return 0;
659 }
660 int get_proc_group() const override {
661 int group = -1;
662 if (__kmp_num_proc_groups == 1) {
663 return 1;
664 }
665 for (int i = 0; i < __kmp_num_proc_groups; i++) {
666 if (mask[i] == 0)
667 continue;
668 if (group >= 0)
669 return -1;
670 group = i;
671 }
672 return group;
673 }
674 };
675 void determine_capable(const char *env_var) override {
676 __kmp_affinity_determine_capable(env_var);
677 }
678 void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
679 KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
680 void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
681 KMPAffinity::Mask *allocate_mask_array(int num) override {
682 return new Mask[num];
683 }
684 void deallocate_mask_array(KMPAffinity::Mask *array) override {
685 Mask *windows_array = static_cast<Mask *>(array);
686 delete[] windows_array;
687 }
688 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
689 int index) override {
690 Mask *windows_array = static_cast<Mask *>(array);
691 return &(windows_array[index]);
692 }
693 api_type get_api_type() const override { return NATIVE_OS; }
694};
695#endif /* KMP_OS_WINDOWS */
696#endif /* KMP_AFFINITY_SUPPORTED */
697
698// Describe an attribute for a level in the machine topology
699struct kmp_hw_attr_t {
700 int core_type : 8;
701 int core_eff : 8;
702 unsigned valid : 1;
703 unsigned reserved : 15;
704
705 static const int UNKNOWN_CORE_EFF = -1;
706
707 kmp_hw_attr_t()
708 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
709 valid(0), reserved(0) {}
710 void set_core_type(kmp_hw_core_type_t type) {
711 valid = 1;
712 core_type = type;
713 }
714 void set_core_eff(int eff) {
715 valid = 1;
716 core_eff = eff;
717 }
718 kmp_hw_core_type_t get_core_type() const {
719 return (kmp_hw_core_type_t)core_type;
720 }
721 int get_core_eff() const { return core_eff; }
722 bool is_core_type_valid() const {
723 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
724 }
725 bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
726 operator bool() const { return valid; }
727 void clear() {
728 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
729 core_eff = UNKNOWN_CORE_EFF;
730 valid = 0;
731 }
732 bool contains(const kmp_hw_attr_t &other) const {
733 if (!valid && !other.valid)
734 return true;
735 if (valid && other.valid) {
736 if (other.is_core_type_valid()) {
737 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
738 return false;
739 }
740 if (other.is_core_eff_valid()) {
741 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
742 return false;
743 }
744 return true;
745 }
746 return false;
747 }
748#if KMP_AFFINITY_SUPPORTED
749 bool contains(const kmp_affinity_attrs_t &attr) const {
750 if (!valid && !attr.valid)
751 return true;
752 if (valid && attr.valid) {
753 if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
754 return (is_core_type_valid() &&
755 (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
756 if (attr.core_eff != UNKNOWN_CORE_EFF)
757 return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
758 return true;
759 }
760 return false;
761 }
762#endif // KMP_AFFINITY_SUPPORTED
763 bool operator==(const kmp_hw_attr_t &rhs) const {
764 return (rhs.valid == valid && rhs.core_eff == core_eff &&
765 rhs.core_type == core_type);
766 }
767 bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
768};
769
770#if KMP_AFFINITY_SUPPORTED
771KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t));
772#endif
773
774class kmp_hw_thread_t {
775public:
776 static const int UNKNOWN_ID = -1;
777 static const int MULTIPLE_ID = -2;
778 static int compare_ids(const void *a, const void *b);
779 static int compare_compact(const void *a, const void *b);
780 int ids[KMP_HW_LAST];
781 int sub_ids[KMP_HW_LAST];
782 bool leader;
783 int os_id;
784 kmp_hw_attr_t attrs;
785
786 void print() const;
787 void clear() {
788 for (int i = 0; i < (int)KMP_HW_LAST; ++i)
789 ids[i] = UNKNOWN_ID;
790 leader = false;
791 attrs.clear();
792 }
793};
794
795class kmp_topology_t {
796
797 struct flags_t {
798 int uniform : 1;
799 int reserved : 31;
800 };
801
802 int depth;
803
804 // The following arrays are all 'depth' long and have been
805 // allocated to hold up to KMP_HW_LAST number of objects if
806 // needed so layers can be added without reallocation of any array
807
808 // Orderd array of the types in the topology
809 kmp_hw_t *types;
810
811 // Keep quick topology ratios, for non-uniform topologies,
812 // this ratio holds the max number of itemAs per itemB
813 // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
814 int *ratio;
815
816 // Storage containing the absolute number of each topology layer
817 int *count;
818
819 // The number of core efficiencies. This is only useful for hybrid
820 // topologies. Core efficiencies will range from 0 to num efficiencies - 1
821 int num_core_efficiencies;
822 int num_core_types;
823 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
824
825 // The hardware threads array
826 // hw_threads is num_hw_threads long
827 // Each hw_thread's ids and sub_ids are depth deep
828 int num_hw_threads;
829 kmp_hw_thread_t *hw_threads;
830
831 // Equivalence hash where the key is the hardware topology item
832 // and the value is the equivalent hardware topology type in the
833 // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
834 // known equivalence for the topology type
835 kmp_hw_t equivalent[KMP_HW_LAST];
836
837 // Flags describing the topology
838 flags_t flags;
839
840 // Compact value used during sort_compact()
841 int compact;
842
843 // Insert a new topology layer after allocation
844 void _insert_layer(kmp_hw_t type, const int *ids);
845
846#if KMP_GROUP_AFFINITY
847 // Insert topology information about Windows Processor groups
848 void _insert_windows_proc_groups();
849#endif
850
851 // Count each item & get the num x's per y
852 // e.g., get the number of cores and the number of threads per core
853 // for each (x, y) in (KMP_HW_* , KMP_HW_*)
854 void _gather_enumeration_information();
855
856 // Remove layers that don't add information to the topology.
857 // This is done by having the layer take on the id = UNKNOWN_ID (-1)
858 void _remove_radix1_layers();
859
860 // Find out if the topology is uniform
861 void _discover_uniformity();
862
863 // Set all the sub_ids for each hardware thread
864 void _set_sub_ids();
865
866 // Set global affinity variables describing the number of threads per
867 // core, the number of packages, the number of cores per package, and
868 // the number of cores.
869 void _set_globals();
870
871 // Set the last level cache equivalent type
872 void _set_last_level_cache();
873
874 // Return the number of cores with a particular attribute, 'attr'.
875 // If 'find_all' is true, then find all cores on the machine, otherwise find
876 // all cores per the layer 'above'
877 int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
878 bool find_all = false) const;
879
880public:
881 // Force use of allocate()/deallocate()
882 kmp_topology_t() = delete;
883 kmp_topology_t(const kmp_topology_t &t) = delete;
884 kmp_topology_t(kmp_topology_t &&t) = delete;
885 kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
886 kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
887
888 static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
889 static void deallocate(kmp_topology_t *);
890
891 // Functions used in create_map() routines
892 kmp_hw_thread_t &at(int index) {
893 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
894 return hw_threads[index];
895 }
896 const kmp_hw_thread_t &at(int index) const {
897 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
898 return hw_threads[index];
899 }
900 int get_num_hw_threads() const { return num_hw_threads; }
901 void sort_ids() {
902 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
903 kmp_hw_thread_t::compare_ids);
904 }
905 // Check if the hardware ids are unique, if they are
906 // return true, otherwise return false
907 bool check_ids() const;
908
909 // Function to call after the create_map() routine
910 void canonicalize();
911 void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
912
913// Functions used after canonicalize() called
914
915#if KMP_AFFINITY_SUPPORTED
916 // Set the granularity for affinity settings
917 void set_granularity(kmp_affinity_t &stgs) const;
918 bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const;
919 bool restrict_to_mask(const kmp_affin_mask_t *mask);
920 bool filter_hw_subset();
921#endif
922 bool is_uniform() const { return flags.uniform; }
923 // Tell whether a type is a valid type in the topology
924 // returns KMP_HW_UNKNOWN when there is no equivalent type
925 kmp_hw_t get_equivalent_type(kmp_hw_t type) const {
926 if (type == KMP_HW_UNKNOWN)
927 return KMP_HW_UNKNOWN;
928 return equivalent[type];
929 }
930 // Set type1 = type2
931 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
932 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
933 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
934 kmp_hw_t real_type2 = equivalent[type2];
935 if (real_type2 == KMP_HW_UNKNOWN)
936 real_type2 = type2;
937 equivalent[type1] = real_type2;
938 // This loop is required since any of the types may have been set to
939 // be equivalent to type1. They all must be checked and reset to type2.
940 KMP_FOREACH_HW_TYPE(type) {
941 if (equivalent[type] == type1) {
942 equivalent[type] = real_type2;
943 }
944 }
945 }
946 // Calculate number of types corresponding to level1
947 // per types corresponding to level2 (e.g., number of threads per core)
948 int calculate_ratio(int level1, int level2) const {
949 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
950 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
951 int r = 1;
952 for (int level = level1; level > level2; --level)
953 r *= ratio[level];
954 return r;
955 }
956 int get_ratio(int level) const {
957 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
958 return ratio[level];
959 }
960 int get_depth() const { return depth; };
961 kmp_hw_t get_type(int level) const {
962 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
963 return types[level];
964 }
965 int get_level(kmp_hw_t type) const {
966 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
967 int eq_type = equivalent[type];
968 if (eq_type == KMP_HW_UNKNOWN)
969 return -1;
970 for (int i = 0; i < depth; ++i)
971 if (types[i] == eq_type)
972 return i;
973 return -1;
974 }
975 int get_count(int level) const {
976 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
977 return count[level];
978 }
979 // Return the total number of cores with attribute 'attr'
980 int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
981 return _get_ncores_with_attr(attr, -1, true);
982 }
983 // Return the number of cores with attribute
984 // 'attr' per topology level 'above'
985 int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
986 return _get_ncores_with_attr(attr, above, false);
987 }
988
989#if KMP_AFFINITY_SUPPORTED
990 friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
991 void sort_compact(kmp_affinity_t &affinity) {
992 compact = affinity.compact;
993 qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
994 kmp_hw_thread_t::compare_compact);
995 }
996#endif
997 void print(const char *env_var = "KMP_AFFINITY") const;
998 void dump() const;
999};
1000extern kmp_topology_t *__kmp_topology;
1001
1002class kmp_hw_subset_t {
1003 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
1004
1005public:
1006 // Describe a machine topology item in KMP_HW_SUBSET
1007 struct item_t {
1008 kmp_hw_t type;
1009 int num_attrs;
1010 int num[MAX_ATTRS];
1011 int offset[MAX_ATTRS];
1012 kmp_hw_attr_t attr[MAX_ATTRS];
1013 };
1014 // Put parenthesis around max to avoid accidental use of Windows max macro.
1015 const static int USE_ALL = (std::numeric_limits<int>::max)();
1016
1017private:
1018 int depth;
1019 int capacity;
1020 item_t *items;
1021 kmp_uint64 set;
1022 bool absolute;
1023 // The set must be able to handle up to KMP_HW_LAST number of layers
1024 KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
1025 // Sorting the KMP_HW_SUBSET items to follow topology order
1026 // All unknown topology types will be at the beginning of the subset
1027 static int hw_subset_compare(const void *i1, const void *i2) {
1028 kmp_hw_t type1 = ((const item_t *)i1)->type;
1029 kmp_hw_t type2 = ((const item_t *)i2)->type;
1030 int level1 = __kmp_topology->get_level(type1);
1031 int level2 = __kmp_topology->get_level(type2);
1032 return level1 - level2;
1033 }
1034
1035public:
1036 // Force use of allocate()/deallocate()
1037 kmp_hw_subset_t() = delete;
1038 kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
1039 kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
1040 kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
1041 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
1042
1043 static kmp_hw_subset_t *allocate() {
1044 int initial_capacity = 5;
1045 kmp_hw_subset_t *retval =
1046 (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
1047 retval->depth = 0;
1048 retval->capacity = initial_capacity;
1049 retval->set = 0ull;
1050 retval->absolute = false;
1051 retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
1052 return retval;
1053 }
1054 static void deallocate(kmp_hw_subset_t *subset) {
1055 __kmp_free(subset->items);
1056 __kmp_free(subset);
1057 }
1058 void set_absolute() { absolute = true; }
1059 bool is_absolute() const { return absolute; }
1060 void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
1061 for (int i = 0; i < depth; ++i) {
1062 // Found an existing item for this layer type
1063 // Add the num, offset, and attr to this item
1064 if (items[i].type == type) {
1065 int idx = items[i].num_attrs++;
1066 if ((size_t)idx >= MAX_ATTRS)
1067 return;
1068 items[i].num[idx] = num;
1069 items[i].offset[idx] = offset;
1070 items[i].attr[idx] = attr;
1071 return;
1072 }
1073 }
1074 if (depth == capacity - 1) {
1075 capacity *= 2;
1076 item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
1077 for (int i = 0; i < depth; ++i)
1078 new_items[i] = items[i];
1079 __kmp_free(items);
1080 items = new_items;
1081 }
1082 items[depth].num_attrs = 1;
1083 items[depth].type = type;
1084 items[depth].num[0] = num;
1085 items[depth].offset[0] = offset;
1086 items[depth].attr[0] = attr;
1087 depth++;
1088 set |= (1ull << type);
1089 }
1090 int get_depth() const { return depth; }
1091 const item_t &at(int index) const {
1092 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1093 return items[index];
1094 }
1095 item_t &at(int index) {
1096 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1097 return items[index];
1098 }
1099 void remove(int index) {
1100 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1101 set &= ~(1ull << items[index].type);
1102 for (int j = index + 1; j < depth; ++j) {
1103 items[j - 1] = items[j];
1104 }
1105 depth--;
1106 }
1107 void sort() {
1108 KMP_DEBUG_ASSERT(__kmp_topology);
1109 qsort(items, depth, sizeof(item_t), hw_subset_compare);
1110 }
1111 bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
1112 void dump() const {
1113 printf("**********************\n");
1114 printf("*** kmp_hw_subset: ***\n");
1115 printf("* depth: %d\n", depth);
1116 printf("* items:\n");
1117 for (int i = 0; i < depth; ++i) {
1118 printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1119 for (int j = 0; j < items[i].num_attrs; ++j) {
1120 printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1121 items[i].offset[j]);
1122 if (!items[i].attr[j]) {
1123 printf(" (none)\n");
1124 } else {
1125 printf(
1126 " core_type = %s, core_eff = %d\n",
1127 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1128 items[i].attr[j].get_core_eff());
1129 }
1130 }
1131 }
1132 printf("* set: 0x%llx\n", set);
1133 printf("* absolute: %d\n", absolute);
1134 printf("**********************\n");
1135 }
1136};
1137extern kmp_hw_subset_t *__kmp_hw_subset;
1138
1139/* A structure for holding machine-specific hierarchy info to be computed once
1140 at init. This structure represents a mapping of threads to the actual machine
1141 hierarchy, or to our best guess at what the hierarchy might be, for the
1142 purpose of performing an efficient barrier. In the worst case, when there is
1143 no machine hierarchy information, it produces a tree suitable for a barrier,
1144 similar to the tree used in the hyper barrier. */
1145class hierarchy_info {
1146public:
1147 /* Good default values for number of leaves and branching factor, given no
1148 affinity information. Behaves a bit like hyper barrier. */
1149 static const kmp_uint32 maxLeaves = 4;
1150 static const kmp_uint32 minBranch = 4;
1156 kmp_uint32 maxLevels;
1157
1162 kmp_uint32 depth;
1163 kmp_uint32 base_num_threads;
1164 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1165 volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1166 // 2=initialization in progress
1167 volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1168
1173 kmp_uint32 *numPerLevel;
1174 kmp_uint32 *skipPerLevel;
1175
1176 void deriveLevels() {
1177 int hier_depth = __kmp_topology->get_depth();
1178 for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1179 numPerLevel[level] = __kmp_topology->get_ratio(i);
1180 }
1181 }
1182
1183 hierarchy_info()
1184 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1185
1186 void fini() {
1187 if (!uninitialized && numPerLevel) {
1188 __kmp_free(numPerLevel);
1189 numPerLevel = NULL;
1190 uninitialized = not_initialized;
1191 }
1192 }
1193
1194 void init(int num_addrs) {
1195 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1196 &uninitialized, not_initialized, initializing);
1197 if (bool_result == 0) { // Wait for initialization
1198 while (TCR_1(uninitialized) != initialized)
1199 KMP_CPU_PAUSE();
1200 return;
1201 }
1202 KMP_DEBUG_ASSERT(bool_result == 1);
1203
1204 /* Added explicit initialization of the data fields here to prevent usage of
1205 dirty value observed when static library is re-initialized multiple times
1206 (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1207 OpenMP). */
1208 depth = 1;
1209 resizing = 0;
1210 maxLevels = 7;
1211 numPerLevel =
1212 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1213 skipPerLevel = &(numPerLevel[maxLevels]);
1214 for (kmp_uint32 i = 0; i < maxLevels;
1215 ++i) { // init numPerLevel[*] to 1 item per level
1216 numPerLevel[i] = 1;
1217 skipPerLevel[i] = 1;
1218 }
1219
1220 // Sort table by physical ID
1221 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1222 deriveLevels();
1223 } else {
1224 numPerLevel[0] = maxLeaves;
1225 numPerLevel[1] = num_addrs / maxLeaves;
1226 if (num_addrs % maxLeaves)
1227 numPerLevel[1]++;
1228 }
1229
1230 base_num_threads = num_addrs;
1231 for (int i = maxLevels - 1; i >= 0;
1232 --i) // count non-empty levels to get depth
1233 if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1234 depth++;
1235
1236 kmp_uint32 branch = minBranch;
1237 if (numPerLevel[0] == 1)
1238 branch = num_addrs / maxLeaves;
1239 if (branch < minBranch)
1240 branch = minBranch;
1241 for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1242 while (numPerLevel[d] > branch ||
1243 (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1244 if (numPerLevel[d] & 1)
1245 numPerLevel[d]++;
1246 numPerLevel[d] = numPerLevel[d] >> 1;
1247 if (numPerLevel[d + 1] == 1)
1248 depth++;
1249 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1250 }
1251 if (numPerLevel[0] == 1) {
1252 branch = branch >> 1;
1253 if (branch < 4)
1254 branch = minBranch;
1255 }
1256 }
1257
1258 for (kmp_uint32 i = 1; i < depth; ++i)
1259 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1260 // Fill in hierarchy in the case of oversubscription
1261 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1262 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1263
1264 uninitialized = initialized; // One writer
1265 }
1266
1267 // Resize the hierarchy if nproc changes to something larger than before
1268 void resize(kmp_uint32 nproc) {
1269 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1270 while (bool_result == 0) { // someone else is trying to resize
1271 KMP_CPU_PAUSE();
1272 if (nproc <= base_num_threads) // happy with other thread's resize
1273 return;
1274 else // try to resize
1275 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1276 }
1277 KMP_DEBUG_ASSERT(bool_result != 0);
1278 if (nproc <= base_num_threads)
1279 return; // happy with other thread's resize
1280
1281 // Calculate new maxLevels
1282 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1283 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1284 // First see if old maxLevels is enough to contain new size
1285 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1286 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1287 numPerLevel[i - 1] *= 2;
1288 old_sz *= 2;
1289 depth++;
1290 }
1291 if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1292 while (nproc > old_sz) {
1293 old_sz *= 2;
1294 incs++;
1295 depth++;
1296 }
1297 maxLevels += incs;
1298
1299 // Resize arrays
1300 kmp_uint32 *old_numPerLevel = numPerLevel;
1301 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1302 numPerLevel = skipPerLevel = NULL;
1303 numPerLevel =
1304 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1305 skipPerLevel = &(numPerLevel[maxLevels]);
1306
1307 // Copy old elements from old arrays
1308 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1309 // init numPerLevel[*] to 1 item per level
1310 numPerLevel[i] = old_numPerLevel[i];
1311 skipPerLevel[i] = old_skipPerLevel[i];
1312 }
1313
1314 // Init new elements in arrays to 1
1315 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1316 // init numPerLevel[*] to 1 item per level
1317 numPerLevel[i] = 1;
1318 skipPerLevel[i] = 1;
1319 }
1320
1321 // Free old arrays
1322 __kmp_free(old_numPerLevel);
1323 }
1324
1325 // Fill in oversubscription levels of hierarchy
1326 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1327 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1328
1329 base_num_threads = nproc;
1330 resizing = 0; // One writer
1331 }
1332};
1333#endif // KMP_AFFINITY_H