20#if KMP_AFFINITY_SUPPORTED
22class KMPHwlocAffinity :
public KMPAffinity {
24 class Mask :
public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(
int i)
override { hwloc_bitmap_set(mask, i); }
34 bool is_set(
int i)
const override {
return hwloc_bitmap_isset(mask, i); }
35 void clear(
int i)
override { hwloc_bitmap_clr(mask, i); }
36 void zero()
override { hwloc_bitmap_zero(mask); }
37 bool empty()
const override {
return hwloc_bitmap_iszero(mask); }
38 void copy(
const KMPAffinity::Mask *src)
override {
39 const Mask *convert =
static_cast<const Mask *
>(src);
40 hwloc_bitmap_copy(mask, convert->mask);
42 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
43 const Mask *convert =
static_cast<const Mask *
>(rhs);
44 hwloc_bitmap_and(mask, mask, convert->mask);
46 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
47 const Mask *convert =
static_cast<const Mask *
>(rhs);
48 hwloc_bitmap_or(mask, mask, convert->mask);
50 void bitwise_not()
override { hwloc_bitmap_not(mask, mask); }
51 bool is_equal(
const KMPAffinity::Mask *rhs)
const override {
52 const Mask *convert =
static_cast<const Mask *
>(rhs);
53 return hwloc_bitmap_isequal(mask, convert->mask);
55 int begin()
const override {
return hwloc_bitmap_first(mask); }
56 int end()
const override {
return -1; }
57 int next(
int previous)
const override {
58 return hwloc_bitmap_next(mask, previous);
60 int get_system_affinity(
bool abort_on_error)
override {
61 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
62 "Illegal get affinity operation when not capable");
64 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
70 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_get_cpubind()"),
71 KMP_ERR(error), __kmp_msg_null);
75 int set_system_affinity(
bool abort_on_error)
const override {
76 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
77 "Illegal set affinity operation when not capable");
79 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
85 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_set_cpubind()"),
86 KMP_ERR(error), __kmp_msg_null);
91 int set_process_affinity(
bool abort_on_error)
const override {
92 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
93 "Illegal set process affinity operation when not capable");
95 const hwloc_topology_support *support =
96 hwloc_topology_get_support(__kmp_hwloc_topology);
97 if (support->cpubind->set_proc_cpubind) {
99 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
100 HWLOC_CPUBIND_PROCESS);
105 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_set_cpubind()"),
106 KMP_ERR(error), __kmp_msg_null);
111 int get_proc_group()
const override {
114 if (__kmp_num_proc_groups == 1) {
117 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
119 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
120 unsigned long second_32_bits =
121 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
122 if (first_32_bits == 0 && second_32_bits == 0) {
134 void determine_capable(
const char *var)
override {
135 const hwloc_topology_support *topology_support;
136 if (__kmp_hwloc_topology == NULL) {
137 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
138 __kmp_hwloc_error = TRUE;
139 if (__kmp_affinity.flags.verbose) {
140 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_init()");
143 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
144 __kmp_hwloc_error = TRUE;
145 if (__kmp_affinity.flags.verbose) {
146 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_load()");
150 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
155 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
156 topology_support->cpubind->get_thisthread_cpubind &&
157 topology_support->discovery->pu && !__kmp_hwloc_error) {
159 KMP_AFFINITY_ENABLE(TRUE);
162 __kmp_hwloc_error = TRUE;
163 KMP_AFFINITY_DISABLE();
166 void bind_thread(
int which)
override {
167 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
168 "Illegal set affinity operation when not capable");
169 KMPAffinity::Mask *mask;
170 KMP_CPU_ALLOC_ON_STACK(mask);
172 KMP_CPU_SET(which, mask);
173 __kmp_set_system_affinity(mask, TRUE);
174 KMP_CPU_FREE_FROM_STACK(mask);
176 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
177 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
178 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
179 return new Mask[num];
181 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
182 Mask *hwloc_array =
static_cast<Mask *
>(array);
183 delete[] hwloc_array;
185 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
186 int index)
override {
187 Mask *hwloc_array =
static_cast<Mask *
>(array);
188 return &(hwloc_array[index]);
190 api_type get_api_type()
const override {
return HWLOC; }
194#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
201#include <sys/syscall.h>
202#if KMP_ARCH_X86 || KMP_ARCH_ARM
203#ifndef __NR_sched_setaffinity
204#define __NR_sched_setaffinity 241
205#elif __NR_sched_setaffinity != 241
206#error Wrong code for setaffinity system call.
208#ifndef __NR_sched_getaffinity
209#define __NR_sched_getaffinity 242
210#elif __NR_sched_getaffinity != 242
211#error Wrong code for getaffinity system call.
213#elif KMP_ARCH_AARCH64
214#ifndef __NR_sched_setaffinity
215#define __NR_sched_setaffinity 122
216#elif __NR_sched_setaffinity != 122
217#error Wrong code for setaffinity system call.
219#ifndef __NR_sched_getaffinity
220#define __NR_sched_getaffinity 123
221#elif __NR_sched_getaffinity != 123
222#error Wrong code for getaffinity system call.
224#elif KMP_ARCH_RISCV64
225#ifndef __NR_sched_setaffinity
226#define __NR_sched_setaffinity 122
227#elif __NR_sched_setaffinity != 122
228#error Wrong code for setaffinity system call.
230#ifndef __NR_sched_getaffinity
231#define __NR_sched_getaffinity 123
232#elif __NR_sched_getaffinity != 123
233#error Wrong code for getaffinity system call.
236#ifndef __NR_sched_setaffinity
237#define __NR_sched_setaffinity 203
238#elif __NR_sched_setaffinity != 203
239#error Wrong code for setaffinity system call.
241#ifndef __NR_sched_getaffinity
242#define __NR_sched_getaffinity 204
243#elif __NR_sched_getaffinity != 204
244#error Wrong code for getaffinity system call.
247#ifndef __NR_sched_setaffinity
248#define __NR_sched_setaffinity 222
249#elif __NR_sched_setaffinity != 222
250#error Wrong code for setaffinity system call.
252#ifndef __NR_sched_getaffinity
253#define __NR_sched_getaffinity 223
254#elif __NR_sched_getaffinity != 223
255#error Wrong code for getaffinity system call.
258#ifndef __NR_sched_setaffinity
259#define __NR_sched_setaffinity 4239
260#elif __NR_sched_setaffinity != 4239
261#error Wrong code for setaffinity system call.
263#ifndef __NR_sched_getaffinity
264#define __NR_sched_getaffinity 4240
265#elif __NR_sched_getaffinity != 4240
266#error Wrong code for getaffinity system call.
269#ifndef __NR_sched_setaffinity
270#define __NR_sched_setaffinity 5195
271#elif __NR_sched_setaffinity != 5195
272#error Wrong code for setaffinity system call.
274#ifndef __NR_sched_getaffinity
275#define __NR_sched_getaffinity 5196
276#elif __NR_sched_getaffinity != 5196
277#error Wrong code for getaffinity system call.
279#elif KMP_ARCH_LOONGARCH64
280#ifndef __NR_sched_setaffinity
281#define __NR_sched_setaffinity 122
282#elif __NR_sched_setaffinity != 122
283#error Wrong code for setaffinity system call.
285#ifndef __NR_sched_getaffinity
286#define __NR_sched_getaffinity 123
287#elif __NR_sched_getaffinity != 123
288#error Wrong code for getaffinity system call.
290#elif KMP_ARCH_RISCV64
291#ifndef __NR_sched_setaffinity
292#define __NR_sched_setaffinity 122
293#elif __NR_sched_setaffinity != 122
294#error Wrong code for setaffinity system call.
296#ifndef __NR_sched_getaffinity
297#define __NR_sched_getaffinity 123
298#elif __NR_sched_getaffinity != 123
299#error Wrong code for getaffinity system call.
302#ifndef __NR_sched_setaffinity
303#define __NR_sched_setaffinity 203
304#elif __NR_sched_setaffinity != 203
305#error Wrong code for setaffinity system call.
307#ifndef __NR_sched_getaffinity
308#define __NR_sched_getaffinity 204
309#elif __NR_sched_getaffinity != 204
310#error Wrong code for getaffinity system call.
313#ifndef __NR_sched_setaffinity
314#define __NR_sched_setaffinity 239
315#elif __NR_sched_setaffinity != 239
316#error Wrong code for setaffinity system call.
318#ifndef __NR_sched_getaffinity
319#define __NR_sched_getaffinity 240
320#elif __NR_sched_getaffinity != 240
321#error Wrong code for getaffinity system call.
324#error Unknown or unsupported architecture
326#elif KMP_OS_FREEBSD || KMP_OS_DRAGONFLY
328#include <pthread_np.h>
335#define VMI_MAXRADS 64
336#define GET_NUMBER_SMT_SETS 0x0004
337extern "C" int syssmt(
int flags,
int,
int,
int *);
339class KMPNativeAffinity :
public KMPAffinity {
340 class Mask :
public KMPAffinity::Mask {
341 typedef unsigned long mask_t;
342 typedef decltype(__kmp_affin_mask_size) mask_size_type;
343 static const unsigned int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
344 static const mask_t ONE = 1;
345 mask_size_type get_num_mask_types()
const {
346 return __kmp_affin_mask_size /
sizeof(mask_t);
351 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
356 void set(
int i)
override {
357 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
359 bool is_set(
int i)
const override {
360 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
362 void clear(
int i)
override {
363 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
365 void zero()
override {
366 mask_size_type e = get_num_mask_types();
367 for (mask_size_type i = 0; i < e; ++i)
370 bool empty()
const override {
371 mask_size_type e = get_num_mask_types();
372 for (mask_size_type i = 0; i < e; ++i)
373 if (mask[i] != (mask_t)0)
377 void copy(
const KMPAffinity::Mask *src)
override {
378 const Mask *convert =
static_cast<const Mask *
>(src);
379 mask_size_type e = get_num_mask_types();
380 for (mask_size_type i = 0; i < e; ++i)
381 mask[i] = convert->mask[i];
383 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
384 const Mask *convert =
static_cast<const Mask *
>(rhs);
385 mask_size_type e = get_num_mask_types();
386 for (mask_size_type i = 0; i < e; ++i)
387 mask[i] &= convert->mask[i];
389 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
390 const Mask *convert =
static_cast<const Mask *
>(rhs);
391 mask_size_type e = get_num_mask_types();
392 for (mask_size_type i = 0; i < e; ++i)
393 mask[i] |= convert->mask[i];
395 void bitwise_not()
override {
396 mask_size_type e = get_num_mask_types();
397 for (mask_size_type i = 0; i < e; ++i)
398 mask[i] = ~(mask[i]);
400 bool is_equal(
const KMPAffinity::Mask *rhs)
const override {
401 const Mask *convert =
static_cast<const Mask *
>(rhs);
402 mask_size_type e = get_num_mask_types();
403 for (mask_size_type i = 0; i < e; ++i)
404 if (mask[i] != convert->mask[i])
408 int begin()
const override {
410 while (retval < end() && !is_set(retval))
414 int end()
const override {
416 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
419 int next(
int previous)
const override {
420 int retval = previous + 1;
421 while (retval < end() && !is_set(retval))
428 int get_system_affinity(
bool abort_on_error)
override {
429 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
430 "Illegal get affinity operation when not capable");
432 (void)abort_on_error;
435 for (
int i = 0; i < __kmp_xproc; ++i)
436 KMP_CPU_SET(i,
this);
439 int set_system_affinity(
bool abort_on_error)
const override {
440 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
442 "Illegal set affinity operation when not capable");
445 int gtid = __kmp_entry_gtid();
446 int tid = thread_self();
450 int retval = bindprocessor(BINDTHREAD, tid, PROCESSOR_CLASS_ANY);
454 KMP_CPU_SET_ITERATE(location,
this) {
455 if (KMP_CPU_ISSET(location,
this)) {
456 retval = bindprocessor(BINDTHREAD, tid, location);
457 if (retval == -1 && errno == 1) {
462 rsh = rs_alloc(RS_EMPTY);
463 rsid.at_pid = getpid();
464 if (RS_DEFAULT_RSET != ra_getrset(R_PROCESS, rsid, 0, rsh)) {
465 retval = ra_detachrset(R_PROCESS, rsid, 0);
466 retval = bindprocessor(BINDTHREAD, tid, location);
470 KA_TRACE(10, (
"__kmp_set_system_affinity: Done binding "
476 if (abort_on_error) {
477 __kmp_fatal(KMP_MSG(FunctionError,
"bindprocessor()"),
478 KMP_ERR(error), __kmp_msg_null);
479 KA_TRACE(10, (
"__kmp_set_system_affinity: Error binding "
480 "T#%d to cpu=%d, errno=%d.\n",
481 gtid, location, error));
489 int get_system_affinity(
bool abort_on_error)
override {
490 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
491 "Illegal get affinity operation when not capable");
494 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
495#elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
496 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
497 reinterpret_cast<cpuset_t *
>(mask));
498 int retval = (r == 0 ? 0 : -1);
504 if (abort_on_error) {
505 __kmp_fatal(KMP_MSG(FunctionError,
"pthread_getaffinity_np()"),
506 KMP_ERR(error), __kmp_msg_null);
510 int set_system_affinity(
bool abort_on_error)
const override {
511 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
512 "Illegal set affinity operation when not capable");
515 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
516#elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY
517 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
518 reinterpret_cast<cpuset_t *
>(mask));
519 int retval = (r == 0 ? 0 : -1);
525 if (abort_on_error) {
526 __kmp_fatal(KMP_MSG(FunctionError,
"pthread_setaffinity_np()"),
527 KMP_ERR(error), __kmp_msg_null);
533 void determine_capable(
const char *env_var)
override {
534 __kmp_affinity_determine_capable(env_var);
536 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
537 KMPAffinity::Mask *allocate_mask()
override {
538 KMPNativeAffinity::Mask *retval =
new Mask();
541 void deallocate_mask(KMPAffinity::Mask *m)
override {
542 KMPNativeAffinity::Mask *native_mask =
543 static_cast<KMPNativeAffinity::Mask *
>(m);
546 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
547 return new Mask[num];
549 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
550 Mask *linux_array =
static_cast<Mask *
>(array);
551 delete[] linux_array;
553 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
554 int index)
override {
555 Mask *linux_array =
static_cast<Mask *
>(array);
556 return &(linux_array[index]);
558 api_type get_api_type()
const override {
return NATIVE_OS; }
564class KMPNativeAffinity :
public KMPAffinity {
565 class Mask :
public KMPAffinity::Mask {
566 typedef ULONG_PTR mask_t;
567 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
572 mask = (mask_t *)__kmp_allocate(
sizeof(mask_t) * __kmp_num_proc_groups);
578 void set(
int i)
override {
579 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
581 bool is_set(
int i)
const override {
582 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
584 void clear(
int i)
override {
585 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
587 void zero()
override {
588 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
591 bool empty()
const override {
592 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
597 void copy(
const KMPAffinity::Mask *src)
override {
598 const Mask *convert =
static_cast<const Mask *
>(src);
599 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
600 mask[i] = convert->mask[i];
602 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
603 const Mask *convert =
static_cast<const Mask *
>(rhs);
604 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
605 mask[i] &= convert->mask[i];
607 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
608 const Mask *convert =
static_cast<const Mask *
>(rhs);
609 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
610 mask[i] |= convert->mask[i];
612 void bitwise_not()
override {
613 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
614 mask[i] = ~(mask[i]);
616 bool is_equal(
const KMPAffinity::Mask *rhs)
const override {
617 const Mask *convert =
static_cast<const Mask *
>(rhs);
618 for (
size_t i = 0; i < __kmp_num_proc_groups; ++i)
619 if (mask[i] != convert->mask[i])
623 int begin()
const override {
625 while (retval < end() && !is_set(retval))
629 int end()
const override {
return __kmp_num_proc_groups * BITS_PER_MASK_T; }
630 int next(
int previous)
const override {
631 int retval = previous + 1;
632 while (retval < end() && !is_set(retval))
636 int set_process_affinity(
bool abort_on_error)
const override {
637 if (__kmp_num_proc_groups <= 1) {
638 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
639 DWORD error = GetLastError();
640 if (abort_on_error) {
641 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
649 int set_system_affinity(
bool abort_on_error)
const override {
650 if (__kmp_num_proc_groups > 1) {
653 int group = get_proc_group();
655 if (abort_on_error) {
656 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
663 ga.Mask = mask[group];
664 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
666 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
667 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
668 DWORD error = GetLastError();
669 if (abort_on_error) {
670 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
676 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
677 DWORD error = GetLastError();
678 if (abort_on_error) {
679 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
687 int get_system_affinity(
bool abort_on_error)
override {
688 if (__kmp_num_proc_groups > 1) {
691 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
692 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
693 DWORD error = GetLastError();
694 if (abort_on_error) {
695 __kmp_fatal(KMP_MSG(FunctionError,
"GetThreadGroupAffinity()"),
696 KMP_ERR(error), __kmp_msg_null);
700 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
704 mask[ga.Group] = ga.Mask;
706 mask_t newMask, sysMask, retval;
707 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
708 DWORD error = GetLastError();
709 if (abort_on_error) {
710 __kmp_fatal(KMP_MSG(FunctionError,
"GetProcessAffinityMask()"),
711 KMP_ERR(error), __kmp_msg_null);
715 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
717 DWORD error = GetLastError();
718 if (abort_on_error) {
719 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
720 KMP_ERR(error), __kmp_msg_null);
724 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
726 DWORD error = GetLastError();
727 if (abort_on_error) {
728 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
729 KMP_ERR(error), __kmp_msg_null);
736 int get_proc_group()
const override {
738 if (__kmp_num_proc_groups == 1) {
741 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
751 void determine_capable(
const char *env_var)
override {
752 __kmp_affinity_determine_capable(env_var);
754 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
755 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
756 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
757 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
758 return new Mask[num];
760 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
761 Mask *windows_array =
static_cast<Mask *
>(array);
762 delete[] windows_array;
764 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
765 int index)
override {
766 Mask *windows_array =
static_cast<Mask *
>(array);
767 return &(windows_array[index]);
769 api_type get_api_type()
const override {
return NATIVE_OS; }
775struct kmp_hw_attr_t {
779 unsigned reserved : 15;
781 static const int UNKNOWN_CORE_EFF = -1;
784 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
785 valid(0), reserved(0) {}
786 void set_core_type(kmp_hw_core_type_t type) {
790 void set_core_eff(
int eff) {
794 kmp_hw_core_type_t get_core_type()
const {
795 return (kmp_hw_core_type_t)core_type;
797 int get_core_eff()
const {
return core_eff; }
798 bool is_core_type_valid()
const {
799 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
801 bool is_core_eff_valid()
const {
return core_eff != UNKNOWN_CORE_EFF; }
802 operator bool()
const {
return valid; }
804 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
805 core_eff = UNKNOWN_CORE_EFF;
808 bool contains(
const kmp_hw_attr_t &other)
const {
809 if (!valid && !other.valid)
811 if (valid && other.valid) {
812 if (other.is_core_type_valid()) {
813 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
816 if (other.is_core_eff_valid()) {
817 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
824#if KMP_AFFINITY_SUPPORTED
825 bool contains(
const kmp_affinity_attrs_t &attr)
const {
826 if (!valid && !attr.valid)
828 if (valid && attr.valid) {
829 if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
830 return (is_core_type_valid() &&
831 (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
832 if (attr.core_eff != UNKNOWN_CORE_EFF)
833 return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
839 bool operator==(
const kmp_hw_attr_t &rhs)
const {
840 return (rhs.valid == valid && rhs.core_eff == core_eff &&
841 rhs.core_type == core_type);
843 bool operator!=(
const kmp_hw_attr_t &rhs)
const {
return !operator==(rhs); }
846#if KMP_AFFINITY_SUPPORTED
847KMP_BUILD_ASSERT(
sizeof(kmp_hw_attr_t) ==
sizeof(kmp_affinity_attrs_t));
850class kmp_hw_thread_t {
852 static const int UNKNOWN_ID = -1;
853 static const int MULTIPLE_ID = -2;
854 static int compare_ids(
const void *a,
const void *b);
855 static int compare_compact(
const void *a,
const void *b);
856 int ids[KMP_HW_LAST];
857 int sub_ids[KMP_HW_LAST];
864 for (
int i = 0; i < (int)KMP_HW_LAST; ++i)
871class kmp_topology_t {
897 int num_core_efficiencies;
899 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
905 kmp_hw_thread_t *hw_threads;
911 kmp_hw_t equivalent[KMP_HW_LAST];
920 void _insert_layer(kmp_hw_t type,
const int *ids);
922#if KMP_GROUP_AFFINITY
924 void _insert_windows_proc_groups();
930 void _gather_enumeration_information();
934 void _remove_radix1_layers();
937 void _discover_uniformity();
948 void _set_last_level_cache();
953 int _get_ncores_with_attr(
const kmp_hw_attr_t &attr,
int above,
954 bool find_all =
false)
const;
958 kmp_topology_t() =
delete;
959 kmp_topology_t(
const kmp_topology_t &t) =
delete;
960 kmp_topology_t(kmp_topology_t &&t) =
delete;
961 kmp_topology_t &operator=(
const kmp_topology_t &t) =
delete;
962 kmp_topology_t &operator=(kmp_topology_t &&t) =
delete;
964 static kmp_topology_t *allocate(
int nproc,
int ndepth,
const kmp_hw_t *types);
965 static void deallocate(kmp_topology_t *);
968 kmp_hw_thread_t &at(
int index) {
969 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
970 return hw_threads[index];
972 const kmp_hw_thread_t &at(
int index)
const {
973 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
974 return hw_threads[index];
976 int get_num_hw_threads()
const {
return num_hw_threads; }
978 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
979 kmp_hw_thread_t::compare_ids);
983 bool check_ids()
const;
987 void canonicalize(
int pkgs,
int cores_per_pkg,
int thr_per_core,
int cores);
991#if KMP_AFFINITY_SUPPORTED
993 void set_granularity(kmp_affinity_t &stgs)
const;
994 bool is_close(
int hwt1,
int hwt2,
const kmp_affinity_t &stgs)
const;
995 bool restrict_to_mask(
const kmp_affin_mask_t *mask);
996 bool filter_hw_subset();
998 bool is_uniform()
const {
return flags.uniform; }
1001 kmp_hw_t get_equivalent_type(kmp_hw_t type)
const {
1002 if (type == KMP_HW_UNKNOWN)
1003 return KMP_HW_UNKNOWN;
1004 return equivalent[type];
1007 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
1008 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
1009 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
1010 kmp_hw_t real_type2 = equivalent[type2];
1011 if (real_type2 == KMP_HW_UNKNOWN)
1013 equivalent[type1] = real_type2;
1016 KMP_FOREACH_HW_TYPE(type) {
1017 if (equivalent[type] == type1) {
1018 equivalent[type] = real_type2;
1024 int calculate_ratio(
int level1,
int level2)
const {
1025 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
1026 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
1028 for (
int level = level1; level > level2; --level)
1032 int get_ratio(
int level)
const {
1033 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1034 return ratio[level];
1036 int get_depth()
const {
return depth; };
1037 kmp_hw_t get_type(
int level)
const {
1038 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1039 return types[level];
1041 int get_level(kmp_hw_t type)
const {
1042 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
1043 int eq_type = equivalent[type];
1044 if (eq_type == KMP_HW_UNKNOWN)
1046 for (
int i = 0; i < depth; ++i)
1047 if (types[i] == eq_type)
1051 int get_count(
int level)
const {
1052 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
1053 return count[level];
1056 int get_ncores_with_attr(
const kmp_hw_attr_t &attr)
const {
1057 return _get_ncores_with_attr(attr, -1,
true);
1061 int get_ncores_with_attr_per(
const kmp_hw_attr_t &attr,
int above)
const {
1062 return _get_ncores_with_attr(attr, above,
false);
1065#if KMP_AFFINITY_SUPPORTED
1066 friend int kmp_hw_thread_t::compare_compact(
const void *a,
const void *b);
1067 void sort_compact(kmp_affinity_t &affinity) {
1068 compact = affinity.compact;
1069 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
1070 kmp_hw_thread_t::compare_compact);
1073 void print(
const char *env_var =
"KMP_AFFINITY")
const;
1076extern kmp_topology_t *__kmp_topology;
1078class kmp_hw_subset_t {
1079 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
1087 int offset[MAX_ATTRS];
1088 kmp_hw_attr_t attr[MAX_ATTRS];
1091 const static int USE_ALL = (std::numeric_limits<int>::max)();
1100 KMP_BUILD_ASSERT(
sizeof(set) * 8 >= KMP_HW_LAST);
1103 static int hw_subset_compare(
const void *i1,
const void *i2) {
1104 kmp_hw_t type1 = ((
const item_t *)i1)->type;
1105 kmp_hw_t type2 = ((
const item_t *)i2)->type;
1106 int level1 = __kmp_topology->get_level(type1);
1107 int level2 = __kmp_topology->get_level(type2);
1108 return level1 - level2;
1113 kmp_hw_subset_t() =
delete;
1114 kmp_hw_subset_t(
const kmp_hw_subset_t &t) =
delete;
1115 kmp_hw_subset_t(kmp_hw_subset_t &&t) =
delete;
1116 kmp_hw_subset_t &operator=(
const kmp_hw_subset_t &t) =
delete;
1117 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) =
delete;
1119 static kmp_hw_subset_t *allocate() {
1120 int initial_capacity = 5;
1121 kmp_hw_subset_t *retval =
1122 (kmp_hw_subset_t *)__kmp_allocate(
sizeof(kmp_hw_subset_t));
1124 retval->capacity = initial_capacity;
1126 retval->absolute =
false;
1127 retval->items = (item_t *)__kmp_allocate(
sizeof(item_t) * initial_capacity);
1130 static void deallocate(kmp_hw_subset_t *subset) {
1131 __kmp_free(subset->items);
1134 void set_absolute() { absolute =
true; }
1135 bool is_absolute()
const {
return absolute; }
1136 void push_back(
int num, kmp_hw_t type,
int offset, kmp_hw_attr_t attr) {
1137 for (
int i = 0; i < depth; ++i) {
1140 if (items[i].type == type) {
1141 int idx = items[i].num_attrs++;
1142 if ((
size_t)idx >= MAX_ATTRS)
1144 items[i].num[idx] = num;
1145 items[i].offset[idx] = offset;
1146 items[i].attr[idx] = attr;
1150 if (depth == capacity - 1) {
1152 item_t *new_items = (item_t *)__kmp_allocate(
sizeof(item_t) * capacity);
1153 for (
int i = 0; i < depth; ++i)
1154 new_items[i] = items[i];
1158 items[depth].num_attrs = 1;
1159 items[depth].type = type;
1160 items[depth].num[0] = num;
1161 items[depth].offset[0] = offset;
1162 items[depth].attr[0] = attr;
1164 set |= (1ull << type);
1166 int get_depth()
const {
return depth; }
1167 const item_t &at(
int index)
const {
1168 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1169 return items[index];
1171 item_t &at(
int index) {
1172 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1173 return items[index];
1175 void remove(
int index) {
1176 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1177 set &= ~(1ull << items[index].type);
1178 for (
int j = index + 1; j < depth; ++j) {
1179 items[j - 1] = items[j];
1184 KMP_DEBUG_ASSERT(__kmp_topology);
1185 qsort(items, depth,
sizeof(item_t), hw_subset_compare);
1187 bool specified(kmp_hw_t type)
const {
return ((set & (1ull << type)) > 0); }
1201 void canonicalize(
const kmp_topology_t *top) {
1203 kmp_hw_t targeted[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
1211 for (kmp_hw_t type : targeted)
1212 if (top->get_level(type) == KMP_HW_UNKNOWN)
1216 for (kmp_hw_t type : targeted) {
1218 for (
int i = 0; i < get_depth(); ++i) {
1219 if (top->get_equivalent_type(items[i].type) == type) {
1225 push_back(USE_ALL, type, 0, kmp_hw_attr_t{});
1233 printf(
"**********************\n");
1234 printf(
"*** kmp_hw_subset: ***\n");
1235 printf(
"* depth: %d\n", depth);
1236 printf(
"* items:\n");
1237 for (
int i = 0; i < depth; ++i) {
1238 printf(
" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1239 for (
int j = 0; j < items[i].num_attrs; ++j) {
1240 printf(
" num: %d, offset: %d, attr: ", items[i].num[j],
1241 items[i].offset[j]);
1242 if (!items[i].attr[j]) {
1243 printf(
" (none)\n");
1246 " core_type = %s, core_eff = %d\n",
1247 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1248 items[i].attr[j].get_core_eff());
1252 printf(
"* set: 0x%llx\n", set);
1253 printf(
"* absolute: %d\n", absolute);
1254 printf(
"**********************\n");
1257extern kmp_hw_subset_t *__kmp_hw_subset;
1265class hierarchy_info {
1269 static const kmp_uint32 maxLeaves = 4;
1270 static const kmp_uint32 minBranch = 4;
1276 kmp_uint32 maxLevels;
1283 kmp_uint32 base_num_threads;
1284 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1285 volatile kmp_int8 uninitialized;
1287 volatile kmp_int8 resizing;
1293 kmp_uint32 *numPerLevel;
1294 kmp_uint32 *skipPerLevel;
1296 void deriveLevels() {
1297 int hier_depth = __kmp_topology->get_depth();
1298 for (
int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1299 numPerLevel[level] = __kmp_topology->get_ratio(i);
1304 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1307 if (!uninitialized && numPerLevel) {
1308 __kmp_free(numPerLevel);
1310 uninitialized = not_initialized;
1314 void init(
int num_addrs) {
1315 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1316 &uninitialized, not_initialized, initializing);
1317 if (bool_result == 0) {
1318 while (TCR_1(uninitialized) != initialized)
1322 KMP_DEBUG_ASSERT(bool_result == 1);
1332 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1333 skipPerLevel = &(numPerLevel[maxLevels]);
1334 for (kmp_uint32 i = 0; i < maxLevels;
1337 skipPerLevel[i] = 1;
1341 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1344 numPerLevel[0] = maxLeaves;
1345 numPerLevel[1] = num_addrs / maxLeaves;
1346 if (num_addrs % maxLeaves)
1350 base_num_threads = num_addrs;
1351 for (
int i = maxLevels - 1; i >= 0;
1353 if (numPerLevel[i] != 1 || depth > 1)
1356 kmp_uint32 branch = minBranch;
1357 if (numPerLevel[0] == 1)
1358 branch = num_addrs / maxLeaves;
1359 if (branch < minBranch)
1361 for (kmp_uint32 d = 0; d < depth - 1; ++d) {
1362 while (numPerLevel[d] > branch ||
1363 (d == 0 && numPerLevel[d] > maxLeaves)) {
1364 if (numPerLevel[d] & 1)
1366 numPerLevel[d] = numPerLevel[d] >> 1;
1367 if (numPerLevel[d + 1] == 1)
1369 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1371 if (numPerLevel[0] == 1) {
1372 branch = branch >> 1;
1378 for (kmp_uint32 i = 1; i < depth; ++i)
1379 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1381 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1382 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1384 uninitialized = initialized;
1388 void resize(kmp_uint32 nproc) {
1389 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1390 while (bool_result == 0) {
1392 if (nproc <= base_num_threads)
1395 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1397 KMP_DEBUG_ASSERT(bool_result != 0);
1398 if (nproc <= base_num_threads)
1402 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1403 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1405 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1406 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1407 numPerLevel[i - 1] *= 2;
1411 if (nproc > old_sz) {
1412 while (nproc > old_sz) {
1420 kmp_uint32 *old_numPerLevel = numPerLevel;
1421 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1422 numPerLevel = skipPerLevel = NULL;
1424 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1425 skipPerLevel = &(numPerLevel[maxLevels]);
1428 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1430 numPerLevel[i] = old_numPerLevel[i];
1431 skipPerLevel[i] = old_skipPerLevel[i];
1435 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1438 skipPerLevel[i] = 1;
1442 __kmp_free(old_numPerLevel);
1446 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1447 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1449 base_num_threads = nproc;