14 #include "kmp_affinity.h"
18 #include "kmp_wrapper_getpid.h"
19 #if KMP_USE_HIER_SCHED
20 #include "kmp_dispatch_hier.h"
24 static hierarchy_info machine_hierarchy;
26 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
28 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
32 if (TCR_1(machine_hierarchy.uninitialized))
33 machine_hierarchy.init(NULL, nproc);
36 if (nproc > machine_hierarchy.base_num_threads)
37 machine_hierarchy.resize(nproc);
39 depth = machine_hierarchy.depth;
40 KMP_DEBUG_ASSERT(depth > 0);
42 thr_bar->depth = depth;
43 __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
44 &(thr_bar->base_leaf_kids));
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED
50 const char *__kmp_hw_get_catalog_string(kmp_hw_t type,
bool plural) {
53 return ((plural) ? KMP_I18N_STR(Sockets) : KMP_I18N_STR(Socket));
55 return ((plural) ? KMP_I18N_STR(Dice) : KMP_I18N_STR(Die));
57 return ((plural) ? KMP_I18N_STR(Modules) : KMP_I18N_STR(Module));
59 return ((plural) ? KMP_I18N_STR(Tiles) : KMP_I18N_STR(Tile));
61 return ((plural) ? KMP_I18N_STR(NumaDomains) : KMP_I18N_STR(NumaDomain));
63 return ((plural) ? KMP_I18N_STR(L3Caches) : KMP_I18N_STR(L3Cache));
65 return ((plural) ? KMP_I18N_STR(L2Caches) : KMP_I18N_STR(L2Cache));
67 return ((plural) ? KMP_I18N_STR(L1Caches) : KMP_I18N_STR(L1Cache));
69 return ((plural) ? KMP_I18N_STR(Cores) : KMP_I18N_STR(Core));
71 return ((plural) ? KMP_I18N_STR(Threads) : KMP_I18N_STR(Thread));
72 case KMP_HW_PROC_GROUP:
73 return ((plural) ? KMP_I18N_STR(ProcGroups) : KMP_I18N_STR(ProcGroup));
75 return KMP_I18N_STR(Unknown);
83 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
84 int depth, kmp_hw_t *types) {
85 int preference[KMP_HW_LAST];
86 int top_index1, top_index2;
88 preference[KMP_HW_PROC_GROUP] = 110;
89 preference[KMP_HW_SOCKET] = 100;
90 preference[KMP_HW_CORE] = 95;
91 preference[KMP_HW_THREAD] = 90;
92 preference[KMP_HW_DIE] = 85;
93 preference[KMP_HW_NUMA] = 80;
94 preference[KMP_HW_TILE] = 75;
95 preference[KMP_HW_MODULE] = 73;
96 preference[KMP_HW_L3] = 70;
97 preference[KMP_HW_L2] = 65;
98 preference[KMP_HW_L1] = 60;
101 while (top_index1 < depth - 1 && top_index2 < depth) {
102 KMP_DEBUG_ASSERT(top_index1 >= 0 && top_index1 < depth);
103 KMP_DEBUG_ASSERT(top_index2 >= 0 && top_index2 < depth);
104 kmp_hw_t type1 = types[top_index1];
105 kmp_hw_t type2 = types[top_index2];
106 if (type1 == KMP_HW_SOCKET && type2 == KMP_HW_CORE) {
107 top_index1 = top_index2++;
111 bool all_same =
true;
112 int id1 = addrP[0].first.labels[top_index1];
113 int id2 = addrP[0].first.labels[top_index2];
114 int pref1 = preference[type1];
115 int pref2 = preference[type2];
116 for (
int hwidx = 1; hwidx < nTh; ++hwidx) {
117 if (addrP[hwidx].first.labels[top_index1] == id1 &&
118 addrP[hwidx].first.labels[top_index2] != id2) {
122 if (addrP[hwidx].first.labels[top_index2] != id2)
124 id1 = addrP[hwidx].first.labels[top_index1];
125 id2 = addrP[hwidx].first.labels[top_index2];
129 kmp_hw_t remove_type, keep_type;
130 int remove_layer, remove_layer_ids;
133 remove_layer = remove_layer_ids = top_index2;
137 remove_layer = remove_layer_ids = top_index1;
143 remove_layer_ids = top_index2;
146 for (
int idx = 0; idx < nTh; ++idx) {
147 Address &hw_thread = addrP[idx].first;
148 for (
int d = remove_layer_ids; d < depth - 1; ++d)
149 hw_thread.labels[d] = hw_thread.labels[d + 1];
152 for (
int idx = remove_layer; idx < depth - 1; ++idx)
153 types[idx] = types[idx + 1];
156 top_index1 = top_index2++;
159 KMP_ASSERT(depth > 0);
165 static void __kmp_affinity_gather_enumeration_information(AddrUnsPair *addrP,
170 int previous_id[KMP_HW_LAST];
171 int max[KMP_HW_LAST];
173 for (
int i = 0; i < depth; ++i) {
179 for (
int i = 0; i < nTh; ++i) {
180 Address &hw_thread = addrP[i].first;
181 for (
int layer = 0; layer < depth; ++layer) {
182 int id = hw_thread.labels[layer];
183 if (
id != previous_id[layer]) {
185 for (
int l = layer; l < depth; ++l)
189 for (
int l = layer + 1; l < depth; ++l) {
190 if (max[l] > ratio[l])
197 for (
int layer = 0; layer < depth; ++layer) {
198 previous_id[layer] = hw_thread.labels[layer];
201 for (
int layer = 0; layer < depth; ++layer) {
202 if (max[layer] > ratio[layer])
203 ratio[layer] = max[layer];
208 static bool __kmp_affinity_discover_uniformity(
int depth,
int *ratio,
211 for (
int level = 0; level < depth; ++level)
213 return (num == count[depth - 1]);
217 static inline int __kmp_affinity_calculate_ratio(
int *ratio,
int deep_level,
220 if (deep_level < 0 || shallow_level < 0)
222 for (
int level = deep_level; level > shallow_level; --level)
223 retval *= ratio[level];
227 static void __kmp_affinity_print_topology(AddrUnsPair *addrP,
int len,
228 int depth, kmp_hw_t *types) {
231 __kmp_str_buf_init(&buf);
232 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
233 for (proc = 0; proc < len; proc++) {
234 for (
int i = 0; i < depth; ++i) {
235 __kmp_str_buf_print(&buf,
"%s %d ", __kmp_hw_get_catalog_string(types[i]),
236 addrP[proc].first.labels[i]);
238 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
239 __kmp_str_buf_clear(&buf);
241 __kmp_str_buf_free(&buf);
246 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
247 int depth,
int pkgLevel,
248 int coreLevel,
int threadLevel) {
251 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
252 for (proc = 0; proc < len; proc++) {
255 __kmp_str_buf_init(&buf);
256 for (level = 0; level < depth; level++) {
257 if (level == threadLevel) {
258 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
259 }
else if (level == coreLevel) {
260 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
261 }
else if (level == pkgLevel) {
262 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
263 }
else if (level > pkgLevel) {
264 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
265 level - pkgLevel - 1);
267 __kmp_str_buf_print(&buf,
"L%d ", level);
269 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
271 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
273 __kmp_str_buf_free(&buf);
277 bool KMPAffinity::picked_api =
false;
279 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
280 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
281 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
282 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
283 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
284 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
286 void KMPAffinity::pick_api() {
287 KMPAffinity *affinity_dispatch;
293 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
294 __kmp_affinity_type != affinity_disabled) {
295 affinity_dispatch =
new KMPHwlocAffinity();
299 affinity_dispatch =
new KMPNativeAffinity();
301 __kmp_affinity_dispatch = affinity_dispatch;
305 void KMPAffinity::destroy_api() {
306 if (__kmp_affinity_dispatch != NULL) {
307 delete __kmp_affinity_dispatch;
308 __kmp_affinity_dispatch = NULL;
313 #define KMP_ADVANCE_SCAN(scan) \
314 while (*scan != '\0') { \
322 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
323 kmp_affin_mask_t *mask) {
324 int start = 0, finish = 0, previous = 0;
327 KMP_ASSERT(buf_len >= 40);
330 char *end = buf + buf_len - 1;
333 if (mask->begin() == mask->end()) {
334 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
335 KMP_ADVANCE_SCAN(scan);
336 KMP_ASSERT(scan <= end);
341 start = mask->begin();
345 for (finish = mask->next(start), previous = start;
346 finish == previous + 1 && finish != mask->end();
347 finish = mask->next(finish)) {
354 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
355 KMP_ADVANCE_SCAN(scan);
360 if (previous - start > 1) {
361 KMP_SNPRINTF(scan, end - scan + 1,
"%u-%u", start, previous);
364 KMP_SNPRINTF(scan, end - scan + 1,
"%u", start);
365 KMP_ADVANCE_SCAN(scan);
366 if (previous - start > 0) {
367 KMP_SNPRINTF(scan, end - scan + 1,
",%u", previous);
370 KMP_ADVANCE_SCAN(scan);
373 if (start == mask->end())
381 KMP_ASSERT(scan <= end);
384 #undef KMP_ADVANCE_SCAN
390 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
391 kmp_affin_mask_t *mask) {
392 int start = 0, finish = 0, previous = 0;
397 __kmp_str_buf_clear(buf);
400 if (mask->begin() == mask->end()) {
401 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
406 start = mask->begin();
410 for (finish = mask->next(start), previous = start;
411 finish == previous + 1 && finish != mask->end();
412 finish = mask->next(finish)) {
419 __kmp_str_buf_print(buf,
"%s",
",");
424 if (previous - start > 1) {
425 __kmp_str_buf_print(buf,
"%u-%u", start, previous);
428 __kmp_str_buf_print(buf,
"%u", start);
429 if (previous - start > 0) {
430 __kmp_str_buf_print(buf,
",%u", previous);
435 if (start == mask->end())
441 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
444 #if KMP_GROUP_AFFINITY
446 if (__kmp_num_proc_groups > 1) {
448 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
449 for (group = 0; group < __kmp_num_proc_groups; group++) {
451 int num = __kmp_GetActiveProcessorCount(group);
452 for (i = 0; i < num; i++) {
453 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
462 for (proc = 0; proc < __kmp_xproc; proc++) {
463 KMP_CPU_SET(proc, mask);
479 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
481 KMP_DEBUG_ASSERT(numAddrs > 0);
482 int depth = address2os->first.depth;
483 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
484 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
486 for (labCt = 0; labCt < depth; labCt++) {
487 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
488 lastLabel[labCt] = address2os[0].first.labels[labCt];
491 for (i = 1; i < numAddrs; i++) {
492 for (labCt = 0; labCt < depth; labCt++) {
493 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
495 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
497 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
500 lastLabel[labCt] = address2os[i].first.labels[labCt];
504 for (labCt = 0; labCt < depth; labCt++) {
505 address2os[i].first.childNums[labCt] = counts[labCt];
507 for (; labCt < (int)Address::maxDepth; labCt++) {
508 address2os[i].first.childNums[labCt] = 0;
511 __kmp_free(lastLabel);
526 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
528 static int nCoresPerPkg, nPackages;
529 static int __kmp_nThreadsPerCore;
530 #ifndef KMP_DFLT_NTH_CORES
531 static int __kmp_ncores;
533 static int *__kmp_pu_os_idx = NULL;
534 static int nDiesPerPkg = 1;
540 inline static bool __kmp_affinity_uniform_topology() {
541 return __kmp_avail_proc ==
542 (__kmp_nThreadsPerCore * nCoresPerPkg * nDiesPerPkg * nPackages);
547 static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
548 #if HWLOC_API_VERSION >= 0x00020000
549 return hwloc_obj_type_is_cache(obj->type);
551 return obj->type == HWLOC_OBJ_CACHE;
556 static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
558 if (__kmp_hwloc_is_cache_type(obj)) {
559 if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
560 return KMP_HW_UNKNOWN;
561 switch (obj->attr->cache.depth) {
565 #if KMP_MIC_SUPPORTED
566 if (__kmp_mic_type == mic3) {
574 return KMP_HW_UNKNOWN;
578 case HWLOC_OBJ_PACKAGE:
579 return KMP_HW_SOCKET;
580 case HWLOC_OBJ_NUMANODE:
585 return KMP_HW_THREAD;
587 return KMP_HW_UNKNOWN;
594 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
595 hwloc_obj_type_t type) {
598 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
599 obj->logical_index, type, 0);
601 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
603 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
610 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
612 kmp_hwloc_depth_t depth,
614 if (o->depth == depth) {
620 for (
unsigned i = 0; i < o->arity; i++)
621 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
625 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
626 hwloc_obj_type_t type,
628 if (!hwloc_compare_types(o->type, type)) {
634 for (
unsigned i = 0; i < o->arity; i++)
635 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
641 static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
644 hwloc_obj_type_t ltype = lower->type;
645 int lindex = lower->logical_index - 1;
648 obj = hwloc_get_obj_by_type(t, ltype, lindex);
649 while (obj && lindex >= 0 &&
650 hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
652 sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
657 obj = hwloc_get_obj_by_type(t, ltype, lindex);
660 lower->userdata = RCAST(
void *, sub_id + 1);
664 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
665 kmp_i18n_id_t *
const msg_id) {
667 int hw_thread_index, sub_id, nActiveThreads;
669 hwloc_obj_t pu, obj, root, prev;
670 int ratio[KMP_HW_LAST];
671 int count[KMP_HW_LAST];
672 kmp_hw_t types[KMP_HW_LAST];
674 hwloc_topology_t tp = __kmp_hwloc_topology;
675 *msg_id = kmp_i18n_null;
678 kmp_affin_mask_t *oldMask;
679 KMP_CPU_ALLOC(oldMask);
680 __kmp_get_system_affinity(oldMask, TRUE);
682 if (!KMP_AFFINITY_CAPABLE()) {
685 KMP_ASSERT(__kmp_affinity_type == affinity_none);
687 hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
689 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
692 o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
694 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
696 __kmp_nThreadsPerCore = 1;
697 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
698 if (nCoresPerPkg == 0)
700 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
701 if (__kmp_affinity_verbose) {
702 KMP_INFORM(AffNotUsingHwloc,
"KMP_AFFINITY");
703 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
704 if (__kmp_affinity_uniform_topology()) {
705 KMP_INFORM(Uniform,
"KMP_AFFINITY");
707 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
709 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
710 __kmp_nThreadsPerCore, __kmp_ncores);
712 KMP_CPU_FREE(oldMask);
716 root = hwloc_get_root_obj(tp);
720 pu = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
722 types[depth] = KMP_HW_THREAD;
724 while (obj != root && obj != NULL) {
726 #if HWLOC_API_VERSION >= 0x00020000
727 if (obj->memory_arity) {
729 for (memory = obj->memory_first_child; memory;
730 memory = hwloc_get_next_child(tp, obj, memory)) {
731 if (memory->type == HWLOC_OBJ_NUMANODE)
734 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
735 types[depth] = KMP_HW_NUMA;
740 type = __kmp_hwloc_type_2_topology_type(obj);
741 if (type != KMP_HW_UNKNOWN) {
746 KMP_ASSERT(depth > 0 && depth <= KMP_HW_LAST);
749 for (
int i = 0, j = depth - 1; i < j; ++i, --j) {
750 kmp_hw_t temp = types[i];
756 AddrUnsPair *retval =
757 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
758 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
759 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
764 while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
765 int index = depth - 1;
766 bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
767 Address hw_thread(depth);
769 hw_thread.labels[index] = pu->logical_index;
770 __kmp_pu_os_idx[hw_thread_index] = pu->os_index;
776 while (obj != root && obj != NULL) {
778 #if HWLOC_API_VERSION >= 0x00020000
782 if (obj->memory_arity) {
784 for (memory = obj->memory_first_child; memory;
785 memory = hwloc_get_next_child(tp, obj, memory)) {
786 if (memory->type == HWLOC_OBJ_NUMANODE)
789 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
790 sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
792 hw_thread.labels[index] = memory->logical_index;
793 hw_thread.labels[index + 1] = sub_id;
800 type = __kmp_hwloc_type_2_topology_type(obj);
801 if (type != KMP_HW_UNKNOWN) {
802 sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
804 hw_thread.labels[index] = obj->logical_index;
805 hw_thread.labels[index + 1] = sub_id;
812 retval[hw_thread_index] = AddrUnsPair(hw_thread, pu->os_index);
818 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
819 KMP_ASSERT(nActiveThreads > 0);
820 if (nActiveThreads == 1) {
821 __kmp_ncores = nPackages = 1;
822 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
823 if (__kmp_affinity_verbose) {
824 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
825 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
826 KMP_INFORM(Uniform,
"KMP_AFFINITY");
827 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
828 __kmp_nThreadsPerCore, __kmp_ncores);
831 if (__kmp_affinity_type == affinity_none) {
833 KMP_CPU_FREE(oldMask);
839 addr.labels[0] = retval[0].first.labels[0];
840 retval[0].first = addr;
842 if (__kmp_affinity_gran_levels < 0) {
843 __kmp_affinity_gran_levels = 0;
846 if (__kmp_affinity_verbose) {
847 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
850 *address2os = retval;
851 KMP_CPU_FREE(oldMask);
856 qsort(retval, nActiveThreads,
sizeof(*retval),
857 __kmp_affinity_cmp_Address_labels);
861 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
864 __kmp_affinity_gather_enumeration_information(retval, nActiveThreads, depth,
865 types, ratio, count);
867 for (
int level = 0; level < depth; ++level) {
868 if ((types[level] == KMP_HW_L2 || types[level] == KMP_HW_L3))
869 __kmp_tile_depth = level;
874 int thread_level, core_level, tile_level, numa_level, socket_level;
875 thread_level = core_level = tile_level = numa_level = socket_level = -1;
876 for (
int level = 0; level < depth; ++level) {
877 if (types[level] == KMP_HW_THREAD)
878 thread_level = level;
879 else if (types[level] == KMP_HW_CORE)
881 else if (types[level] == KMP_HW_SOCKET)
882 socket_level = level;
883 else if (types[level] == KMP_HW_TILE)
885 else if (types[level] == KMP_HW_NUMA)
888 __kmp_nThreadsPerCore =
889 __kmp_affinity_calculate_ratio(ratio, thread_level, core_level);
891 __kmp_affinity_calculate_ratio(ratio, core_level, socket_level);
892 if (socket_level >= 0)
893 nPackages = count[socket_level];
897 __kmp_ncores = count[core_level];
901 unsigned uniform = __kmp_affinity_discover_uniformity(depth, ratio, count);
904 if (__kmp_affinity_verbose) {
905 kmp_hw_t numerator_type, denominator_type;
907 __kmp_str_buf_init(&buf);
908 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
910 KMP_INFORM(Uniform,
"KMP_AFFINITY");
912 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
915 __kmp_str_buf_clear(&buf);
918 core_level = depth - 1;
919 int ncores = count[core_level];
921 denominator_type = KMP_HW_UNKNOWN;
922 for (
int level = 0; level < depth; ++level) {
925 numerator_type = types[level];
929 __kmp_str_buf_print(&buf,
"%d %s", c, __kmp_hw_get_catalog_string(
930 numerator_type, plural));
932 __kmp_str_buf_print(&buf,
" x %d %s/%s", c,
933 __kmp_hw_get_catalog_string(numerator_type, plural),
934 __kmp_hw_get_catalog_string(denominator_type));
936 denominator_type = numerator_type;
938 KMP_INFORM(TopologyGeneric,
"KMP_AFFINITY", buf.str, ncores);
939 __kmp_str_buf_free(&buf);
942 if (__kmp_affinity_type == affinity_none) {
944 KMP_CPU_FREE(oldMask);
950 if (__kmp_affinity_gran == affinity_gran_node)
951 __kmp_affinity_gran = affinity_gran_numa;
952 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
953 if (__kmp_affinity_gran_levels < 0) {
954 __kmp_affinity_gran_levels = 0;
955 if ((thread_level >= 0) && (__kmp_affinity_gran > affinity_gran_thread))
956 __kmp_affinity_gran_levels++;
957 if ((core_level >= 0) && (__kmp_affinity_gran > affinity_gran_core))
958 __kmp_affinity_gran_levels++;
959 if ((tile_level >= 0) && (__kmp_affinity_gran > affinity_gran_tile))
960 __kmp_affinity_gran_levels++;
961 if ((numa_level >= 0) && (__kmp_affinity_gran > affinity_gran_numa))
962 __kmp_affinity_gran_levels++;
963 if ((socket_level >= 0) && (__kmp_affinity_gran > affinity_gran_package))
964 __kmp_affinity_gran_levels++;
967 if (__kmp_affinity_verbose)
968 __kmp_affinity_print_topology(retval, nActiveThreads, depth, types);
970 KMP_CPU_FREE(oldMask);
971 *address2os = retval;
979 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
980 kmp_i18n_id_t *
const msg_id) {
982 *msg_id = kmp_i18n_null;
987 if (!KMP_AFFINITY_CAPABLE()) {
988 KMP_ASSERT(__kmp_affinity_type == affinity_none);
989 __kmp_ncores = nPackages = __kmp_xproc;
990 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
991 if (__kmp_affinity_verbose) {
992 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
993 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
994 KMP_INFORM(Uniform,
"KMP_AFFINITY");
995 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
996 __kmp_nThreadsPerCore, __kmp_ncores);
1005 __kmp_ncores = nPackages = __kmp_avail_proc;
1006 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1007 if (__kmp_affinity_verbose) {
1008 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
1009 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1010 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1011 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1012 __kmp_nThreadsPerCore, __kmp_ncores);
1014 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1015 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1016 if (__kmp_affinity_type == affinity_none) {
1019 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1020 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
1022 __kmp_pu_os_idx[avail_ct++] = i;
1029 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
1032 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1034 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1037 __kmp_pu_os_idx[avail_ct] = i;
1040 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
1042 if (__kmp_affinity_verbose) {
1043 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
1046 if (__kmp_affinity_gran_levels < 0) {
1049 if (__kmp_affinity_gran > affinity_gran_package) {
1050 __kmp_affinity_gran_levels = 1;
1052 __kmp_affinity_gran_levels = 0;
1058 #if KMP_GROUP_AFFINITY
1064 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
1065 kmp_i18n_id_t *
const msg_id) {
1067 *msg_id = kmp_i18n_null;
1071 if (!KMP_AFFINITY_CAPABLE()) {
1078 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
1079 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1080 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1083 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1085 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1088 __kmp_pu_os_idx[avail_ct] = i;
1090 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
1091 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
1092 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
1094 if (__kmp_affinity_verbose) {
1095 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
1100 if (__kmp_affinity_gran_levels < 0) {
1101 if (__kmp_affinity_gran == affinity_gran_group) {
1102 __kmp_affinity_gran_levels = 1;
1103 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
1104 (__kmp_affinity_gran == affinity_gran_thread)) {
1105 __kmp_affinity_gran_levels = 0;
1107 const char *gran_str = NULL;
1108 if (__kmp_affinity_gran == affinity_gran_core) {
1110 }
else if (__kmp_affinity_gran == affinity_gran_package) {
1111 gran_str =
"package";
1112 }
else if (__kmp_affinity_gran == affinity_gran_node) {
1120 __kmp_affinity_gran_levels = 0;
1128 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1146 INTEL_LEVEL_TYPE_INVALID = 0,
1147 INTEL_LEVEL_TYPE_SMT = 1,
1148 INTEL_LEVEL_TYPE_CORE = 2,
1149 INTEL_LEVEL_TYPE_TILE = 3,
1150 INTEL_LEVEL_TYPE_MODULE = 4,
1151 INTEL_LEVEL_TYPE_DIE = 5,
1152 INTEL_LEVEL_TYPE_LAST = 6,
1155 struct cpuid_level_info_t {
1156 unsigned level_type, mask, mask_width, nitems, cache_mask;
1159 template <kmp_u
int32 LSB, kmp_u
int32 MSB>
1160 static inline unsigned __kmp_extract_bits(kmp_uint32 v) {
1161 const kmp_uint32 SHIFT_LEFT =
sizeof(kmp_uint32) * 8 - 1 - MSB;
1162 const kmp_uint32 SHIFT_RIGHT = LSB;
1163 kmp_uint32 retval = v;
1164 retval <<= SHIFT_LEFT;
1165 retval >>= (SHIFT_LEFT + SHIFT_RIGHT);
1169 static kmp_hw_t __kmp_intel_type_2_topology_type(
int intel_type) {
1170 switch (intel_type) {
1171 case INTEL_LEVEL_TYPE_INVALID:
1172 return KMP_HW_SOCKET;
1173 case INTEL_LEVEL_TYPE_SMT:
1174 return KMP_HW_THREAD;
1175 case INTEL_LEVEL_TYPE_CORE:
1178 case INTEL_LEVEL_TYPE_TILE:
1179 return KMP_HW_UNKNOWN;
1180 case INTEL_LEVEL_TYPE_MODULE:
1181 return KMP_HW_UNKNOWN;
1182 case INTEL_LEVEL_TYPE_DIE:
1185 return KMP_HW_UNKNOWN;
1192 __kmp_x2apicid_get_levels(
int leaf,
1193 cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],
1194 kmp_uint64 known_levels) {
1195 unsigned level, levels_index;
1196 unsigned level_type, mask_width, nitems;
1205 level = levels_index = 0;
1207 __kmp_x86_cpuid(leaf, level, &buf);
1208 level_type = __kmp_extract_bits<8, 15>(buf.ecx);
1209 mask_width = __kmp_extract_bits<0, 4>(buf.eax);
1210 nitems = __kmp_extract_bits<0, 15>(buf.ebx);
1211 if (level_type != INTEL_LEVEL_TYPE_INVALID && nitems == 0)
1214 if (known_levels & (1ull << level_type)) {
1216 KMP_ASSERT(levels_index < INTEL_LEVEL_TYPE_LAST);
1217 levels[levels_index].level_type = level_type;
1218 levels[levels_index].mask_width = mask_width;
1219 levels[levels_index].nitems = nitems;
1223 if (levels_index > 0) {
1224 levels[levels_index - 1].mask_width = mask_width;
1225 levels[levels_index - 1].nitems = nitems;
1229 }
while (level_type != INTEL_LEVEL_TYPE_INVALID);
1232 for (
unsigned i = 0; i < levels_index; ++i) {
1233 if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) {
1234 levels[i].mask = ~((-1) << levels[i].mask_width);
1235 levels[i].cache_mask = (-1) << levels[i].mask_width;
1236 for (
unsigned j = 0; j < i; ++j)
1237 levels[i].mask ^= levels[j].mask;
1239 KMP_DEBUG_ASSERT(levels_index > 0);
1240 levels[i].mask = (-1) << levels[i - 1].mask_width;
1241 levels[i].cache_mask = 0;
1244 return levels_index;
1247 static int __kmp_cpuid_mask_width(
int count) {
1250 while ((1 << r) < count)
1255 class apicThreadInfo {
1259 unsigned maxCoresPerPkg;
1260 unsigned maxThreadsPerPkg;
1266 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1268 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1269 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1270 if (aa->pkgId < bb->pkgId)
1272 if (aa->pkgId > bb->pkgId)
1274 if (aa->coreId < bb->coreId)
1276 if (aa->coreId > bb->coreId)
1278 if (aa->threadId < bb->threadId)
1280 if (aa->threadId > bb->threadId)
1289 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1290 kmp_i18n_id_t *
const msg_id) {
1293 *msg_id = kmp_i18n_null;
1296 __kmp_x86_cpuid(0, 0, &buf);
1298 *msg_id = kmp_i18n_str_NoLeaf4Support;
1307 if (!KMP_AFFINITY_CAPABLE()) {
1310 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1316 __kmp_x86_cpuid(1, 0, &buf);
1317 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1318 if (maxThreadsPerPkg == 0) {
1319 maxThreadsPerPkg = 1;
1333 __kmp_x86_cpuid(0, 0, &buf);
1335 __kmp_x86_cpuid(4, 0, &buf);
1336 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1354 __kmp_ncores = __kmp_xproc;
1355 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1356 __kmp_nThreadsPerCore = 1;
1357 if (__kmp_affinity_verbose) {
1358 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1359 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1360 if (__kmp_affinity_uniform_topology()) {
1361 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1363 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1365 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1366 __kmp_nThreadsPerCore, __kmp_ncores);
1376 kmp_affin_mask_t *oldMask;
1377 KMP_CPU_ALLOC(oldMask);
1378 KMP_ASSERT(oldMask != NULL);
1379 __kmp_get_system_affinity(oldMask, TRUE);
1407 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1408 __kmp_avail_proc *
sizeof(apicThreadInfo));
1409 unsigned nApics = 0;
1410 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1412 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1415 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1417 __kmp_affinity_dispatch->bind_thread(i);
1418 threadInfo[nApics].osId = i;
1421 __kmp_x86_cpuid(1, 0, &buf);
1422 if (((buf.edx >> 9) & 1) == 0) {
1423 __kmp_set_system_affinity(oldMask, TRUE);
1424 __kmp_free(threadInfo);
1425 KMP_CPU_FREE(oldMask);
1426 *msg_id = kmp_i18n_str_ApicNotPresent;
1429 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1430 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1431 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1432 threadInfo[nApics].maxThreadsPerPkg = 1;
1441 __kmp_x86_cpuid(0, 0, &buf);
1443 __kmp_x86_cpuid(4, 0, &buf);
1444 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1446 threadInfo[nApics].maxCoresPerPkg = 1;
1450 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1451 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1453 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1454 int widthT = widthCT - widthC;
1459 __kmp_set_system_affinity(oldMask, TRUE);
1460 __kmp_free(threadInfo);
1461 KMP_CPU_FREE(oldMask);
1462 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1466 int maskC = (1 << widthC) - 1;
1467 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1469 int maskT = (1 << widthT) - 1;
1470 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1477 __kmp_set_system_affinity(oldMask, TRUE);
1486 KMP_ASSERT(nApics > 0);
1488 __kmp_ncores = nPackages = 1;
1489 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1490 if (__kmp_affinity_verbose) {
1491 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1492 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1493 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1494 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1495 __kmp_nThreadsPerCore, __kmp_ncores);
1498 if (__kmp_affinity_type == affinity_none) {
1499 __kmp_free(threadInfo);
1500 KMP_CPU_FREE(oldMask);
1504 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1506 addr.labels[0] = threadInfo[0].pkgId;
1507 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1509 if (__kmp_affinity_gran_levels < 0) {
1510 __kmp_affinity_gran_levels = 0;
1513 if (__kmp_affinity_verbose) {
1514 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1517 __kmp_free(threadInfo);
1518 KMP_CPU_FREE(oldMask);
1523 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1524 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1541 __kmp_nThreadsPerCore = 1;
1542 unsigned nCores = 1;
1545 unsigned lastPkgId = threadInfo[0].pkgId;
1546 unsigned coreCt = 1;
1547 unsigned lastCoreId = threadInfo[0].coreId;
1548 unsigned threadCt = 1;
1549 unsigned lastThreadId = threadInfo[0].threadId;
1552 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1553 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1555 for (i = 1; i < nApics; i++) {
1556 if (threadInfo[i].pkgId != lastPkgId) {
1559 lastPkgId = threadInfo[i].pkgId;
1560 if ((
int)coreCt > nCoresPerPkg)
1561 nCoresPerPkg = coreCt;
1563 lastCoreId = threadInfo[i].coreId;
1564 if ((
int)threadCt > __kmp_nThreadsPerCore)
1565 __kmp_nThreadsPerCore = threadCt;
1567 lastThreadId = threadInfo[i].threadId;
1571 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1572 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1576 if (threadInfo[i].coreId != lastCoreId) {
1579 lastCoreId = threadInfo[i].coreId;
1580 if ((
int)threadCt > __kmp_nThreadsPerCore)
1581 __kmp_nThreadsPerCore = threadCt;
1583 lastThreadId = threadInfo[i].threadId;
1584 }
else if (threadInfo[i].threadId != lastThreadId) {
1586 lastThreadId = threadInfo[i].threadId;
1588 __kmp_free(threadInfo);
1589 KMP_CPU_FREE(oldMask);
1590 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1596 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1597 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1598 __kmp_free(threadInfo);
1599 KMP_CPU_FREE(oldMask);
1600 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1605 if ((
int)coreCt > nCoresPerPkg)
1606 nCoresPerPkg = coreCt;
1607 if ((
int)threadCt > __kmp_nThreadsPerCore)
1608 __kmp_nThreadsPerCore = threadCt;
1614 __kmp_ncores = nCores;
1615 if (__kmp_affinity_verbose) {
1616 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1617 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1618 if (__kmp_affinity_uniform_topology()) {
1619 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1621 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1623 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1624 __kmp_nThreadsPerCore, __kmp_ncores);
1626 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1627 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1628 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1629 for (i = 0; i < nApics; ++i) {
1630 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1632 if (__kmp_affinity_type == affinity_none) {
1633 __kmp_free(threadInfo);
1634 KMP_CPU_FREE(oldMask);
1642 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1644 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1645 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1647 KMP_ASSERT(depth > 0);
1648 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1650 for (i = 0; i < nApics; ++i) {
1651 Address addr(depth);
1652 unsigned os = threadInfo[i].osId;
1655 if (pkgLevel >= 0) {
1656 addr.labels[d++] = threadInfo[i].pkgId;
1658 if (coreLevel >= 0) {
1659 addr.labels[d++] = threadInfo[i].coreId;
1661 if (threadLevel >= 0) {
1662 addr.labels[d++] = threadInfo[i].threadId;
1664 (*address2os)[i] = AddrUnsPair(addr, os);
1667 if (__kmp_affinity_gran_levels < 0) {
1670 __kmp_affinity_gran_levels = 0;
1671 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1672 __kmp_affinity_gran_levels++;
1674 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1675 __kmp_affinity_gran_levels++;
1677 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1678 __kmp_affinity_gran_levels++;
1682 if (__kmp_affinity_verbose) {
1683 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1684 coreLevel, threadLevel);
1687 __kmp_free(threadInfo);
1688 KMP_CPU_FREE(oldMask);
1695 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1696 kmp_i18n_id_t *
const msg_id) {
1698 cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST];
1699 int ratio[KMP_HW_LAST];
1700 int count[KMP_HW_LAST];
1701 kmp_hw_t types[INTEL_LEVEL_TYPE_LAST];
1702 unsigned levels_index;
1704 kmp_uint64 known_levels;
1705 int topology_leaf, highest_leaf, apic_id;
1707 static int leaves[] = {0, 0};
1709 kmp_i18n_id_t leaf_message_id;
1711 KMP_BUILD_ASSERT(
sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST);
1713 *msg_id = kmp_i18n_null;
1716 known_levels = 0ull;
1717 for (
int i = 0; i < INTEL_LEVEL_TYPE_LAST; ++i) {
1718 if (__kmp_intel_type_2_topology_type(i) != KMP_HW_UNKNOWN) {
1719 known_levels |= (1ull << i);
1724 __kmp_x86_cpuid(0, 0, &buf);
1725 highest_leaf = buf.eax;
1730 if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
1733 leaf_message_id = kmp_i18n_str_NoLeaf11Support;
1734 }
else if (__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
1737 leaf_message_id = kmp_i18n_str_NoLeaf31Support;
1742 leaf_message_id = kmp_i18n_str_NoLeaf11Support;
1746 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1748 for (
int i = 0; i < num_leaves; ++i) {
1749 int leaf = leaves[i];
1750 if (highest_leaf < leaf)
1752 __kmp_x86_cpuid(leaf, 0, &buf);
1755 topology_leaf = leaf;
1756 levels_index = __kmp_x2apicid_get_levels(leaf, levels, known_levels);
1757 if (levels_index == 0)
1761 if (topology_leaf == -1 || levels_index == 0) {
1762 *msg_id = leaf_message_id;
1765 KMP_ASSERT(levels_index <= INTEL_LEVEL_TYPE_LAST);
1772 if (!KMP_AFFINITY_CAPABLE()) {
1775 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1777 for (
unsigned i = 0; i < levels_index; ++i) {
1778 if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
1779 __kmp_nThreadsPerCore = levels[i].nitems;
1780 }
else if (levels[i].level_type == INTEL_LEVEL_TYPE_CORE) {
1781 nCoresPerPkg = levels[i].nitems;
1782 }
else if (levels[i].level_type == INTEL_LEVEL_TYPE_DIE) {
1783 nDiesPerPkg = levels[i].nitems;
1786 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1787 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1788 if (__kmp_affinity_verbose) {
1789 KMP_INFORM(AffNotCapableUseLocCpuidL,
"KMP_AFFINITY", topology_leaf);
1790 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1791 if (__kmp_affinity_uniform_topology()) {
1792 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1794 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1796 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1797 __kmp_nThreadsPerCore, __kmp_ncores);
1807 kmp_affin_mask_t *oldMask;
1808 KMP_CPU_ALLOC(oldMask);
1809 __kmp_get_system_affinity(oldMask, TRUE);
1812 int depth = levels_index;
1813 for (
int i = depth - 1, j = 0; i >= 0; --i, ++j)
1814 types[j] = __kmp_intel_type_2_topology_type(levels[i].level_type);
1815 AddrUnsPair *retval =
1816 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1822 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1823 cpuid_level_info_t my_levels[INTEL_LEVEL_TYPE_LAST];
1824 unsigned my_levels_index;
1827 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1830 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1832 __kmp_affinity_dispatch->bind_thread(proc);
1835 __kmp_x86_cpuid(topology_leaf, 0, &buf);
1837 Address addr(depth);
1839 __kmp_x2apicid_get_levels(topology_leaf, my_levels, known_levels);
1840 if (my_levels_index == 0 || my_levels_index != levels_index) {
1841 KMP_CPU_FREE(oldMask);
1842 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1846 for (
unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
1847 addr.labels[idx] = apic_id & my_levels[j].mask;
1849 addr.labels[idx] >>= my_levels[j - 1].mask_width;
1851 retval[nApics++] = AddrUnsPair(addr, proc);
1856 __kmp_set_system_affinity(oldMask, TRUE);
1859 KMP_ASSERT(nApics > 0);
1862 __kmp_ncores = nPackages = 1;
1863 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1864 if (__kmp_affinity_verbose) {
1865 KMP_INFORM(AffUseGlobCpuidL,
"KMP_AFFINITY", topology_leaf);
1866 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1867 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1868 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1869 __kmp_nThreadsPerCore, __kmp_ncores);
1872 if (__kmp_affinity_type == affinity_none) {
1874 KMP_CPU_FREE(oldMask);
1879 for (
int i = 0; i < depth; ++i)
1880 if (types[i] == KMP_HW_SOCKET) {
1886 addr.labels[0] = retval[0].first.labels[pkg_level];
1887 retval[0].first = addr;
1889 if (__kmp_affinity_gran_levels < 0) {
1890 __kmp_affinity_gran_levels = 0;
1893 if (__kmp_affinity_verbose) {
1894 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1897 *address2os = retval;
1898 KMP_CPU_FREE(oldMask);
1903 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1905 __kmp_affinity_gather_enumeration_information(retval, nApics, depth, types,
1912 int thread_level, core_level, socket_level, die_level;
1913 thread_level = core_level = die_level = socket_level = -1;
1914 for (
int level = 0; level < depth; ++level) {
1915 if (types[level] == KMP_HW_THREAD)
1916 thread_level = level;
1917 else if (types[level] == KMP_HW_CORE)
1919 else if (types[level] == KMP_HW_DIE)
1921 else if (types[level] == KMP_HW_SOCKET)
1922 socket_level = level;
1924 __kmp_nThreadsPerCore =
1925 __kmp_affinity_calculate_ratio(ratio, thread_level, core_level);
1926 if (die_level > 0) {
1928 __kmp_affinity_calculate_ratio(ratio, die_level, socket_level);
1929 nCoresPerPkg = __kmp_affinity_calculate_ratio(ratio, core_level, die_level);
1932 __kmp_affinity_calculate_ratio(ratio, core_level, socket_level);
1934 if (socket_level >= 0)
1935 nPackages = count[socket_level];
1938 if (core_level >= 0)
1939 __kmp_ncores = count[core_level];
1944 unsigned uniform = __kmp_affinity_discover_uniformity(depth, ratio, count);
1947 if (__kmp_affinity_verbose) {
1948 kmp_hw_t numerator_type, denominator_type;
1949 KMP_INFORM(AffUseGlobCpuidL,
"KMP_AFFINITY", topology_leaf);
1950 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1952 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1954 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1958 __kmp_str_buf_init(&buf);
1961 core_level = depth - 1;
1962 int ncores = count[core_level];
1964 denominator_type = KMP_HW_UNKNOWN;
1965 for (
int level = 0; level < depth; ++level) {
1968 numerator_type = types[level];
1972 __kmp_str_buf_print(&buf,
"%d %s", c, __kmp_hw_get_catalog_string(
1973 numerator_type, plural));
1975 __kmp_str_buf_print(&buf,
" x %d %s/%s", c,
1976 __kmp_hw_get_catalog_string(numerator_type, plural),
1977 __kmp_hw_get_catalog_string(denominator_type));
1979 denominator_type = numerator_type;
1981 KMP_INFORM(TopologyGeneric,
"KMP_AFFINITY", buf.str, ncores);
1982 __kmp_str_buf_free(&buf);
1985 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1986 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1987 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1988 for (proc = 0; (int)proc < nApics; ++proc) {
1989 __kmp_pu_os_idx[proc] = retval[proc].second;
1991 if (__kmp_affinity_type == affinity_none) {
1993 KMP_CPU_FREE(oldMask);
1999 depth = __kmp_affinity_remove_radix_one_levels(retval, nApics, depth, types);
2000 thread_level = core_level = die_level = socket_level = -1;
2001 for (
int level = 0; level < depth; ++level) {
2002 if (types[level] == KMP_HW_THREAD)
2003 thread_level = level;
2004 else if (types[level] == KMP_HW_CORE)
2006 else if (types[level] == KMP_HW_DIE)
2008 else if (types[level] == KMP_HW_SOCKET)
2009 socket_level = level;
2012 if (__kmp_affinity_gran_levels < 0) {
2015 __kmp_affinity_gran_levels = 0;
2016 if ((thread_level >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
2017 __kmp_affinity_gran_levels++;
2019 if ((core_level >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
2020 __kmp_affinity_gran_levels++;
2022 if ((die_level >= 0) && (__kmp_affinity_gran > affinity_gran_die)) {
2023 __kmp_affinity_gran_levels++;
2025 if (__kmp_affinity_gran > affinity_gran_package) {
2026 __kmp_affinity_gran_levels++;
2030 if (__kmp_affinity_verbose) {
2031 __kmp_affinity_print_topology(retval, nApics, depth, types);
2034 KMP_CPU_FREE(oldMask);
2035 *address2os = retval;
2042 #define threadIdIndex 1
2043 #define coreIdIndex 2
2044 #define pkgIdIndex 3
2045 #define nodeIdIndex 4
2047 typedef unsigned *ProcCpuInfo;
2048 static unsigned maxIndex = pkgIdIndex;
2050 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
2053 const unsigned *aa = *(
unsigned *
const *)a;
2054 const unsigned *bb = *(
unsigned *
const *)b;
2055 for (i = maxIndex;; i--) {
2066 #if KMP_USE_HIER_SCHED
2068 static void __kmp_dispatch_set_hierarchy_values() {
2074 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
2075 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2076 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
2077 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
2079 if (__kmp_mic_type >= mic3)
2080 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
2083 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
2084 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
2085 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
2086 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
2089 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
2090 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
2091 __kmp_nThreadsPerCore;
2092 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
2094 if (__kmp_mic_type >= mic3)
2095 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2096 2 * __kmp_nThreadsPerCore;
2099 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2100 __kmp_nThreadsPerCore;
2101 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
2102 nCoresPerPkg * __kmp_nThreadsPerCore;
2103 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
2104 nCoresPerPkg * __kmp_nThreadsPerCore;
2105 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
2106 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2111 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
2112 int index = type + 1;
2113 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2114 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2115 if (type == kmp_hier_layer_e::LAYER_THREAD)
2117 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2119 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2120 if (tid >= num_hw_threads)
2121 tid = tid % num_hw_threads;
2122 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2126 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2129 KMP_DEBUG_ASSERT(i1 <= i2);
2130 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2131 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2132 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2134 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2140 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
2142 kmp_i18n_id_t *
const msg_id,
2145 *msg_id = kmp_i18n_null;
2150 unsigned num_records = 0;
2152 buf[
sizeof(buf) - 1] = 1;
2153 if (!fgets(buf,
sizeof(buf), f)) {
2158 char s1[] =
"processor";
2159 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2166 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2167 if (nodeIdIndex + level >= maxIndex) {
2168 maxIndex = nodeIdIndex + level;
2176 if (num_records == 0) {
2178 *msg_id = kmp_i18n_str_NoProcRecords;
2181 if (num_records > (
unsigned)__kmp_xproc) {
2183 *msg_id = kmp_i18n_str_TooManyProcRecords;
2192 if (fseek(f, 0, SEEK_SET) != 0) {
2194 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2200 unsigned **threadInfo =
2201 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2203 for (i = 0; i <= num_records; i++) {
2205 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2208 #define CLEANUP_THREAD_INFO \
2209 for (i = 0; i <= num_records; i++) { \
2210 __kmp_free(threadInfo[i]); \
2212 __kmp_free(threadInfo);
2217 #define INIT_PROC_INFO(p) \
2218 for (__index = 0; __index <= maxIndex; __index++) { \
2219 (p)[__index] = UINT_MAX; \
2222 for (i = 0; i <= num_records; i++) {
2223 INIT_PROC_INFO(threadInfo[i]);
2226 unsigned num_avail = 0;
2233 buf[
sizeof(buf) - 1] = 1;
2234 bool long_line =
false;
2235 if (!fgets(buf,
sizeof(buf), f)) {
2240 for (i = 0; i <= maxIndex; i++) {
2241 if (threadInfo[num_avail][i] != UINT_MAX) {
2249 }
else if (!buf[
sizeof(buf) - 1]) {
2254 #define CHECK_LINE \
2256 CLEANUP_THREAD_INFO; \
2257 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2263 char s1[] =
"processor";
2264 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2266 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2268 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2270 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2271 #if KMP_ARCH_AARCH64
2280 threadInfo[num_avail][osIdIndex] = val;
2281 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
2285 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2286 threadInfo[num_avail][osIdIndex]);
2287 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2289 KMP_SNPRINTF(path,
sizeof(path),
2290 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2291 threadInfo[num_avail][osIdIndex]);
2292 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2296 char s2[] =
"physical id";
2297 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2299 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2301 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2303 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2305 threadInfo[num_avail][pkgIdIndex] = val;
2308 char s3[] =
"core id";
2309 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2311 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2313 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2315 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2317 threadInfo[num_avail][coreIdIndex] = val;
2321 char s4[] =
"thread id";
2322 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2324 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2326 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2328 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2330 threadInfo[num_avail][threadIdIndex] = val;
2334 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2336 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2338 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2340 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2341 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2343 threadInfo[num_avail][nodeIdIndex + level] = val;
2350 if ((*buf != 0) && (*buf !=
'\n')) {
2355 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2363 if ((
int)num_avail == __kmp_xproc) {
2364 CLEANUP_THREAD_INFO;
2365 *msg_id = kmp_i18n_str_TooManyEntries;
2371 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2372 CLEANUP_THREAD_INFO;
2373 *msg_id = kmp_i18n_str_MissingProcField;
2376 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2377 CLEANUP_THREAD_INFO;
2378 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2383 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2384 __kmp_affin_fullMask)) {
2385 INIT_PROC_INFO(threadInfo[num_avail]);
2392 KMP_ASSERT(num_avail <= num_records);
2393 INIT_PROC_INFO(threadInfo[num_avail]);
2398 CLEANUP_THREAD_INFO;
2399 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2403 CLEANUP_THREAD_INFO;
2404 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2409 #if KMP_MIC && REDUCE_TEAM_SIZE
2410 unsigned teamSize = 0;
2422 KMP_ASSERT(num_avail > 0);
2423 KMP_ASSERT(num_avail <= num_records);
2424 if (num_avail == 1) {
2426 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2427 if (__kmp_affinity_verbose) {
2428 if (!KMP_AFFINITY_CAPABLE()) {
2429 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2430 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2431 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2433 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2434 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2435 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2439 __kmp_str_buf_init(&buf);
2440 __kmp_str_buf_print(&buf,
"1");
2441 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2442 __kmp_str_buf_print(&buf,
" x 1");
2444 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2445 __kmp_str_buf_free(&buf);
2448 if (__kmp_affinity_type == affinity_none) {
2449 CLEANUP_THREAD_INFO;
2453 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2455 addr.labels[0] = threadInfo[0][pkgIdIndex];
2456 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2458 if (__kmp_affinity_gran_levels < 0) {
2459 __kmp_affinity_gran_levels = 0;
2462 if (__kmp_affinity_verbose) {
2463 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2466 CLEANUP_THREAD_INFO;
2471 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2472 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2484 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2486 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2488 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2490 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2492 bool assign_thread_ids =
false;
2493 unsigned threadIdCt;
2496 restart_radix_check:
2500 if (assign_thread_ids) {
2501 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2502 threadInfo[0][threadIdIndex] = threadIdCt++;
2503 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2504 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2507 for (index = 0; index <= maxIndex; index++) {
2511 lastId[index] = threadInfo[0][index];
2516 for (i = 1; i < num_avail; i++) {
2519 for (index = maxIndex; index >= threadIdIndex; index--) {
2520 if (assign_thread_ids && (index == threadIdIndex)) {
2522 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2523 threadInfo[i][threadIdIndex] = threadIdCt++;
2527 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2528 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2531 if (threadInfo[i][index] != lastId[index]) {
2536 for (index2 = threadIdIndex; index2 < index; index2++) {
2538 if (counts[index2] > maxCt[index2]) {
2539 maxCt[index2] = counts[index2];
2542 lastId[index2] = threadInfo[i][index2];
2546 lastId[index] = threadInfo[i][index];
2548 if (assign_thread_ids && (index > threadIdIndex)) {
2550 #if KMP_MIC && REDUCE_TEAM_SIZE
2553 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2560 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2561 threadInfo[i][threadIdIndex] = threadIdCt++;
2567 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2568 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2574 if (index < threadIdIndex) {
2578 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2583 CLEANUP_THREAD_INFO;
2584 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2590 assign_thread_ids =
true;
2591 goto restart_radix_check;
2595 #if KMP_MIC && REDUCE_TEAM_SIZE
2598 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2601 for (index = threadIdIndex; index <= maxIndex; index++) {
2602 if (counts[index] > maxCt[index]) {
2603 maxCt[index] = counts[index];
2607 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2608 nCoresPerPkg = maxCt[coreIdIndex];
2609 nPackages = totals[pkgIdIndex];
2612 unsigned prod = totals[maxIndex];
2613 for (index = threadIdIndex; index < maxIndex; index++) {
2614 prod *= maxCt[index];
2616 bool uniform = (prod == totals[threadIdIndex]);
2622 __kmp_ncores = totals[coreIdIndex];
2624 if (__kmp_affinity_verbose) {
2625 if (!KMP_AFFINITY_CAPABLE()) {
2626 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2627 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2629 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2631 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2634 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2635 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2637 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2639 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2643 __kmp_str_buf_init(&buf);
2645 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2646 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2647 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2649 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2650 maxCt[threadIdIndex], __kmp_ncores);
2652 __kmp_str_buf_free(&buf);
2655 #if KMP_MIC && REDUCE_TEAM_SIZE
2657 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2658 __kmp_dflt_team_nth = teamSize;
2659 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting "
2660 "__kmp_dflt_team_nth = %d\n",
2661 __kmp_dflt_team_nth));
2665 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2666 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2667 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2668 for (i = 0; i < num_avail; ++i) {
2669 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2672 if (__kmp_affinity_type == affinity_none) {
2677 CLEANUP_THREAD_INFO;
2686 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2687 for (index = threadIdIndex; index < maxIndex; index++) {
2688 KMP_ASSERT(totals[index] >= totals[index + 1]);
2689 inMap[index] = (totals[index] > totals[index + 1]);
2691 inMap[maxIndex] = (totals[maxIndex] > 1);
2692 inMap[pkgIdIndex] =
true;
2695 for (index = threadIdIndex; index <= maxIndex; index++) {
2700 KMP_ASSERT(depth > 0);
2703 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2706 int threadLevel = -1;
2708 for (i = 0; i < num_avail; ++i) {
2709 Address addr(depth);
2710 unsigned os = threadInfo[i][osIdIndex];
2714 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2715 if (!inMap[src_index]) {
2718 addr.labels[dst_index] = threadInfo[i][src_index];
2719 if (src_index == pkgIdIndex) {
2720 pkgLevel = dst_index;
2721 }
else if (src_index == coreIdIndex) {
2722 coreLevel = dst_index;
2723 }
else if (src_index == threadIdIndex) {
2724 threadLevel = dst_index;
2728 (*address2os)[i] = AddrUnsPair(addr, os);
2731 if (__kmp_affinity_gran_levels < 0) {
2735 __kmp_affinity_gran_levels = 0;
2736 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2737 if (!inMap[src_index]) {
2740 switch (src_index) {
2742 if (__kmp_affinity_gran > affinity_gran_thread) {
2743 __kmp_affinity_gran_levels++;
2748 if (__kmp_affinity_gran > affinity_gran_core) {
2749 __kmp_affinity_gran_levels++;
2754 if (__kmp_affinity_gran > affinity_gran_package) {
2755 __kmp_affinity_gran_levels++;
2762 if (__kmp_affinity_verbose) {
2763 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2764 coreLevel, threadLevel);
2772 CLEANUP_THREAD_INFO;
2779 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2780 unsigned *numUnique,
2781 AddrUnsPair *address2os,
2782 unsigned numAddrs) {
2788 KMP_ASSERT(numAddrs > 0);
2789 depth = address2os[0].first.depth;
2792 for (i = numAddrs - 1;; --i) {
2793 unsigned osId = address2os[i].second;
2794 if (osId > maxOsId) {
2800 kmp_affin_mask_t *osId2Mask;
2801 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2805 qsort(address2os, numAddrs,
sizeof(*address2os),
2806 __kmp_affinity_cmp_Address_labels);
2808 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2809 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2810 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2812 if (__kmp_affinity_gran_levels >= (
int)depth) {
2813 if (__kmp_affinity_verbose ||
2814 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2815 KMP_WARNING(AffThreadsMayMigrate);
2823 unsigned unique = 0;
2825 unsigned leader = 0;
2826 Address *leaderAddr = &(address2os[0].first);
2827 kmp_affin_mask_t *sum;
2828 KMP_CPU_ALLOC_ON_STACK(sum);
2830 KMP_CPU_SET(address2os[0].second, sum);
2831 for (i = 1; i < numAddrs; i++) {
2835 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2836 KMP_CPU_SET(address2os[i].second, sum);
2842 for (; j < i; j++) {
2843 unsigned osId = address2os[j].second;
2844 KMP_DEBUG_ASSERT(osId <= maxOsId);
2845 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2846 KMP_CPU_COPY(mask, sum);
2847 address2os[j].first.leader = (j == leader);
2853 leaderAddr = &(address2os[i].first);
2855 KMP_CPU_SET(address2os[i].second, sum);
2860 for (; j < i; j++) {
2861 unsigned osId = address2os[j].second;
2862 KMP_DEBUG_ASSERT(osId <= maxOsId);
2863 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2864 KMP_CPU_COPY(mask, sum);
2865 address2os[j].first.leader = (j == leader);
2868 KMP_CPU_FREE_FROM_STACK(sum);
2870 *maxIndex = maxOsId;
2871 *numUnique = unique;
2878 static kmp_affin_mask_t *newMasks;
2879 static int numNewMasks;
2880 static int nextNewMask;
2882 #define ADD_MASK(_mask) \
2884 if (nextNewMask >= numNewMasks) { \
2887 kmp_affin_mask_t *temp; \
2888 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2889 for (i = 0; i < numNewMasks / 2; i++) { \
2890 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2891 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2892 KMP_CPU_COPY(dest, src); \
2894 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2897 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2901 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2903 if (((_osId) > _maxOsId) || \
2904 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2905 if (__kmp_affinity_verbose || \
2906 (__kmp_affinity_warnings && \
2907 (__kmp_affinity_type != affinity_none))) { \
2908 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2911 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2917 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2918 unsigned int *out_numMasks,
2919 const char *proclist,
2920 kmp_affin_mask_t *osId2Mask,
2923 const char *scan = proclist;
2924 const char *next = proclist;
2929 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2931 kmp_affin_mask_t *sumMask;
2932 KMP_CPU_ALLOC(sumMask);
2936 int start, end, stride;
2940 if (*next ==
'\0') {
2952 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2954 num = __kmp_str_to_int(scan, *next);
2955 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2958 if ((num > maxOsId) ||
2959 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2960 if (__kmp_affinity_verbose ||
2961 (__kmp_affinity_warnings &&
2962 (__kmp_affinity_type != affinity_none))) {
2963 KMP_WARNING(AffIgnoreInvalidProcID, num);
2965 KMP_CPU_ZERO(sumMask);
2967 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2987 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2990 num = __kmp_str_to_int(scan, *next);
2991 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2994 if ((num > maxOsId) ||
2995 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2996 if (__kmp_affinity_verbose ||
2997 (__kmp_affinity_warnings &&
2998 (__kmp_affinity_type != affinity_none))) {
2999 KMP_WARNING(AffIgnoreInvalidProcID, num);
3002 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
3019 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3021 start = __kmp_str_to_int(scan, *next);
3022 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
3027 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3041 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3043 end = __kmp_str_to_int(scan, *next);
3044 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
3061 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3063 stride = __kmp_str_to_int(scan, *next);
3064 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
3069 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
3071 KMP_ASSERT2(start <= end,
"bad explicit proc list");
3073 KMP_ASSERT2(start >= end,
"bad explicit proc list");
3075 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
3080 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3082 }
while (start <= end);
3085 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3087 }
while (start >= end);
3098 *out_numMasks = nextNewMask;
3099 if (nextNewMask == 0) {
3101 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3104 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3105 for (i = 0; i < nextNewMask; i++) {
3106 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3107 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3108 KMP_CPU_COPY(dest, src);
3110 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3111 KMP_CPU_FREE(sumMask);
3134 static void __kmp_process_subplace_list(
const char **scan,
3135 kmp_affin_mask_t *osId2Mask,
3136 int maxOsId, kmp_affin_mask_t *tempMask,
3141 int start, count, stride, i;
3145 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3148 start = __kmp_str_to_int(*scan, *next);
3149 KMP_ASSERT(start >= 0);
3154 if (**scan ==
'}' || **scan ==
',') {
3155 if ((start > maxOsId) ||
3156 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3157 if (__kmp_affinity_verbose ||
3158 (__kmp_affinity_warnings &&
3159 (__kmp_affinity_type != affinity_none))) {
3160 KMP_WARNING(AffIgnoreInvalidProcID, start);
3163 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3166 if (**scan ==
'}') {
3172 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3177 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3180 count = __kmp_str_to_int(*scan, *next);
3181 KMP_ASSERT(count >= 0);
3186 if (**scan ==
'}' || **scan ==
',') {
3187 for (i = 0; i < count; i++) {
3188 if ((start > maxOsId) ||
3189 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3190 if (__kmp_affinity_verbose ||
3191 (__kmp_affinity_warnings &&
3192 (__kmp_affinity_type != affinity_none))) {
3193 KMP_WARNING(AffIgnoreInvalidProcID, start);
3197 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3202 if (**scan ==
'}') {
3208 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3215 if (**scan ==
'+') {
3219 if (**scan ==
'-') {
3227 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3230 stride = __kmp_str_to_int(*scan, *next);
3231 KMP_ASSERT(stride >= 0);
3237 if (**scan ==
'}' || **scan ==
',') {
3238 for (i = 0; i < count; i++) {
3239 if ((start > maxOsId) ||
3240 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3241 if (__kmp_affinity_verbose ||
3242 (__kmp_affinity_warnings &&
3243 (__kmp_affinity_type != affinity_none))) {
3244 KMP_WARNING(AffIgnoreInvalidProcID, start);
3248 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3253 if (**scan ==
'}') {
3260 KMP_ASSERT2(0,
"bad explicit places list");
3264 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3265 int maxOsId, kmp_affin_mask_t *tempMask,
3271 if (**scan ==
'{') {
3273 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3274 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3276 }
else if (**scan ==
'!') {
3278 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3279 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3280 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3283 int num = __kmp_str_to_int(*scan, *next);
3284 KMP_ASSERT(num >= 0);
3285 if ((num > maxOsId) ||
3286 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3287 if (__kmp_affinity_verbose ||
3288 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3289 KMP_WARNING(AffIgnoreInvalidProcID, num);
3292 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3297 KMP_ASSERT2(0,
"bad explicit places list");
3302 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3303 unsigned int *out_numMasks,
3304 const char *placelist,
3305 kmp_affin_mask_t *osId2Mask,
3307 int i, j, count, stride, sign;
3308 const char *scan = placelist;
3309 const char *next = placelist;
3312 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3318 kmp_affin_mask_t *tempMask;
3319 kmp_affin_mask_t *previousMask;
3320 KMP_CPU_ALLOC(tempMask);
3321 KMP_CPU_ZERO(tempMask);
3322 KMP_CPU_ALLOC(previousMask);
3323 KMP_CPU_ZERO(previousMask);
3327 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3331 if (*scan ==
'\0' || *scan ==
',') {
3335 KMP_CPU_ZERO(tempMask);
3337 if (*scan ==
'\0') {
3344 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3349 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3352 count = __kmp_str_to_int(scan, *next);
3353 KMP_ASSERT(count >= 0);
3358 if (*scan ==
'\0' || *scan ==
',') {
3361 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3380 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3383 stride = __kmp_str_to_int(scan, *next);
3384 KMP_DEBUG_ASSERT(stride >= 0);
3390 for (i = 0; i < count; i++) {
3395 KMP_CPU_COPY(previousMask, tempMask);
3396 ADD_MASK(previousMask);
3397 KMP_CPU_ZERO(tempMask);
3399 KMP_CPU_SET_ITERATE(j, previousMask) {
3400 if (!KMP_CPU_ISSET(j, previousMask)) {
3403 if ((j + stride > maxOsId) || (j + stride < 0) ||
3404 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3405 (!KMP_CPU_ISSET(j + stride,
3406 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3407 if ((__kmp_affinity_verbose ||
3408 (__kmp_affinity_warnings &&
3409 (__kmp_affinity_type != affinity_none))) &&
3411 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3415 KMP_CPU_SET(j + stride, tempMask);
3419 KMP_CPU_ZERO(tempMask);
3424 if (*scan ==
'\0') {
3432 KMP_ASSERT2(0,
"bad explicit places list");
3435 *out_numMasks = nextNewMask;
3436 if (nextNewMask == 0) {
3438 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3441 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3442 KMP_CPU_FREE(tempMask);
3443 KMP_CPU_FREE(previousMask);
3444 for (i = 0; i < nextNewMask; i++) {
3445 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3446 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3447 KMP_CPU_COPY(dest, src);
3449 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3453 #undef ADD_MASK_OSID
3456 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3459 hwloc_obj_t hT = NULL;
3460 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3461 for (
int i = 0; i < N; ++i) {
3462 KMP_DEBUG_ASSERT(hT);
3463 unsigned idx = hT->os_index;
3464 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3465 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3466 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3469 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3474 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3476 hwloc_obj_t hT = NULL;
3477 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3478 for (
int i = 0; i < N; ++i) {
3479 KMP_DEBUG_ASSERT(hT);
3480 unsigned idx = hT->os_index;
3481 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3483 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3489 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3490 AddrUnsPair *newAddr;
3491 if (__kmp_hws_requested == 0)
3494 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3498 hwloc_topology_t tp = __kmp_hwloc_topology;
3499 int nS = 0, nN = 0, nL = 0, nC = 0,
3501 int nCr = 0, nTr = 0;
3502 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3503 hwloc_obj_t hT, hC, hL, hN, hS;
3507 int numa_support = 0, tile_support = 0;
3508 if (__kmp_pu_os_idx)
3509 hT = hwloc_get_pu_obj_by_os_index(tp,
3510 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3512 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3514 KMP_WARNING(AffHWSubsetUnsupported);
3518 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3519 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3520 if (hN != NULL && hN->depth > hS->depth) {
3522 }
else if (__kmp_hws_node.num > 0) {
3524 KMP_WARNING(AffHWSubsetUnsupported);
3528 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3529 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3531 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3533 }
else if (__kmp_hws_tile.num > 0) {
3534 if (__kmp_hws_core.num == 0) {
3535 __kmp_hws_core = __kmp_hws_tile;
3536 __kmp_hws_tile.num = 0;
3539 KMP_WARNING(AffHWSubsetInvalid);
3546 if (__kmp_hws_socket.num == 0)
3547 __kmp_hws_socket.num = nPackages;
3548 if (__kmp_hws_socket.offset >= nPackages) {
3549 KMP_WARNING(AffHWSubsetManySockets);
3554 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3556 if (__kmp_hws_node.num == 0)
3557 __kmp_hws_node.num = NN;
3558 if (__kmp_hws_node.offset >= NN) {
3559 KMP_WARNING(AffHWSubsetManyNodes);
3564 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3565 if (__kmp_hws_tile.num == 0) {
3566 __kmp_hws_tile.num = NL + 1;
3568 if (__kmp_hws_tile.offset >= NL) {
3569 KMP_WARNING(AffHWSubsetManyTiles);
3572 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3574 if (__kmp_hws_core.num == 0)
3575 __kmp_hws_core.num = NC;
3576 if (__kmp_hws_core.offset >= NC) {
3577 KMP_WARNING(AffHWSubsetManyCores);
3581 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3583 if (__kmp_hws_core.num == 0)
3584 __kmp_hws_core.num = NC;
3585 if (__kmp_hws_core.offset >= NC) {
3586 KMP_WARNING(AffHWSubsetManyCores);
3593 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3594 if (__kmp_hws_tile.num == 0)
3595 __kmp_hws_tile.num = NL;
3596 if (__kmp_hws_tile.offset >= NL) {
3597 KMP_WARNING(AffHWSubsetManyTiles);
3600 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3602 if (__kmp_hws_core.num == 0)
3603 __kmp_hws_core.num = NC;
3604 if (__kmp_hws_core.offset >= NC) {
3605 KMP_WARNING(AffHWSubsetManyCores);
3609 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3611 if (__kmp_hws_core.num == 0)
3612 __kmp_hws_core.num = NC;
3613 if (__kmp_hws_core.offset >= NC) {
3614 KMP_WARNING(AffHWSubsetManyCores);
3619 if (__kmp_hws_proc.num == 0)
3620 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3621 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3622 KMP_WARNING(AffHWSubsetManyProcs);
3628 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3632 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3633 for (
int s = 0; s < NP; ++s) {
3635 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3636 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3639 if (nS <= __kmp_hws_socket.offset ||
3640 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3641 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3652 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3653 for (
int n = 0; n < NN; ++n) {
3655 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3656 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3660 if (nN <= __kmp_hws_node.offset ||
3661 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3663 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3664 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3671 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3672 for (
int l = 0; l < NL; ++l) {
3674 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3675 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3679 if (nL <= __kmp_hws_tile.offset ||
3680 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3682 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3683 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3690 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3691 HWLOC_OBJ_CORE, &hC);
3692 for (
int c = 0; c < NC; ++c) {
3694 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3695 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3699 if (nC <= __kmp_hws_core.offset ||
3700 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3702 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3703 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3711 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3713 for (
int t = 0; t < NT; ++t) {
3716 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3717 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3721 if (nT <= __kmp_hws_proc.offset ||
3722 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3724 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3726 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3727 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3732 newAddr[n_new] = (*pAddr)[n_old];
3735 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3743 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3745 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3753 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3754 for (
int c = 0; c < NC; ++c) {
3756 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3757 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3761 if (nC <= __kmp_hws_core.offset ||
3762 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3764 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3765 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3773 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3774 for (
int t = 0; t < NT; ++t) {
3777 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3778 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3782 if (nT <= __kmp_hws_proc.offset ||
3783 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3785 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3787 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3788 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3793 newAddr[n_new] = (*pAddr)[n_old];
3796 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3804 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3807 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3815 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3816 for (
int l = 0; l < NL; ++l) {
3818 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3819 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3823 if (nL <= __kmp_hws_tile.offset ||
3824 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3826 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3827 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3835 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3836 for (
int c = 0; c < NC; ++c) {
3838 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3839 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3843 if (nC <= __kmp_hws_core.offset ||
3844 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3846 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3847 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3856 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3857 for (
int t = 0; t < NT; ++t) {
3860 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3861 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3865 if (nT <= __kmp_hws_proc.offset ||
3866 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3868 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3870 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3871 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3876 newAddr[n_new] = (*pAddr)[n_old];
3879 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3887 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3889 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3897 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3898 for (
int c = 0; c < NC; ++c) {
3900 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3901 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3905 if (nC <= __kmp_hws_core.offset ||
3906 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3908 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3909 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3918 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3919 for (
int t = 0; t < NT; ++t) {
3922 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3923 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3927 if (nT <= __kmp_hws_proc.offset ||
3928 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3930 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3932 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3933 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3938 newAddr[n_new] = (*pAddr)[n_old];
3941 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3949 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3961 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3962 KMP_DEBUG_ASSERT(nPkg > 0);
3963 KMP_DEBUG_ASSERT(nCpP > 0);
3964 KMP_DEBUG_ASSERT(nTpC > 0);
3965 KMP_DEBUG_ASSERT(nCo > 0);
3966 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3967 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3968 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3969 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3972 nCoresPerPkg = nCpP;
3973 __kmp_nThreadsPerCore = nTpC;
3974 __kmp_avail_proc = n_new;
3980 int n_old = 0, n_new = 0, proc_num = 0;
3981 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3982 KMP_WARNING(AffHWSubsetNoHWLOC);
3985 if (__kmp_hws_socket.num == 0)
3986 __kmp_hws_socket.num = nPackages;
3987 if (__kmp_hws_die.num == 0)
3988 __kmp_hws_die.num = nDiesPerPkg;
3989 if (__kmp_hws_core.num == 0)
3990 __kmp_hws_core.num = nCoresPerPkg;
3991 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3992 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3993 if (!__kmp_affinity_uniform_topology()) {
3994 KMP_WARNING(AffHWSubsetNonUniform);
3998 KMP_WARNING(AffHWSubsetNonThreeLevel);
4001 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
4002 KMP_WARNING(AffHWSubsetManySockets);
4005 if (depth == 4 && __kmp_hws_die.offset + __kmp_hws_die.num > nDiesPerPkg) {
4006 KMP_WARNING(AffHWSubsetManyDies);
4009 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
4010 KMP_WARNING(AffHWSubsetManyCores);
4015 newAddr = (AddrUnsPair *)__kmp_allocate(
4016 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_die.num *
4017 __kmp_hws_core.num * __kmp_hws_proc.num);
4018 for (
int i = 0; i < nPackages; ++i) {
4019 if (i < __kmp_hws_socket.offset ||
4020 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
4022 n_old += nDiesPerPkg * nCoresPerPkg * __kmp_nThreadsPerCore;
4023 if (__kmp_pu_os_idx != NULL) {
4025 for (
int l = 0; l < nDiesPerPkg; ++l) {
4026 for (
int j = 0; j < nCoresPerPkg; ++j) {
4027 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
4028 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
4036 for (
int l = 0; l < nDiesPerPkg; ++l) {
4038 if (l < __kmp_hws_die.offset ||
4039 l >= __kmp_hws_die.offset + __kmp_hws_die.num) {
4040 n_old += nCoresPerPkg;
4041 if (__kmp_pu_os_idx != NULL) {
4042 for (
int k = 0; k < nCoresPerPkg; ++k) {
4043 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
4048 for (
int j = 0; j < nCoresPerPkg; ++j) {
4049 if (j < __kmp_hws_core.offset ||
4050 j >= __kmp_hws_core.offset +
4051 __kmp_hws_core.num) {
4052 n_old += __kmp_nThreadsPerCore;
4053 if (__kmp_pu_os_idx != NULL) {
4054 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
4055 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num],
4056 __kmp_affin_fullMask);
4062 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
4063 if (k < __kmp_hws_proc.num) {
4065 newAddr[n_new] = (*pAddr)[n_old];
4068 if (__kmp_pu_os_idx != NULL)
4069 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num],
4070 __kmp_affin_fullMask);
4081 KMP_DEBUG_ASSERT(n_old ==
4082 nPackages * nDiesPerPkg * nCoresPerPkg *
4083 __kmp_nThreadsPerCore);
4084 KMP_DEBUG_ASSERT(n_new ==
4085 __kmp_hws_socket.num * __kmp_hws_die.num *
4086 __kmp_hws_core.num * __kmp_hws_proc.num);
4087 nPackages = __kmp_hws_socket.num;
4088 nCoresPerPkg = __kmp_hws_core.num;
4089 nDiesPerPkg = __kmp_hws_die.num;
4090 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
4091 __kmp_avail_proc = n_new;
4093 nPackages * nDiesPerPkg * __kmp_hws_core.num;
4099 if (__kmp_affinity_verbose) {
4100 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
4102 __kmp_str_buf_init(&buf);
4103 __kmp_str_buf_print(&buf,
"%d", nPackages);
4104 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
4105 __kmp_nThreadsPerCore, __kmp_ncores);
4106 __kmp_str_buf_free(&buf);
4109 if (__kmp_pu_os_idx != NULL) {
4110 __kmp_free(__kmp_pu_os_idx);
4111 __kmp_pu_os_idx = NULL;
4117 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
4118 int nprocs,
int bottom_level) {
4121 for (
int i = 0; i < nprocs; i++) {
4122 for (
int j = bottom_level; j > 0; j--) {
4123 if (address2os[i].first.labels[j] > 0) {
4124 if (core_level < (j - 1)) {
4134 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
4135 int nprocs,
int bottom_level,
4141 for (i = 0; i < nprocs; i++) {
4142 for (j = bottom_level; j > core_level; j--) {
4143 if ((i + 1) < nprocs) {
4144 if (address2os[i + 1].first.labels[j] > 0) {
4149 if (j == core_level) {
4153 if (j > core_level) {
4162 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
4163 int bottom_level,
int core_level) {
4164 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
4171 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
4172 int nprocs,
int bottom_level,
4174 int maxprocpercore = 0;
4176 if (core_level < bottom_level) {
4177 for (
int i = 0; i < nprocs; i++) {
4178 int percore = address2os[i].first.labels[core_level + 1] + 1;
4180 if (percore > maxprocpercore) {
4181 maxprocpercore = percore;
4187 return maxprocpercore;
4190 static AddrUnsPair *address2os = NULL;
4191 static int *procarr = NULL;
4192 static int __kmp_aff_depth = 0;
4194 #if KMP_USE_HIER_SCHED
4195 #define KMP_EXIT_AFF_NONE \
4196 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4197 KMP_ASSERT(address2os == NULL); \
4198 __kmp_apply_thread_places(NULL, 0); \
4199 __kmp_create_affinity_none_places(); \
4200 __kmp_dispatch_set_hierarchy_values(); \
4203 #define KMP_EXIT_AFF_NONE \
4204 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4205 KMP_ASSERT(address2os == NULL); \
4206 __kmp_apply_thread_places(NULL, 0); \
4207 __kmp_create_affinity_none_places(); \
4213 static void __kmp_create_affinity_none_places() {
4214 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4215 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4216 __kmp_affinity_num_masks = 1;
4217 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4218 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4219 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4222 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4223 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4224 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4225 unsigned depth = aa->depth;
4227 KMP_DEBUG_ASSERT(depth == bb->depth);
4228 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4229 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4230 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4231 int j = depth - i - 1;
4232 if (aa->childNums[j] < bb->childNums[j])
4234 if (aa->childNums[j] > bb->childNums[j])
4237 for (; i < depth; i++) {
4238 int j = i - __kmp_affinity_compact;
4239 if (aa->childNums[j] < bb->childNums[j])
4241 if (aa->childNums[j] > bb->childNums[j])
4247 static void __kmp_aux_affinity_initialize(
void) {
4248 if (__kmp_affinity_masks != NULL) {
4249 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4257 if (__kmp_affin_fullMask == NULL) {
4258 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4260 if (KMP_AFFINITY_CAPABLE()) {
4261 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4262 if (__kmp_affinity_respect_mask) {
4265 __kmp_avail_proc = 0;
4266 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4267 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4272 if (__kmp_avail_proc > __kmp_xproc) {
4273 if (__kmp_affinity_verbose ||
4274 (__kmp_affinity_warnings &&
4275 (__kmp_affinity_type != affinity_none))) {
4276 KMP_WARNING(ErrorInitializeAffinity);
4278 __kmp_affinity_type = affinity_none;
4279 KMP_AFFINITY_DISABLE();
4283 if (__kmp_affinity_verbose) {
4284 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4285 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4286 __kmp_affin_fullMask);
4287 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
4290 if (__kmp_affinity_verbose) {
4291 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4292 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4293 __kmp_affin_fullMask);
4294 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
4296 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4297 __kmp_avail_proc = __kmp_xproc;
4301 __kmp_affin_fullMask->set_process_affinity(
true);
4306 if (__kmp_affinity_gran == affinity_gran_tile &&
4308 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4309 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4310 __kmp_affinity_gran = affinity_gran_package;
4314 kmp_i18n_id_t msg_id = kmp_i18n_null;
4318 if ((__kmp_cpuinfo_file != NULL) &&
4319 (__kmp_affinity_top_method == affinity_top_method_all)) {
4320 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4323 if (__kmp_affinity_top_method == affinity_top_method_all) {
4327 const char *file_name = NULL;
4331 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4332 if (__kmp_affinity_verbose) {
4333 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4335 if (!__kmp_hwloc_error) {
4336 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4339 }
else if (depth < 0 && __kmp_affinity_verbose) {
4340 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4342 }
else if (__kmp_affinity_verbose) {
4343 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4348 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4351 if (__kmp_affinity_verbose) {
4352 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4356 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4362 if (__kmp_affinity_verbose) {
4363 if (msg_id != kmp_i18n_null) {
4364 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4365 __kmp_i18n_catgets(msg_id),
4366 KMP_I18N_STR(DecodingLegacyAPIC));
4368 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4369 KMP_I18N_STR(DecodingLegacyAPIC));
4374 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4386 if (__kmp_affinity_verbose) {
4387 if (msg_id != kmp_i18n_null) {
4388 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4389 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4391 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4396 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4404 #if KMP_GROUP_AFFINITY
4406 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4407 if (__kmp_affinity_verbose) {
4408 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4411 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4412 KMP_ASSERT(depth != 0);
4418 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4419 if (file_name == NULL) {
4420 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4421 }
else if (line == 0) {
4422 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4424 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4425 __kmp_i18n_catgets(msg_id));
4431 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4435 KMP_ASSERT(depth > 0);
4436 KMP_ASSERT(address2os != NULL);
4441 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4442 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4443 if (__kmp_affinity_verbose) {
4444 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4446 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4457 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4459 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid ||
4460 __kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
4461 if (__kmp_affinity_verbose) {
4462 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4465 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4470 KMP_ASSERT(msg_id != kmp_i18n_null);
4471 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4473 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4474 if (__kmp_affinity_verbose) {
4475 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4478 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4483 KMP_ASSERT(msg_id != kmp_i18n_null);
4484 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4490 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4491 const char *filename;
4492 const char *env_var =
nullptr;
4493 if (__kmp_cpuinfo_file != NULL) {
4494 filename = __kmp_cpuinfo_file;
4495 env_var =
"KMP_CPUINFO_FILE";
4497 filename =
"/proc/cpuinfo";
4500 if (__kmp_affinity_verbose) {
4501 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4506 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4508 KMP_ASSERT(msg_id != kmp_i18n_null);
4510 KMP_FATAL(FileLineMsgExiting, filename, line,
4511 __kmp_i18n_catgets(msg_id));
4513 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4516 if (__kmp_affinity_type == affinity_none) {
4517 KMP_ASSERT(depth == 0);
4522 #if KMP_GROUP_AFFINITY
4524 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4525 if (__kmp_affinity_verbose) {
4526 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4529 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4530 KMP_ASSERT(depth != 0);
4532 KMP_ASSERT(msg_id != kmp_i18n_null);
4533 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4539 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4540 if (__kmp_affinity_verbose) {
4541 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4544 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4549 KMP_ASSERT(depth > 0);
4550 KMP_ASSERT(address2os != NULL);
4553 #if KMP_USE_HIER_SCHED
4554 __kmp_dispatch_set_hierarchy_values();
4557 if (address2os == NULL) {
4558 if (KMP_AFFINITY_CAPABLE() &&
4559 (__kmp_affinity_verbose ||
4560 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4561 KMP_WARNING(ErrorInitializeAffinity);
4563 __kmp_affinity_type = affinity_none;
4564 __kmp_create_affinity_none_places();
4565 KMP_AFFINITY_DISABLE();
4569 if (__kmp_affinity_gran == affinity_gran_tile
4571 && __kmp_tile_depth == 0
4575 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4578 __kmp_apply_thread_places(&address2os, depth);
4583 kmp_affin_mask_t *osId2Mask =
4584 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4585 if (__kmp_affinity_gran_levels == 0) {
4586 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4592 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4594 switch (__kmp_affinity_type) {
4596 case affinity_explicit:
4597 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4598 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4599 __kmp_affinity_process_proclist(
4600 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4601 __kmp_affinity_proclist, osId2Mask, maxIndex);
4603 __kmp_affinity_process_placelist(
4604 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4605 __kmp_affinity_proclist, osId2Mask, maxIndex);
4607 if (__kmp_affinity_num_masks == 0) {
4608 if (__kmp_affinity_verbose ||
4609 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4610 KMP_WARNING(AffNoValidProcID);
4612 __kmp_affinity_type = affinity_none;
4613 __kmp_create_affinity_none_places();
4623 case affinity_logical:
4624 __kmp_affinity_compact = 0;
4625 if (__kmp_affinity_offset) {
4626 __kmp_affinity_offset =
4627 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4631 case affinity_physical:
4632 if (__kmp_nThreadsPerCore > 1) {
4633 __kmp_affinity_compact = 1;
4634 if (__kmp_affinity_compact >= depth) {
4635 __kmp_affinity_compact = 0;
4638 __kmp_affinity_compact = 0;
4640 if (__kmp_affinity_offset) {
4641 __kmp_affinity_offset =
4642 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4646 case affinity_scatter:
4647 if (__kmp_affinity_compact >= depth) {
4648 __kmp_affinity_compact = 0;
4650 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4654 case affinity_compact:
4655 if (__kmp_affinity_compact >= depth) {
4656 __kmp_affinity_compact = depth - 1;
4660 case affinity_balanced:
4662 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4663 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4665 __kmp_affinity_type = affinity_none;
4666 __kmp_create_affinity_none_places();
4668 }
else if (!__kmp_affinity_uniform_topology()) {
4670 __kmp_aff_depth = depth;
4672 int core_level = __kmp_affinity_find_core_level(
4673 address2os, __kmp_avail_proc, depth - 1);
4674 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4675 depth - 1, core_level);
4676 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4677 address2os, __kmp_avail_proc, depth - 1, core_level);
4679 int nproc = ncores * maxprocpercore;
4680 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4681 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4682 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4684 __kmp_affinity_type = affinity_none;
4688 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4689 for (
int i = 0; i < nproc; i++) {
4695 for (
int i = 0; i < __kmp_avail_proc; i++) {
4696 int proc = address2os[i].second;
4698 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4700 if (core == lastcore) {
4707 procarr[core * maxprocpercore + inlastcore] = proc;
4710 if (__kmp_affinity_compact >= depth) {
4711 __kmp_affinity_compact = depth - 1;
4716 if (__kmp_affinity_dups) {
4717 __kmp_affinity_num_masks = __kmp_avail_proc;
4719 __kmp_affinity_num_masks = numUnique;
4722 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4723 (__kmp_affinity_num_places > 0) &&
4724 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4725 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4728 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4732 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4733 __kmp_affinity_cmp_Address_child_num);
4737 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4738 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4741 unsigned osId = address2os[i].second;
4742 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4743 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4744 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4745 KMP_CPU_COPY(dest, src);
4746 if (++j >= __kmp_affinity_num_masks) {
4750 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4755 KMP_ASSERT2(0,
"Unexpected affinity setting");
4758 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4759 machine_hierarchy.init(address2os, __kmp_avail_proc);
4761 #undef KMP_EXIT_AFF_NONE
4763 void __kmp_affinity_initialize(
void) {
4772 int disabled = (__kmp_affinity_type == affinity_disabled);
4773 if (!KMP_AFFINITY_CAPABLE()) {
4774 KMP_ASSERT(disabled);
4777 __kmp_affinity_type = affinity_none;
4779 __kmp_aux_affinity_initialize();
4781 __kmp_affinity_type = affinity_disabled;
4785 void __kmp_affinity_uninitialize(
void) {
4786 if (__kmp_affinity_masks != NULL) {
4787 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4788 __kmp_affinity_masks = NULL;
4790 if (__kmp_affin_fullMask != NULL) {
4791 KMP_CPU_FREE(__kmp_affin_fullMask);
4792 __kmp_affin_fullMask = NULL;
4794 __kmp_affinity_num_masks = 0;
4795 __kmp_affinity_type = affinity_default;
4796 __kmp_affinity_num_places = 0;
4797 if (__kmp_affinity_proclist != NULL) {
4798 __kmp_free(__kmp_affinity_proclist);
4799 __kmp_affinity_proclist = NULL;
4801 if (address2os != NULL) {
4802 __kmp_free(address2os);
4805 if (procarr != NULL) {
4806 __kmp_free(procarr);
4810 if (__kmp_hwloc_topology != NULL) {
4811 hwloc_topology_destroy(__kmp_hwloc_topology);
4812 __kmp_hwloc_topology = NULL;
4815 KMPAffinity::destroy_api();
4818 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4819 if (!KMP_AFFINITY_CAPABLE()) {
4823 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4824 if (th->th.th_affin_mask == NULL) {
4825 KMP_CPU_ALLOC(th->th.th_affin_mask);
4827 KMP_CPU_ZERO(th->th.th_affin_mask);
4834 kmp_affin_mask_t *mask;
4837 if (KMP_AFFINITY_NON_PROC_BIND) {
4838 if ((__kmp_affinity_type == affinity_none) ||
4839 (__kmp_affinity_type == affinity_balanced)) {
4840 #if KMP_GROUP_AFFINITY
4841 if (__kmp_num_proc_groups > 1) {
4845 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4847 mask = __kmp_affin_fullMask;
4849 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4850 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4851 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4855 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4856 #if KMP_GROUP_AFFINITY
4857 if (__kmp_num_proc_groups > 1) {
4861 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4863 mask = __kmp_affin_fullMask;
4867 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4868 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4869 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4873 th->th.th_current_place = i;
4875 th->th.th_new_place = i;
4876 th->th.th_first_place = 0;
4877 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4878 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4881 th->th.th_first_place = 0;
4882 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4885 if (i == KMP_PLACE_ALL) {
4886 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4889 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4893 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4895 if (__kmp_affinity_verbose
4897 && (__kmp_affinity_type == affinity_none ||
4898 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4899 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4900 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4901 th->th.th_affin_mask);
4902 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4903 __kmp_gettid(), gtid, buf);
4910 if (__kmp_affinity_type == affinity_none) {
4911 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4914 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4917 void __kmp_affinity_set_place(
int gtid) {
4918 if (!KMP_AFFINITY_CAPABLE()) {
4922 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4924 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current "
4926 gtid, th->th.th_new_place, th->th.th_current_place));
4929 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4930 KMP_ASSERT(th->th.th_new_place >= 0);
4931 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4932 if (th->th.th_first_place <= th->th.th_last_place) {
4933 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4934 (th->th.th_new_place <= th->th.th_last_place));
4936 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4937 (th->th.th_new_place >= th->th.th_last_place));
4942 kmp_affin_mask_t *mask =
4943 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4944 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4945 th->th.th_current_place = th->th.th_new_place;
4947 if (__kmp_affinity_verbose) {
4948 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4949 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4950 th->th.th_affin_mask);
4951 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4952 __kmp_gettid(), gtid, buf);
4954 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4957 int __kmp_aux_set_affinity(
void **mask) {
4962 if (!KMP_AFFINITY_CAPABLE()) {
4966 gtid = __kmp_entry_gtid();
4967 KA_TRACE(1000, (
""); {
4968 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4969 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4970 (kmp_affin_mask_t *)(*mask));
4972 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4976 if (__kmp_env_consistency_check) {
4977 if ((mask == NULL) || (*mask == NULL)) {
4978 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4983 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4984 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4985 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4987 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4992 if (num_procs == 0) {
4993 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4996 #if KMP_GROUP_AFFINITY
4997 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4998 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
5004 th = __kmp_threads[gtid];
5005 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5006 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
5008 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
5011 th->th.th_current_place = KMP_PLACE_UNDEFINED;
5012 th->th.th_new_place = KMP_PLACE_UNDEFINED;
5013 th->th.th_first_place = 0;
5014 th->th.th_last_place = __kmp_affinity_num_masks - 1;
5017 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
5022 int __kmp_aux_get_affinity(
void **mask) {
5027 if (!KMP_AFFINITY_CAPABLE()) {
5031 gtid = __kmp_entry_gtid();
5032 th = __kmp_threads[gtid];
5033 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5035 KA_TRACE(1000, (
""); {
5036 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5037 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5038 th->th.th_affin_mask);
5039 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
5043 if (__kmp_env_consistency_check) {
5044 if ((mask == NULL) || (*mask == NULL)) {
5045 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
5051 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
5052 KA_TRACE(1000, (
""); {
5053 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5054 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5055 (kmp_affin_mask_t *)(*mask));
5056 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
5063 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
5069 int __kmp_aux_get_affinity_max_proc() {
5070 if (!KMP_AFFINITY_CAPABLE()) {
5073 #if KMP_GROUP_AFFINITY
5074 if (__kmp_num_proc_groups > 1) {
5075 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
5081 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
5082 if (!KMP_AFFINITY_CAPABLE()) {
5086 KA_TRACE(1000, (
""); {
5087 int gtid = __kmp_entry_gtid();
5088 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5089 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5090 (kmp_affin_mask_t *)(*mask));
5091 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in "
5092 "affinity mask for thread %d = %s\n",
5096 if (__kmp_env_consistency_check) {
5097 if ((mask == NULL) || (*mask == NULL)) {
5098 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
5102 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5105 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5109 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
5113 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
5114 if (!KMP_AFFINITY_CAPABLE()) {
5118 KA_TRACE(1000, (
""); {
5119 int gtid = __kmp_entry_gtid();
5120 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5121 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5122 (kmp_affin_mask_t *)(*mask));
5123 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in "
5124 "affinity mask for thread %d = %s\n",
5128 if (__kmp_env_consistency_check) {
5129 if ((mask == NULL) || (*mask == NULL)) {
5130 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
5134 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5137 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5141 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5145 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
5146 if (!KMP_AFFINITY_CAPABLE()) {
5150 KA_TRACE(1000, (
""); {
5151 int gtid = __kmp_entry_gtid();
5152 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5153 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5154 (kmp_affin_mask_t *)(*mask));
5155 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in "
5156 "affinity mask for thread %d = %s\n",
5160 if (__kmp_env_consistency_check) {
5161 if ((mask == NULL) || (*mask == NULL)) {
5162 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5166 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5169 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5173 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5177 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
5178 KMP_DEBUG_ASSERT(th);
5179 bool fine_gran =
true;
5180 int tid = th->th.th_info.ds.ds_tid;
5182 switch (__kmp_affinity_gran) {
5183 case affinity_gran_fine:
5184 case affinity_gran_thread:
5186 case affinity_gran_core:
5187 if (__kmp_nThreadsPerCore > 1) {
5191 case affinity_gran_package:
5192 if (nCoresPerPkg > 1) {
5200 if (__kmp_affinity_uniform_topology()) {
5204 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5206 int ncores = __kmp_ncores;
5207 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5208 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5212 int chunk = nthreads / ncores;
5214 int big_cores = nthreads % ncores;
5216 int big_nth = (chunk + 1) * big_cores;
5217 if (tid < big_nth) {
5218 coreID = tid / (chunk + 1);
5219 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5221 coreID = (tid - big_cores) / chunk;
5222 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5225 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5226 "Illegal set affinity operation when not capable");
5228 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5232 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5233 KMP_CPU_SET(osID, mask);
5235 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5237 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5238 KMP_CPU_SET(osID, mask);
5241 if (__kmp_affinity_verbose) {
5242 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5243 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5244 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5245 __kmp_gettid(), tid, buf);
5247 __kmp_set_system_affinity(mask, TRUE);
5250 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5253 int core_level = __kmp_affinity_find_core_level(
5254 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5255 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5256 __kmp_aff_depth - 1, core_level);
5257 int nth_per_core = __kmp_affinity_max_proc_per_core(
5258 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5262 if (nthreads == __kmp_avail_proc) {
5264 int osID = address2os[tid].second;
5265 KMP_CPU_SET(osID, mask);
5267 int core = __kmp_affinity_find_core(address2os, tid,
5268 __kmp_aff_depth - 1, core_level);
5269 for (
int i = 0; i < __kmp_avail_proc; i++) {
5270 int osID = address2os[i].second;
5271 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5272 core_level) == core) {
5273 KMP_CPU_SET(osID, mask);
5277 }
else if (nthreads <= ncores) {
5280 for (
int i = 0; i < ncores; i++) {
5283 for (
int j = 0; j < nth_per_core; j++) {
5284 if (procarr[i * nth_per_core + j] != -1) {
5291 for (
int j = 0; j < nth_per_core; j++) {
5292 int osID = procarr[i * nth_per_core + j];
5294 KMP_CPU_SET(osID, mask);
5310 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5312 int *ncores_with_x_procs =
5313 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5315 int *ncores_with_x_to_max_procs =
5316 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5318 for (
int i = 0; i <= nth_per_core; i++) {
5319 ncores_with_x_procs[i] = 0;
5320 ncores_with_x_to_max_procs[i] = 0;
5323 for (
int i = 0; i < ncores; i++) {
5325 for (
int j = 0; j < nth_per_core; j++) {
5326 if (procarr[i * nth_per_core + j] != -1) {
5330 nproc_at_core[i] = cnt;
5331 ncores_with_x_procs[cnt]++;
5334 for (
int i = 0; i <= nth_per_core; i++) {
5335 for (
int j = i; j <= nth_per_core; j++) {
5336 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5341 int nproc = nth_per_core * ncores;
5343 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5344 for (
int i = 0; i < nproc; i++) {
5351 for (
int j = 1; j <= nth_per_core; j++) {
5352 int cnt = ncores_with_x_to_max_procs[j];
5353 for (
int i = 0; i < ncores; i++) {
5355 if (nproc_at_core[i] == 0) {
5358 for (
int k = 0; k < nth_per_core; k++) {
5359 if (procarr[i * nth_per_core + k] != -1) {
5360 if (newarr[i * nth_per_core + k] == 0) {
5361 newarr[i * nth_per_core + k] = 1;
5367 newarr[i * nth_per_core + k]++;
5375 if (cnt == 0 || nth == 0) {
5386 for (
int i = 0; i < nproc; i++) {
5390 int osID = procarr[i];
5391 KMP_CPU_SET(osID, mask);
5393 int coreID = i / nth_per_core;
5394 for (
int ii = 0; ii < nth_per_core; ii++) {
5395 int osID = procarr[coreID * nth_per_core + ii];
5397 KMP_CPU_SET(osID, mask);
5407 if (__kmp_affinity_verbose) {
5408 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5409 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5410 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5411 __kmp_gettid(), tid, buf);
5413 __kmp_set_system_affinity(mask, TRUE);
5417 #if KMP_OS_LINUX || KMP_OS_FREEBSD
5431 kmp_set_thread_affinity_mask_initial()
5436 int gtid = __kmp_get_gtid();
5439 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5440 "non-omp thread, returning\n"));
5443 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5444 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5445 "affinity not initialized, returning\n"));
5448 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5449 "set full mask for thread %d\n",
5451 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5452 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);