14 #include "kmp_affinity.h"
18 #include "kmp_wrapper_getpid.h"
19 #if KMP_USE_HIER_SCHED
20 #include "kmp_dispatch_hier.h"
24 #define HWLOC_GROUP_KIND_INTEL_DIE 104
28 static hierarchy_info machine_hierarchy;
30 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
32 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
36 if (TCR_1(machine_hierarchy.uninitialized))
37 machine_hierarchy.init(NULL, nproc);
40 if (nproc > machine_hierarchy.base_num_threads)
41 machine_hierarchy.resize(nproc);
43 depth = machine_hierarchy.depth;
44 KMP_DEBUG_ASSERT(depth > 0);
46 thr_bar->depth = depth;
47 __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
48 &(thr_bar->base_leaf_kids));
49 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
52 #if KMP_AFFINITY_SUPPORTED
54 const char *__kmp_hw_get_catalog_string(kmp_hw_t type,
bool plural) {
57 return ((plural) ? KMP_I18N_STR(Sockets) : KMP_I18N_STR(Socket));
59 return ((plural) ? KMP_I18N_STR(Dice) : KMP_I18N_STR(Die));
61 return ((plural) ? KMP_I18N_STR(Modules) : KMP_I18N_STR(Module));
63 return ((plural) ? KMP_I18N_STR(Tiles) : KMP_I18N_STR(Tile));
65 return ((plural) ? KMP_I18N_STR(NumaDomains) : KMP_I18N_STR(NumaDomain));
67 return ((plural) ? KMP_I18N_STR(L3Caches) : KMP_I18N_STR(L3Cache));
69 return ((plural) ? KMP_I18N_STR(L2Caches) : KMP_I18N_STR(L2Cache));
71 return ((plural) ? KMP_I18N_STR(L1Caches) : KMP_I18N_STR(L1Cache));
73 return ((plural) ? KMP_I18N_STR(Cores) : KMP_I18N_STR(Core));
75 return ((plural) ? KMP_I18N_STR(Threads) : KMP_I18N_STR(Thread));
76 case KMP_HW_PROC_GROUP:
77 return ((plural) ? KMP_I18N_STR(ProcGroups) : KMP_I18N_STR(ProcGroup));
79 return KMP_I18N_STR(Unknown);
87 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
88 int depth, kmp_hw_t *types) {
89 int preference[KMP_HW_LAST];
90 int top_index1, top_index2;
92 preference[KMP_HW_PROC_GROUP] = 110;
93 preference[KMP_HW_SOCKET] = 100;
94 preference[KMP_HW_CORE] = 95;
95 preference[KMP_HW_THREAD] = 90;
96 preference[KMP_HW_DIE] = 85;
97 preference[KMP_HW_NUMA] = 80;
98 preference[KMP_HW_TILE] = 75;
99 preference[KMP_HW_MODULE] = 73;
100 preference[KMP_HW_L3] = 70;
101 preference[KMP_HW_L2] = 65;
102 preference[KMP_HW_L1] = 60;
105 while (top_index1 < depth - 1 && top_index2 < depth) {
106 KMP_DEBUG_ASSERT(top_index1 >= 0 && top_index1 < depth);
107 KMP_DEBUG_ASSERT(top_index2 >= 0 && top_index2 < depth);
108 kmp_hw_t type1 = types[top_index1];
109 kmp_hw_t type2 = types[top_index2];
110 if (type1 == KMP_HW_SOCKET && type2 == KMP_HW_CORE) {
111 top_index1 = top_index2++;
115 bool all_same =
true;
116 unsigned id1 = addrP[0].first.labels[top_index1];
117 unsigned id2 = addrP[0].first.labels[top_index2];
118 int pref1 = preference[type1];
119 int pref2 = preference[type2];
120 for (
int hwidx = 1; hwidx < nTh; ++hwidx) {
121 if (addrP[hwidx].first.labels[top_index1] == id1 &&
122 addrP[hwidx].first.labels[top_index2] != id2) {
126 if (addrP[hwidx].first.labels[top_index2] != id2)
128 id1 = addrP[hwidx].first.labels[top_index1];
129 id2 = addrP[hwidx].first.labels[top_index2];
133 kmp_hw_t remove_type, keep_type;
134 int remove_layer, remove_layer_ids;
137 remove_layer = remove_layer_ids = top_index2;
141 remove_layer = remove_layer_ids = top_index1;
147 remove_layer_ids = top_index2;
150 for (
int idx = 0; idx < nTh; ++idx) {
151 Address &hw_thread = addrP[idx].first;
152 for (
int d = remove_layer_ids; d < depth - 1; ++d)
153 hw_thread.labels[d] = hw_thread.labels[d + 1];
156 for (
int idx = remove_layer; idx < depth - 1; ++idx)
157 types[idx] = types[idx + 1];
160 top_index1 = top_index2++;
163 KMP_ASSERT(depth > 0);
169 static void __kmp_affinity_gather_enumeration_information(AddrUnsPair *addrP,
174 int previous_id[KMP_HW_LAST];
175 int max[KMP_HW_LAST];
177 for (
int i = 0; i < depth; ++i) {
183 for (
int i = 0; i < nTh; ++i) {
184 Address &hw_thread = addrP[i].first;
185 for (
int layer = 0; layer < depth; ++layer) {
186 int id = hw_thread.labels[layer];
187 if (
id != previous_id[layer]) {
189 for (
int l = layer; l < depth; ++l)
193 for (
int l = layer + 1; l < depth; ++l) {
194 if (max[l] > ratio[l])
201 for (
int layer = 0; layer < depth; ++layer) {
202 previous_id[layer] = hw_thread.labels[layer];
205 for (
int layer = 0; layer < depth; ++layer) {
206 if (max[layer] > ratio[layer])
207 ratio[layer] = max[layer];
212 static bool __kmp_affinity_discover_uniformity(
int depth,
int *ratio,
215 for (
int level = 0; level < depth; ++level)
217 return (num == count[depth - 1]);
221 static inline int __kmp_affinity_calculate_ratio(
int *ratio,
int deep_level,
224 if (deep_level < 0 || shallow_level < 0)
226 for (
int level = deep_level; level > shallow_level; --level)
227 retval *= ratio[level];
231 static void __kmp_affinity_print_topology(AddrUnsPair *addrP,
int len,
232 int depth, kmp_hw_t *types) {
235 __kmp_str_buf_init(&buf);
236 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
237 for (proc = 0; proc < len; proc++) {
238 for (
int i = 0; i < depth; ++i) {
239 __kmp_str_buf_print(&buf,
"%s %d ", __kmp_hw_get_catalog_string(types[i]),
240 addrP[proc].first.labels[i]);
242 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
243 __kmp_str_buf_clear(&buf);
245 __kmp_str_buf_free(&buf);
250 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
251 int depth,
int pkgLevel,
252 int coreLevel,
int threadLevel) {
255 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
256 for (proc = 0; proc < len; proc++) {
259 __kmp_str_buf_init(&buf);
260 for (level = 0; level < depth; level++) {
261 if (level == threadLevel) {
262 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
263 }
else if (level == coreLevel) {
264 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
265 }
else if (level == pkgLevel) {
266 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
267 }
else if (level > pkgLevel) {
268 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
269 level - pkgLevel - 1);
271 __kmp_str_buf_print(&buf,
"L%d ", level);
273 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
275 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
277 __kmp_str_buf_free(&buf);
281 bool KMPAffinity::picked_api =
false;
283 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
284 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
285 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
286 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
287 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
288 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
290 void KMPAffinity::pick_api() {
291 KMPAffinity *affinity_dispatch;
297 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
298 __kmp_affinity_type != affinity_disabled) {
299 affinity_dispatch =
new KMPHwlocAffinity();
303 affinity_dispatch =
new KMPNativeAffinity();
305 __kmp_affinity_dispatch = affinity_dispatch;
309 void KMPAffinity::destroy_api() {
310 if (__kmp_affinity_dispatch != NULL) {
311 delete __kmp_affinity_dispatch;
312 __kmp_affinity_dispatch = NULL;
317 #define KMP_ADVANCE_SCAN(scan) \
318 while (*scan != '\0') { \
326 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
327 kmp_affin_mask_t *mask) {
328 int start = 0, finish = 0, previous = 0;
331 KMP_ASSERT(buf_len >= 40);
334 char *end = buf + buf_len - 1;
337 if (mask->begin() == mask->end()) {
338 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
339 KMP_ADVANCE_SCAN(scan);
340 KMP_ASSERT(scan <= end);
345 start = mask->begin();
349 for (finish = mask->next(start), previous = start;
350 finish == previous + 1 && finish != mask->end();
351 finish = mask->next(finish)) {
358 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
359 KMP_ADVANCE_SCAN(scan);
364 if (previous - start > 1) {
365 KMP_SNPRINTF(scan, end - scan + 1,
"%u-%u", start, previous);
368 KMP_SNPRINTF(scan, end - scan + 1,
"%u", start);
369 KMP_ADVANCE_SCAN(scan);
370 if (previous - start > 0) {
371 KMP_SNPRINTF(scan, end - scan + 1,
",%u", previous);
374 KMP_ADVANCE_SCAN(scan);
377 if (start == mask->end())
385 KMP_ASSERT(scan <= end);
388 #undef KMP_ADVANCE_SCAN
394 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
395 kmp_affin_mask_t *mask) {
396 int start = 0, finish = 0, previous = 0;
401 __kmp_str_buf_clear(buf);
404 if (mask->begin() == mask->end()) {
405 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
410 start = mask->begin();
414 for (finish = mask->next(start), previous = start;
415 finish == previous + 1 && finish != mask->end();
416 finish = mask->next(finish)) {
423 __kmp_str_buf_print(buf,
"%s",
",");
428 if (previous - start > 1) {
429 __kmp_str_buf_print(buf,
"%u-%u", start, previous);
432 __kmp_str_buf_print(buf,
"%u", start);
433 if (previous - start > 0) {
434 __kmp_str_buf_print(buf,
",%u", previous);
439 if (start == mask->end())
445 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
448 #if KMP_GROUP_AFFINITY
450 if (__kmp_num_proc_groups > 1) {
452 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
453 for (group = 0; group < __kmp_num_proc_groups; group++) {
455 int num = __kmp_GetActiveProcessorCount(group);
456 for (i = 0; i < num; i++) {
457 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
466 for (proc = 0; proc < __kmp_xproc; proc++) {
467 KMP_CPU_SET(proc, mask);
483 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
485 KMP_DEBUG_ASSERT(numAddrs > 0);
486 int depth = address2os->first.depth;
487 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
488 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
490 for (labCt = 0; labCt < depth; labCt++) {
491 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
492 lastLabel[labCt] = address2os[0].first.labels[labCt];
495 for (i = 1; i < numAddrs; i++) {
496 for (labCt = 0; labCt < depth; labCt++) {
497 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
499 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
501 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
504 lastLabel[labCt] = address2os[i].first.labels[labCt];
508 for (labCt = 0; labCt < depth; labCt++) {
509 address2os[i].first.childNums[labCt] = counts[labCt];
511 for (; labCt < (int)Address::maxDepth; labCt++) {
512 address2os[i].first.childNums[labCt] = 0;
515 __kmp_free(lastLabel);
530 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
532 static int nCoresPerPkg, nPackages;
533 static int __kmp_nThreadsPerCore;
534 #ifndef KMP_DFLT_NTH_CORES
535 static int __kmp_ncores;
537 static int *__kmp_pu_os_idx = NULL;
538 static int nDiesPerPkg = 1;
544 inline static bool __kmp_affinity_uniform_topology() {
545 return __kmp_avail_proc ==
546 (__kmp_nThreadsPerCore * nCoresPerPkg * nDiesPerPkg * nPackages);
551 static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
552 #if HWLOC_API_VERSION >= 0x00020000
553 return hwloc_obj_type_is_cache(obj->type);
555 return obj->type == HWLOC_OBJ_CACHE;
560 static inline kmp_hw_t __kmp_hwloc_type_2_topology_type(hwloc_obj_t obj) {
562 if (__kmp_hwloc_is_cache_type(obj)) {
563 if (obj->attr->cache.type == HWLOC_OBJ_CACHE_INSTRUCTION)
564 return KMP_HW_UNKNOWN;
565 switch (obj->attr->cache.depth) {
569 #if KMP_MIC_SUPPORTED
570 if (__kmp_mic_type == mic3) {
578 return KMP_HW_UNKNOWN;
582 case HWLOC_OBJ_PACKAGE:
583 return KMP_HW_SOCKET;
584 case HWLOC_OBJ_NUMANODE:
589 return KMP_HW_THREAD;
590 case HWLOC_OBJ_GROUP:
591 if (obj->attr->group.kind == HWLOC_GROUP_KIND_INTEL_DIE)
593 #if HWLOC_API_VERSION >= 0x00020100
598 return KMP_HW_UNKNOWN;
605 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
606 hwloc_obj_type_t type) {
609 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
610 obj->logical_index, type, 0);
611 first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology,
612 obj->type, first) == obj;
613 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
620 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
622 kmp_hwloc_depth_t depth,
624 if (o->depth == depth) {
630 for (
unsigned i = 0; i < o->arity; i++)
631 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
635 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
636 hwloc_obj_type_t type,
638 if (!hwloc_compare_types(o->type, type)) {
644 for (
unsigned i = 0; i < o->arity; i++)
645 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
651 static int __kmp_hwloc_get_sub_id(hwloc_topology_t t, hwloc_obj_t higher,
654 hwloc_obj_type_t ltype = lower->type;
655 int lindex = lower->logical_index - 1;
658 obj = hwloc_get_obj_by_type(t, ltype, lindex);
659 while (obj && lindex >= 0 &&
660 hwloc_bitmap_isincluded(obj->cpuset, higher->cpuset)) {
662 sub_id = (int)(RCAST(kmp_intptr_t, obj->userdata));
667 obj = hwloc_get_obj_by_type(t, ltype, lindex);
670 lower->userdata = RCAST(
void *, sub_id + 1);
674 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
675 kmp_i18n_id_t *
const msg_id) {
677 int hw_thread_index, sub_id, nActiveThreads;
679 hwloc_obj_t pu, obj, root, prev;
680 int ratio[KMP_HW_LAST];
681 int count[KMP_HW_LAST];
682 kmp_hw_t types[KMP_HW_LAST];
684 hwloc_topology_t tp = __kmp_hwloc_topology;
685 *msg_id = kmp_i18n_null;
688 kmp_affin_mask_t *oldMask;
689 KMP_CPU_ALLOC(oldMask);
690 __kmp_get_system_affinity(oldMask, TRUE);
692 if (!KMP_AFFINITY_CAPABLE()) {
695 KMP_ASSERT(__kmp_affinity_type == affinity_none);
697 hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
699 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
702 o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
704 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
706 __kmp_nThreadsPerCore = 1;
707 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
708 if (nCoresPerPkg == 0)
710 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
711 if (__kmp_affinity_verbose) {
712 KMP_INFORM(AffNotUsingHwloc,
"KMP_AFFINITY");
713 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
714 if (__kmp_affinity_uniform_topology()) {
715 KMP_INFORM(Uniform,
"KMP_AFFINITY");
717 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
719 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
720 __kmp_nThreadsPerCore, __kmp_ncores);
722 KMP_CPU_FREE(oldMask);
726 root = hwloc_get_root_obj(tp);
730 pu = hwloc_get_pu_obj_by_os_index(tp, __kmp_affin_fullMask->begin());
732 types[depth] = KMP_HW_THREAD;
734 while (obj != root && obj != NULL) {
736 #if HWLOC_API_VERSION >= 0x00020000
737 if (obj->memory_arity) {
739 for (memory = obj->memory_first_child; memory;
740 memory = hwloc_get_next_child(tp, obj, memory)) {
741 if (memory->type == HWLOC_OBJ_NUMANODE)
744 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
745 types[depth] = KMP_HW_NUMA;
750 type = __kmp_hwloc_type_2_topology_type(obj);
751 if (type != KMP_HW_UNKNOWN) {
756 KMP_ASSERT(depth > 0 && depth <= KMP_HW_LAST);
759 for (
int i = 0, j = depth - 1; i < j; ++i, --j) {
760 kmp_hw_t temp = types[i];
766 AddrUnsPair *retval =
767 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
768 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
769 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
774 while (pu = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, pu)) {
775 int index = depth - 1;
776 bool included = KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask);
777 Address hw_thread(depth);
779 hw_thread.labels[index] = pu->logical_index;
780 __kmp_pu_os_idx[hw_thread_index] = pu->os_index;
786 while (obj != root && obj != NULL) {
788 #if HWLOC_API_VERSION >= 0x00020000
792 if (obj->memory_arity) {
794 for (memory = obj->memory_first_child; memory;
795 memory = hwloc_get_next_child(tp, obj, memory)) {
796 if (memory->type == HWLOC_OBJ_NUMANODE)
799 if (memory && memory->type == HWLOC_OBJ_NUMANODE) {
800 sub_id = __kmp_hwloc_get_sub_id(tp, memory, prev);
802 hw_thread.labels[index] = memory->logical_index;
803 hw_thread.labels[index + 1] = sub_id;
810 type = __kmp_hwloc_type_2_topology_type(obj);
811 if (type != KMP_HW_UNKNOWN) {
812 sub_id = __kmp_hwloc_get_sub_id(tp, obj, prev);
814 hw_thread.labels[index] = obj->logical_index;
815 hw_thread.labels[index + 1] = sub_id;
822 retval[hw_thread_index] = AddrUnsPair(hw_thread, pu->os_index);
828 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
829 KMP_ASSERT(nActiveThreads > 0);
830 if (nActiveThreads == 1) {
831 __kmp_ncores = nPackages = 1;
832 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
833 if (__kmp_affinity_verbose) {
834 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
835 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
836 KMP_INFORM(Uniform,
"KMP_AFFINITY");
837 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
838 __kmp_nThreadsPerCore, __kmp_ncores);
841 if (__kmp_affinity_type == affinity_none) {
843 KMP_CPU_FREE(oldMask);
849 addr.labels[0] = retval[0].first.labels[0];
850 retval[0].first = addr;
852 if (__kmp_affinity_gran_levels < 0) {
853 __kmp_affinity_gran_levels = 0;
856 if (__kmp_affinity_verbose) {
857 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
860 *address2os = retval;
861 KMP_CPU_FREE(oldMask);
866 qsort(retval, nActiveThreads,
sizeof(*retval),
867 __kmp_affinity_cmp_Address_labels);
871 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
874 __kmp_affinity_gather_enumeration_information(retval, nActiveThreads, depth,
875 types, ratio, count);
877 for (
int level = 0; level < depth; ++level) {
878 if ((types[level] == KMP_HW_L2 || types[level] == KMP_HW_L3))
879 __kmp_tile_depth = level;
884 int thread_level, core_level, tile_level, numa_level, socket_level;
885 thread_level = core_level = tile_level = numa_level = socket_level = -1;
886 for (
int level = 0; level < depth; ++level) {
887 if (types[level] == KMP_HW_THREAD)
888 thread_level = level;
889 else if (types[level] == KMP_HW_CORE)
891 else if (types[level] == KMP_HW_SOCKET)
892 socket_level = level;
893 else if (types[level] == KMP_HW_TILE)
895 else if (types[level] == KMP_HW_NUMA)
898 __kmp_nThreadsPerCore =
899 __kmp_affinity_calculate_ratio(ratio, thread_level, core_level);
901 __kmp_affinity_calculate_ratio(ratio, core_level, socket_level);
902 if (socket_level >= 0)
903 nPackages = count[socket_level];
907 __kmp_ncores = count[core_level];
911 unsigned uniform = __kmp_affinity_discover_uniformity(depth, ratio, count);
914 if (__kmp_affinity_verbose) {
915 kmp_hw_t numerator_type, denominator_type;
917 __kmp_str_buf_init(&buf);
918 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
920 KMP_INFORM(Uniform,
"KMP_AFFINITY");
922 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
925 __kmp_str_buf_clear(&buf);
928 core_level = depth - 1;
929 int ncores = count[core_level];
931 denominator_type = KMP_HW_UNKNOWN;
932 for (
int level = 0; level < depth; ++level) {
935 numerator_type = types[level];
941 __kmp_hw_get_catalog_string(numerator_type, plural));
943 __kmp_str_buf_print(&buf,
" x %d %s/%s", c,
944 __kmp_hw_get_catalog_string(numerator_type, plural),
945 __kmp_hw_get_catalog_string(denominator_type));
947 denominator_type = numerator_type;
949 KMP_INFORM(TopologyGeneric,
"KMP_AFFINITY", buf.str, ncores);
950 __kmp_str_buf_free(&buf);
953 if (__kmp_affinity_type == affinity_none) {
955 KMP_CPU_FREE(oldMask);
961 if (__kmp_affinity_gran == affinity_gran_node)
962 __kmp_affinity_gran = affinity_gran_numa;
963 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
964 if (__kmp_affinity_gran_levels < 0) {
965 __kmp_affinity_gran_levels = 0;
966 if ((thread_level >= 0) && (__kmp_affinity_gran > affinity_gran_thread))
967 __kmp_affinity_gran_levels++;
968 if ((core_level >= 0) && (__kmp_affinity_gran > affinity_gran_core))
969 __kmp_affinity_gran_levels++;
970 if ((tile_level >= 0) && (__kmp_affinity_gran > affinity_gran_tile))
971 __kmp_affinity_gran_levels++;
972 if ((numa_level >= 0) && (__kmp_affinity_gran > affinity_gran_numa))
973 __kmp_affinity_gran_levels++;
974 if ((socket_level >= 0) && (__kmp_affinity_gran > affinity_gran_package))
975 __kmp_affinity_gran_levels++;
978 if (__kmp_affinity_verbose)
979 __kmp_affinity_print_topology(retval, nActiveThreads, depth, types);
981 KMP_CPU_FREE(oldMask);
982 *address2os = retval;
990 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
991 kmp_i18n_id_t *
const msg_id) {
993 *msg_id = kmp_i18n_null;
998 if (!KMP_AFFINITY_CAPABLE()) {
999 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1000 __kmp_ncores = nPackages = __kmp_xproc;
1001 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1002 if (__kmp_affinity_verbose) {
1003 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
1004 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1005 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1006 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1007 __kmp_nThreadsPerCore, __kmp_ncores);
1016 __kmp_ncores = nPackages = __kmp_avail_proc;
1017 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1018 if (__kmp_affinity_verbose) {
1019 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
1020 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1021 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1022 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1023 __kmp_nThreadsPerCore, __kmp_ncores);
1025 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1026 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1027 if (__kmp_affinity_type == affinity_none) {
1030 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1031 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
1033 __kmp_pu_os_idx[avail_ct++] = i;
1040 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
1043 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1045 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1048 __kmp_pu_os_idx[avail_ct] = i;
1051 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
1053 if (__kmp_affinity_verbose) {
1054 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
1057 if (__kmp_affinity_gran_levels < 0) {
1060 if (__kmp_affinity_gran > affinity_gran_package) {
1061 __kmp_affinity_gran_levels = 1;
1063 __kmp_affinity_gran_levels = 0;
1069 #if KMP_GROUP_AFFINITY
1075 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
1076 kmp_i18n_id_t *
const msg_id) {
1078 *msg_id = kmp_i18n_null;
1082 if (!KMP_AFFINITY_CAPABLE()) {
1089 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
1090 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1091 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1094 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1096 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1099 __kmp_pu_os_idx[avail_ct] = i;
1101 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
1102 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
1103 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
1105 if (__kmp_affinity_verbose) {
1106 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
1111 if (__kmp_affinity_gran_levels < 0) {
1112 if (__kmp_affinity_gran == affinity_gran_group) {
1113 __kmp_affinity_gran_levels = 1;
1114 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
1115 (__kmp_affinity_gran == affinity_gran_thread)) {
1116 __kmp_affinity_gran_levels = 0;
1118 const char *gran_str = NULL;
1119 if (__kmp_affinity_gran == affinity_gran_core) {
1121 }
else if (__kmp_affinity_gran == affinity_gran_package) {
1122 gran_str =
"package";
1123 }
else if (__kmp_affinity_gran == affinity_gran_node) {
1131 __kmp_affinity_gran_levels = 0;
1139 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1157 INTEL_LEVEL_TYPE_INVALID = 0,
1158 INTEL_LEVEL_TYPE_SMT = 1,
1159 INTEL_LEVEL_TYPE_CORE = 2,
1160 INTEL_LEVEL_TYPE_TILE = 3,
1161 INTEL_LEVEL_TYPE_MODULE = 4,
1162 INTEL_LEVEL_TYPE_DIE = 5,
1163 INTEL_LEVEL_TYPE_LAST = 6,
1166 struct cpuid_level_info_t {
1167 unsigned level_type, mask, mask_width, nitems, cache_mask;
1170 template <kmp_u
int32 LSB, kmp_u
int32 MSB>
1171 static inline unsigned __kmp_extract_bits(kmp_uint32 v) {
1172 const kmp_uint32 SHIFT_LEFT =
sizeof(kmp_uint32) * 8 - 1 - MSB;
1173 const kmp_uint32 SHIFT_RIGHT = LSB;
1174 kmp_uint32 retval = v;
1175 retval <<= SHIFT_LEFT;
1176 retval >>= (SHIFT_LEFT + SHIFT_RIGHT);
1180 static kmp_hw_t __kmp_intel_type_2_topology_type(
int intel_type) {
1181 switch (intel_type) {
1182 case INTEL_LEVEL_TYPE_INVALID:
1183 return KMP_HW_SOCKET;
1184 case INTEL_LEVEL_TYPE_SMT:
1185 return KMP_HW_THREAD;
1186 case INTEL_LEVEL_TYPE_CORE:
1189 case INTEL_LEVEL_TYPE_TILE:
1190 return KMP_HW_UNKNOWN;
1191 case INTEL_LEVEL_TYPE_MODULE:
1192 return KMP_HW_UNKNOWN;
1193 case INTEL_LEVEL_TYPE_DIE:
1196 return KMP_HW_UNKNOWN;
1203 __kmp_x2apicid_get_levels(
int leaf,
1204 cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST],
1205 kmp_uint64 known_levels) {
1206 unsigned level, levels_index;
1207 unsigned level_type, mask_width, nitems;
1216 level = levels_index = 0;
1218 __kmp_x86_cpuid(leaf, level, &buf);
1219 level_type = __kmp_extract_bits<8, 15>(buf.ecx);
1220 mask_width = __kmp_extract_bits<0, 4>(buf.eax);
1221 nitems = __kmp_extract_bits<0, 15>(buf.ebx);
1222 if (level_type != INTEL_LEVEL_TYPE_INVALID && nitems == 0)
1225 if (known_levels & (1ull << level_type)) {
1227 KMP_ASSERT(levels_index < INTEL_LEVEL_TYPE_LAST);
1228 levels[levels_index].level_type = level_type;
1229 levels[levels_index].mask_width = mask_width;
1230 levels[levels_index].nitems = nitems;
1234 if (levels_index > 0) {
1235 levels[levels_index - 1].mask_width = mask_width;
1236 levels[levels_index - 1].nitems = nitems;
1240 }
while (level_type != INTEL_LEVEL_TYPE_INVALID);
1243 for (
unsigned i = 0; i < levels_index; ++i) {
1244 if (levels[i].level_type != INTEL_LEVEL_TYPE_INVALID) {
1245 levels[i].mask = ~((-1) << levels[i].mask_width);
1246 levels[i].cache_mask = (-1) << levels[i].mask_width;
1247 for (
unsigned j = 0; j < i; ++j)
1248 levels[i].mask ^= levels[j].mask;
1250 KMP_DEBUG_ASSERT(levels_index > 0);
1251 levels[i].mask = (-1) << levels[i - 1].mask_width;
1252 levels[i].cache_mask = 0;
1255 return levels_index;
1258 static int __kmp_cpuid_mask_width(
int count) {
1261 while ((1 << r) < count)
1266 class apicThreadInfo {
1270 unsigned maxCoresPerPkg;
1271 unsigned maxThreadsPerPkg;
1277 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1279 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1280 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1281 if (aa->pkgId < bb->pkgId)
1283 if (aa->pkgId > bb->pkgId)
1285 if (aa->coreId < bb->coreId)
1287 if (aa->coreId > bb->coreId)
1289 if (aa->threadId < bb->threadId)
1291 if (aa->threadId > bb->threadId)
1300 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1301 kmp_i18n_id_t *
const msg_id) {
1304 *msg_id = kmp_i18n_null;
1307 __kmp_x86_cpuid(0, 0, &buf);
1309 *msg_id = kmp_i18n_str_NoLeaf4Support;
1318 if (!KMP_AFFINITY_CAPABLE()) {
1321 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1327 __kmp_x86_cpuid(1, 0, &buf);
1328 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1329 if (maxThreadsPerPkg == 0) {
1330 maxThreadsPerPkg = 1;
1344 __kmp_x86_cpuid(0, 0, &buf);
1346 __kmp_x86_cpuid(4, 0, &buf);
1347 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1365 __kmp_ncores = __kmp_xproc;
1366 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1367 __kmp_nThreadsPerCore = 1;
1368 if (__kmp_affinity_verbose) {
1369 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1370 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1371 if (__kmp_affinity_uniform_topology()) {
1372 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1374 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1376 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1377 __kmp_nThreadsPerCore, __kmp_ncores);
1387 kmp_affin_mask_t *oldMask;
1388 KMP_CPU_ALLOC(oldMask);
1389 KMP_ASSERT(oldMask != NULL);
1390 __kmp_get_system_affinity(oldMask, TRUE);
1418 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1419 __kmp_avail_proc *
sizeof(apicThreadInfo));
1420 unsigned nApics = 0;
1421 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1423 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1426 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1428 __kmp_affinity_dispatch->bind_thread(i);
1429 threadInfo[nApics].osId = i;
1432 __kmp_x86_cpuid(1, 0, &buf);
1433 if (((buf.edx >> 9) & 1) == 0) {
1434 __kmp_set_system_affinity(oldMask, TRUE);
1435 __kmp_free(threadInfo);
1436 KMP_CPU_FREE(oldMask);
1437 *msg_id = kmp_i18n_str_ApicNotPresent;
1440 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1441 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1442 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1443 threadInfo[nApics].maxThreadsPerPkg = 1;
1452 __kmp_x86_cpuid(0, 0, &buf);
1454 __kmp_x86_cpuid(4, 0, &buf);
1455 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1457 threadInfo[nApics].maxCoresPerPkg = 1;
1461 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1462 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1464 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1465 int widthT = widthCT - widthC;
1470 __kmp_set_system_affinity(oldMask, TRUE);
1471 __kmp_free(threadInfo);
1472 KMP_CPU_FREE(oldMask);
1473 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1477 int maskC = (1 << widthC) - 1;
1478 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1480 int maskT = (1 << widthT) - 1;
1481 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1488 __kmp_set_system_affinity(oldMask, TRUE);
1497 KMP_ASSERT(nApics > 0);
1499 __kmp_ncores = nPackages = 1;
1500 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1501 if (__kmp_affinity_verbose) {
1502 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1503 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1504 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1505 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1506 __kmp_nThreadsPerCore, __kmp_ncores);
1509 if (__kmp_affinity_type == affinity_none) {
1510 __kmp_free(threadInfo);
1511 KMP_CPU_FREE(oldMask);
1515 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1517 addr.labels[0] = threadInfo[0].pkgId;
1518 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1520 if (__kmp_affinity_gran_levels < 0) {
1521 __kmp_affinity_gran_levels = 0;
1524 if (__kmp_affinity_verbose) {
1525 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1528 __kmp_free(threadInfo);
1529 KMP_CPU_FREE(oldMask);
1534 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1535 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1552 __kmp_nThreadsPerCore = 1;
1553 unsigned nCores = 1;
1556 unsigned lastPkgId = threadInfo[0].pkgId;
1557 unsigned coreCt = 1;
1558 unsigned lastCoreId = threadInfo[0].coreId;
1559 unsigned threadCt = 1;
1560 unsigned lastThreadId = threadInfo[0].threadId;
1563 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1564 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1566 for (i = 1; i < nApics; i++) {
1567 if (threadInfo[i].pkgId != lastPkgId) {
1570 lastPkgId = threadInfo[i].pkgId;
1571 if ((
int)coreCt > nCoresPerPkg)
1572 nCoresPerPkg = coreCt;
1574 lastCoreId = threadInfo[i].coreId;
1575 if ((
int)threadCt > __kmp_nThreadsPerCore)
1576 __kmp_nThreadsPerCore = threadCt;
1578 lastThreadId = threadInfo[i].threadId;
1582 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1583 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1587 if (threadInfo[i].coreId != lastCoreId) {
1590 lastCoreId = threadInfo[i].coreId;
1591 if ((
int)threadCt > __kmp_nThreadsPerCore)
1592 __kmp_nThreadsPerCore = threadCt;
1594 lastThreadId = threadInfo[i].threadId;
1595 }
else if (threadInfo[i].threadId != lastThreadId) {
1597 lastThreadId = threadInfo[i].threadId;
1599 __kmp_free(threadInfo);
1600 KMP_CPU_FREE(oldMask);
1601 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1607 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1608 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1609 __kmp_free(threadInfo);
1610 KMP_CPU_FREE(oldMask);
1611 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1616 if ((
int)coreCt > nCoresPerPkg)
1617 nCoresPerPkg = coreCt;
1618 if ((
int)threadCt > __kmp_nThreadsPerCore)
1619 __kmp_nThreadsPerCore = threadCt;
1625 __kmp_ncores = nCores;
1626 if (__kmp_affinity_verbose) {
1627 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1628 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1629 if (__kmp_affinity_uniform_topology()) {
1630 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1632 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1634 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1635 __kmp_nThreadsPerCore, __kmp_ncores);
1637 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1638 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1639 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1640 for (i = 0; i < nApics; ++i) {
1641 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1643 if (__kmp_affinity_type == affinity_none) {
1644 __kmp_free(threadInfo);
1645 KMP_CPU_FREE(oldMask);
1653 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1655 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1656 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1658 KMP_ASSERT(depth > 0);
1659 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1661 for (i = 0; i < nApics; ++i) {
1662 Address addr(depth);
1663 unsigned os = threadInfo[i].osId;
1666 if (pkgLevel >= 0) {
1667 addr.labels[d++] = threadInfo[i].pkgId;
1669 if (coreLevel >= 0) {
1670 addr.labels[d++] = threadInfo[i].coreId;
1672 if (threadLevel >= 0) {
1673 addr.labels[d++] = threadInfo[i].threadId;
1675 (*address2os)[i] = AddrUnsPair(addr, os);
1678 if (__kmp_affinity_gran_levels < 0) {
1681 __kmp_affinity_gran_levels = 0;
1682 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1683 __kmp_affinity_gran_levels++;
1685 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1686 __kmp_affinity_gran_levels++;
1688 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1689 __kmp_affinity_gran_levels++;
1693 if (__kmp_affinity_verbose) {
1694 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1695 coreLevel, threadLevel);
1698 __kmp_free(threadInfo);
1699 KMP_CPU_FREE(oldMask);
1706 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1707 kmp_i18n_id_t *
const msg_id) {
1709 cpuid_level_info_t levels[INTEL_LEVEL_TYPE_LAST];
1710 int ratio[KMP_HW_LAST];
1711 int count[KMP_HW_LAST];
1712 kmp_hw_t types[INTEL_LEVEL_TYPE_LAST];
1713 unsigned levels_index;
1715 kmp_uint64 known_levels;
1716 int topology_leaf, highest_leaf, apic_id;
1718 static int leaves[] = {0, 0};
1720 kmp_i18n_id_t leaf_message_id;
1722 KMP_BUILD_ASSERT(
sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST);
1724 *msg_id = kmp_i18n_null;
1727 known_levels = 0ull;
1728 for (
int i = 0; i < INTEL_LEVEL_TYPE_LAST; ++i) {
1729 if (__kmp_intel_type_2_topology_type(i) != KMP_HW_UNKNOWN) {
1730 known_levels |= (1ull << i);
1735 __kmp_x86_cpuid(0, 0, &buf);
1736 highest_leaf = buf.eax;
1741 if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
1744 leaf_message_id = kmp_i18n_str_NoLeaf11Support;
1745 }
else if (__kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
1748 leaf_message_id = kmp_i18n_str_NoLeaf31Support;
1753 leaf_message_id = kmp_i18n_str_NoLeaf11Support;
1757 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1759 for (
int i = 0; i < num_leaves; ++i) {
1760 int leaf = leaves[i];
1761 if (highest_leaf < leaf)
1763 __kmp_x86_cpuid(leaf, 0, &buf);
1766 topology_leaf = leaf;
1767 levels_index = __kmp_x2apicid_get_levels(leaf, levels, known_levels);
1768 if (levels_index == 0)
1772 if (topology_leaf == -1 || levels_index == 0) {
1773 *msg_id = leaf_message_id;
1776 KMP_ASSERT(levels_index <= INTEL_LEVEL_TYPE_LAST);
1783 if (!KMP_AFFINITY_CAPABLE()) {
1786 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1788 for (
unsigned i = 0; i < levels_index; ++i) {
1789 if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
1790 __kmp_nThreadsPerCore = levels[i].nitems;
1791 }
else if (levels[i].level_type == INTEL_LEVEL_TYPE_CORE) {
1792 nCoresPerPkg = levels[i].nitems;
1793 }
else if (levels[i].level_type == INTEL_LEVEL_TYPE_DIE) {
1794 nDiesPerPkg = levels[i].nitems;
1797 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1798 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1799 if (__kmp_affinity_verbose) {
1800 KMP_INFORM(AffNotCapableUseLocCpuidL,
"KMP_AFFINITY", topology_leaf);
1801 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1802 if (__kmp_affinity_uniform_topology()) {
1803 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1805 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1807 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1808 __kmp_nThreadsPerCore, __kmp_ncores);
1818 kmp_affin_mask_t *oldMask;
1819 KMP_CPU_ALLOC(oldMask);
1820 __kmp_get_system_affinity(oldMask, TRUE);
1823 int depth = levels_index;
1824 for (
int i = depth - 1, j = 0; i >= 0; --i, ++j)
1825 types[j] = __kmp_intel_type_2_topology_type(levels[i].level_type);
1826 AddrUnsPair *retval =
1827 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1833 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1834 cpuid_level_info_t my_levels[INTEL_LEVEL_TYPE_LAST];
1835 unsigned my_levels_index;
1838 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1841 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1843 __kmp_affinity_dispatch->bind_thread(proc);
1846 __kmp_x86_cpuid(topology_leaf, 0, &buf);
1848 Address addr(depth);
1850 __kmp_x2apicid_get_levels(topology_leaf, my_levels, known_levels);
1851 if (my_levels_index == 0 || my_levels_index != levels_index) {
1852 KMP_CPU_FREE(oldMask);
1853 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1857 for (
unsigned j = 0, idx = depth - 1; j < my_levels_index; ++j, --idx) {
1858 addr.labels[idx] = apic_id & my_levels[j].mask;
1860 addr.labels[idx] >>= my_levels[j - 1].mask_width;
1862 retval[nApics++] = AddrUnsPair(addr, proc);
1867 __kmp_set_system_affinity(oldMask, TRUE);
1870 KMP_ASSERT(nApics > 0);
1873 __kmp_ncores = nPackages = 1;
1874 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1875 if (__kmp_affinity_verbose) {
1876 KMP_INFORM(AffUseGlobCpuidL,
"KMP_AFFINITY", topology_leaf);
1877 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1878 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1879 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1880 __kmp_nThreadsPerCore, __kmp_ncores);
1883 if (__kmp_affinity_type == affinity_none) {
1885 KMP_CPU_FREE(oldMask);
1890 for (
int i = 0; i < depth; ++i)
1891 if (types[i] == KMP_HW_SOCKET) {
1897 addr.labels[0] = retval[0].first.labels[pkg_level];
1898 retval[0].first = addr;
1900 if (__kmp_affinity_gran_levels < 0) {
1901 __kmp_affinity_gran_levels = 0;
1904 if (__kmp_affinity_verbose) {
1905 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1908 *address2os = retval;
1909 KMP_CPU_FREE(oldMask);
1914 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1916 __kmp_affinity_gather_enumeration_information(retval, nApics, depth, types,
1923 int thread_level, core_level, socket_level, die_level;
1924 thread_level = core_level = die_level = socket_level = -1;
1925 for (
int level = 0; level < depth; ++level) {
1926 if (types[level] == KMP_HW_THREAD)
1927 thread_level = level;
1928 else if (types[level] == KMP_HW_CORE)
1930 else if (types[level] == KMP_HW_DIE)
1932 else if (types[level] == KMP_HW_SOCKET)
1933 socket_level = level;
1935 __kmp_nThreadsPerCore =
1936 __kmp_affinity_calculate_ratio(ratio, thread_level, core_level);
1937 if (die_level > 0) {
1939 __kmp_affinity_calculate_ratio(ratio, die_level, socket_level);
1940 nCoresPerPkg = __kmp_affinity_calculate_ratio(ratio, core_level, die_level);
1943 __kmp_affinity_calculate_ratio(ratio, core_level, socket_level);
1945 if (socket_level >= 0)
1946 nPackages = count[socket_level];
1949 if (core_level >= 0)
1950 __kmp_ncores = count[core_level];
1955 unsigned uniform = __kmp_affinity_discover_uniformity(depth, ratio, count);
1958 if (__kmp_affinity_verbose) {
1959 kmp_hw_t numerator_type, denominator_type;
1960 KMP_INFORM(AffUseGlobCpuidL,
"KMP_AFFINITY", topology_leaf);
1961 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1963 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1965 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1969 __kmp_str_buf_init(&buf);
1972 core_level = depth - 1;
1973 int ncores = count[core_level];
1975 denominator_type = KMP_HW_UNKNOWN;
1976 for (
int level = 0; level < depth; ++level) {
1979 numerator_type = types[level];
1983 __kmp_str_buf_print(
1985 __kmp_hw_get_catalog_string(numerator_type, plural));
1987 __kmp_str_buf_print(&buf,
" x %d %s/%s", c,
1988 __kmp_hw_get_catalog_string(numerator_type, plural),
1989 __kmp_hw_get_catalog_string(denominator_type));
1991 denominator_type = numerator_type;
1993 KMP_INFORM(TopologyGeneric,
"KMP_AFFINITY", buf.str, ncores);
1994 __kmp_str_buf_free(&buf);
1997 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1998 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1999 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2000 for (proc = 0; (int)proc < nApics; ++proc) {
2001 __kmp_pu_os_idx[proc] = retval[proc].second;
2003 if (__kmp_affinity_type == affinity_none) {
2005 KMP_CPU_FREE(oldMask);
2011 depth = __kmp_affinity_remove_radix_one_levels(retval, nApics, depth, types);
2012 thread_level = core_level = die_level = socket_level = -1;
2013 for (
int level = 0; level < depth; ++level) {
2014 if (types[level] == KMP_HW_THREAD)
2015 thread_level = level;
2016 else if (types[level] == KMP_HW_CORE)
2018 else if (types[level] == KMP_HW_DIE)
2020 else if (types[level] == KMP_HW_SOCKET)
2021 socket_level = level;
2024 if (__kmp_affinity_gran_levels < 0) {
2027 __kmp_affinity_gran_levels = 0;
2028 if ((thread_level >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
2029 __kmp_affinity_gran_levels++;
2031 if ((core_level >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
2032 __kmp_affinity_gran_levels++;
2034 if ((die_level >= 0) && (__kmp_affinity_gran > affinity_gran_die)) {
2035 __kmp_affinity_gran_levels++;
2037 if (__kmp_affinity_gran > affinity_gran_package) {
2038 __kmp_affinity_gran_levels++;
2042 if (__kmp_affinity_verbose) {
2043 __kmp_affinity_print_topology(retval, nApics, depth, types);
2046 KMP_CPU_FREE(oldMask);
2047 *address2os = retval;
2054 #define threadIdIndex 1
2055 #define coreIdIndex 2
2056 #define pkgIdIndex 3
2057 #define nodeIdIndex 4
2059 typedef unsigned *ProcCpuInfo;
2060 static unsigned maxIndex = pkgIdIndex;
2062 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
2065 const unsigned *aa = *(
unsigned *
const *)a;
2066 const unsigned *bb = *(
unsigned *
const *)b;
2067 for (i = maxIndex;; i--) {
2078 #if KMP_USE_HIER_SCHED
2080 static void __kmp_dispatch_set_hierarchy_values() {
2086 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
2087 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2088 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
2089 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
2091 if (__kmp_mic_type >= mic3)
2092 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
2095 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
2096 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
2097 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
2098 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
2101 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
2102 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
2103 __kmp_nThreadsPerCore;
2104 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
2106 if (__kmp_mic_type >= mic3)
2107 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2108 2 * __kmp_nThreadsPerCore;
2111 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
2112 __kmp_nThreadsPerCore;
2113 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
2114 nCoresPerPkg * __kmp_nThreadsPerCore;
2115 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
2116 nCoresPerPkg * __kmp_nThreadsPerCore;
2117 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
2118 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
2123 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
2124 int index = type + 1;
2125 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
2126 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
2127 if (type == kmp_hier_layer_e::LAYER_THREAD)
2129 else if (type == kmp_hier_layer_e::LAYER_LOOP)
2131 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
2132 if (tid >= num_hw_threads)
2133 tid = tid % num_hw_threads;
2134 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
2138 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
2141 KMP_DEBUG_ASSERT(i1 <= i2);
2142 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
2143 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
2144 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
2146 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
2152 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
2154 kmp_i18n_id_t *
const msg_id,
2157 *msg_id = kmp_i18n_null;
2162 unsigned num_records = 0;
2164 buf[
sizeof(buf) - 1] = 1;
2165 if (!fgets(buf,
sizeof(buf), f)) {
2170 char s1[] =
"processor";
2171 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2178 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2180 if (level > (
unsigned)__kmp_xproc) {
2181 level = __kmp_xproc;
2183 if (nodeIdIndex + level >= maxIndex) {
2184 maxIndex = nodeIdIndex + level;
2192 if (num_records == 0) {
2194 *msg_id = kmp_i18n_str_NoProcRecords;
2197 if (num_records > (
unsigned)__kmp_xproc) {
2199 *msg_id = kmp_i18n_str_TooManyProcRecords;
2208 if (fseek(f, 0, SEEK_SET) != 0) {
2210 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2216 unsigned **threadInfo =
2217 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2219 for (i = 0; i <= num_records; i++) {
2221 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2224 #define CLEANUP_THREAD_INFO \
2225 for (i = 0; i <= num_records; i++) { \
2226 __kmp_free(threadInfo[i]); \
2228 __kmp_free(threadInfo);
2233 #define INIT_PROC_INFO(p) \
2234 for (__index = 0; __index <= maxIndex; __index++) { \
2235 (p)[__index] = UINT_MAX; \
2238 for (i = 0; i <= num_records; i++) {
2239 INIT_PROC_INFO(threadInfo[i]);
2242 unsigned num_avail = 0;
2249 buf[
sizeof(buf) - 1] = 1;
2250 bool long_line =
false;
2251 if (!fgets(buf,
sizeof(buf), f)) {
2256 for (i = 0; i <= maxIndex; i++) {
2257 if (threadInfo[num_avail][i] != UINT_MAX) {
2265 }
else if (!buf[
sizeof(buf) - 1]) {
2270 #define CHECK_LINE \
2272 CLEANUP_THREAD_INFO; \
2273 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2279 char s1[] =
"processor";
2280 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2282 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2284 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2286 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2287 #if KMP_ARCH_AARCH64
2296 threadInfo[num_avail][osIdIndex] = val;
2297 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
2301 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2302 threadInfo[num_avail][osIdIndex]);
2303 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2305 KMP_SNPRINTF(path,
sizeof(path),
2306 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2307 threadInfo[num_avail][osIdIndex]);
2308 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2312 char s2[] =
"physical id";
2313 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2315 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2317 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2319 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2321 threadInfo[num_avail][pkgIdIndex] = val;
2324 char s3[] =
"core id";
2325 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2327 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2329 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2331 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2333 threadInfo[num_avail][coreIdIndex] = val;
2337 char s4[] =
"thread id";
2338 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2340 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2342 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2344 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2346 threadInfo[num_avail][threadIdIndex] = val;
2350 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2352 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2354 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2356 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2357 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2359 threadInfo[num_avail][nodeIdIndex + level] = val;
2366 if ((*buf != 0) && (*buf !=
'\n')) {
2371 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2379 if ((
int)num_avail == __kmp_xproc) {
2380 CLEANUP_THREAD_INFO;
2381 *msg_id = kmp_i18n_str_TooManyEntries;
2387 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2388 CLEANUP_THREAD_INFO;
2389 *msg_id = kmp_i18n_str_MissingProcField;
2392 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2393 CLEANUP_THREAD_INFO;
2394 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2399 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2400 __kmp_affin_fullMask)) {
2401 INIT_PROC_INFO(threadInfo[num_avail]);
2408 KMP_ASSERT(num_avail <= num_records);
2409 INIT_PROC_INFO(threadInfo[num_avail]);
2414 CLEANUP_THREAD_INFO;
2415 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2419 CLEANUP_THREAD_INFO;
2420 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2425 #if KMP_MIC && REDUCE_TEAM_SIZE
2426 unsigned teamSize = 0;
2438 KMP_ASSERT(num_avail > 0);
2439 KMP_ASSERT(num_avail <= num_records);
2440 if (num_avail == 1) {
2442 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2443 if (__kmp_affinity_verbose) {
2444 if (!KMP_AFFINITY_CAPABLE()) {
2445 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2446 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2447 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2449 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2450 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2451 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2455 __kmp_str_buf_init(&buf);
2456 __kmp_str_buf_print(&buf,
"1");
2457 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2458 __kmp_str_buf_print(&buf,
" x 1");
2460 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2461 __kmp_str_buf_free(&buf);
2464 if (__kmp_affinity_type == affinity_none) {
2465 CLEANUP_THREAD_INFO;
2469 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2471 addr.labels[0] = threadInfo[0][pkgIdIndex];
2472 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2474 if (__kmp_affinity_gran_levels < 0) {
2475 __kmp_affinity_gran_levels = 0;
2478 if (__kmp_affinity_verbose) {
2479 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2482 CLEANUP_THREAD_INFO;
2487 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2488 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2500 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2502 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2504 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2506 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2508 bool assign_thread_ids =
false;
2509 unsigned threadIdCt;
2512 restart_radix_check:
2516 if (assign_thread_ids) {
2517 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2518 threadInfo[0][threadIdIndex] = threadIdCt++;
2519 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2520 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2523 for (index = 0; index <= maxIndex; index++) {
2527 lastId[index] = threadInfo[0][index];
2532 for (i = 1; i < num_avail; i++) {
2535 for (index = maxIndex; index >= threadIdIndex; index--) {
2536 if (assign_thread_ids && (index == threadIdIndex)) {
2538 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2539 threadInfo[i][threadIdIndex] = threadIdCt++;
2543 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2544 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2547 if (threadInfo[i][index] != lastId[index]) {
2552 for (index2 = threadIdIndex; index2 < index; index2++) {
2554 if (counts[index2] > maxCt[index2]) {
2555 maxCt[index2] = counts[index2];
2558 lastId[index2] = threadInfo[i][index2];
2562 lastId[index] = threadInfo[i][index];
2564 if (assign_thread_ids && (index > threadIdIndex)) {
2566 #if KMP_MIC && REDUCE_TEAM_SIZE
2569 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2576 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2577 threadInfo[i][threadIdIndex] = threadIdCt++;
2583 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2584 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2590 if (index < threadIdIndex) {
2594 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2599 CLEANUP_THREAD_INFO;
2600 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2606 assign_thread_ids =
true;
2607 goto restart_radix_check;
2611 #if KMP_MIC && REDUCE_TEAM_SIZE
2614 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2617 for (index = threadIdIndex; index <= maxIndex; index++) {
2618 if (counts[index] > maxCt[index]) {
2619 maxCt[index] = counts[index];
2623 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2624 nCoresPerPkg = maxCt[coreIdIndex];
2625 nPackages = totals[pkgIdIndex];
2628 unsigned prod = totals[maxIndex];
2629 for (index = threadIdIndex; index < maxIndex; index++) {
2630 prod *= maxCt[index];
2632 bool uniform = (prod == totals[threadIdIndex]);
2638 __kmp_ncores = totals[coreIdIndex];
2640 if (__kmp_affinity_verbose) {
2641 if (!KMP_AFFINITY_CAPABLE()) {
2642 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2643 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2645 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2647 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2650 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2651 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2653 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2655 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2659 __kmp_str_buf_init(&buf);
2661 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2662 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2663 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2665 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2666 maxCt[threadIdIndex], __kmp_ncores);
2668 __kmp_str_buf_free(&buf);
2671 #if KMP_MIC && REDUCE_TEAM_SIZE
2673 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2674 __kmp_dflt_team_nth = teamSize;
2675 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting "
2676 "__kmp_dflt_team_nth = %d\n",
2677 __kmp_dflt_team_nth));
2681 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2682 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2683 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2684 for (i = 0; i < num_avail; ++i) {
2685 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2688 if (__kmp_affinity_type == affinity_none) {
2693 CLEANUP_THREAD_INFO;
2702 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2703 for (index = threadIdIndex; index < maxIndex; index++) {
2704 KMP_ASSERT(totals[index] >= totals[index + 1]);
2705 inMap[index] = (totals[index] > totals[index + 1]);
2707 inMap[maxIndex] = (totals[maxIndex] > 1);
2708 inMap[pkgIdIndex] =
true;
2711 for (index = threadIdIndex; index <= maxIndex; index++) {
2716 KMP_ASSERT(depth > 0);
2719 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2722 int threadLevel = -1;
2724 for (i = 0; i < num_avail; ++i) {
2725 Address addr(depth);
2726 unsigned os = threadInfo[i][osIdIndex];
2730 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2731 if (!inMap[src_index]) {
2734 addr.labels[dst_index] = threadInfo[i][src_index];
2735 if (src_index == pkgIdIndex) {
2736 pkgLevel = dst_index;
2737 }
else if (src_index == coreIdIndex) {
2738 coreLevel = dst_index;
2739 }
else if (src_index == threadIdIndex) {
2740 threadLevel = dst_index;
2744 (*address2os)[i] = AddrUnsPair(addr, os);
2747 if (__kmp_affinity_gran_levels < 0) {
2751 __kmp_affinity_gran_levels = 0;
2752 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2753 if (!inMap[src_index]) {
2756 switch (src_index) {
2758 if (__kmp_affinity_gran > affinity_gran_thread) {
2759 __kmp_affinity_gran_levels++;
2764 if (__kmp_affinity_gran > affinity_gran_core) {
2765 __kmp_affinity_gran_levels++;
2770 if (__kmp_affinity_gran > affinity_gran_package) {
2771 __kmp_affinity_gran_levels++;
2778 if (__kmp_affinity_verbose) {
2779 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2780 coreLevel, threadLevel);
2788 CLEANUP_THREAD_INFO;
2795 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2796 unsigned *numUnique,
2797 AddrUnsPair *address2os,
2798 unsigned numAddrs) {
2804 KMP_ASSERT(numAddrs > 0);
2805 depth = address2os[0].first.depth;
2808 for (i = numAddrs - 1;; --i) {
2809 unsigned osId = address2os[i].second;
2810 if (osId > maxOsId) {
2816 kmp_affin_mask_t *osId2Mask;
2817 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2821 qsort(address2os, numAddrs,
sizeof(*address2os),
2822 __kmp_affinity_cmp_Address_labels);
2824 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2825 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2826 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2828 if (__kmp_affinity_gran_levels >= (
int)depth) {
2829 if (__kmp_affinity_verbose ||
2830 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2831 KMP_WARNING(AffThreadsMayMigrate);
2839 unsigned unique = 0;
2841 unsigned leader = 0;
2842 Address *leaderAddr = &(address2os[0].first);
2843 kmp_affin_mask_t *sum;
2844 KMP_CPU_ALLOC_ON_STACK(sum);
2846 KMP_CPU_SET(address2os[0].second, sum);
2847 for (i = 1; i < numAddrs; i++) {
2851 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2852 KMP_CPU_SET(address2os[i].second, sum);
2858 for (; j < i; j++) {
2859 unsigned osId = address2os[j].second;
2860 KMP_DEBUG_ASSERT(osId <= maxOsId);
2861 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2862 KMP_CPU_COPY(mask, sum);
2863 address2os[j].first.leader = (j == leader);
2869 leaderAddr = &(address2os[i].first);
2871 KMP_CPU_SET(address2os[i].second, sum);
2876 for (; j < i; j++) {
2877 unsigned osId = address2os[j].second;
2878 KMP_DEBUG_ASSERT(osId <= maxOsId);
2879 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2880 KMP_CPU_COPY(mask, sum);
2881 address2os[j].first.leader = (j == leader);
2884 KMP_CPU_FREE_FROM_STACK(sum);
2886 *maxIndex = maxOsId;
2887 *numUnique = unique;
2894 static kmp_affin_mask_t *newMasks;
2895 static int numNewMasks;
2896 static int nextNewMask;
2898 #define ADD_MASK(_mask) \
2900 if (nextNewMask >= numNewMasks) { \
2903 kmp_affin_mask_t *temp; \
2904 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2905 for (i = 0; i < numNewMasks / 2; i++) { \
2906 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2907 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2908 KMP_CPU_COPY(dest, src); \
2910 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2913 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2917 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2919 if (((_osId) > _maxOsId) || \
2920 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2921 if (__kmp_affinity_verbose || \
2922 (__kmp_affinity_warnings && \
2923 (__kmp_affinity_type != affinity_none))) { \
2924 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2927 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2933 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2934 unsigned int *out_numMasks,
2935 const char *proclist,
2936 kmp_affin_mask_t *osId2Mask,
2939 const char *scan = proclist;
2940 const char *next = proclist;
2945 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2947 kmp_affin_mask_t *sumMask;
2948 KMP_CPU_ALLOC(sumMask);
2952 int start, end, stride;
2956 if (*next ==
'\0') {
2968 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2970 num = __kmp_str_to_int(scan, *next);
2971 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2974 if ((num > maxOsId) ||
2975 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2976 if (__kmp_affinity_verbose ||
2977 (__kmp_affinity_warnings &&
2978 (__kmp_affinity_type != affinity_none))) {
2979 KMP_WARNING(AffIgnoreInvalidProcID, num);
2981 KMP_CPU_ZERO(sumMask);
2983 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
3003 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3006 num = __kmp_str_to_int(scan, *next);
3007 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
3010 if ((num > maxOsId) ||
3011 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3012 if (__kmp_affinity_verbose ||
3013 (__kmp_affinity_warnings &&
3014 (__kmp_affinity_type != affinity_none))) {
3015 KMP_WARNING(AffIgnoreInvalidProcID, num);
3018 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
3035 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3037 start = __kmp_str_to_int(scan, *next);
3038 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
3043 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3057 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3059 end = __kmp_str_to_int(scan, *next);
3060 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
3077 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
3079 stride = __kmp_str_to_int(scan, *next);
3080 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
3085 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
3087 KMP_ASSERT2(start <= end,
"bad explicit proc list");
3089 KMP_ASSERT2(start >= end,
"bad explicit proc list");
3091 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
3096 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3098 }
while (start <= end);
3101 ADD_MASK_OSID(start, osId2Mask, maxOsId);
3103 }
while (start >= end);
3114 *out_numMasks = nextNewMask;
3115 if (nextNewMask == 0) {
3117 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3120 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3121 for (i = 0; i < nextNewMask; i++) {
3122 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3123 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3124 KMP_CPU_COPY(dest, src);
3126 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3127 KMP_CPU_FREE(sumMask);
3150 static void __kmp_process_subplace_list(
const char **scan,
3151 kmp_affin_mask_t *osId2Mask,
3152 int maxOsId, kmp_affin_mask_t *tempMask,
3157 int start, count, stride, i;
3161 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3164 start = __kmp_str_to_int(*scan, *next);
3165 KMP_ASSERT(start >= 0);
3170 if (**scan ==
'}' || **scan ==
',') {
3171 if ((start > maxOsId) ||
3172 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3173 if (__kmp_affinity_verbose ||
3174 (__kmp_affinity_warnings &&
3175 (__kmp_affinity_type != affinity_none))) {
3176 KMP_WARNING(AffIgnoreInvalidProcID, start);
3179 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3182 if (**scan ==
'}') {
3188 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3193 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3196 count = __kmp_str_to_int(*scan, *next);
3197 KMP_ASSERT(count >= 0);
3202 if (**scan ==
'}' || **scan ==
',') {
3203 for (i = 0; i < count; i++) {
3204 if ((start > maxOsId) ||
3205 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3206 if (__kmp_affinity_verbose ||
3207 (__kmp_affinity_warnings &&
3208 (__kmp_affinity_type != affinity_none))) {
3209 KMP_WARNING(AffIgnoreInvalidProcID, start);
3213 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3218 if (**scan ==
'}') {
3224 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3231 if (**scan ==
'+') {
3235 if (**scan ==
'-') {
3243 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3246 stride = __kmp_str_to_int(*scan, *next);
3247 KMP_ASSERT(stride >= 0);
3253 if (**scan ==
'}' || **scan ==
',') {
3254 for (i = 0; i < count; i++) {
3255 if ((start > maxOsId) ||
3256 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3257 if (__kmp_affinity_verbose ||
3258 (__kmp_affinity_warnings &&
3259 (__kmp_affinity_type != affinity_none))) {
3260 KMP_WARNING(AffIgnoreInvalidProcID, start);
3264 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3269 if (**scan ==
'}') {
3276 KMP_ASSERT2(0,
"bad explicit places list");
3280 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3281 int maxOsId, kmp_affin_mask_t *tempMask,
3287 if (**scan ==
'{') {
3289 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3290 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3292 }
else if (**scan ==
'!') {
3294 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3295 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3296 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3299 int num = __kmp_str_to_int(*scan, *next);
3300 KMP_ASSERT(num >= 0);
3301 if ((num > maxOsId) ||
3302 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3303 if (__kmp_affinity_verbose ||
3304 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3305 KMP_WARNING(AffIgnoreInvalidProcID, num);
3308 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3313 KMP_ASSERT2(0,
"bad explicit places list");
3318 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3319 unsigned int *out_numMasks,
3320 const char *placelist,
3321 kmp_affin_mask_t *osId2Mask,
3323 int i, j, count, stride, sign;
3324 const char *scan = placelist;
3325 const char *next = placelist;
3328 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3334 kmp_affin_mask_t *tempMask;
3335 kmp_affin_mask_t *previousMask;
3336 KMP_CPU_ALLOC(tempMask);
3337 KMP_CPU_ZERO(tempMask);
3338 KMP_CPU_ALLOC(previousMask);
3339 KMP_CPU_ZERO(previousMask);
3343 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3347 if (*scan ==
'\0' || *scan ==
',') {
3351 KMP_CPU_ZERO(tempMask);
3353 if (*scan ==
'\0') {
3360 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3365 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3368 count = __kmp_str_to_int(scan, *next);
3369 KMP_ASSERT(count >= 0);
3374 if (*scan ==
'\0' || *scan ==
',') {
3377 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3396 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3399 stride = __kmp_str_to_int(scan, *next);
3400 KMP_DEBUG_ASSERT(stride >= 0);
3406 for (i = 0; i < count; i++) {
3411 KMP_CPU_COPY(previousMask, tempMask);
3412 ADD_MASK(previousMask);
3413 KMP_CPU_ZERO(tempMask);
3415 KMP_CPU_SET_ITERATE(j, previousMask) {
3416 if (!KMP_CPU_ISSET(j, previousMask)) {
3419 if ((j + stride > maxOsId) || (j + stride < 0) ||
3420 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3421 (!KMP_CPU_ISSET(j + stride,
3422 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3423 if ((__kmp_affinity_verbose ||
3424 (__kmp_affinity_warnings &&
3425 (__kmp_affinity_type != affinity_none))) &&
3427 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3431 KMP_CPU_SET(j + stride, tempMask);
3435 KMP_CPU_ZERO(tempMask);
3440 if (*scan ==
'\0') {
3448 KMP_ASSERT2(0,
"bad explicit places list");
3451 *out_numMasks = nextNewMask;
3452 if (nextNewMask == 0) {
3454 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3457 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3458 KMP_CPU_FREE(tempMask);
3459 KMP_CPU_FREE(previousMask);
3460 for (i = 0; i < nextNewMask; i++) {
3461 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3462 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3463 KMP_CPU_COPY(dest, src);
3465 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3469 #undef ADD_MASK_OSID
3472 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3475 hwloc_obj_t hT = NULL;
3476 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3477 for (
int i = 0; i < N; ++i) {
3478 KMP_DEBUG_ASSERT(hT);
3479 unsigned idx = hT->os_index;
3480 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3481 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3482 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3485 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3490 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3492 hwloc_obj_t hT = NULL;
3493 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3494 for (
int i = 0; i < N; ++i) {
3495 KMP_DEBUG_ASSERT(hT);
3496 unsigned idx = hT->os_index;
3497 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3499 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3505 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3506 AddrUnsPair *newAddr;
3507 if (__kmp_hws_requested == 0)
3510 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3514 hwloc_topology_t tp = __kmp_hwloc_topology;
3515 int nS = 0, nN = 0, nL = 0, nC = 0,
3517 int nCr = 0, nTr = 0;
3518 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3519 hwloc_obj_t hT, hC, hL, hN, hS;
3523 int numa_support = 0, tile_support = 0;
3524 if (__kmp_pu_os_idx)
3525 hT = hwloc_get_pu_obj_by_os_index(tp,
3526 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3528 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3530 KMP_WARNING(AffHWSubsetUnsupported);
3534 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3535 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3536 if (hN != NULL && hN->depth > hS->depth) {
3538 }
else if (__kmp_hws_node.num > 0) {
3540 KMP_WARNING(AffHWSubsetUnsupported);
3544 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3545 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3547 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3549 }
else if (__kmp_hws_tile.num > 0) {
3550 if (__kmp_hws_core.num == 0) {
3551 __kmp_hws_core = __kmp_hws_tile;
3552 __kmp_hws_tile.num = 0;
3555 KMP_WARNING(AffHWSubsetInvalid);
3562 if (__kmp_hws_socket.num == 0)
3563 __kmp_hws_socket.num = nPackages;
3564 if (__kmp_hws_socket.offset >= nPackages) {
3565 KMP_WARNING(AffHWSubsetManySockets);
3570 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3572 if (__kmp_hws_node.num == 0)
3573 __kmp_hws_node.num = NN;
3574 if (__kmp_hws_node.offset >= NN) {
3575 KMP_WARNING(AffHWSubsetManyNodes);
3580 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3581 if (__kmp_hws_tile.num == 0) {
3582 __kmp_hws_tile.num = NL + 1;
3584 if (__kmp_hws_tile.offset >= NL) {
3585 KMP_WARNING(AffHWSubsetManyTiles);
3588 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3590 if (__kmp_hws_core.num == 0)
3591 __kmp_hws_core.num = NC;
3592 if (__kmp_hws_core.offset >= NC) {
3593 KMP_WARNING(AffHWSubsetManyCores);
3597 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3599 if (__kmp_hws_core.num == 0)
3600 __kmp_hws_core.num = NC;
3601 if (__kmp_hws_core.offset >= NC) {
3602 KMP_WARNING(AffHWSubsetManyCores);
3609 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3610 if (__kmp_hws_tile.num == 0)
3611 __kmp_hws_tile.num = NL;
3612 if (__kmp_hws_tile.offset >= NL) {
3613 KMP_WARNING(AffHWSubsetManyTiles);
3616 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3618 if (__kmp_hws_core.num == 0)
3619 __kmp_hws_core.num = NC;
3620 if (__kmp_hws_core.offset >= NC) {
3621 KMP_WARNING(AffHWSubsetManyCores);
3625 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3627 if (__kmp_hws_core.num == 0)
3628 __kmp_hws_core.num = NC;
3629 if (__kmp_hws_core.offset >= NC) {
3630 KMP_WARNING(AffHWSubsetManyCores);
3635 if (__kmp_hws_proc.num == 0)
3636 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3637 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3638 KMP_WARNING(AffHWSubsetManyProcs);
3644 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3648 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3649 for (
int s = 0; s < NP; ++s) {
3651 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3652 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3655 if (nS <= __kmp_hws_socket.offset ||
3656 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3657 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3668 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3669 for (
int n = 0; n < NN; ++n) {
3671 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3672 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3676 if (nN <= __kmp_hws_node.offset ||
3677 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3679 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3680 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3687 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3688 for (
int l = 0; l < NL; ++l) {
3690 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3691 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3695 if (nL <= __kmp_hws_tile.offset ||
3696 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3698 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3699 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3706 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3707 HWLOC_OBJ_CORE, &hC);
3708 for (
int c = 0; c < NC; ++c) {
3710 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3711 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3715 if (nC <= __kmp_hws_core.offset ||
3716 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3718 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3719 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3727 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3729 for (
int t = 0; t < NT; ++t) {
3732 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3733 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3737 if (nT <= __kmp_hws_proc.offset ||
3738 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3740 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3742 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3743 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3748 newAddr[n_new] = (*pAddr)[n_old];
3751 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3759 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3761 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3769 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3770 for (
int c = 0; c < NC; ++c) {
3772 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3773 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3777 if (nC <= __kmp_hws_core.offset ||
3778 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3780 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3781 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3789 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3790 for (
int t = 0; t < NT; ++t) {
3793 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3794 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3798 if (nT <= __kmp_hws_proc.offset ||
3799 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3801 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3803 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3804 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3809 newAddr[n_new] = (*pAddr)[n_old];
3812 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3820 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3823 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3831 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3832 for (
int l = 0; l < NL; ++l) {
3834 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3835 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3839 if (nL <= __kmp_hws_tile.offset ||
3840 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3842 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3843 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3851 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3852 for (
int c = 0; c < NC; ++c) {
3854 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3855 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3859 if (nC <= __kmp_hws_core.offset ||
3860 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3862 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3863 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3872 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3873 for (
int t = 0; t < NT; ++t) {
3876 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3877 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3881 if (nT <= __kmp_hws_proc.offset ||
3882 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3884 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3886 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3887 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3892 newAddr[n_new] = (*pAddr)[n_old];
3895 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3903 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3905 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3913 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3914 for (
int c = 0; c < NC; ++c) {
3916 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3917 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3921 if (nC <= __kmp_hws_core.offset ||
3922 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3924 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3925 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3934 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3935 for (
int t = 0; t < NT; ++t) {
3938 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3939 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3943 if (nT <= __kmp_hws_proc.offset ||
3944 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3946 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3948 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3949 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3954 newAddr[n_new] = (*pAddr)[n_old];
3957 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3965 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3977 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3978 KMP_DEBUG_ASSERT(nPkg > 0);
3979 KMP_DEBUG_ASSERT(nCpP > 0);
3980 KMP_DEBUG_ASSERT(nTpC > 0);
3981 KMP_DEBUG_ASSERT(nCo > 0);
3982 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3983 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3984 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3985 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3988 nCoresPerPkg = nCpP;
3989 __kmp_nThreadsPerCore = nTpC;
3990 __kmp_avail_proc = n_new;
3996 int n_old = 0, n_new = 0, proc_num = 0;
3997 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3998 KMP_WARNING(AffHWSubsetNoHWLOC);
4001 if (__kmp_hws_socket.num == 0)
4002 __kmp_hws_socket.num = nPackages;
4003 if (__kmp_hws_die.num == 0)
4004 __kmp_hws_die.num = nDiesPerPkg;
4005 if (__kmp_hws_core.num == 0)
4006 __kmp_hws_core.num = nCoresPerPkg;
4007 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
4008 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
4009 if (!__kmp_affinity_uniform_topology()) {
4010 KMP_WARNING(AffHWSubsetNonUniform);
4014 KMP_WARNING(AffHWSubsetNonThreeLevel);
4017 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
4018 KMP_WARNING(AffHWSubsetManySockets);
4021 if (depth == 4 && __kmp_hws_die.offset + __kmp_hws_die.num > nDiesPerPkg) {
4022 KMP_WARNING(AffHWSubsetManyDies);
4025 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
4026 KMP_WARNING(AffHWSubsetManyCores);
4031 newAddr = (AddrUnsPair *)__kmp_allocate(
4032 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_die.num *
4033 __kmp_hws_core.num * __kmp_hws_proc.num);
4034 for (
int i = 0; i < nPackages; ++i) {
4035 if (i < __kmp_hws_socket.offset ||
4036 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
4038 n_old += nDiesPerPkg * nCoresPerPkg * __kmp_nThreadsPerCore;
4039 if (__kmp_pu_os_idx != NULL) {
4041 for (
int l = 0; l < nDiesPerPkg; ++l) {
4042 for (
int j = 0; j < nCoresPerPkg; ++j) {
4043 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
4044 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
4052 for (
int l = 0; l < nDiesPerPkg; ++l) {
4054 if (l < __kmp_hws_die.offset ||
4055 l >= __kmp_hws_die.offset + __kmp_hws_die.num) {
4056 n_old += nCoresPerPkg;
4057 if (__kmp_pu_os_idx != NULL) {
4058 for (
int k = 0; k < nCoresPerPkg; ++k) {
4059 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
4064 for (
int j = 0; j < nCoresPerPkg; ++j) {
4065 if (j < __kmp_hws_core.offset ||
4066 j >= __kmp_hws_core.offset +
4067 __kmp_hws_core.num) {
4068 n_old += __kmp_nThreadsPerCore;
4069 if (__kmp_pu_os_idx != NULL) {
4070 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
4071 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num],
4072 __kmp_affin_fullMask);
4078 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
4079 if (k < __kmp_hws_proc.num) {
4081 newAddr[n_new] = (*pAddr)[n_old];
4084 if (__kmp_pu_os_idx != NULL)
4085 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num],
4086 __kmp_affin_fullMask);
4097 KMP_DEBUG_ASSERT(n_old == nPackages * nDiesPerPkg * nCoresPerPkg *
4098 __kmp_nThreadsPerCore);
4099 KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_die.num *
4100 __kmp_hws_core.num * __kmp_hws_proc.num);
4101 nPackages = __kmp_hws_socket.num;
4102 nCoresPerPkg = __kmp_hws_core.num;
4103 nDiesPerPkg = __kmp_hws_die.num;
4104 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
4105 __kmp_avail_proc = n_new;
4107 nPackages * nDiesPerPkg * __kmp_hws_core.num;
4113 if (__kmp_affinity_verbose) {
4114 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
4116 __kmp_str_buf_init(&buf);
4117 __kmp_str_buf_print(&buf,
"%d", nPackages);
4118 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
4119 __kmp_nThreadsPerCore, __kmp_ncores);
4120 __kmp_str_buf_free(&buf);
4123 if (__kmp_pu_os_idx != NULL) {
4124 __kmp_free(__kmp_pu_os_idx);
4125 __kmp_pu_os_idx = NULL;
4131 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
4132 int nprocs,
int bottom_level) {
4135 for (
int i = 0; i < nprocs; i++) {
4136 for (
int j = bottom_level; j > 0; j--) {
4137 if (address2os[i].first.labels[j] > 0) {
4138 if (core_level < (j - 1)) {
4148 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
4149 int nprocs,
int bottom_level,
4155 for (i = 0; i < nprocs; i++) {
4156 for (j = bottom_level; j > core_level; j--) {
4157 if ((i + 1) < nprocs) {
4158 if (address2os[i + 1].first.labels[j] > 0) {
4163 if (j == core_level) {
4167 if (j > core_level) {
4176 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
4177 int bottom_level,
int core_level) {
4178 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
4185 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
4186 int nprocs,
int bottom_level,
4188 int maxprocpercore = 0;
4190 if (core_level < bottom_level) {
4191 for (
int i = 0; i < nprocs; i++) {
4192 int percore = address2os[i].first.labels[core_level + 1] + 1;
4194 if (percore > maxprocpercore) {
4195 maxprocpercore = percore;
4201 return maxprocpercore;
4204 static AddrUnsPair *address2os = NULL;
4205 static int *procarr = NULL;
4206 static int __kmp_aff_depth = 0;
4208 #if KMP_USE_HIER_SCHED
4209 #define KMP_EXIT_AFF_NONE \
4210 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4211 KMP_ASSERT(address2os == NULL); \
4212 __kmp_apply_thread_places(NULL, 0); \
4213 __kmp_create_affinity_none_places(); \
4214 __kmp_dispatch_set_hierarchy_values(); \
4217 #define KMP_EXIT_AFF_NONE \
4218 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4219 KMP_ASSERT(address2os == NULL); \
4220 __kmp_apply_thread_places(NULL, 0); \
4221 __kmp_create_affinity_none_places(); \
4227 static void __kmp_create_affinity_none_places() {
4228 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4229 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4230 __kmp_affinity_num_masks = 1;
4231 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4232 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4233 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4236 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4237 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4238 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4239 unsigned depth = aa->depth;
4241 KMP_DEBUG_ASSERT(depth == bb->depth);
4242 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4243 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4244 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4245 int j = depth - i - 1;
4246 if (aa->childNums[j] < bb->childNums[j])
4248 if (aa->childNums[j] > bb->childNums[j])
4251 for (; i < depth; i++) {
4252 int j = i - __kmp_affinity_compact;
4253 if (aa->childNums[j] < bb->childNums[j])
4255 if (aa->childNums[j] > bb->childNums[j])
4261 static void __kmp_aux_affinity_initialize(
void) {
4262 if (__kmp_affinity_masks != NULL) {
4263 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4271 if (__kmp_affin_fullMask == NULL) {
4272 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4274 if (KMP_AFFINITY_CAPABLE()) {
4275 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4276 if (__kmp_affinity_respect_mask) {
4279 __kmp_avail_proc = 0;
4280 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4281 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4286 if (__kmp_avail_proc > __kmp_xproc) {
4287 if (__kmp_affinity_verbose ||
4288 (__kmp_affinity_warnings &&
4289 (__kmp_affinity_type != affinity_none))) {
4290 KMP_WARNING(ErrorInitializeAffinity);
4292 __kmp_affinity_type = affinity_none;
4293 KMP_AFFINITY_DISABLE();
4297 if (__kmp_affinity_verbose) {
4298 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4299 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4300 __kmp_affin_fullMask);
4301 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
4304 if (__kmp_affinity_verbose) {
4305 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4306 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4307 __kmp_affin_fullMask);
4308 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
4310 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4311 __kmp_avail_proc = __kmp_xproc;
4315 __kmp_affin_fullMask->set_process_affinity(
true);
4320 if (__kmp_affinity_gran == affinity_gran_tile &&
4322 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4323 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4324 __kmp_affinity_gran = affinity_gran_package;
4328 kmp_i18n_id_t msg_id = kmp_i18n_null;
4332 if ((__kmp_cpuinfo_file != NULL) &&
4333 (__kmp_affinity_top_method == affinity_top_method_all)) {
4334 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4337 if (__kmp_affinity_top_method == affinity_top_method_all) {
4341 const char *file_name = NULL;
4345 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4346 if (__kmp_affinity_verbose) {
4347 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4349 if (!__kmp_hwloc_error) {
4350 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4354 }
else if (__kmp_affinity_verbose) {
4355 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4360 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4363 if (__kmp_affinity_verbose) {
4364 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4368 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4374 if (__kmp_affinity_verbose) {
4375 if (msg_id != kmp_i18n_null) {
4376 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4377 __kmp_i18n_catgets(msg_id),
4378 KMP_I18N_STR(DecodingLegacyAPIC));
4380 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4381 KMP_I18N_STR(DecodingLegacyAPIC));
4386 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4398 if (__kmp_affinity_verbose) {
4399 if (msg_id != kmp_i18n_null) {
4400 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4401 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4403 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4408 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4416 #if KMP_GROUP_AFFINITY
4418 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4419 if (__kmp_affinity_verbose) {
4420 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4423 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4424 KMP_ASSERT(depth != 0);
4430 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4431 if (file_name == NULL) {
4432 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4433 }
else if (line == 0) {
4434 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4436 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4437 __kmp_i18n_catgets(msg_id));
4443 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4447 KMP_ASSERT(depth > 0);
4448 KMP_ASSERT(address2os != NULL);
4453 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4454 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4455 if (__kmp_affinity_verbose) {
4456 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4458 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4469 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4471 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid ||
4472 __kmp_affinity_top_method == affinity_top_method_x2apicid_1f) {
4473 if (__kmp_affinity_verbose) {
4474 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4477 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4482 KMP_ASSERT(msg_id != kmp_i18n_null);
4483 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4485 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4486 if (__kmp_affinity_verbose) {
4487 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4490 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4495 KMP_ASSERT(msg_id != kmp_i18n_null);
4496 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4502 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4503 const char *filename;
4504 const char *env_var =
nullptr;
4505 if (__kmp_cpuinfo_file != NULL) {
4506 filename = __kmp_cpuinfo_file;
4507 env_var =
"KMP_CPUINFO_FILE";
4509 filename =
"/proc/cpuinfo";
4512 if (__kmp_affinity_verbose) {
4513 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4518 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4520 KMP_ASSERT(msg_id != kmp_i18n_null);
4522 KMP_FATAL(FileLineMsgExiting, filename, line,
4523 __kmp_i18n_catgets(msg_id));
4525 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4528 if (__kmp_affinity_type == affinity_none) {
4529 KMP_ASSERT(depth == 0);
4534 #if KMP_GROUP_AFFINITY
4536 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4537 if (__kmp_affinity_verbose) {
4538 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4541 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4542 KMP_ASSERT(depth != 0);
4544 KMP_ASSERT(msg_id != kmp_i18n_null);
4545 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4551 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4552 if (__kmp_affinity_verbose) {
4553 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4556 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4561 KMP_ASSERT(depth > 0);
4562 KMP_ASSERT(address2os != NULL);
4565 #if KMP_USE_HIER_SCHED
4566 __kmp_dispatch_set_hierarchy_values();
4569 if (address2os == NULL) {
4570 if (KMP_AFFINITY_CAPABLE() &&
4571 (__kmp_affinity_verbose ||
4572 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4573 KMP_WARNING(ErrorInitializeAffinity);
4575 __kmp_affinity_type = affinity_none;
4576 __kmp_create_affinity_none_places();
4577 KMP_AFFINITY_DISABLE();
4581 if (__kmp_affinity_gran == affinity_gran_tile
4583 && __kmp_tile_depth == 0
4587 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4590 __kmp_apply_thread_places(&address2os, depth);
4595 kmp_affin_mask_t *osId2Mask =
4596 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4597 if (__kmp_affinity_gran_levels == 0) {
4598 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4604 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4606 switch (__kmp_affinity_type) {
4608 case affinity_explicit:
4609 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4610 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4611 __kmp_affinity_process_proclist(
4612 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4613 __kmp_affinity_proclist, osId2Mask, maxIndex);
4615 __kmp_affinity_process_placelist(
4616 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4617 __kmp_affinity_proclist, osId2Mask, maxIndex);
4619 if (__kmp_affinity_num_masks == 0) {
4620 if (__kmp_affinity_verbose ||
4621 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4622 KMP_WARNING(AffNoValidProcID);
4624 __kmp_affinity_type = affinity_none;
4625 __kmp_create_affinity_none_places();
4635 case affinity_logical:
4636 __kmp_affinity_compact = 0;
4637 if (__kmp_affinity_offset) {
4638 __kmp_affinity_offset =
4639 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4643 case affinity_physical:
4644 if (__kmp_nThreadsPerCore > 1) {
4645 __kmp_affinity_compact = 1;
4646 if (__kmp_affinity_compact >= depth) {
4647 __kmp_affinity_compact = 0;
4650 __kmp_affinity_compact = 0;
4652 if (__kmp_affinity_offset) {
4653 __kmp_affinity_offset =
4654 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4658 case affinity_scatter:
4659 if (__kmp_affinity_compact >= depth) {
4660 __kmp_affinity_compact = 0;
4662 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4666 case affinity_compact:
4667 if (__kmp_affinity_compact >= depth) {
4668 __kmp_affinity_compact = depth - 1;
4672 case affinity_balanced:
4674 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4675 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4677 __kmp_affinity_type = affinity_none;
4678 __kmp_create_affinity_none_places();
4680 }
else if (!__kmp_affinity_uniform_topology()) {
4682 __kmp_aff_depth = depth;
4684 int core_level = __kmp_affinity_find_core_level(
4685 address2os, __kmp_avail_proc, depth - 1);
4686 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4687 depth - 1, core_level);
4688 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4689 address2os, __kmp_avail_proc, depth - 1, core_level);
4691 int nproc = ncores * maxprocpercore;
4692 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4693 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4694 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4696 __kmp_affinity_type = affinity_none;
4700 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4701 for (
int i = 0; i < nproc; i++) {
4707 for (
int i = 0; i < __kmp_avail_proc; i++) {
4708 int proc = address2os[i].second;
4710 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4712 if (core == lastcore) {
4719 procarr[core * maxprocpercore + inlastcore] = proc;
4722 if (__kmp_affinity_compact >= depth) {
4723 __kmp_affinity_compact = depth - 1;
4728 if (__kmp_affinity_dups) {
4729 __kmp_affinity_num_masks = __kmp_avail_proc;
4731 __kmp_affinity_num_masks = numUnique;
4734 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4735 (__kmp_affinity_num_places > 0) &&
4736 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4737 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4740 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4744 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4745 __kmp_affinity_cmp_Address_child_num);
4749 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4750 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4753 unsigned osId = address2os[i].second;
4754 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4755 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4756 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4757 KMP_CPU_COPY(dest, src);
4758 if (++j >= __kmp_affinity_num_masks) {
4762 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4767 KMP_ASSERT2(0,
"Unexpected affinity setting");
4770 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4771 machine_hierarchy.init(address2os, __kmp_avail_proc);
4773 #undef KMP_EXIT_AFF_NONE
4775 void __kmp_affinity_initialize(
void) {
4784 int disabled = (__kmp_affinity_type == affinity_disabled);
4785 if (!KMP_AFFINITY_CAPABLE()) {
4786 KMP_ASSERT(disabled);
4789 __kmp_affinity_type = affinity_none;
4791 __kmp_aux_affinity_initialize();
4793 __kmp_affinity_type = affinity_disabled;
4797 void __kmp_affinity_uninitialize(
void) {
4798 if (__kmp_affinity_masks != NULL) {
4799 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4800 __kmp_affinity_masks = NULL;
4802 if (__kmp_affin_fullMask != NULL) {
4803 KMP_CPU_FREE(__kmp_affin_fullMask);
4804 __kmp_affin_fullMask = NULL;
4806 __kmp_affinity_num_masks = 0;
4807 __kmp_affinity_type = affinity_default;
4808 __kmp_affinity_num_places = 0;
4809 if (__kmp_affinity_proclist != NULL) {
4810 __kmp_free(__kmp_affinity_proclist);
4811 __kmp_affinity_proclist = NULL;
4813 if (address2os != NULL) {
4814 __kmp_free(address2os);
4817 if (procarr != NULL) {
4818 __kmp_free(procarr);
4822 if (__kmp_hwloc_topology != NULL) {
4823 hwloc_topology_destroy(__kmp_hwloc_topology);
4824 __kmp_hwloc_topology = NULL;
4827 KMPAffinity::destroy_api();
4830 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4831 if (!KMP_AFFINITY_CAPABLE()) {
4835 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4836 if (th->th.th_affin_mask == NULL) {
4837 KMP_CPU_ALLOC(th->th.th_affin_mask);
4839 KMP_CPU_ZERO(th->th.th_affin_mask);
4846 kmp_affin_mask_t *mask;
4849 if (KMP_AFFINITY_NON_PROC_BIND) {
4850 if ((__kmp_affinity_type == affinity_none) ||
4851 (__kmp_affinity_type == affinity_balanced)) {
4852 #if KMP_GROUP_AFFINITY
4853 if (__kmp_num_proc_groups > 1) {
4857 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4859 mask = __kmp_affin_fullMask;
4861 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4862 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4863 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4867 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4868 #if KMP_GROUP_AFFINITY
4869 if (__kmp_num_proc_groups > 1) {
4873 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4875 mask = __kmp_affin_fullMask;
4879 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4880 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4881 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4885 th->th.th_current_place = i;
4887 th->th.th_new_place = i;
4888 th->th.th_first_place = 0;
4889 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4890 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4893 th->th.th_first_place = 0;
4894 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4897 if (i == KMP_PLACE_ALL) {
4898 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4901 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4905 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4907 if (__kmp_affinity_verbose
4909 && (__kmp_affinity_type == affinity_none ||
4910 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4911 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4912 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4913 th->th.th_affin_mask);
4914 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4915 __kmp_gettid(), gtid, buf);
4922 if (__kmp_affinity_type == affinity_none) {
4923 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4926 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4929 void __kmp_affinity_set_place(
int gtid) {
4930 if (!KMP_AFFINITY_CAPABLE()) {
4934 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4936 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current "
4938 gtid, th->th.th_new_place, th->th.th_current_place));
4941 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4942 KMP_ASSERT(th->th.th_new_place >= 0);
4943 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4944 if (th->th.th_first_place <= th->th.th_last_place) {
4945 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4946 (th->th.th_new_place <= th->th.th_last_place));
4948 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4949 (th->th.th_new_place >= th->th.th_last_place));
4954 kmp_affin_mask_t *mask =
4955 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4956 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4957 th->th.th_current_place = th->th.th_new_place;
4959 if (__kmp_affinity_verbose) {
4960 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4961 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4962 th->th.th_affin_mask);
4963 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4964 __kmp_gettid(), gtid, buf);
4966 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4969 int __kmp_aux_set_affinity(
void **mask) {
4974 if (!KMP_AFFINITY_CAPABLE()) {
4978 gtid = __kmp_entry_gtid();
4981 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4982 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4983 (kmp_affin_mask_t *)(*mask));
4985 "kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4989 if (__kmp_env_consistency_check) {
4990 if ((mask == NULL) || (*mask == NULL)) {
4991 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4996 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4997 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4998 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
5000 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
5005 if (num_procs == 0) {
5006 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
5009 #if KMP_GROUP_AFFINITY
5010 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
5011 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
5017 th = __kmp_threads[gtid];
5018 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5019 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
5021 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
5024 th->th.th_current_place = KMP_PLACE_UNDEFINED;
5025 th->th.th_new_place = KMP_PLACE_UNDEFINED;
5026 th->th.th_first_place = 0;
5027 th->th.th_last_place = __kmp_affinity_num_masks - 1;
5030 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
5035 int __kmp_aux_get_affinity(
void **mask) {
5040 if (!KMP_AFFINITY_CAPABLE()) {
5044 gtid = __kmp_entry_gtid();
5045 th = __kmp_threads[gtid];
5046 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
5050 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5051 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5052 th->th.th_affin_mask);
5054 "kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid,
5058 if (__kmp_env_consistency_check) {
5059 if ((mask == NULL) || (*mask == NULL)) {
5060 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
5066 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
5069 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5070 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5071 (kmp_affin_mask_t *)(*mask));
5073 "kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid,
5081 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
5087 int __kmp_aux_get_affinity_max_proc() {
5088 if (!KMP_AFFINITY_CAPABLE()) {
5091 #if KMP_GROUP_AFFINITY
5092 if (__kmp_num_proc_groups > 1) {
5093 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
5099 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
5100 if (!KMP_AFFINITY_CAPABLE()) {
5106 int gtid = __kmp_entry_gtid();
5107 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5108 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5109 (kmp_affin_mask_t *)(*mask));
5110 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in "
5111 "affinity mask for thread %d = %s\n",
5115 if (__kmp_env_consistency_check) {
5116 if ((mask == NULL) || (*mask == NULL)) {
5117 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
5121 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5124 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5128 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
5132 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
5133 if (!KMP_AFFINITY_CAPABLE()) {
5139 int gtid = __kmp_entry_gtid();
5140 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5141 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5142 (kmp_affin_mask_t *)(*mask));
5143 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in "
5144 "affinity mask for thread %d = %s\n",
5148 if (__kmp_env_consistency_check) {
5149 if ((mask == NULL) || (*mask == NULL)) {
5150 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
5154 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5157 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5161 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
5165 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
5166 if (!KMP_AFFINITY_CAPABLE()) {
5172 int gtid = __kmp_entry_gtid();
5173 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5174 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
5175 (kmp_affin_mask_t *)(*mask));
5176 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in "
5177 "affinity mask for thread %d = %s\n",
5181 if (__kmp_env_consistency_check) {
5182 if ((mask == NULL) || (*mask == NULL)) {
5183 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5187 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5190 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5194 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5198 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
5199 KMP_DEBUG_ASSERT(th);
5200 bool fine_gran =
true;
5201 int tid = th->th.th_info.ds.ds_tid;
5203 switch (__kmp_affinity_gran) {
5204 case affinity_gran_fine:
5205 case affinity_gran_thread:
5207 case affinity_gran_core:
5208 if (__kmp_nThreadsPerCore > 1) {
5212 case affinity_gran_package:
5213 if (nCoresPerPkg > 1) {
5221 if (__kmp_affinity_uniform_topology()) {
5225 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5227 int ncores = __kmp_ncores;
5228 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5229 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5233 int chunk = nthreads / ncores;
5235 int big_cores = nthreads % ncores;
5237 int big_nth = (chunk + 1) * big_cores;
5238 if (tid < big_nth) {
5239 coreID = tid / (chunk + 1);
5240 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5242 coreID = (tid - big_cores) / chunk;
5243 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5246 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5247 "Illegal set affinity operation when not capable");
5249 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5253 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5254 KMP_CPU_SET(osID, mask);
5256 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5258 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5259 KMP_CPU_SET(osID, mask);
5262 if (__kmp_affinity_verbose) {
5263 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5264 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5265 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5266 __kmp_gettid(), tid, buf);
5268 __kmp_set_system_affinity(mask, TRUE);
5271 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5274 int core_level = __kmp_affinity_find_core_level(
5275 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5276 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5277 __kmp_aff_depth - 1, core_level);
5278 int nth_per_core = __kmp_affinity_max_proc_per_core(
5279 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5283 if (nthreads == __kmp_avail_proc) {
5285 int osID = address2os[tid].second;
5286 KMP_CPU_SET(osID, mask);
5288 int core = __kmp_affinity_find_core(address2os, tid,
5289 __kmp_aff_depth - 1, core_level);
5290 for (
int i = 0; i < __kmp_avail_proc; i++) {
5291 int osID = address2os[i].second;
5292 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5293 core_level) == core) {
5294 KMP_CPU_SET(osID, mask);
5298 }
else if (nthreads <= ncores) {
5301 for (
int i = 0; i < ncores; i++) {
5304 for (
int j = 0; j < nth_per_core; j++) {
5305 if (procarr[i * nth_per_core + j] != -1) {
5312 for (
int j = 0; j < nth_per_core; j++) {
5313 int osID = procarr[i * nth_per_core + j];
5315 KMP_CPU_SET(osID, mask);
5331 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5333 int *ncores_with_x_procs =
5334 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5336 int *ncores_with_x_to_max_procs =
5337 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5339 for (
int i = 0; i <= nth_per_core; i++) {
5340 ncores_with_x_procs[i] = 0;
5341 ncores_with_x_to_max_procs[i] = 0;
5344 for (
int i = 0; i < ncores; i++) {
5346 for (
int j = 0; j < nth_per_core; j++) {
5347 if (procarr[i * nth_per_core + j] != -1) {
5351 nproc_at_core[i] = cnt;
5352 ncores_with_x_procs[cnt]++;
5355 for (
int i = 0; i <= nth_per_core; i++) {
5356 for (
int j = i; j <= nth_per_core; j++) {
5357 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5362 int nproc = nth_per_core * ncores;
5364 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5365 for (
int i = 0; i < nproc; i++) {
5372 for (
int j = 1; j <= nth_per_core; j++) {
5373 int cnt = ncores_with_x_to_max_procs[j];
5374 for (
int i = 0; i < ncores; i++) {
5376 if (nproc_at_core[i] == 0) {
5379 for (
int k = 0; k < nth_per_core; k++) {
5380 if (procarr[i * nth_per_core + k] != -1) {
5381 if (newarr[i * nth_per_core + k] == 0) {
5382 newarr[i * nth_per_core + k] = 1;
5388 newarr[i * nth_per_core + k]++;
5396 if (cnt == 0 || nth == 0) {
5407 for (
int i = 0; i < nproc; i++) {
5411 int osID = procarr[i];
5412 KMP_CPU_SET(osID, mask);
5414 int coreID = i / nth_per_core;
5415 for (
int ii = 0; ii < nth_per_core; ii++) {
5416 int osID = procarr[coreID * nth_per_core + ii];
5418 KMP_CPU_SET(osID, mask);
5428 if (__kmp_affinity_verbose) {
5429 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5430 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5431 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5432 __kmp_gettid(), tid, buf);
5434 __kmp_set_system_affinity(mask, TRUE);
5438 #if KMP_OS_LINUX || KMP_OS_FREEBSD
5452 kmp_set_thread_affinity_mask_initial()
5457 int gtid = __kmp_get_gtid();
5460 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5461 "non-omp thread, returning\n"));
5464 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5465 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5466 "affinity not initialized, returning\n"));
5469 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5470 "set full mask for thread %d\n",
5472 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5473 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);