LLVM OpenMP* Runtime Library
z_Linux_util.cpp
1 /*
2  * z_Linux_util.cpp -- platform specific routines.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_affinity.h"
15 #include "kmp_i18n.h"
16 #include "kmp_io.h"
17 #include "kmp_itt.h"
18 #include "kmp_lock.h"
19 #include "kmp_stats.h"
20 #include "kmp_str.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
25 #include <alloca.h>
26 #endif
27 #include <math.h> // HUGE_VAL.
28 #include <semaphore.h>
29 #include <sys/resource.h>
30 #include <sys/syscall.h>
31 #include <sys/time.h>
32 #include <sys/times.h>
33 #include <unistd.h>
34 
35 #if KMP_OS_LINUX
36 #include <sys/sysinfo.h>
37 #if KMP_USE_FUTEX
38 // We should really include <futex.h>, but that causes compatibility problems on
39 // different Linux* OS distributions that either require that you include (or
40 // break when you try to include) <pci/types.h>. Since all we need is the two
41 // macros below (which are part of the kernel ABI, so can't change) we just
42 // define the constants here and don't include <futex.h>
43 #ifndef FUTEX_WAIT
44 #define FUTEX_WAIT 0
45 #endif
46 #ifndef FUTEX_WAKE
47 #define FUTEX_WAKE 1
48 #endif
49 #endif
50 #elif KMP_OS_DARWIN
51 #include <mach/mach.h>
52 #include <sys/sysctl.h>
53 #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
54 #include <sys/types.h>
55 #include <sys/sysctl.h>
56 #include <sys/user.h>
57 #include <pthread_np.h>
58 #elif KMP_OS_NETBSD || KMP_OS_OPENBSD
59 #include <sys/types.h>
60 #include <sys/sysctl.h>
61 #endif
62 
63 #include <ctype.h>
64 #include <dirent.h>
65 #include <fcntl.h>
66 
67 #include "tsan_annotations.h"
68 
69 struct kmp_sys_timer {
70  struct timespec start;
71 };
72 
73 // Convert timespec to nanoseconds.
74 #define TS2NS(timespec) \
75  (((timespec).tv_sec * (long int)1e9) + (timespec).tv_nsec)
76 
77 static struct kmp_sys_timer __kmp_sys_timer_data;
78 
79 #if KMP_HANDLE_SIGNALS
80 typedef void (*sig_func_t)(int);
81 STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
82 static sigset_t __kmp_sigset;
83 #endif
84 
85 static int __kmp_init_runtime = FALSE;
86 
87 static int __kmp_fork_count = 0;
88 
89 static pthread_condattr_t __kmp_suspend_cond_attr;
90 static pthread_mutexattr_t __kmp_suspend_mutex_attr;
91 
92 static kmp_cond_align_t __kmp_wait_cv;
93 static kmp_mutex_align_t __kmp_wait_mx;
94 
95 kmp_uint64 __kmp_ticks_per_msec = 1000000;
96 
97 #ifdef DEBUG_SUSPEND
98 static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
99  KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
100  cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
101  cond->c_cond.__c_waiting);
102 }
103 #endif
104 
105 #if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED)
106 
107 /* Affinity support */
108 
109 void __kmp_affinity_bind_thread(int which) {
110  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
111  "Illegal set affinity operation when not capable");
112 
113  kmp_affin_mask_t *mask;
114  KMP_CPU_ALLOC_ON_STACK(mask);
115  KMP_CPU_ZERO(mask);
116  KMP_CPU_SET(which, mask);
117  __kmp_set_system_affinity(mask, TRUE);
118  KMP_CPU_FREE_FROM_STACK(mask);
119 }
120 
121 /* Determine if we can access affinity functionality on this version of
122  * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
123  * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
124 void __kmp_affinity_determine_capable(const char *env_var) {
125  // Check and see if the OS supports thread affinity.
126 
127 #if KMP_OS_LINUX
128 #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
129 #elif KMP_OS_FREEBSD
130 #define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
131 #endif
132 
133 #if KMP_OS_LINUX
134  // If Linux* OS:
135  // If the syscall fails or returns a suggestion for the size,
136  // then we don't have to search for an appropriate size.
137  long gCode;
138  long sCode;
139  unsigned char *buf;
140  buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
141  gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
142  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
143  "initial getaffinity call returned %ld errno = %d\n",
144  gCode, errno));
145 
146  // if ((gCode < 0) && (errno == ENOSYS))
147  if (gCode < 0) {
148  // System call not supported
149  if (__kmp_affinity_verbose ||
150  (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
151  (__kmp_affinity_type != affinity_default) &&
152  (__kmp_affinity_type != affinity_disabled))) {
153  int error = errno;
154  kmp_msg_t err_code = KMP_ERR(error);
155  __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
156  err_code, __kmp_msg_null);
157  if (__kmp_generate_warnings == kmp_warnings_off) {
158  __kmp_str_free(&err_code.str);
159  }
160  }
161  KMP_AFFINITY_DISABLE();
162  KMP_INTERNAL_FREE(buf);
163  return;
164  }
165  if (gCode > 0) { // Linux* OS only
166  // The optimal situation: the OS returns the size of the buffer it expects.
167  //
168  // A verification of correct behavior is that setaffinity on a NULL
169  // buffer with the same size fails with errno set to EFAULT.
170  sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
171  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
172  "setaffinity for mask size %ld returned %ld errno = %d\n",
173  gCode, sCode, errno));
174  if (sCode < 0) {
175  if (errno == ENOSYS) {
176  if (__kmp_affinity_verbose ||
177  (__kmp_affinity_warnings &&
178  (__kmp_affinity_type != affinity_none) &&
179  (__kmp_affinity_type != affinity_default) &&
180  (__kmp_affinity_type != affinity_disabled))) {
181  int error = errno;
182  kmp_msg_t err_code = KMP_ERR(error);
183  __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
184  err_code, __kmp_msg_null);
185  if (__kmp_generate_warnings == kmp_warnings_off) {
186  __kmp_str_free(&err_code.str);
187  }
188  }
189  KMP_AFFINITY_DISABLE();
190  KMP_INTERNAL_FREE(buf);
191  }
192  if (errno == EFAULT) {
193  KMP_AFFINITY_ENABLE(gCode);
194  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
195  "affinity supported (mask size %d)\n",
196  (int)__kmp_affin_mask_size));
197  KMP_INTERNAL_FREE(buf);
198  return;
199  }
200  }
201  }
202 
203  // Call the getaffinity system call repeatedly with increasing set sizes
204  // until we succeed, or reach an upper bound on the search.
205  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
206  "searching for proper set size\n"));
207  int size;
208  for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
209  gCode = syscall(__NR_sched_getaffinity, 0, size, buf);
210  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
211  "getaffinity for mask size %ld returned %ld errno = %d\n",
212  size, gCode, errno));
213 
214  if (gCode < 0) {
215  if (errno == ENOSYS) {
216  // We shouldn't get here
217  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
218  "inconsistent OS call behavior: errno == ENOSYS for mask "
219  "size %d\n",
220  size));
221  if (__kmp_affinity_verbose ||
222  (__kmp_affinity_warnings &&
223  (__kmp_affinity_type != affinity_none) &&
224  (__kmp_affinity_type != affinity_default) &&
225  (__kmp_affinity_type != affinity_disabled))) {
226  int error = errno;
227  kmp_msg_t err_code = KMP_ERR(error);
228  __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
229  err_code, __kmp_msg_null);
230  if (__kmp_generate_warnings == kmp_warnings_off) {
231  __kmp_str_free(&err_code.str);
232  }
233  }
234  KMP_AFFINITY_DISABLE();
235  KMP_INTERNAL_FREE(buf);
236  return;
237  }
238  continue;
239  }
240 
241  sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
242  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
243  "setaffinity for mask size %ld returned %ld errno = %d\n",
244  gCode, sCode, errno));
245  if (sCode < 0) {
246  if (errno == ENOSYS) { // Linux* OS only
247  // We shouldn't get here
248  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
249  "inconsistent OS call behavior: errno == ENOSYS for mask "
250  "size %d\n",
251  size));
252  if (__kmp_affinity_verbose ||
253  (__kmp_affinity_warnings &&
254  (__kmp_affinity_type != affinity_none) &&
255  (__kmp_affinity_type != affinity_default) &&
256  (__kmp_affinity_type != affinity_disabled))) {
257  int error = errno;
258  kmp_msg_t err_code = KMP_ERR(error);
259  __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
260  err_code, __kmp_msg_null);
261  if (__kmp_generate_warnings == kmp_warnings_off) {
262  __kmp_str_free(&err_code.str);
263  }
264  }
265  KMP_AFFINITY_DISABLE();
266  KMP_INTERNAL_FREE(buf);
267  return;
268  }
269  if (errno == EFAULT) {
270  KMP_AFFINITY_ENABLE(gCode);
271  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
272  "affinity supported (mask size %d)\n",
273  (int)__kmp_affin_mask_size));
274  KMP_INTERNAL_FREE(buf);
275  return;
276  }
277  }
278  }
279 #elif KMP_OS_FREEBSD
280  long gCode;
281  unsigned char *buf;
282  buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
283  gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT,
284  reinterpret_cast<cpuset_t *>(buf));
285  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
286  "initial getaffinity call returned %d errno = %d\n",
287  gCode, errno));
288  if (gCode == 0) {
289  KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
290  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
291  "affinity supported (mask size %d)\n",
292  (int)__kmp_affin_mask_size));
293  KMP_INTERNAL_FREE(buf);
294  return;
295  }
296 #endif
297  // save uncaught error code
298  // int error = errno;
299  KMP_INTERNAL_FREE(buf);
300  // restore uncaught error code, will be printed at the next KMP_WARNING below
301  // errno = error;
302 
303  // Affinity is not supported
304  KMP_AFFINITY_DISABLE();
305  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
306  "cannot determine mask size - affinity not supported\n"));
307  if (__kmp_affinity_verbose ||
308  (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
309  (__kmp_affinity_type != affinity_default) &&
310  (__kmp_affinity_type != affinity_disabled))) {
311  KMP_WARNING(AffCantGetMaskSize, env_var);
312  }
313 }
314 
315 #endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
316 
317 #if KMP_USE_FUTEX
318 
319 int __kmp_futex_determine_capable() {
320  int loc = 0;
321  long rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0);
322  int retval = (rc == 0) || (errno != ENOSYS);
323 
324  KA_TRACE(10,
325  ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno));
326  KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n",
327  retval ? "" : " not"));
328 
329  return retval;
330 }
331 
332 #endif // KMP_USE_FUTEX
333 
334 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS)
335 /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
336  use compare_and_store for these routines */
337 
338 kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
339  kmp_int8 old_value, new_value;
340 
341  old_value = TCR_1(*p);
342  new_value = old_value | d;
343 
344  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
345  KMP_CPU_PAUSE();
346  old_value = TCR_1(*p);
347  new_value = old_value | d;
348  }
349  return old_value;
350 }
351 
352 kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
353  kmp_int8 old_value, new_value;
354 
355  old_value = TCR_1(*p);
356  new_value = old_value & d;
357 
358  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
359  KMP_CPU_PAUSE();
360  old_value = TCR_1(*p);
361  new_value = old_value & d;
362  }
363  return old_value;
364 }
365 
366 kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
367  kmp_uint32 old_value, new_value;
368 
369  old_value = TCR_4(*p);
370  new_value = old_value | d;
371 
372  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
373  KMP_CPU_PAUSE();
374  old_value = TCR_4(*p);
375  new_value = old_value | d;
376  }
377  return old_value;
378 }
379 
380 kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
381  kmp_uint32 old_value, new_value;
382 
383  old_value = TCR_4(*p);
384  new_value = old_value & d;
385 
386  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
387  KMP_CPU_PAUSE();
388  old_value = TCR_4(*p);
389  new_value = old_value & d;
390  }
391  return old_value;
392 }
393 
394 #if KMP_ARCH_X86
395 kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
396  kmp_int8 old_value, new_value;
397 
398  old_value = TCR_1(*p);
399  new_value = old_value + d;
400 
401  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
402  KMP_CPU_PAUSE();
403  old_value = TCR_1(*p);
404  new_value = old_value + d;
405  }
406  return old_value;
407 }
408 
409 kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
410  kmp_int64 old_value, new_value;
411 
412  old_value = TCR_8(*p);
413  new_value = old_value + d;
414 
415  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
416  KMP_CPU_PAUSE();
417  old_value = TCR_8(*p);
418  new_value = old_value + d;
419  }
420  return old_value;
421 }
422 #endif /* KMP_ARCH_X86 */
423 
424 kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
425  kmp_uint64 old_value, new_value;
426 
427  old_value = TCR_8(*p);
428  new_value = old_value | d;
429  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
430  KMP_CPU_PAUSE();
431  old_value = TCR_8(*p);
432  new_value = old_value | d;
433  }
434  return old_value;
435 }
436 
437 kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
438  kmp_uint64 old_value, new_value;
439 
440  old_value = TCR_8(*p);
441  new_value = old_value & d;
442  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
443  KMP_CPU_PAUSE();
444  old_value = TCR_8(*p);
445  new_value = old_value & d;
446  }
447  return old_value;
448 }
449 
450 #endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
451 
452 void __kmp_terminate_thread(int gtid) {
453  int status;
454  kmp_info_t *th = __kmp_threads[gtid];
455 
456  if (!th)
457  return;
458 
459 #ifdef KMP_CANCEL_THREADS
460  KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
461  status = pthread_cancel(th->th.th_info.ds.ds_thread);
462  if (status != 0 && status != ESRCH) {
463  __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status),
464  __kmp_msg_null);
465  }
466 #endif
467  KMP_YIELD(TRUE);
468 } //
469 
470 /* Set thread stack info according to values returned by pthread_getattr_np().
471  If values are unreasonable, assume call failed and use incremental stack
472  refinement method instead. Returns TRUE if the stack parameters could be
473  determined exactly, FALSE if incremental refinement is necessary. */
474 static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
475  int stack_data;
476 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
477  KMP_OS_HURD
478  pthread_attr_t attr;
479  int status;
480  size_t size = 0;
481  void *addr = 0;
482 
483  /* Always do incremental stack refinement for ubermaster threads since the
484  initial thread stack range can be reduced by sibling thread creation so
485  pthread_attr_getstack may cause thread gtid aliasing */
486  if (!KMP_UBER_GTID(gtid)) {
487 
488  /* Fetch the real thread attributes */
489  status = pthread_attr_init(&attr);
490  KMP_CHECK_SYSFAIL("pthread_attr_init", status);
491 #if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
492  status = pthread_attr_get_np(pthread_self(), &attr);
493  KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
494 #else
495  status = pthread_getattr_np(pthread_self(), &attr);
496  KMP_CHECK_SYSFAIL("pthread_getattr_np", status);
497 #endif
498  status = pthread_attr_getstack(&attr, &addr, &size);
499  KMP_CHECK_SYSFAIL("pthread_attr_getstack", status);
500  KA_TRACE(60,
501  ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:"
502  " %lu, low addr: %p\n",
503  gtid, size, addr));
504  status = pthread_attr_destroy(&attr);
505  KMP_CHECK_SYSFAIL("pthread_attr_destroy", status);
506  }
507 
508  if (size != 0 && addr != 0) { // was stack parameter determination successful?
509  /* Store the correct base and size */
510  TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
511  TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
512  TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
513  return TRUE;
514  }
515 #endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD \
516  || KMP_OS_HURD */
517  /* Use incremental refinement starting from initial conservative estimate */
518  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
519  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
520  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
521  return FALSE;
522 }
523 
524 static void *__kmp_launch_worker(void *thr) {
525  int status, old_type, old_state;
526 #ifdef KMP_BLOCK_SIGNALS
527  sigset_t new_set, old_set;
528 #endif /* KMP_BLOCK_SIGNALS */
529  void *exit_val;
530 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
531  KMP_OS_OPENBSD || KMP_OS_HURD
532  void *volatile padding = 0;
533 #endif
534  int gtid;
535 
536  gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid;
537  __kmp_gtid_set_specific(gtid);
538 #ifdef KMP_TDATA_GTID
539  __kmp_gtid = gtid;
540 #endif
541 #if KMP_STATS_ENABLED
542  // set thread local index to point to thread-specific stats
543  __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
544  __kmp_stats_thread_ptr->startLife();
545  KMP_SET_THREAD_STATE(IDLE);
546  KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
547 #endif
548 
549 #if USE_ITT_BUILD
550  __kmp_itt_thread_name(gtid);
551 #endif /* USE_ITT_BUILD */
552 
553 #if KMP_AFFINITY_SUPPORTED
554  __kmp_affinity_set_init_mask(gtid, FALSE);
555 #endif
556 
557 #ifdef KMP_CANCEL_THREADS
558  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
559  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
560  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
561  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
562  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
563 #endif
564 
565 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
566  // Set FP control regs to be a copy of the parallel initialization thread's.
567  __kmp_clear_x87_fpu_status_word();
568  __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
569  __kmp_load_mxcsr(&__kmp_init_mxcsr);
570 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
571 
572 #ifdef KMP_BLOCK_SIGNALS
573  status = sigfillset(&new_set);
574  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
575  status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set);
576  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
577 #endif /* KMP_BLOCK_SIGNALS */
578 
579 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
580  KMP_OS_OPENBSD
581  if (__kmp_stkoffset > 0 && gtid > 0) {
582  padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
583  }
584 #endif
585 
586  KMP_MB();
587  __kmp_set_stack_info(gtid, (kmp_info_t *)thr);
588 
589  __kmp_check_stack_overlap((kmp_info_t *)thr);
590 
591  exit_val = __kmp_launch_thread((kmp_info_t *)thr);
592 
593 #ifdef KMP_BLOCK_SIGNALS
594  status = pthread_sigmask(SIG_SETMASK, &old_set, NULL);
595  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
596 #endif /* KMP_BLOCK_SIGNALS */
597 
598  return exit_val;
599 }
600 
601 #if KMP_USE_MONITOR
602 /* The monitor thread controls all of the threads in the complex */
603 
604 static void *__kmp_launch_monitor(void *thr) {
605  int status, old_type, old_state;
606 #ifdef KMP_BLOCK_SIGNALS
607  sigset_t new_set;
608 #endif /* KMP_BLOCK_SIGNALS */
609  struct timespec interval;
610 
611  KMP_MB(); /* Flush all pending memory write invalidates. */
612 
613  KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));
614 
615  /* register us as the monitor thread */
616  __kmp_gtid_set_specific(KMP_GTID_MONITOR);
617 #ifdef KMP_TDATA_GTID
618  __kmp_gtid = KMP_GTID_MONITOR;
619 #endif
620 
621  KMP_MB();
622 
623 #if USE_ITT_BUILD
624  // Instruct Intel(R) Threading Tools to ignore monitor thread.
625  __kmp_itt_thread_ignore();
626 #endif /* USE_ITT_BUILD */
627 
628  __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
629  (kmp_info_t *)thr);
630 
631  __kmp_check_stack_overlap((kmp_info_t *)thr);
632 
633 #ifdef KMP_CANCEL_THREADS
634  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
635  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
636  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
637  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
638  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
639 #endif
640 
641 #if KMP_REAL_TIME_FIX
642  // This is a potential fix which allows application with real-time scheduling
643  // policy work. However, decision about the fix is not made yet, so it is
644  // disabled by default.
645  { // Are program started with real-time scheduling policy?
646  int sched = sched_getscheduler(0);
647  if (sched == SCHED_FIFO || sched == SCHED_RR) {
648  // Yes, we are a part of real-time application. Try to increase the
649  // priority of the monitor.
650  struct sched_param param;
651  int max_priority = sched_get_priority_max(sched);
652  int rc;
653  KMP_WARNING(RealTimeSchedNotSupported);
654  sched_getparam(0, &param);
655  if (param.sched_priority < max_priority) {
656  param.sched_priority += 1;
657  rc = sched_setscheduler(0, sched, &param);
658  if (rc != 0) {
659  int error = errno;
660  kmp_msg_t err_code = KMP_ERR(error);
661  __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
662  err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
663  if (__kmp_generate_warnings == kmp_warnings_off) {
664  __kmp_str_free(&err_code.str);
665  }
666  }
667  } else {
668  // We cannot abort here, because number of CPUs may be enough for all
669  // the threads, including the monitor thread, so application could
670  // potentially work...
671  __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
672  KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
673  __kmp_msg_null);
674  }
675  }
676  // AC: free thread that waits for monitor started
677  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
678  }
679 #endif // KMP_REAL_TIME_FIX
680 
681  KMP_MB(); /* Flush all pending memory write invalidates. */
682 
683  if (__kmp_monitor_wakeups == 1) {
684  interval.tv_sec = 1;
685  interval.tv_nsec = 0;
686  } else {
687  interval.tv_sec = 0;
688  interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
689  }
690 
691  KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
692 
693  while (!TCR_4(__kmp_global.g.g_done)) {
694  struct timespec now;
695  struct timeval tval;
696 
697  /* This thread monitors the state of the system */
698 
699  KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
700 
701  status = gettimeofday(&tval, NULL);
702  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
703  TIMEVAL_TO_TIMESPEC(&tval, &now);
704 
705  now.tv_sec += interval.tv_sec;
706  now.tv_nsec += interval.tv_nsec;
707 
708  if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
709  now.tv_sec += 1;
710  now.tv_nsec -= KMP_NSEC_PER_SEC;
711  }
712 
713  status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
714  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
715  // AC: the monitor should not fall asleep if g_done has been set
716  if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
717  status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
718  &__kmp_wait_mx.m_mutex, &now);
719  if (status != 0) {
720  if (status != ETIMEDOUT && status != EINTR) {
721  KMP_SYSFAIL("pthread_cond_timedwait", status);
722  }
723  }
724  }
725  status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
726  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
727 
728  TCW_4(__kmp_global.g.g_time.dt.t_value,
729  TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
730 
731  KMP_MB(); /* Flush all pending memory write invalidates. */
732  }
733 
734  KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));
735 
736 #ifdef KMP_BLOCK_SIGNALS
737  status = sigfillset(&new_set);
738  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
739  status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
740  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
741 #endif /* KMP_BLOCK_SIGNALS */
742 
743  KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));
744 
745  if (__kmp_global.g.g_abort != 0) {
746  /* now we need to terminate the worker threads */
747  /* the value of t_abort is the signal we caught */
748 
749  int gtid;
750 
751  KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
752  __kmp_global.g.g_abort));
753 
754  /* terminate the OpenMP worker threads */
755  /* TODO this is not valid for sibling threads!!
756  * the uber master might not be 0 anymore.. */
757  for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
758  __kmp_terminate_thread(gtid);
759 
760  __kmp_cleanup();
761 
762  KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
763  __kmp_global.g.g_abort));
764 
765  if (__kmp_global.g.g_abort > 0)
766  raise(__kmp_global.g.g_abort);
767  }
768 
769  KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));
770 
771  return thr;
772 }
773 #endif // KMP_USE_MONITOR
774 
775 void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
776  pthread_t handle;
777  pthread_attr_t thread_attr;
778  int status;
779 
780  th->th.th_info.ds.ds_gtid = gtid;
781 
782 #if KMP_STATS_ENABLED
783  // sets up worker thread stats
784  __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
785 
786  // th->th.th_stats is used to transfer thread-specific stats-pointer to
787  // __kmp_launch_worker. So when thread is created (goes into
788  // __kmp_launch_worker) it will set its thread local pointer to
789  // th->th.th_stats
790  if (!KMP_UBER_GTID(gtid)) {
791  th->th.th_stats = __kmp_stats_list->push_back(gtid);
792  } else {
793  // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(),
794  // so set the th->th.th_stats field to it.
795  th->th.th_stats = __kmp_stats_thread_ptr;
796  }
797  __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
798 
799 #endif // KMP_STATS_ENABLED
800 
801  if (KMP_UBER_GTID(gtid)) {
802  KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid));
803  th->th.th_info.ds.ds_thread = pthread_self();
804  __kmp_set_stack_info(gtid, th);
805  __kmp_check_stack_overlap(th);
806  return;
807  }
808 
809  KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
810 
811  KMP_MB(); /* Flush all pending memory write invalidates. */
812 
813 #ifdef KMP_THREAD_ATTR
814  status = pthread_attr_init(&thread_attr);
815  if (status != 0) {
816  __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
817  }
818  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
819  if (status != 0) {
820  __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null);
821  }
822 
823  /* Set stack size for this thread now.
824  The multiple of 2 is there because on some machines, requesting an unusual
825  stacksize causes the thread to have an offset before the dummy alloca()
826  takes place to create the offset. Since we want the user to have a
827  sufficient stacksize AND support a stack offset, we alloca() twice the
828  offset so that the upcoming alloca() does not eliminate any premade offset,
829  and also gives the user the stack space they requested for all threads */
830  stack_size += gtid * __kmp_stkoffset * 2;
831 
832 #if defined(__ANDROID__) && __ANDROID_API__ < 19
833  // Round the stack size to a multiple of the page size. Older versions of
834  // Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL
835  // if the stack size was not a multiple of the page size.
836  stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
837 #endif
838 
839  KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
840  "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
841  gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
842 
843 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
844  status = pthread_attr_setstacksize(&thread_attr, stack_size);
845 #ifdef KMP_BACKUP_STKSIZE
846  if (status != 0) {
847  if (!__kmp_env_stksize) {
848  stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
849  __kmp_stksize = KMP_BACKUP_STKSIZE;
850  KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
851  "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
852  "bytes\n",
853  gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
854  status = pthread_attr_setstacksize(&thread_attr, stack_size);
855  }
856  }
857 #endif /* KMP_BACKUP_STKSIZE */
858  if (status != 0) {
859  __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
860  KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null);
861  }
862 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
863 
864 #endif /* KMP_THREAD_ATTR */
865 
866  status =
867  pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th);
868  if (status != 0 || !handle) { // ??? Why do we check handle??
869 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
870  if (status == EINVAL) {
871  __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
872  KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null);
873  }
874  if (status == ENOMEM) {
875  __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
876  KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null);
877  }
878 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
879  if (status == EAGAIN) {
880  __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status),
881  KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null);
882  }
883  KMP_SYSFAIL("pthread_create", status);
884  }
885 
886  th->th.th_info.ds.ds_thread = handle;
887 
888 #ifdef KMP_THREAD_ATTR
889  status = pthread_attr_destroy(&thread_attr);
890  if (status) {
891  kmp_msg_t err_code = KMP_ERR(status);
892  __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
893  __kmp_msg_null);
894  if (__kmp_generate_warnings == kmp_warnings_off) {
895  __kmp_str_free(&err_code.str);
896  }
897  }
898 #endif /* KMP_THREAD_ATTR */
899 
900  KMP_MB(); /* Flush all pending memory write invalidates. */
901 
902  KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
903 
904 } // __kmp_create_worker
905 
906 #if KMP_USE_MONITOR
907 void __kmp_create_monitor(kmp_info_t *th) {
908  pthread_t handle;
909  pthread_attr_t thread_attr;
910  size_t size;
911  int status;
912  int auto_adj_size = FALSE;
913 
914  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
915  // We don't need monitor thread in case of MAX_BLOCKTIME
916  KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
917  "MAX blocktime\n"));
918  th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
919  th->th.th_info.ds.ds_gtid = 0;
920  return;
921  }
922  KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
923 
924  KMP_MB(); /* Flush all pending memory write invalidates. */
925 
926  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
927  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
928 #if KMP_REAL_TIME_FIX
929  TCW_4(__kmp_global.g.g_time.dt.t_value,
930  -1); // Will use it for synchronization a bit later.
931 #else
932  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
933 #endif // KMP_REAL_TIME_FIX
934 
935 #ifdef KMP_THREAD_ATTR
936  if (__kmp_monitor_stksize == 0) {
937  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
938  auto_adj_size = TRUE;
939  }
940  status = pthread_attr_init(&thread_attr);
941  if (status != 0) {
942  __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
943  }
944  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
945  if (status != 0) {
946  __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
947  }
948 
949 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
950  status = pthread_attr_getstacksize(&thread_attr, &size);
951  KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
952 #else
953  size = __kmp_sys_min_stksize;
954 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
955 #endif /* KMP_THREAD_ATTR */
956 
957  if (__kmp_monitor_stksize == 0) {
958  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
959  }
960  if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
961  __kmp_monitor_stksize = __kmp_sys_min_stksize;
962  }
963 
964  KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
965  "requested stacksize = %lu bytes\n",
966  size, __kmp_monitor_stksize));
967 
968 retry:
969 
970 /* Set stack size for this thread now. */
971 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
972  KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
973  __kmp_monitor_stksize));
974  status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
975  if (status != 0) {
976  if (auto_adj_size) {
977  __kmp_monitor_stksize *= 2;
978  goto retry;
979  }
980  kmp_msg_t err_code = KMP_ERR(status);
981  __kmp_msg(kmp_ms_warning, // should this be fatal? BB
982  KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
983  err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
984  if (__kmp_generate_warnings == kmp_warnings_off) {
985  __kmp_str_free(&err_code.str);
986  }
987  }
988 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
989 
990  status =
991  pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);
992 
993  if (status != 0) {
994 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
995  if (status == EINVAL) {
996  if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
997  __kmp_monitor_stksize *= 2;
998  goto retry;
999  }
1000  __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1001  KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
1002  __kmp_msg_null);
1003  }
1004  if (status == ENOMEM) {
1005  __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1006  KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
1007  __kmp_msg_null);
1008  }
1009 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
1010  if (status == EAGAIN) {
1011  __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
1012  KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
1013  }
1014  KMP_SYSFAIL("pthread_create", status);
1015  }
1016 
1017  th->th.th_info.ds.ds_thread = handle;
1018 
1019 #if KMP_REAL_TIME_FIX
1020  // Wait for the monitor thread is really started and set its *priority*.
1021  KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
1022  sizeof(__kmp_global.g.g_time.dt.t_value));
1023  __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
1024  &__kmp_neq_4, NULL);
1025 #endif // KMP_REAL_TIME_FIX
1026 
1027 #ifdef KMP_THREAD_ATTR
1028  status = pthread_attr_destroy(&thread_attr);
1029  if (status != 0) {
1030  kmp_msg_t err_code = KMP_ERR(status);
1031  __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
1032  __kmp_msg_null);
1033  if (__kmp_generate_warnings == kmp_warnings_off) {
1034  __kmp_str_free(&err_code.str);
1035  }
1036  }
1037 #endif
1038 
1039  KMP_MB(); /* Flush all pending memory write invalidates. */
1040 
1041  KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
1042  th->th.th_info.ds.ds_thread));
1043 
1044 } // __kmp_create_monitor
1045 #endif // KMP_USE_MONITOR
1046 
1047 void __kmp_exit_thread(int exit_status) {
1048  pthread_exit((void *)(intptr_t)exit_status);
1049 } // __kmp_exit_thread
1050 
1051 #if KMP_USE_MONITOR
1052 void __kmp_resume_monitor();
1053 
1054 void __kmp_reap_monitor(kmp_info_t *th) {
1055  int status;
1056  void *exit_val;
1057 
1058  KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
1059  " %#.8lx\n",
1060  th->th.th_info.ds.ds_thread));
1061 
1062  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1063  // If both tid and gtid are 0, it means the monitor did not ever start.
1064  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1065  KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1066  if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1067  KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1068  return;
1069  }
1070 
1071  KMP_MB(); /* Flush all pending memory write invalidates. */
1072 
1073  /* First, check to see whether the monitor thread exists to wake it up. This
1074  is to avoid performance problem when the monitor sleeps during
1075  blocktime-size interval */
1076 
1077  status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
1078  if (status != ESRCH) {
1079  __kmp_resume_monitor(); // Wake up the monitor thread
1080  }
1081  KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
1082  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1083  if (exit_val != th) {
1084  __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
1085  }
1086 
1087  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1088  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1089 
1090  KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
1091  " %#.8lx\n",
1092  th->th.th_info.ds.ds_thread));
1093 
1094  KMP_MB(); /* Flush all pending memory write invalidates. */
1095 }
1096 #endif // KMP_USE_MONITOR
1097 
1098 void __kmp_reap_worker(kmp_info_t *th) {
1099  int status;
1100  void *exit_val;
1101 
1102  KMP_MB(); /* Flush all pending memory write invalidates. */
1103 
1104  KA_TRACE(
1105  10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid));
1106 
1107  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1108 #ifdef KMP_DEBUG
1109  /* Don't expose these to the user until we understand when they trigger */
1110  if (status != 0) {
1111  __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null);
1112  }
1113  if (exit_val != th) {
1114  KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, "
1115  "exit_val = %p\n",
1116  th->th.th_info.ds.ds_gtid, exit_val));
1117  }
1118 #endif /* KMP_DEBUG */
1119 
1120  KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
1121  th->th.th_info.ds.ds_gtid));
1122 
1123  KMP_MB(); /* Flush all pending memory write invalidates. */
1124 }
1125 
1126 #if KMP_HANDLE_SIGNALS
1127 
1128 static void __kmp_null_handler(int signo) {
1129  // Do nothing, for doing SIG_IGN-type actions.
1130 } // __kmp_null_handler
1131 
1132 static void __kmp_team_handler(int signo) {
1133  if (__kmp_global.g.g_abort == 0) {
1134 /* Stage 1 signal handler, let's shut down all of the threads */
1135 #ifdef KMP_DEBUG
1136  __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo);
1137 #endif
1138  switch (signo) {
1139  case SIGHUP:
1140  case SIGINT:
1141  case SIGQUIT:
1142  case SIGILL:
1143  case SIGABRT:
1144  case SIGFPE:
1145  case SIGBUS:
1146  case SIGSEGV:
1147 #ifdef SIGSYS
1148  case SIGSYS:
1149 #endif
1150  case SIGTERM:
1151  if (__kmp_debug_buf) {
1152  __kmp_dump_debug_buffer();
1153  }
1154  __kmp_unregister_library(); // cleanup shared memory
1155  KMP_MB(); // Flush all pending memory write invalidates.
1156  TCW_4(__kmp_global.g.g_abort, signo);
1157  KMP_MB(); // Flush all pending memory write invalidates.
1158  TCW_4(__kmp_global.g.g_done, TRUE);
1159  KMP_MB(); // Flush all pending memory write invalidates.
1160  break;
1161  default:
1162 #ifdef KMP_DEBUG
1163  __kmp_debug_printf("__kmp_team_handler: unknown signal type");
1164 #endif
1165  break;
1166  }
1167  }
1168 } // __kmp_team_handler
1169 
1170 static void __kmp_sigaction(int signum, const struct sigaction *act,
1171  struct sigaction *oldact) {
1172  int rc = sigaction(signum, act, oldact);
1173  KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc);
1174 }
1175 
1176 static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
1177  int parallel_init) {
1178  KMP_MB(); // Flush all pending memory write invalidates.
1179  KB_TRACE(60,
1180  ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init));
1181  if (parallel_init) {
1182  struct sigaction new_action;
1183  struct sigaction old_action;
1184  new_action.sa_handler = handler_func;
1185  new_action.sa_flags = 0;
1186  sigfillset(&new_action.sa_mask);
1187  __kmp_sigaction(sig, &new_action, &old_action);
1188  if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) {
1189  sigaddset(&__kmp_sigset, sig);
1190  } else {
1191  // Restore/keep user's handler if one previously installed.
1192  __kmp_sigaction(sig, &old_action, NULL);
1193  }
1194  } else {
1195  // Save initial/system signal handlers to see if user handlers installed.
1196  __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]);
1197  }
1198  KMP_MB(); // Flush all pending memory write invalidates.
1199 } // __kmp_install_one_handler
1200 
1201 static void __kmp_remove_one_handler(int sig) {
1202  KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig));
1203  if (sigismember(&__kmp_sigset, sig)) {
1204  struct sigaction old;
1205  KMP_MB(); // Flush all pending memory write invalidates.
1206  __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old);
1207  if ((old.sa_handler != __kmp_team_handler) &&
1208  (old.sa_handler != __kmp_null_handler)) {
1209  // Restore the users signal handler.
1210  KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1211  "restoring: sig=%d\n",
1212  sig));
1213  __kmp_sigaction(sig, &old, NULL);
1214  }
1215  sigdelset(&__kmp_sigset, sig);
1216  KMP_MB(); // Flush all pending memory write invalidates.
1217  }
1218 } // __kmp_remove_one_handler
1219 
1220 void __kmp_install_signals(int parallel_init) {
1221  KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init));
1222  if (__kmp_handle_signals || !parallel_init) {
1223  // If ! parallel_init, we do not install handlers, just save original
1224  // handlers. Let us do it even __handle_signals is 0.
1225  sigemptyset(&__kmp_sigset);
1226  __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init);
1227  __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1228  __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init);
1229  __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1230  __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1231  __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1232  __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init);
1233  __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1234 #ifdef SIGSYS
1235  __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init);
1236 #endif // SIGSYS
1237  __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1238 #ifdef SIGPIPE
1239  __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init);
1240 #endif // SIGPIPE
1241  }
1242 } // __kmp_install_signals
1243 
1244 void __kmp_remove_signals(void) {
1245  int sig;
1246  KB_TRACE(10, ("__kmp_remove_signals()\n"));
1247  for (sig = 1; sig < NSIG; ++sig) {
1248  __kmp_remove_one_handler(sig);
1249  }
1250 } // __kmp_remove_signals
1251 
1252 #endif // KMP_HANDLE_SIGNALS
1253 
1254 void __kmp_enable(int new_state) {
1255 #ifdef KMP_CANCEL_THREADS
1256  int status, old_state;
1257  status = pthread_setcancelstate(new_state, &old_state);
1258  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1259  KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE);
1260 #endif
1261 }
1262 
1263 void __kmp_disable(int *old_state) {
1264 #ifdef KMP_CANCEL_THREADS
1265  int status;
1266  status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state);
1267  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1268 #endif
1269 }
1270 
1271 static void __kmp_atfork_prepare(void) {
1272  __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
1273  __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1274 }
1275 
1276 static void __kmp_atfork_parent(void) {
1277  __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1278  __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1279 }
1280 
1281 /* Reset the library so execution in the child starts "all over again" with
1282  clean data structures in initial states. Don't worry about freeing memory
1283  allocated by parent, just abandon it to be safe. */
1284 static void __kmp_atfork_child(void) {
1285  __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1286  __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1287  /* TODO make sure this is done right for nested/sibling */
1288  // ATT: Memory leaks are here? TODO: Check it and fix.
1289  /* KMP_ASSERT( 0 ); */
1290 
1291  ++__kmp_fork_count;
1292 
1293 #if KMP_AFFINITY_SUPPORTED
1294 #if KMP_OS_LINUX || KMP_OS_FREEBSD
1295  // reset the affinity in the child to the initial thread
1296  // affinity in the parent
1297  kmp_set_thread_affinity_mask_initial();
1298 #endif
1299  // Set default not to bind threads tightly in the child (we’re expecting
1300  // over-subscription after the fork and this can improve things for
1301  // scripting languages that use OpenMP inside process-parallel code).
1302  __kmp_affinity_type = affinity_none;
1303  if (__kmp_nested_proc_bind.bind_types != NULL) {
1304  __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
1305  }
1306  __kmp_affinity_masks = NULL;
1307  __kmp_affinity_num_masks = 0;
1308 #endif // KMP_AFFINITY_SUPPORTED
1309 
1310 #if KMP_USE_MONITOR
1311  __kmp_init_monitor = 0;
1312 #endif
1313  __kmp_init_parallel = FALSE;
1314  __kmp_init_middle = FALSE;
1315  __kmp_init_serial = FALSE;
1316  TCW_4(__kmp_init_gtid, FALSE);
1317  __kmp_init_common = FALSE;
1318 
1319  TCW_4(__kmp_init_user_locks, FALSE);
1320 #if !KMP_USE_DYNAMIC_LOCK
1321  __kmp_user_lock_table.used = 1;
1322  __kmp_user_lock_table.allocated = 0;
1323  __kmp_user_lock_table.table = NULL;
1324  __kmp_lock_blocks = NULL;
1325 #endif
1326 
1327  __kmp_all_nth = 0;
1328  TCW_4(__kmp_nth, 0);
1329 
1330  __kmp_thread_pool = NULL;
1331  __kmp_thread_pool_insert_pt = NULL;
1332  __kmp_team_pool = NULL;
1333 
1334  /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
1335  here so threadprivate doesn't use stale data */
1336  KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
1337  __kmp_threadpriv_cache_list));
1338 
1339  while (__kmp_threadpriv_cache_list != NULL) {
1340 
1341  if (*__kmp_threadpriv_cache_list->addr != NULL) {
1342  KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
1343  &(*__kmp_threadpriv_cache_list->addr)));
1344 
1345  *__kmp_threadpriv_cache_list->addr = NULL;
1346  }
1347  __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
1348  }
1349 
1350  __kmp_init_runtime = FALSE;
1351 
1352  /* reset statically initialized locks */
1353  __kmp_init_bootstrap_lock(&__kmp_initz_lock);
1354  __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
1355  __kmp_init_bootstrap_lock(&__kmp_console_lock);
1356  __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
1357 
1358 #if USE_ITT_BUILD
1359  __kmp_itt_reset(); // reset ITT's global state
1360 #endif /* USE_ITT_BUILD */
1361 
1362  __kmp_serial_initialize();
1363 
1364  /* This is necessary to make sure no stale data is left around */
1365  /* AC: customers complain that we use unsafe routines in the atfork
1366  handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
1367  in dynamic_link when check the presence of shared tbbmalloc library.
1368  Suggestion is to make the library initialization lazier, similar
1369  to what done for __kmpc_begin(). */
1370  // TODO: synchronize all static initializations with regular library
1371  // startup; look at kmp_global.cpp and etc.
1372  //__kmp_internal_begin ();
1373 }
1374 
1375 void __kmp_register_atfork(void) {
1376  if (__kmp_need_register_atfork) {
1377  int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
1378  __kmp_atfork_child);
1379  KMP_CHECK_SYSFAIL("pthread_atfork", status);
1380  __kmp_need_register_atfork = FALSE;
1381  }
1382 }
1383 
1384 void __kmp_suspend_initialize(void) {
1385  int status;
1386  status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr);
1387  KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1388  status = pthread_condattr_init(&__kmp_suspend_cond_attr);
1389  KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1390 }
1391 
1392 void __kmp_suspend_initialize_thread(kmp_info_t *th) {
1393  ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count);
1394  int old_value = KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count);
1395  int new_value = __kmp_fork_count + 1;
1396  // Return if already initialized
1397  if (old_value == new_value)
1398  return;
1399  // Wait, then return if being initialized
1400  if (old_value == -1 || !__kmp_atomic_compare_store(
1401  &th->th.th_suspend_init_count, old_value, -1)) {
1402  while (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) != new_value) {
1403  KMP_CPU_PAUSE();
1404  }
1405  } else {
1406  // Claim to be the initializer and do initializations
1407  int status;
1408  status = pthread_cond_init(&th->th.th_suspend_cv.c_cond,
1409  &__kmp_suspend_cond_attr);
1410  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1411  status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex,
1412  &__kmp_suspend_mutex_attr);
1413  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1414  KMP_ATOMIC_ST_REL(&th->th.th_suspend_init_count, new_value);
1415  ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count);
1416  }
1417 }
1418 
1419 void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
1420  if (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) > __kmp_fork_count) {
1421  /* this means we have initialize the suspension pthread objects for this
1422  thread in this instance of the process */
1423  int status;
1424 
1425  status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond);
1426  if (status != 0 && status != EBUSY) {
1427  KMP_SYSFAIL("pthread_cond_destroy", status);
1428  }
1429  status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex);
1430  if (status != 0 && status != EBUSY) {
1431  KMP_SYSFAIL("pthread_mutex_destroy", status);
1432  }
1433  --th->th.th_suspend_init_count;
1434  KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count) ==
1435  __kmp_fork_count);
1436  }
1437 }
1438 
1439 // return true if lock obtained, false otherwise
1440 int __kmp_try_suspend_mx(kmp_info_t *th) {
1441  return (pthread_mutex_trylock(&th->th.th_suspend_mx.m_mutex) == 0);
1442 }
1443 
1444 void __kmp_lock_suspend_mx(kmp_info_t *th) {
1445  int status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1446  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1447 }
1448 
1449 void __kmp_unlock_suspend_mx(kmp_info_t *th) {
1450  int status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1451  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1452 }
1453 
1454 /* This routine puts the calling thread to sleep after setting the
1455  sleep bit for the indicated flag variable to true. */
1456 template <class C>
1457 static inline void __kmp_suspend_template(int th_gtid, C *flag) {
1458  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
1459  kmp_info_t *th = __kmp_threads[th_gtid];
1460  int status;
1461  typename C::flag_t old_spin;
1462 
1463  KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid,
1464  flag->get()));
1465 
1466  __kmp_suspend_initialize_thread(th);
1467 
1468  __kmp_lock_suspend_mx(th);
1469 
1470  KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
1471  th_gtid, flag->get()));
1472 
1473  /* TODO: shouldn't this use release semantics to ensure that
1474  __kmp_suspend_initialize_thread gets called first? */
1475  old_spin = flag->set_sleeping();
1476  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1477  __kmp_pause_status != kmp_soft_paused) {
1478  flag->unset_sleeping();
1479  __kmp_unlock_suspend_mx(th);
1480  return;
1481  }
1482  KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
1483  " was %x\n",
1484  th_gtid, flag->get(), flag->load(), old_spin));
1485 
1486  if (flag->done_check_val(old_spin)) {
1487  old_spin = flag->unset_sleeping();
1488  KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
1489  "for spin(%p)\n",
1490  th_gtid, flag->get()));
1491  } else {
1492  /* Encapsulate in a loop as the documentation states that this may
1493  "with low probability" return when the condition variable has
1494  not been signaled or broadcast */
1495  int deactivated = FALSE;
1496  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
1497 
1498  while (flag->is_sleeping()) {
1499 #ifdef DEBUG_SUSPEND
1500  char buffer[128];
1501  __kmp_suspend_count++;
1502  __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1503  __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
1504  buffer);
1505 #endif
1506  // Mark the thread as no longer active (only in the first iteration of the
1507  // loop).
1508  if (!deactivated) {
1509  th->th.th_active = FALSE;
1510  if (th->th.th_active_in_pool) {
1511  th->th.th_active_in_pool = FALSE;
1512  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
1513  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
1514  }
1515  deactivated = TRUE;
1516  }
1517 
1518 #if USE_SUSPEND_TIMEOUT
1519  struct timespec now;
1520  struct timeval tval;
1521  int msecs;
1522 
1523  status = gettimeofday(&tval, NULL);
1524  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1525  TIMEVAL_TO_TIMESPEC(&tval, &now);
1526 
1527  msecs = (4 * __kmp_dflt_blocktime) + 200;
1528  now.tv_sec += msecs / 1000;
1529  now.tv_nsec += (msecs % 1000) * 1000;
1530 
1531  KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
1532  "pthread_cond_timedwait\n",
1533  th_gtid));
1534  status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond,
1535  &th->th.th_suspend_mx.m_mutex, &now);
1536 #else
1537  KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform"
1538  " pthread_cond_wait\n",
1539  th_gtid));
1540  status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
1541  &th->th.th_suspend_mx.m_mutex);
1542 #endif // USE_SUSPEND_TIMEOUT
1543 
1544  if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
1545  KMP_SYSFAIL("pthread_cond_wait", status);
1546  }
1547 #ifdef KMP_DEBUG
1548  if (status == ETIMEDOUT) {
1549  if (flag->is_sleeping()) {
1550  KF_TRACE(100,
1551  ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
1552  } else {
1553  KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
1554  "not set!\n",
1555  th_gtid));
1556  }
1557  } else if (flag->is_sleeping()) {
1558  KF_TRACE(100,
1559  ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
1560  }
1561 #endif
1562  } // while
1563 
1564  // Mark the thread as active again (if it was previous marked as inactive)
1565  if (deactivated) {
1566  th->th.th_active = TRUE;
1567  if (TCR_4(th->th.th_in_pool)) {
1568  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
1569  th->th.th_active_in_pool = TRUE;
1570  }
1571  }
1572  }
1573 #ifdef DEBUG_SUSPEND
1574  {
1575  char buffer[128];
1576  __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1577  __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
1578  buffer);
1579  }
1580 #endif
1581 
1582  __kmp_unlock_suspend_mx(th);
1583  KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
1584 }
1585 
1586 template <bool C, bool S>
1587 void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
1588  __kmp_suspend_template(th_gtid, flag);
1589 }
1590 template <bool C, bool S>
1591 void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
1592  __kmp_suspend_template(th_gtid, flag);
1593 }
1594 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
1595  __kmp_suspend_template(th_gtid, flag);
1596 }
1597 
1598 template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
1599 template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
1600 template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
1601 
1602 /* This routine signals the thread specified by target_gtid to wake up
1603  after setting the sleep bit indicated by the flag argument to FALSE.
1604  The target thread must already have called __kmp_suspend_template() */
1605 template <class C>
1606 static inline void __kmp_resume_template(int target_gtid, C *flag) {
1607  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1608  kmp_info_t *th = __kmp_threads[target_gtid];
1609  int status;
1610 
1611 #ifdef KMP_DEBUG
1612  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1613 #endif
1614 
1615  KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
1616  gtid, target_gtid));
1617  KMP_DEBUG_ASSERT(gtid != target_gtid);
1618 
1619  __kmp_suspend_initialize_thread(th);
1620 
1621  __kmp_lock_suspend_mx(th);
1622 
1623  if (!flag) { // coming from __kmp_null_resume_wrapper
1624  flag = (C *)CCAST(void *, th->th.th_sleep_loc);
1625  }
1626 
1627  // First, check if the flag is null or its type has changed. If so, someone
1628  // else woke it up.
1629  if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
1630  // simply shows what flag was cast to
1631  KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1632  "awake: flag(%p)\n",
1633  gtid, target_gtid, NULL));
1634  __kmp_unlock_suspend_mx(th);
1635  return;
1636  } else { // if multiple threads are sleeping, flag should be internally
1637  // referring to a specific thread here
1638  typename C::flag_t old_spin = flag->unset_sleeping();
1639  if (!flag->is_sleeping_val(old_spin)) {
1640  KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1641  "awake: flag(%p): "
1642  "%u => %u\n",
1643  gtid, target_gtid, flag->get(), old_spin, flag->load()));
1644  __kmp_unlock_suspend_mx(th);
1645  return;
1646  }
1647  KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
1648  "sleep bit for flag's loc(%p): "
1649  "%u => %u\n",
1650  gtid, target_gtid, flag->get(), old_spin, flag->load()));
1651  }
1652  TCW_PTR(th->th.th_sleep_loc, NULL);
1653 
1654 #ifdef DEBUG_SUSPEND
1655  {
1656  char buffer[128];
1657  __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1658  __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
1659  target_gtid, buffer);
1660  }
1661 #endif
1662  status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
1663  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1664  __kmp_unlock_suspend_mx(th);
1665  KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
1666  " for T#%d\n",
1667  gtid, target_gtid));
1668 }
1669 
1670 template <bool C, bool S>
1671 void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
1672  __kmp_resume_template(target_gtid, flag);
1673 }
1674 template <bool C, bool S>
1675 void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
1676  __kmp_resume_template(target_gtid, flag);
1677 }
1678 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
1679  __kmp_resume_template(target_gtid, flag);
1680 }
1681 
1682 template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
1683 template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
1684 
1685 #if KMP_USE_MONITOR
1686 void __kmp_resume_monitor() {
1687  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1688  int status;
1689 #ifdef KMP_DEBUG
1690  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1691  KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
1692  KMP_GTID_MONITOR));
1693  KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
1694 #endif
1695  status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
1696  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1697 #ifdef DEBUG_SUSPEND
1698  {
1699  char buffer[128];
1700  __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
1701  __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
1702  KMP_GTID_MONITOR, buffer);
1703  }
1704 #endif
1705  status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
1706  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1707  status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
1708  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1709  KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
1710  " for T#%d\n",
1711  gtid, KMP_GTID_MONITOR));
1712 }
1713 #endif // KMP_USE_MONITOR
1714 
1715 void __kmp_yield() { sched_yield(); }
1716 
1717 void __kmp_gtid_set_specific(int gtid) {
1718  if (__kmp_init_gtid) {
1719  int status;
1720  status = pthread_setspecific(__kmp_gtid_threadprivate_key,
1721  (void *)(intptr_t)(gtid + 1));
1722  KMP_CHECK_SYSFAIL("pthread_setspecific", status);
1723  } else {
1724  KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
1725  }
1726 }
1727 
1728 int __kmp_gtid_get_specific() {
1729  int gtid;
1730  if (!__kmp_init_gtid) {
1731  KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
1732  "KMP_GTID_SHUTDOWN\n"));
1733  return KMP_GTID_SHUTDOWN;
1734  }
1735  gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key);
1736  if (gtid == 0) {
1737  gtid = KMP_GTID_DNE;
1738  } else {
1739  gtid--;
1740  }
1741  KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
1742  __kmp_gtid_threadprivate_key, gtid));
1743  return gtid;
1744 }
1745 
1746 double __kmp_read_cpu_time(void) {
1747  /*clock_t t;*/
1748  struct tms buffer;
1749 
1750  /*t =*/times(&buffer);
1751 
1752  return (double)(buffer.tms_utime + buffer.tms_cutime) /
1753  (double)CLOCKS_PER_SEC;
1754 }
1755 
1756 int __kmp_read_system_info(struct kmp_sys_info *info) {
1757  int status;
1758  struct rusage r_usage;
1759 
1760  memset(info, 0, sizeof(*info));
1761 
1762  status = getrusage(RUSAGE_SELF, &r_usage);
1763  KMP_CHECK_SYSFAIL_ERRNO("getrusage", status);
1764 
1765  // The maximum resident set size utilized (in kilobytes)
1766  info->maxrss = r_usage.ru_maxrss;
1767  // The number of page faults serviced without any I/O
1768  info->minflt = r_usage.ru_minflt;
1769  // The number of page faults serviced that required I/O
1770  info->majflt = r_usage.ru_majflt;
1771  // The number of times a process was "swapped" out of memory
1772  info->nswap = r_usage.ru_nswap;
1773  // The number of times the file system had to perform input
1774  info->inblock = r_usage.ru_inblock;
1775  // The number of times the file system had to perform output
1776  info->oublock = r_usage.ru_oublock;
1777  // The number of times a context switch was voluntarily
1778  info->nvcsw = r_usage.ru_nvcsw;
1779  // The number of times a context switch was forced
1780  info->nivcsw = r_usage.ru_nivcsw;
1781 
1782  return (status != 0);
1783 }
1784 
1785 void __kmp_read_system_time(double *delta) {
1786  double t_ns;
1787  struct timeval tval;
1788  struct timespec stop;
1789  int status;
1790 
1791  status = gettimeofday(&tval, NULL);
1792  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1793  TIMEVAL_TO_TIMESPEC(&tval, &stop);
1794  t_ns = (double)(TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start));
1795  *delta = (t_ns * 1e-9);
1796 }
1797 
1798 void __kmp_clear_system_time(void) {
1799  struct timeval tval;
1800  int status;
1801  status = gettimeofday(&tval, NULL);
1802  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1803  TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start);
1804 }
1805 
1806 static int __kmp_get_xproc(void) {
1807 
1808  int r = 0;
1809 
1810 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
1811  KMP_OS_OPENBSD || KMP_OS_HURD
1812 
1813  __kmp_type_convert(sysconf(_SC_NPROCESSORS_ONLN), &(r));
1814 
1815 #elif KMP_OS_DARWIN
1816 
1817  // Bug C77011 High "OpenMP Threads and number of active cores".
1818 
1819  // Find the number of available CPUs.
1820  kern_return_t rc;
1821  host_basic_info_data_t info;
1822  mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT;
1823  rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num);
1824  if (rc == 0 && num == HOST_BASIC_INFO_COUNT) {
1825  // Cannot use KA_TRACE() here because this code works before trace support
1826  // is initialized.
1827  r = info.avail_cpus;
1828  } else {
1829  KMP_WARNING(CantGetNumAvailCPU);
1830  KMP_INFORM(AssumedNumCPU);
1831  }
1832 
1833 #else
1834 
1835 #error "Unknown or unsupported OS."
1836 
1837 #endif
1838 
1839  return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
1840 
1841 } // __kmp_get_xproc
1842 
1843 int __kmp_read_from_file(char const *path, char const *format, ...) {
1844  int result;
1845  va_list args;
1846 
1847  va_start(args, format);
1848  FILE *f = fopen(path, "rb");
1849  if (f == NULL)
1850  return 0;
1851  result = vfscanf(f, format, args);
1852  fclose(f);
1853 
1854  return result;
1855 }
1856 
1857 void __kmp_runtime_initialize(void) {
1858  int status;
1859  pthread_mutexattr_t mutex_attr;
1860  pthread_condattr_t cond_attr;
1861 
1862  if (__kmp_init_runtime) {
1863  return;
1864  }
1865 
1866 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1867  if (!__kmp_cpuinfo.initialized) {
1868  __kmp_query_cpuid(&__kmp_cpuinfo);
1869  }
1870 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1871 
1872  __kmp_xproc = __kmp_get_xproc();
1873 
1874 #if !KMP_32_BIT_ARCH
1875  struct rlimit rlim;
1876  // read stack size of calling thread, save it as default for worker threads;
1877  // this should be done before reading environment variables
1878  status = getrlimit(RLIMIT_STACK, &rlim);
1879  if (status == 0) { // success?
1880  __kmp_stksize = rlim.rlim_cur;
1881  __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
1882  }
1883 #endif /* KMP_32_BIT_ARCH */
1884 
1885  if (sysconf(_SC_THREADS)) {
1886 
1887  /* Query the maximum number of threads */
1888  __kmp_type_convert(sysconf(_SC_THREAD_THREADS_MAX), &(__kmp_sys_max_nth));
1889  if (__kmp_sys_max_nth == -1) {
1890  /* Unlimited threads for NPTL */
1891  __kmp_sys_max_nth = INT_MAX;
1892  } else if (__kmp_sys_max_nth <= 1) {
1893  /* Can't tell, just use PTHREAD_THREADS_MAX */
1894  __kmp_sys_max_nth = KMP_MAX_NTH;
1895  }
1896 
1897  /* Query the minimum stack size */
1898  __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN);
1899  if (__kmp_sys_min_stksize <= 1) {
1900  __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
1901  }
1902  }
1903 
1904  /* Set up minimum number of threads to switch to TLS gtid */
1905  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
1906 
1907  status = pthread_key_create(&__kmp_gtid_threadprivate_key,
1908  __kmp_internal_end_dest);
1909  KMP_CHECK_SYSFAIL("pthread_key_create", status);
1910  status = pthread_mutexattr_init(&mutex_attr);
1911  KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1912  status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr);
1913  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1914  status = pthread_mutexattr_destroy(&mutex_attr);
1915  KMP_CHECK_SYSFAIL("pthread_mutexattr_destroy", status);
1916  status = pthread_condattr_init(&cond_attr);
1917  KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1918  status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr);
1919  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1920  status = pthread_condattr_destroy(&cond_attr);
1921  KMP_CHECK_SYSFAIL("pthread_condattr_destroy", status);
1922 #if USE_ITT_BUILD
1923  __kmp_itt_initialize();
1924 #endif /* USE_ITT_BUILD */
1925 
1926  __kmp_init_runtime = TRUE;
1927 }
1928 
1929 void __kmp_runtime_destroy(void) {
1930  int status;
1931 
1932  if (!__kmp_init_runtime) {
1933  return; // Nothing to do.
1934  }
1935 
1936 #if USE_ITT_BUILD
1937  __kmp_itt_destroy();
1938 #endif /* USE_ITT_BUILD */
1939 
1940  status = pthread_key_delete(__kmp_gtid_threadprivate_key);
1941  KMP_CHECK_SYSFAIL("pthread_key_delete", status);
1942 
1943  status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex);
1944  if (status != 0 && status != EBUSY) {
1945  KMP_SYSFAIL("pthread_mutex_destroy", status);
1946  }
1947  status = pthread_cond_destroy(&__kmp_wait_cv.c_cond);
1948  if (status != 0 && status != EBUSY) {
1949  KMP_SYSFAIL("pthread_cond_destroy", status);
1950  }
1951 #if KMP_AFFINITY_SUPPORTED
1952  __kmp_affinity_uninitialize();
1953 #endif
1954 
1955  __kmp_init_runtime = FALSE;
1956 }
1957 
1958 /* Put the thread to sleep for a time period */
1959 /* NOTE: not currently used anywhere */
1960 void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); }
1961 
1962 /* Calculate the elapsed wall clock time for the user */
1963 void __kmp_elapsed(double *t) {
1964  int status;
1965 #ifdef FIX_SGI_CLOCK
1966  struct timespec ts;
1967 
1968  status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
1969  KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status);
1970  *t =
1971  (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec;
1972 #else
1973  struct timeval tv;
1974 
1975  status = gettimeofday(&tv, NULL);
1976  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1977  *t =
1978  (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec;
1979 #endif
1980 }
1981 
1982 /* Calculate the elapsed wall clock tick for the user */
1983 void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
1984 
1985 /* Return the current time stamp in nsec */
1986 kmp_uint64 __kmp_now_nsec() {
1987  struct timeval t;
1988  gettimeofday(&t, NULL);
1989  kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
1990  (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
1991  return nsec;
1992 }
1993 
1994 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1995 /* Measure clock ticks per millisecond */
1996 void __kmp_initialize_system_tick() {
1997  kmp_uint64 now, nsec2, diff;
1998  kmp_uint64 delay = 100000; // 50~100 usec on most machines.
1999  kmp_uint64 nsec = __kmp_now_nsec();
2000  kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
2001  while ((now = __kmp_hardware_timestamp()) < goal)
2002  ;
2003  nsec2 = __kmp_now_nsec();
2004  diff = nsec2 - nsec;
2005  if (diff > 0) {
2006  kmp_uint64 tpms = ((kmp_uint64)1e6 * (delay + (now - goal)) / diff);
2007  if (tpms > 0)
2008  __kmp_ticks_per_msec = tpms;
2009  }
2010 }
2011 #endif
2012 
2013 /* Determine whether the given address is mapped into the current address
2014  space. */
2015 
2016 int __kmp_is_address_mapped(void *addr) {
2017 
2018  int found = 0;
2019  int rc;
2020 
2021 #if KMP_OS_LINUX || KMP_OS_HURD
2022 
2023  /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the
2024  address ranges mapped into the address space. */
2025 
2026  char *name = __kmp_str_format("/proc/%d/maps", getpid());
2027  FILE *file = NULL;
2028 
2029  file = fopen(name, "r");
2030  KMP_ASSERT(file != NULL);
2031 
2032  for (;;) {
2033 
2034  void *beginning = NULL;
2035  void *ending = NULL;
2036  char perms[5];
2037 
2038  rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms);
2039  if (rc == EOF) {
2040  break;
2041  }
2042  KMP_ASSERT(rc == 3 &&
2043  KMP_STRLEN(perms) == 4); // Make sure all fields are read.
2044 
2045  // Ending address is not included in the region, but beginning is.
2046  if ((addr >= beginning) && (addr < ending)) {
2047  perms[2] = 0; // 3th and 4th character does not matter.
2048  if (strcmp(perms, "rw") == 0) {
2049  // Memory we are looking for should be readable and writable.
2050  found = 1;
2051  }
2052  break;
2053  }
2054  }
2055 
2056  // Free resources.
2057  fclose(file);
2058  KMP_INTERNAL_FREE(name);
2059 #elif KMP_OS_FREEBSD
2060  char *buf;
2061  size_t lstsz;
2062  int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
2063  rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
2064  if (rc < 0)
2065  return 0;
2066  // We pass from number of vm entry's semantic
2067  // to size of whole entry map list.
2068  lstsz = lstsz * 4 / 3;
2069  buf = reinterpret_cast<char *>(kmpc_malloc(lstsz));
2070  rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
2071  if (rc < 0) {
2072  kmpc_free(buf);
2073  return 0;
2074  }
2075 
2076  char *lw = buf;
2077  char *up = buf + lstsz;
2078 
2079  while (lw < up) {
2080  struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
2081  size_t cursz = cur->kve_structsize;
2082  if (cursz == 0)
2083  break;
2084  void *start = reinterpret_cast<void *>(cur->kve_start);
2085  void *end = reinterpret_cast<void *>(cur->kve_end);
2086  // Readable/Writable addresses within current map entry
2087  if ((addr >= start) && (addr < end)) {
2088  if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
2089  (cur->kve_protection & KVME_PROT_WRITE) != 0) {
2090  found = 1;
2091  break;
2092  }
2093  }
2094  lw += cursz;
2095  }
2096  kmpc_free(buf);
2097 
2098 #elif KMP_OS_DARWIN
2099 
2100  /* On OS X*, /proc pseudo filesystem is not available. Try to read memory
2101  using vm interface. */
2102 
2103  int buffer;
2104  vm_size_t count;
2105  rc = vm_read_overwrite(
2106  mach_task_self(), // Task to read memory of.
2107  (vm_address_t)(addr), // Address to read from.
2108  1, // Number of bytes to be read.
2109  (vm_address_t)(&buffer), // Address of buffer to save read bytes in.
2110  &count // Address of var to save number of read bytes in.
2111  );
2112  if (rc == 0) {
2113  // Memory successfully read.
2114  found = 1;
2115  }
2116 
2117 #elif KMP_OS_NETBSD
2118 
2119  int mib[5];
2120  mib[0] = CTL_VM;
2121  mib[1] = VM_PROC;
2122  mib[2] = VM_PROC_MAP;
2123  mib[3] = getpid();
2124  mib[4] = sizeof(struct kinfo_vmentry);
2125 
2126  size_t size;
2127  rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
2128  KMP_ASSERT(!rc);
2129  KMP_ASSERT(size);
2130 
2131  size = size * 4 / 3;
2132  struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
2133  KMP_ASSERT(kiv);
2134 
2135  rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
2136  KMP_ASSERT(!rc);
2137  KMP_ASSERT(size);
2138 
2139  for (size_t i = 0; i < size; i++) {
2140  if (kiv[i].kve_start >= (uint64_t)addr &&
2141  kiv[i].kve_end <= (uint64_t)addr) {
2142  found = 1;
2143  break;
2144  }
2145  }
2146  KMP_INTERNAL_FREE(kiv);
2147 #elif KMP_OS_OPENBSD
2148 
2149  int mib[3];
2150  mib[0] = CTL_KERN;
2151  mib[1] = KERN_PROC_VMMAP;
2152  mib[2] = getpid();
2153 
2154  size_t size;
2155  uint64_t end;
2156  rc = sysctl(mib, 3, NULL, &size, NULL, 0);
2157  KMP_ASSERT(!rc);
2158  KMP_ASSERT(size);
2159  end = size;
2160 
2161  struct kinfo_vmentry kiv = {.kve_start = 0};
2162 
2163  while ((rc = sysctl(mib, 3, &kiv, &size, NULL, 0)) == 0) {
2164  KMP_ASSERT(size);
2165  if (kiv.kve_end == end)
2166  break;
2167 
2168  if (kiv.kve_start >= (uint64_t)addr && kiv.kve_end <= (uint64_t)addr) {
2169  found = 1;
2170  break;
2171  }
2172  kiv.kve_start += 1;
2173  }
2174 #elif KMP_OS_DRAGONFLY
2175 
2176  // FIXME(DragonFly): Implement this
2177  found = 1;
2178 
2179 #else
2180 
2181 #error "Unknown or unsupported OS"
2182 
2183 #endif
2184 
2185  return found;
2186 
2187 } // __kmp_is_address_mapped
2188 
2189 #ifdef USE_LOAD_BALANCE
2190 
2191 #if KMP_OS_DARWIN || KMP_OS_NETBSD
2192 
2193 // The function returns the rounded value of the system load average
2194 // during given time interval which depends on the value of
2195 // __kmp_load_balance_interval variable (default is 60 sec, other values
2196 // may be 300 sec or 900 sec).
2197 // It returns -1 in case of error.
2198 int __kmp_get_load_balance(int max) {
2199  double averages[3];
2200  int ret_avg = 0;
2201 
2202  int res = getloadavg(averages, 3);
2203 
2204  // Check __kmp_load_balance_interval to determine which of averages to use.
2205  // getloadavg() may return the number of samples less than requested that is
2206  // less than 3.
2207  if (__kmp_load_balance_interval < 180 && (res >= 1)) {
2208  ret_avg = (int)averages[0]; // 1 min
2209  } else if ((__kmp_load_balance_interval >= 180 &&
2210  __kmp_load_balance_interval < 600) &&
2211  (res >= 2)) {
2212  ret_avg = (int)averages[1]; // 5 min
2213  } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
2214  ret_avg = (int)averages[2]; // 15 min
2215  } else { // Error occurred
2216  return -1;
2217  }
2218 
2219  return ret_avg;
2220 }
2221 
2222 #else // Linux* OS
2223 
2224 // The function returns number of running (not sleeping) threads, or -1 in case
2225 // of error. Error could be reported if Linux* OS kernel too old (without
2226 // "/proc" support). Counting running threads stops if max running threads
2227 // encountered.
2228 int __kmp_get_load_balance(int max) {
2229  static int permanent_error = 0;
2230  static int glb_running_threads = 0; // Saved count of the running threads for
2231  // the thread balance algorithm
2232  static double glb_call_time = 0; /* Thread balance algorithm call time */
2233 
2234  int running_threads = 0; // Number of running threads in the system.
2235 
2236  DIR *proc_dir = NULL; // Handle of "/proc/" directory.
2237  struct dirent *proc_entry = NULL;
2238 
2239  kmp_str_buf_t task_path; // "/proc/<pid>/task/<tid>/" path.
2240  DIR *task_dir = NULL; // Handle of "/proc/<pid>/task/<tid>/" directory.
2241  struct dirent *task_entry = NULL;
2242  int task_path_fixed_len;
2243 
2244  kmp_str_buf_t stat_path; // "/proc/<pid>/task/<tid>/stat" path.
2245  int stat_file = -1;
2246  int stat_path_fixed_len;
2247 
2248  int total_processes = 0; // Total number of processes in system.
2249  int total_threads = 0; // Total number of threads in system.
2250 
2251  double call_time = 0.0;
2252 
2253  __kmp_str_buf_init(&task_path);
2254  __kmp_str_buf_init(&stat_path);
2255 
2256  __kmp_elapsed(&call_time);
2257 
2258  if (glb_call_time &&
2259  (call_time - glb_call_time < __kmp_load_balance_interval)) {
2260  running_threads = glb_running_threads;
2261  goto finish;
2262  }
2263 
2264  glb_call_time = call_time;
2265 
2266  // Do not spend time on scanning "/proc/" if we have a permanent error.
2267  if (permanent_error) {
2268  running_threads = -1;
2269  goto finish;
2270  }
2271 
2272  if (max <= 0) {
2273  max = INT_MAX;
2274  }
2275 
2276  // Open "/proc/" directory.
2277  proc_dir = opendir("/proc");
2278  if (proc_dir == NULL) {
2279  // Cannot open "/prroc/". Probably the kernel does not support it. Return an
2280  // error now and in subsequent calls.
2281  running_threads = -1;
2282  permanent_error = 1;
2283  goto finish;
2284  }
2285 
2286  // Initialize fixed part of task_path. This part will not change.
2287  __kmp_str_buf_cat(&task_path, "/proc/", 6);
2288  task_path_fixed_len = task_path.used; // Remember number of used characters.
2289 
2290  proc_entry = readdir(proc_dir);
2291  while (proc_entry != NULL) {
2292  // Proc entry is a directory and name starts with a digit. Assume it is a
2293  // process' directory.
2294  if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) {
2295 
2296  ++total_processes;
2297  // Make sure init process is the very first in "/proc", so we can replace
2298  // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes ==
2299  // 1. We are going to check that total_processes == 1 => d_name == "1" is
2300  // true (where "=>" is implication). Since C++ does not have => operator,
2301  // let us replace it with its equivalent: a => b == ! a || b.
2302  KMP_DEBUG_ASSERT(total_processes != 1 ||
2303  strcmp(proc_entry->d_name, "1") == 0);
2304 
2305  // Construct task_path.
2306  task_path.used = task_path_fixed_len; // Reset task_path to "/proc/".
2307  __kmp_str_buf_cat(&task_path, proc_entry->d_name,
2308  KMP_STRLEN(proc_entry->d_name));
2309  __kmp_str_buf_cat(&task_path, "/task", 5);
2310 
2311  task_dir = opendir(task_path.str);
2312  if (task_dir == NULL) {
2313  // Process can finish between reading "/proc/" directory entry and
2314  // opening process' "task/" directory. So, in general case we should not
2315  // complain, but have to skip this process and read the next one. But on
2316  // systems with no "task/" support we will spend lot of time to scan
2317  // "/proc/" tree again and again without any benefit. "init" process
2318  // (its pid is 1) should exist always, so, if we cannot open
2319  // "/proc/1/task/" directory, it means "task/" is not supported by
2320  // kernel. Report an error now and in the future.
2321  if (strcmp(proc_entry->d_name, "1") == 0) {
2322  running_threads = -1;
2323  permanent_error = 1;
2324  goto finish;
2325  }
2326  } else {
2327  // Construct fixed part of stat file path.
2328  __kmp_str_buf_clear(&stat_path);
2329  __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used);
2330  __kmp_str_buf_cat(&stat_path, "/", 1);
2331  stat_path_fixed_len = stat_path.used;
2332 
2333  task_entry = readdir(task_dir);
2334  while (task_entry != NULL) {
2335  // It is a directory and name starts with a digit.
2336  if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) {
2337  ++total_threads;
2338 
2339  // Construct complete stat file path. Easiest way would be:
2340  // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str,
2341  // task_entry->d_name );
2342  // but seriae of __kmp_str_buf_cat works a bit faster.
2343  stat_path.used =
2344  stat_path_fixed_len; // Reset stat path to its fixed part.
2345  __kmp_str_buf_cat(&stat_path, task_entry->d_name,
2346  KMP_STRLEN(task_entry->d_name));
2347  __kmp_str_buf_cat(&stat_path, "/stat", 5);
2348 
2349  // Note: Low-level API (open/read/close) is used. High-level API
2350  // (fopen/fclose) works ~ 30 % slower.
2351  stat_file = open(stat_path.str, O_RDONLY);
2352  if (stat_file == -1) {
2353  // We cannot report an error because task (thread) can terminate
2354  // just before reading this file.
2355  } else {
2356  /* Content of "stat" file looks like:
2357  24285 (program) S ...
2358 
2359  It is a single line (if program name does not include funny
2360  symbols). First number is a thread id, then name of executable
2361  file name in paretheses, then state of the thread. We need just
2362  thread state.
2363 
2364  Good news: Length of program name is 15 characters max. Longer
2365  names are truncated.
2366 
2367  Thus, we need rather short buffer: 15 chars for program name +
2368  2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
2369 
2370  Bad news: Program name may contain special symbols like space,
2371  closing parenthesis, or even new line. This makes parsing
2372  "stat" file not 100 % reliable. In case of fanny program names
2373  parsing may fail (report incorrect thread state).
2374 
2375  Parsing "status" file looks more promissing (due to different
2376  file structure and escaping special symbols) but reading and
2377  parsing of "status" file works slower.
2378  -- ln
2379  */
2380  char buffer[65];
2381  ssize_t len;
2382  len = read(stat_file, buffer, sizeof(buffer) - 1);
2383  if (len >= 0) {
2384  buffer[len] = 0;
2385  // Using scanf:
2386  // sscanf( buffer, "%*d (%*s) %c ", & state );
2387  // looks very nice, but searching for a closing parenthesis
2388  // works a bit faster.
2389  char *close_parent = strstr(buffer, ") ");
2390  if (close_parent != NULL) {
2391  char state = *(close_parent + 2);
2392  if (state == 'R') {
2393  ++running_threads;
2394  if (running_threads >= max) {
2395  goto finish;
2396  }
2397  }
2398  }
2399  }
2400  close(stat_file);
2401  stat_file = -1;
2402  }
2403  }
2404  task_entry = readdir(task_dir);
2405  }
2406  closedir(task_dir);
2407  task_dir = NULL;
2408  }
2409  }
2410  proc_entry = readdir(proc_dir);
2411  }
2412 
2413  // There _might_ be a timing hole where the thread executing this
2414  // code get skipped in the load balance, and running_threads is 0.
2415  // Assert in the debug builds only!!!
2416  KMP_DEBUG_ASSERT(running_threads > 0);
2417  if (running_threads <= 0) {
2418  running_threads = 1;
2419  }
2420 
2421 finish: // Clean up and exit.
2422  if (proc_dir != NULL) {
2423  closedir(proc_dir);
2424  }
2425  __kmp_str_buf_free(&task_path);
2426  if (task_dir != NULL) {
2427  closedir(task_dir);
2428  }
2429  __kmp_str_buf_free(&stat_path);
2430  if (stat_file != -1) {
2431  close(stat_file);
2432  }
2433 
2434  glb_running_threads = running_threads;
2435 
2436  return running_threads;
2437 
2438 } // __kmp_get_load_balance
2439 
2440 #endif // KMP_OS_DARWIN
2441 
2442 #endif // USE_LOAD_BALANCE
2443 
2444 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
2445  ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
2446  KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
2447 
2448 // we really only need the case with 1 argument, because CLANG always build
2449 // a struct of pointers to shared variables referenced in the outlined function
2450 int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2451  void *p_argv[]
2452 #if OMPT_SUPPORT
2453  ,
2454  void **exit_frame_ptr
2455 #endif
2456 ) {
2457 #if OMPT_SUPPORT
2458  *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2459 #endif
2460 
2461  switch (argc) {
2462  default:
2463  fprintf(stderr, "Too many args to microtask: %d!\n", argc);
2464  fflush(stderr);
2465  exit(-1);
2466  case 0:
2467  (*pkfn)(&gtid, &tid);
2468  break;
2469  case 1:
2470  (*pkfn)(&gtid, &tid, p_argv[0]);
2471  break;
2472  case 2:
2473  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
2474  break;
2475  case 3:
2476  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
2477  break;
2478  case 4:
2479  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
2480  break;
2481  case 5:
2482  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
2483  break;
2484  case 6:
2485  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2486  p_argv[5]);
2487  break;
2488  case 7:
2489  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2490  p_argv[5], p_argv[6]);
2491  break;
2492  case 8:
2493  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2494  p_argv[5], p_argv[6], p_argv[7]);
2495  break;
2496  case 9:
2497  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2498  p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
2499  break;
2500  case 10:
2501  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2502  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
2503  break;
2504  case 11:
2505  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2506  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
2507  break;
2508  case 12:
2509  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2510  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2511  p_argv[11]);
2512  break;
2513  case 13:
2514  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2515  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2516  p_argv[11], p_argv[12]);
2517  break;
2518  case 14:
2519  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2520  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2521  p_argv[11], p_argv[12], p_argv[13]);
2522  break;
2523  case 15:
2524  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2525  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2526  p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
2527  break;
2528  }
2529 
2530  return 1;
2531 }
2532 
2533 #endif
2534 
2535 // Functions for hidden helper task
2536 namespace {
2537 // Condition variable for initializing hidden helper team
2538 pthread_cond_t hidden_helper_threads_initz_cond_var;
2539 pthread_mutex_t hidden_helper_threads_initz_lock;
2540 volatile int hidden_helper_initz_signaled = FALSE;
2541 
2542 // Condition variable for deinitializing hidden helper team
2543 pthread_cond_t hidden_helper_threads_deinitz_cond_var;
2544 pthread_mutex_t hidden_helper_threads_deinitz_lock;
2545 volatile int hidden_helper_deinitz_signaled = FALSE;
2546 
2547 // Condition variable for the wrapper function of main thread
2548 pthread_cond_t hidden_helper_main_thread_cond_var;
2549 pthread_mutex_t hidden_helper_main_thread_lock;
2550 volatile int hidden_helper_main_thread_signaled = FALSE;
2551 
2552 // Semaphore for worker threads. We don't use condition variable here in case
2553 // that when multiple signals are sent at the same time, only one thread might
2554 // be waken.
2555 sem_t hidden_helper_task_sem;
2556 } // namespace
2557 
2558 void __kmp_hidden_helper_worker_thread_wait() {
2559  int status = sem_wait(&hidden_helper_task_sem);
2560  KMP_CHECK_SYSFAIL("sem_wait", status);
2561 }
2562 
2563 void __kmp_do_initialize_hidden_helper_threads() {
2564  // Initialize condition variable
2565  int status =
2566  pthread_cond_init(&hidden_helper_threads_initz_cond_var, nullptr);
2567  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2568 
2569  status = pthread_cond_init(&hidden_helper_threads_deinitz_cond_var, nullptr);
2570  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2571 
2572  status = pthread_cond_init(&hidden_helper_main_thread_cond_var, nullptr);
2573  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
2574 
2575  status = pthread_mutex_init(&hidden_helper_threads_initz_lock, nullptr);
2576  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2577 
2578  status = pthread_mutex_init(&hidden_helper_threads_deinitz_lock, nullptr);
2579  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2580 
2581  status = pthread_mutex_init(&hidden_helper_main_thread_lock, nullptr);
2582  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
2583 
2584  // Initialize the semaphore
2585  status = sem_init(&hidden_helper_task_sem, 0, 0);
2586  KMP_CHECK_SYSFAIL("sem_init", status);
2587 
2588  // Create a new thread to finish initialization
2589  pthread_t handle;
2590  status = pthread_create(
2591  &handle, nullptr,
2592  [](void *) -> void * {
2593  __kmp_hidden_helper_threads_initz_routine();
2594  return nullptr;
2595  },
2596  nullptr);
2597  KMP_CHECK_SYSFAIL("pthread_create", status);
2598 }
2599 
2600 void __kmp_hidden_helper_threads_initz_wait() {
2601  // Initial thread waits here for the completion of the initialization. The
2602  // condition variable will be notified by main thread of hidden helper teams.
2603  int status = pthread_mutex_lock(&hidden_helper_threads_initz_lock);
2604  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2605 
2606  if (!TCR_4(hidden_helper_initz_signaled)) {
2607  status = pthread_cond_wait(&hidden_helper_threads_initz_cond_var,
2608  &hidden_helper_threads_initz_lock);
2609  KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2610  }
2611 
2612  status = pthread_mutex_unlock(&hidden_helper_threads_initz_lock);
2613  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2614 }
2615 
2616 void __kmp_hidden_helper_initz_release() {
2617  // After all initialization, reset __kmp_init_hidden_helper_threads to false.
2618  int status = pthread_mutex_lock(&hidden_helper_threads_initz_lock);
2619  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2620 
2621  status = pthread_cond_signal(&hidden_helper_threads_initz_cond_var);
2622  KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2623 
2624  TCW_SYNC_4(hidden_helper_initz_signaled, TRUE);
2625 
2626  status = pthread_mutex_unlock(&hidden_helper_threads_initz_lock);
2627  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2628 }
2629 
2630 void __kmp_hidden_helper_main_thread_wait() {
2631  // The main thread of hidden helper team will be blocked here. The
2632  // condition variable can only be signal in the destructor of RTL.
2633  int status = pthread_mutex_lock(&hidden_helper_main_thread_lock);
2634  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2635 
2636  if (!TCR_4(hidden_helper_main_thread_signaled)) {
2637  status = pthread_cond_wait(&hidden_helper_main_thread_cond_var,
2638  &hidden_helper_main_thread_lock);
2639  KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2640  }
2641 
2642  status = pthread_mutex_unlock(&hidden_helper_main_thread_lock);
2643  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2644 }
2645 
2646 void __kmp_hidden_helper_main_thread_release() {
2647  // The initial thread of OpenMP RTL should call this function to wake up the
2648  // main thread of hidden helper team.
2649  int status = pthread_mutex_lock(&hidden_helper_main_thread_lock);
2650  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2651 
2652  status = pthread_cond_signal(&hidden_helper_main_thread_cond_var);
2653  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
2654 
2655  // The hidden helper team is done here
2656  TCW_SYNC_4(hidden_helper_main_thread_signaled, TRUE);
2657 
2658  status = pthread_mutex_unlock(&hidden_helper_main_thread_lock);
2659  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2660 }
2661 
2662 void __kmp_hidden_helper_worker_thread_signal() {
2663  int status = sem_post(&hidden_helper_task_sem);
2664  KMP_CHECK_SYSFAIL("sem_post", status);
2665 }
2666 
2667 void __kmp_hidden_helper_threads_deinitz_wait() {
2668  // Initial thread waits here for the completion of the deinitialization. The
2669  // condition variable will be notified by main thread of hidden helper teams.
2670  int status = pthread_mutex_lock(&hidden_helper_threads_deinitz_lock);
2671  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2672 
2673  if (!TCR_4(hidden_helper_deinitz_signaled)) {
2674  status = pthread_cond_wait(&hidden_helper_threads_deinitz_cond_var,
2675  &hidden_helper_threads_deinitz_lock);
2676  KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2677  }
2678 
2679  status = pthread_mutex_unlock(&hidden_helper_threads_deinitz_lock);
2680  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2681 }
2682 
2683 void __kmp_hidden_helper_threads_deinitz_release() {
2684  int status = pthread_mutex_lock(&hidden_helper_threads_deinitz_lock);
2685  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
2686 
2687  status = pthread_cond_signal(&hidden_helper_threads_deinitz_cond_var);
2688  KMP_CHECK_SYSFAIL("pthread_cond_wait", status);
2689 
2690  TCW_SYNC_4(hidden_helper_deinitz_signaled, TRUE);
2691 
2692  status = pthread_mutex_unlock(&hidden_helper_threads_deinitz_lock);
2693  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
2694 }
2695 
2696 // end of file //
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
Definition: kmp_stats.h:933