LLVM OpenMP* Runtime Library
z_Windows_NT_util.c
1 /*
2  * z_Windows_NT_util.c -- platform specific routines.
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_i18n.h"
19 #include "kmp_io.h"
20 #include "kmp_wait_release.h"
21 
22 /* This code is related to NtQuerySystemInformation() function. This function
23  is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
24  number of running threads in the system. */
25 
26 #include <ntstatus.h>
27 #include <ntsecapi.h> // UNICODE_STRING
28 
29 enum SYSTEM_INFORMATION_CLASS {
30  SystemProcessInformation = 5
31 }; // SYSTEM_INFORMATION_CLASS
32 
33 struct CLIENT_ID {
34  HANDLE UniqueProcess;
35  HANDLE UniqueThread;
36 }; // struct CLIENT_ID
37 
38 enum THREAD_STATE {
39  StateInitialized,
40  StateReady,
41  StateRunning,
42  StateStandby,
43  StateTerminated,
44  StateWait,
45  StateTransition,
46  StateUnknown
47 }; // enum THREAD_STATE
48 
49 struct VM_COUNTERS {
50  SIZE_T PeakVirtualSize;
51  SIZE_T VirtualSize;
52  ULONG PageFaultCount;
53  SIZE_T PeakWorkingSetSize;
54  SIZE_T WorkingSetSize;
55  SIZE_T QuotaPeakPagedPoolUsage;
56  SIZE_T QuotaPagedPoolUsage;
57  SIZE_T QuotaPeakNonPagedPoolUsage;
58  SIZE_T QuotaNonPagedPoolUsage;
59  SIZE_T PagefileUsage;
60  SIZE_T PeakPagefileUsage;
61  SIZE_T PrivatePageCount;
62 }; // struct VM_COUNTERS
63 
64 struct SYSTEM_THREAD {
65  LARGE_INTEGER KernelTime;
66  LARGE_INTEGER UserTime;
67  LARGE_INTEGER CreateTime;
68  ULONG WaitTime;
69  LPVOID StartAddress;
70  CLIENT_ID ClientId;
71  DWORD Priority;
72  LONG BasePriority;
73  ULONG ContextSwitchCount;
74  THREAD_STATE State;
75  ULONG WaitReason;
76 }; // SYSTEM_THREAD
77 
78 KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 );
79 #if KMP_ARCH_X86
80  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 );
81  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 52 );
82 #else
83  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 );
84  KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 68 );
85 #endif
86 
87 struct SYSTEM_PROCESS_INFORMATION {
88  ULONG NextEntryOffset;
89  ULONG NumberOfThreads;
90  LARGE_INTEGER Reserved[ 3 ];
91  LARGE_INTEGER CreateTime;
92  LARGE_INTEGER UserTime;
93  LARGE_INTEGER KernelTime;
94  UNICODE_STRING ImageName;
95  DWORD BasePriority;
96  HANDLE ProcessId;
97  HANDLE ParentProcessId;
98  ULONG HandleCount;
99  ULONG Reserved2[ 2 ];
100  VM_COUNTERS VMCounters;
101  IO_COUNTERS IOCounters;
102  SYSTEM_THREAD Threads[ 1 ];
103 }; // SYSTEM_PROCESS_INFORMATION
104 typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION;
105 
106 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) == 0 );
107 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime ) == 32 );
108 KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName ) == 56 );
109 #if KMP_ARCH_X86
110  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 68 );
111  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 76 );
112  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 88 );
113  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 136 );
114  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 184 );
115 #else
116  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 80 );
117  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 96 );
118  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 112 );
119  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 208 );
120  KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 256 );
121 #endif
122 
123 typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG );
124 NtQuerySystemInformation_t NtQuerySystemInformation = NULL;
125 
126 HMODULE ntdll = NULL;
127 
128 /* End of NtQuerySystemInformation()-related code */
129 
130 #if KMP_GROUP_AFFINITY
131 static HMODULE kernel32 = NULL;
132 #endif /* KMP_GROUP_AFFINITY */
133 
134 /* ----------------------------------------------------------------------------------- */
135 /* ----------------------------------------------------------------------------------- */
136 
137 #if KMP_HANDLE_SIGNALS
138  typedef void (* sig_func_t )( int );
139  static sig_func_t __kmp_sighldrs[ NSIG ];
140  static int __kmp_siginstalled[ NSIG ];
141 #endif
142 
143 static HANDLE __kmp_monitor_ev;
144 static kmp_int64 __kmp_win32_time;
145 double __kmp_win32_tick;
146 
147 int __kmp_init_runtime = FALSE;
148 CRITICAL_SECTION __kmp_win32_section;
149 
150 void
151 __kmp_win32_mutex_init( kmp_win32_mutex_t *mx )
152 {
153  InitializeCriticalSection( & mx->cs );
154 #if USE_ITT_BUILD
155  __kmp_itt_system_object_created( & mx->cs, "Critical Section" );
156 #endif /* USE_ITT_BUILD */
157 }
158 
159 void
160 __kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx )
161 {
162  DeleteCriticalSection( & mx->cs );
163 }
164 
165 void
166 __kmp_win32_mutex_lock( kmp_win32_mutex_t *mx )
167 {
168  EnterCriticalSection( & mx->cs );
169 }
170 
171 void
172 __kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx )
173 {
174  LeaveCriticalSection( & mx->cs );
175 }
176 
177 void
178 __kmp_win32_cond_init( kmp_win32_cond_t *cv )
179 {
180  cv->waiters_count_ = 0;
181  cv->wait_generation_count_ = 0;
182  cv->release_count_ = 0;
183 
184  /* Initialize the critical section */
185  __kmp_win32_mutex_init( & cv->waiters_count_lock_ );
186 
187  /* Create a manual-reset event. */
188  cv->event_ = CreateEvent( NULL, // no security
189  TRUE, // manual-reset
190  FALSE, // non-signaled initially
191  NULL ); // unnamed
192 #if USE_ITT_BUILD
193  __kmp_itt_system_object_created( cv->event_, "Event" );
194 #endif /* USE_ITT_BUILD */
195 }
196 
197 void
198 __kmp_win32_cond_destroy( kmp_win32_cond_t *cv )
199 {
200  __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ );
201  __kmp_free_handle( cv->event_ );
202  memset( cv, '\0', sizeof( *cv ) );
203 }
204 
205 /* TODO associate cv with a team instead of a thread so as to optimize
206  * the case where we wake up a whole team */
207 
208 void
209 __kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load )
210 {
211  int my_generation;
212  int last_waiter;
213 
214  /* Avoid race conditions */
215  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
216 
217  /* Increment count of waiters */
218  cv->waiters_count_++;
219 
220  /* Store current generation in our activation record. */
221  my_generation = cv->wait_generation_count_;
222 
223  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
224  __kmp_win32_mutex_unlock( mx );
225 
226  for (;;) {
227  int wait_done;
228 
229  /* Wait until the event is signaled */
230  WaitForSingleObject( cv->event_, INFINITE );
231 
232  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
233 
234  /* Exit the loop when the <cv->event_> is signaled and
235  * there are still waiting threads from this <wait_generation>
236  * that haven't been released from this wait yet. */
237  wait_done = ( cv->release_count_ > 0 ) &&
238  ( cv->wait_generation_count_ != my_generation );
239 
240  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_);
241 
242  /* there used to be a semicolon after the if statement,
243  * it looked like a bug, so i removed it */
244  if( wait_done )
245  break;
246  }
247 
248  __kmp_win32_mutex_lock( mx );
249  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
250 
251  cv->waiters_count_--;
252  cv->release_count_--;
253 
254  last_waiter = ( cv->release_count_ == 0 );
255 
256  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
257 
258  if( last_waiter ) {
259  /* We're the last waiter to be notified, so reset the manual event. */
260  ResetEvent( cv->event_ );
261  }
262 }
263 
264 void
265 __kmp_win32_cond_broadcast( kmp_win32_cond_t *cv )
266 {
267  __kmp_win32_mutex_lock( &cv->waiters_count_lock_ );
268 
269  if( cv->waiters_count_ > 0 ) {
270  SetEvent( cv->event_ );
271  /* Release all the threads in this generation. */
272 
273  cv->release_count_ = cv->waiters_count_;
274 
275  /* Start a new generation. */
276  cv->wait_generation_count_++;
277  }
278 
279  __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ );
280 }
281 
282 void
283 __kmp_win32_cond_signal( kmp_win32_cond_t *cv )
284 {
285  __kmp_win32_cond_broadcast( cv );
286 }
287 
288 /* ------------------------------------------------------------------------ */
289 /* ------------------------------------------------------------------------ */
290 
291 void
292 __kmp_enable( int new_state )
293 {
294  if (__kmp_init_runtime)
295  LeaveCriticalSection( & __kmp_win32_section );
296 }
297 
298 void
299 __kmp_disable( int *old_state )
300 {
301  *old_state = 0;
302 
303  if (__kmp_init_runtime)
304  EnterCriticalSection( & __kmp_win32_section );
305 }
306 
307 void
308 __kmp_suspend_initialize( void )
309 {
310  /* do nothing */
311 }
312 
313 static void
314 __kmp_suspend_initialize_thread( kmp_info_t *th )
315 {
316  if ( ! TCR_4( th->th.th_suspend_init ) ) {
317  /* this means we haven't initialized the suspension pthread objects for this thread
318  in this instance of the process */
319  __kmp_win32_cond_init( &th->th.th_suspend_cv );
320  __kmp_win32_mutex_init( &th->th.th_suspend_mx );
321  TCW_4( th->th.th_suspend_init, TRUE );
322  }
323 }
324 
325 void
326 __kmp_suspend_uninitialize_thread( kmp_info_t *th )
327 {
328  if ( TCR_4( th->th.th_suspend_init ) ) {
329  /* this means we have initialize the suspension pthread objects for this thread
330  in this instance of the process */
331  __kmp_win32_cond_destroy( & th->th.th_suspend_cv );
332  __kmp_win32_mutex_destroy( & th->th.th_suspend_mx );
333  TCW_4( th->th.th_suspend_init, FALSE );
334  }
335 }
336 
337 /* This routine puts the calling thread to sleep after setting the
338  * sleep bit for the indicated flag variable to true.
339  */
340 template <class C>
341 static inline void __kmp_suspend_template( int th_gtid, C *flag )
342 {
343  kmp_info_t *th = __kmp_threads[th_gtid];
344  int status;
345  typename C::flag_t old_spin;
346 
347  KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) );
348 
349  __kmp_suspend_initialize_thread( th );
350  __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
351 
352  KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n",
353  th_gtid, flag->get() ) );
354 
355  /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread
356  gets called first?
357  */
358  old_spin = flag->set_sleeping();
359 
360  KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n",
361  th_gtid, flag->get(), *(flag->get()) ) );
362 
363  if ( flag->done_check_val(old_spin) ) {
364  old_spin = flag->unset_sleeping();
365  KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n",
366  th_gtid, flag->get()) );
367  } else {
368 #ifdef DEBUG_SUSPEND
369  __kmp_suspend_count++;
370 #endif
371  /* Encapsulate in a loop as the documentation states that this may
372  * "with low probability" return when the condition variable has
373  * not been signaled or broadcast
374  */
375  int deactivated = FALSE;
376  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
377  while ( flag->is_sleeping() ) {
378  KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n",
379  th_gtid ) );
380  // Mark the thread as no longer active (only in the first iteration of the loop).
381  if ( ! deactivated ) {
382  th->th.th_active = FALSE;
383  if ( th->th.th_active_in_pool ) {
384  th->th.th_active_in_pool = FALSE;
385  KMP_TEST_THEN_DEC32(
386  (kmp_int32 *) &__kmp_thread_pool_active_nth );
387  KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
388  }
389  deactivated = TRUE;
390 
391  __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
392  }
393  else {
394  __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 );
395  }
396 
397 #ifdef KMP_DEBUG
398  if( flag->is_sleeping() ) {
399  KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ));
400  }
401 #endif /* KMP_DEBUG */
402 
403  } // while
404 
405  // Mark the thread as active again (if it was previous marked as inactive)
406  if ( deactivated ) {
407  th->th.th_active = TRUE;
408  if ( TCR_4(th->th.th_in_pool) ) {
409  KMP_TEST_THEN_INC32(
410  (kmp_int32 *) &__kmp_thread_pool_active_nth );
411  th->th.th_active_in_pool = TRUE;
412  }
413  }
414  }
415 
416  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
417 
418  KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) );
419 }
420 
421 void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
422  __kmp_suspend_template(th_gtid, flag);
423 }
424 void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
425  __kmp_suspend_template(th_gtid, flag);
426 }
427 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
428  __kmp_suspend_template(th_gtid, flag);
429 }
430 
431 
432 /* This routine signals the thread specified by target_gtid to wake up
433  * after setting the sleep bit indicated by the flag argument to FALSE
434  */
435 template <class C>
436 static inline void __kmp_resume_template( int target_gtid, C *flag )
437 {
438  kmp_info_t *th = __kmp_threads[target_gtid];
439  int status;
440 
441 #ifdef KMP_DEBUG
442  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
443 #endif
444 
445  KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) );
446 
447  __kmp_suspend_initialize_thread( th );
448  __kmp_win32_mutex_lock( &th->th.th_suspend_mx );
449 
450  if (!flag) { // coming from __kmp_null_resume_wrapper
451  flag = (C *)th->th.th_sleep_loc;
452  }
453 
454  // First, check if the flag is null or its type has changed. If so, someone else woke it up.
455  if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to
456  KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n",
457  gtid, target_gtid, NULL ) );
458  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
459  return;
460  }
461  else {
462  typename C::flag_t old_spin = flag->unset_sleeping();
463  if ( !flag->is_sleeping_val(old_spin) ) {
464  KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): "
465  "%u => %u\n",
466  gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) );
467  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
468  return;
469  }
470  }
471  TCW_PTR(th->th.th_sleep_loc, NULL);
472 
473  KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n",
474  gtid, target_gtid, flag->get() ) );
475 
476  __kmp_win32_cond_signal( &th->th.th_suspend_cv );
477  __kmp_win32_mutex_unlock( &th->th.th_suspend_mx );
478 
479  KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n",
480  gtid, target_gtid ) );
481 }
482 
483 void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
484  __kmp_resume_template(target_gtid, flag);
485 }
486 void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
487  __kmp_resume_template(target_gtid, flag);
488 }
489 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
490  __kmp_resume_template(target_gtid, flag);
491 }
492 
493 
494 /* ------------------------------------------------------------------------ */
495 /* ------------------------------------------------------------------------ */
496 
497 void
498 __kmp_yield( int cond )
499 {
500  if (cond)
501  Sleep(0);
502 }
503 
504 /* ------------------------------------------------------------------------ */
505 /* ------------------------------------------------------------------------ */
506 
507 void
508 __kmp_gtid_set_specific( int gtid )
509 {
510  if( __kmp_init_gtid ) {
511  KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n",
512  gtid, __kmp_gtid_threadprivate_key ));
513  if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) )
514  KMP_FATAL( TLSSetValueFailed );
515  } else {
516  KA_TRACE( 50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n" ) );
517  }
518 }
519 
520 int
521 __kmp_gtid_get_specific()
522 {
523  int gtid;
524  if( !__kmp_init_gtid ) {
525  KA_TRACE( 50, ("__kmp_gtid_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) );
526  return KMP_GTID_SHUTDOWN;
527  }
528  gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key );
529  if ( gtid == 0 ) {
530  gtid = KMP_GTID_DNE;
531  }
532  else {
533  gtid--;
534  }
535  KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
536  __kmp_gtid_threadprivate_key, gtid ));
537  return gtid;
538 }
539 
540 /* ------------------------------------------------------------------------ */
541 /* ------------------------------------------------------------------------ */
542 
543 #if KMP_GROUP_AFFINITY
544 
545 //
546 // Only 1 DWORD in the mask should have any procs set.
547 // Return the appropriate index, or -1 for an invalid mask.
548 //
549 int
550 __kmp_get_proc_group( kmp_affin_mask_t const *mask )
551 {
552  int i;
553  int group = -1;
554  for (i = 0; i < __kmp_num_proc_groups; i++) {
555 #if KMP_USE_HWLOC
556  // On windows, the long type is always 32 bits
557  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2);
558  unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1);
559  if (first_32_bits == 0 && second_32_bits == 0) {
560  continue;
561  }
562 #else
563  if (mask[i] == 0) {
564  continue;
565  }
566 #endif
567  if (group >= 0) {
568  return -1;
569  }
570  group = i;
571  }
572  return group;
573 }
574 
575 #endif /* KMP_GROUP_AFFINITY */
576 
577 int
578 __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error )
579 {
580 #if KMP_USE_HWLOC
581  int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
582  if (retval >= 0) {
583  return 0;
584  }
585  int error = errno;
586  if (abort_on_error) {
587  __kmp_msg(
588  kmp_ms_fatal,
589  KMP_MSG( FatalSysError ),
590  KMP_ERR( error ),
591  __kmp_msg_null
592  );
593  }
594  return error;
595 #else
596 # if KMP_GROUP_AFFINITY
597 
598  if (__kmp_num_proc_groups > 1) {
599  //
600  // Check for a valid mask.
601  //
602  GROUP_AFFINITY ga;
603  int group = __kmp_get_proc_group( mask );
604  if (group < 0) {
605  if (abort_on_error) {
606  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
607  }
608  return -1;
609  }
610 
611  //
612  // Transform the bit vector into a GROUP_AFFINITY struct
613  // and make the system call to set affinity.
614  //
615  ga.Group = group;
616  ga.Mask = mask[group];
617  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
618 
619  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
620  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
621  DWORD error = GetLastError();
622  if (abort_on_error) {
623  __kmp_msg(
624  kmp_ms_fatal,
625  KMP_MSG( CantSetThreadAffMask ),
626  KMP_ERR( error ),
627  __kmp_msg_null
628  );
629  }
630  return error;
631  }
632  }
633  else
634 
635 # endif /* KMP_GROUP_AFFINITY */
636 
637  {
638  if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
639  DWORD error = GetLastError();
640  if (abort_on_error) {
641  __kmp_msg(
642  kmp_ms_fatal,
643  KMP_MSG( CantSetThreadAffMask ),
644  KMP_ERR( error ),
645  __kmp_msg_null
646  );
647  }
648  return error;
649  }
650  }
651 #endif /* KMP_USE_HWLOC */
652  return 0;
653 }
654 
655 int
656 __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error )
657 {
658 #if KMP_USE_HWLOC
659  int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD);
660  if (retval >= 0) {
661  return 0;
662  }
663  int error = errno;
664  if (abort_on_error) {
665  __kmp_msg(
666  kmp_ms_fatal,
667  KMP_MSG( FatalSysError ),
668  KMP_ERR( error ),
669  __kmp_msg_null
670  );
671  }
672  return error;
673 #else /* KMP_USE_HWLOC */
674 # if KMP_GROUP_AFFINITY
675 
676  if (__kmp_num_proc_groups > 1) {
677  KMP_CPU_ZERO(mask);
678  GROUP_AFFINITY ga;
679  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
680 
681  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
682  DWORD error = GetLastError();
683  if (abort_on_error) {
684  __kmp_msg(
685  kmp_ms_fatal,
686  KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
687  KMP_ERR(error),
688  __kmp_msg_null
689  );
690  }
691  return error;
692  }
693 
694  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups)
695  || (ga.Mask == 0)) {
696  return -1;
697  }
698 
699  mask[ga.Group] = ga.Mask;
700  }
701  else
702 
703 # endif /* KMP_GROUP_AFFINITY */
704 
705  {
706  kmp_affin_mask_t newMask, sysMask, retval;
707 
708  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
709  DWORD error = GetLastError();
710  if (abort_on_error) {
711  __kmp_msg(
712  kmp_ms_fatal,
713  KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
714  KMP_ERR(error),
715  __kmp_msg_null
716  );
717  }
718  return error;
719  }
720  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
721  if (! retval) {
722  DWORD error = GetLastError();
723  if (abort_on_error) {
724  __kmp_msg(
725  kmp_ms_fatal,
726  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
727  KMP_ERR(error),
728  __kmp_msg_null
729  );
730  }
731  return error;
732  }
733  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
734  if (! newMask) {
735  DWORD error = GetLastError();
736  if (abort_on_error) {
737  __kmp_msg(
738  kmp_ms_fatal,
739  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
740  KMP_ERR(error),
741  __kmp_msg_null
742  );
743  }
744  }
745  *mask = retval;
746  }
747 #endif /* KMP_USE_HWLOC */
748  return 0;
749 }
750 
751 void
752 __kmp_affinity_bind_thread( int proc )
753 {
754 #if KMP_USE_HWLOC
755  kmp_affin_mask_t *mask;
756  KMP_CPU_ALLOC_ON_STACK(mask);
757  KMP_CPU_ZERO(mask);
758  KMP_CPU_SET(proc, mask);
759  __kmp_set_system_affinity(mask, TRUE);
760  KMP_CPU_FREE_FROM_STACK(mask);
761 #else /* KMP_USE_HWLOC */
762 # if KMP_GROUP_AFFINITY
763 
764  if (__kmp_num_proc_groups > 1) {
765  //
766  // Form the GROUP_AFFINITY struct directly, rather than filling
767  // out a bit vector and calling __kmp_set_system_affinity().
768  //
769  GROUP_AFFINITY ga;
770  KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups
771  * CHAR_BIT * sizeof(DWORD_PTR))));
772  ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR));
773  ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR)));
774  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
775 
776  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
777  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
778  DWORD error = GetLastError();
779  if (__kmp_affinity_verbose) { // AC: continue silently if not verbose
780  __kmp_msg(
781  kmp_ms_warning,
782  KMP_MSG( CantSetThreadAffMask ),
783  KMP_ERR( error ),
784  __kmp_msg_null
785  );
786  }
787  }
788  }
789  else
790 
791 # endif /* KMP_GROUP_AFFINITY */
792 
793  {
794  kmp_affin_mask_t mask;
795  KMP_CPU_ZERO(&mask);
796  KMP_CPU_SET(proc, &mask);
797  __kmp_set_system_affinity(&mask, TRUE);
798  }
799 #endif /* KMP_USE_HWLOC */
800 }
801 
802 void
803 __kmp_affinity_determine_capable( const char *env_var )
804 {
805  //
806  // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask().
807  //
808 
809 #if KMP_GROUP_AFFINITY
810  KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(kmp_affin_mask_t));
811 #else
812  KMP_AFFINITY_ENABLE(sizeof(kmp_affin_mask_t));
813 #endif
814 
815  KA_TRACE( 10, (
816  "__kmp_affinity_determine_capable: "
817  "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n",
818  __kmp_affin_mask_size
819  ) );
820 }
821 
822 double
823 __kmp_read_cpu_time( void )
824 {
825  FILETIME CreationTime, ExitTime, KernelTime, UserTime;
826  int status;
827  double cpu_time;
828 
829  cpu_time = 0;
830 
831  status = GetProcessTimes( GetCurrentProcess(), &CreationTime,
832  &ExitTime, &KernelTime, &UserTime );
833 
834  if (status) {
835  double sec = 0;
836 
837  sec += KernelTime.dwHighDateTime;
838  sec += UserTime.dwHighDateTime;
839 
840  /* Shift left by 32 bits */
841  sec *= (double) (1 << 16) * (double) (1 << 16);
842 
843  sec += KernelTime.dwLowDateTime;
844  sec += UserTime.dwLowDateTime;
845 
846  cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC;
847  }
848 
849  return cpu_time;
850 }
851 
852 int
853 __kmp_read_system_info( struct kmp_sys_info *info )
854 {
855  info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */
856  info->minflt = 0; /* the number of page faults serviced without any I/O */
857  info->majflt = 0; /* the number of page faults serviced that required I/O */
858  info->nswap = 0; /* the number of times a process was "swapped" out of memory */
859  info->inblock = 0; /* the number of times the file system had to perform input */
860  info->oublock = 0; /* the number of times the file system had to perform output */
861  info->nvcsw = 0; /* the number of times a context switch was voluntarily */
862  info->nivcsw = 0; /* the number of times a context switch was forced */
863 
864  return 1;
865 }
866 
867 /* ------------------------------------------------------------------------ */
868 /* ------------------------------------------------------------------------ */
869 
870 
871 void
872 __kmp_runtime_initialize( void )
873 {
874  SYSTEM_INFO info;
875  kmp_str_buf_t path;
876  UINT path_size;
877 
878  if ( __kmp_init_runtime ) {
879  return;
880  };
881 
882 #if KMP_DYNAMIC_LIB
883  /* Pin dynamic library for the lifetime of application */
884  {
885  // First, turn off error message boxes
886  UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS);
887  HMODULE h;
888  BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS
889  |GET_MODULE_HANDLE_EX_FLAG_PIN,
890  (LPCTSTR)&__kmp_serial_initialize, &h);
891  KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded");
892  SetErrorMode (err_mode); // Restore error mode
893  KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") );
894  }
895 #endif
896 
897  InitializeCriticalSection( & __kmp_win32_section );
898 #if USE_ITT_BUILD
899  __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" );
900 #endif /* USE_ITT_BUILD */
901  __kmp_initialize_system_tick();
902 
903  #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
904  if ( ! __kmp_cpuinfo.initialized ) {
905  __kmp_query_cpuid( & __kmp_cpuinfo );
906  }; // if
907  #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
908 
909  /* Set up minimum number of threads to switch to TLS gtid */
910  #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB
911  // Windows* OS, static library.
912  /*
913  New thread may use stack space previously used by another thread, currently terminated.
914  On Windows* OS, in case of static linking, we do not know the moment of thread termination,
915  and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead
916  threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly
917  finds gtid (by searching through stack addresses of all known threads) for unregistered
918  foreign tread.
919 
920  Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id()
921  does not search through stacks, but get gtid from TLS immediately.
922 
923  --ln
924  */
925  __kmp_tls_gtid_min = 0;
926  #else
927  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
928  #endif
929 
930  /* for the static library */
931  if ( !__kmp_gtid_threadprivate_key ) {
932  __kmp_gtid_threadprivate_key = TlsAlloc();
933  if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) {
934  KMP_FATAL( TLSOutOfIndexes );
935  }
936  }
937 
938 
939  //
940  // Load ntdll.dll.
941  //
942  /*
943  Simple
944  GetModuleHandle( "ntdll.dl" )
945  is not suitable due to security issue (see
946  http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full
947  path to the library.
948  */
949  __kmp_str_buf_init( & path );
950  path_size = GetSystemDirectory( path.str, path.size );
951  KMP_DEBUG_ASSERT( path_size > 0 );
952  if ( path_size >= path.size ) {
953  //
954  // Buffer is too short. Expand the buffer and try again.
955  //
956  __kmp_str_buf_reserve( & path, path_size );
957  path_size = GetSystemDirectory( path.str, path.size );
958  KMP_DEBUG_ASSERT( path_size > 0 );
959  }; // if
960  if ( path_size > 0 && path_size < path.size ) {
961  //
962  // Now we have system directory name in the buffer.
963  // Append backslash and name of dll to form full path,
964  //
965  path.used = path_size;
966  __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" );
967 
968  //
969  // Now load ntdll using full path.
970  //
971  ntdll = GetModuleHandle( path.str );
972  }
973 
974  KMP_DEBUG_ASSERT( ntdll != NULL );
975  if ( ntdll != NULL ) {
976  NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" );
977  }
978  KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL );
979 
980 #if KMP_GROUP_AFFINITY
981  //
982  // Load kernel32.dll.
983  // Same caveat - must use full system path name.
984  //
985  if ( path_size > 0 && path_size < path.size ) {
986  //
987  // Truncate the buffer back to just the system path length,
988  // discarding "\\ntdll.dll", and replacing it with "kernel32.dll".
989  //
990  path.used = path_size;
991  __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" );
992 
993  //
994  // Load kernel32.dll using full path.
995  //
996  kernel32 = GetModuleHandle( path.str );
997  KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) );
998 
999  //
1000  // Load the function pointers to kernel32.dll routines
1001  // that may or may not exist on this system.
1002  //
1003  if ( kernel32 != NULL ) {
1004  __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" );
1005  __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" );
1006  __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" );
1007  __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" );
1008 
1009  KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) );
1010  KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) );
1011  KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) );
1012  KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) );
1013  KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) );
1014 
1015  //
1016  // See if group affinity is supported on this system.
1017  // If so, calculate the #groups and #procs.
1018  //
1019  // Group affinity was introduced with Windows* 7 OS and
1020  // Windows* Server 2008 R2 OS.
1021  //
1022  if ( ( __kmp_GetActiveProcessorCount != NULL )
1023  && ( __kmp_GetActiveProcessorGroupCount != NULL )
1024  && ( __kmp_GetThreadGroupAffinity != NULL )
1025  && ( __kmp_SetThreadGroupAffinity != NULL )
1026  && ( ( __kmp_num_proc_groups
1027  = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) {
1028  //
1029  // Calculate the total number of active OS procs.
1030  //
1031  int i;
1032 
1033  KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
1034 
1035  __kmp_xproc = 0;
1036 
1037  for ( i = 0; i < __kmp_num_proc_groups; i++ ) {
1038  DWORD size = __kmp_GetActiveProcessorCount( i );
1039  __kmp_xproc += size;
1040  KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) );
1041  }
1042  }
1043  else {
1044  KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) );
1045  }
1046  }
1047  }
1048  if ( __kmp_num_proc_groups <= 1 ) {
1049  GetSystemInfo( & info );
1050  __kmp_xproc = info.dwNumberOfProcessors;
1051  }
1052 #else
1053  GetSystemInfo( & info );
1054  __kmp_xproc = info.dwNumberOfProcessors;
1055 #endif /* KMP_GROUP_AFFINITY */
1056 
1057  //
1058  // If the OS said there were 0 procs, take a guess and use a value of 2.
1059  // This is done for Linux* OS, also. Do we need error / warning?
1060  //
1061  if ( __kmp_xproc <= 0 ) {
1062  __kmp_xproc = 2;
1063  }
1064 
1065  KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) );
1066 
1067  __kmp_str_buf_free( & path );
1068 
1069 #if USE_ITT_BUILD
1070  __kmp_itt_initialize();
1071 #endif /* USE_ITT_BUILD */
1072 
1073  __kmp_init_runtime = TRUE;
1074 } // __kmp_runtime_initialize
1075 
1076 void
1077 __kmp_runtime_destroy( void )
1078 {
1079  if ( ! __kmp_init_runtime ) {
1080  return;
1081  }
1082 
1083 #if USE_ITT_BUILD
1084  __kmp_itt_destroy();
1085 #endif /* USE_ITT_BUILD */
1086 
1087  /* we can't DeleteCriticalsection( & __kmp_win32_section ); */
1088  /* due to the KX_TRACE() commands */
1089  KA_TRACE( 40, ("__kmp_runtime_destroy\n" ));
1090 
1091  if( __kmp_gtid_threadprivate_key ) {
1092  TlsFree( __kmp_gtid_threadprivate_key );
1093  __kmp_gtid_threadprivate_key = 0;
1094  }
1095 
1096  __kmp_affinity_uninitialize();
1097  DeleteCriticalSection( & __kmp_win32_section );
1098 
1099  ntdll = NULL;
1100  NtQuerySystemInformation = NULL;
1101 
1102 #if KMP_ARCH_X86_64
1103  kernel32 = NULL;
1104  __kmp_GetActiveProcessorCount = NULL;
1105  __kmp_GetActiveProcessorGroupCount = NULL;
1106  __kmp_GetThreadGroupAffinity = NULL;
1107  __kmp_SetThreadGroupAffinity = NULL;
1108 #endif // KMP_ARCH_X86_64
1109 
1110  __kmp_init_runtime = FALSE;
1111 }
1112 
1113 
1114 void
1115 __kmp_terminate_thread( int gtid )
1116 {
1117  kmp_info_t *th = __kmp_threads[ gtid ];
1118 
1119  if( !th ) return;
1120 
1121  KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) );
1122 
1123  if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) {
1124  /* It's OK, the thread may have exited already */
1125  }
1126  __kmp_free_handle( th->th.th_info.ds.ds_thread );
1127 }
1128 
1129 /* ------------------------------------------------------------------------ */
1130 /* ------------------------------------------------------------------------ */
1131 
1132 void
1133 __kmp_clear_system_time( void )
1134 {
1135  BOOL status;
1136  LARGE_INTEGER time;
1137  status = QueryPerformanceCounter( & time );
1138  __kmp_win32_time = (kmp_int64) time.QuadPart;
1139 }
1140 
1141 void
1142 __kmp_initialize_system_tick( void )
1143 {
1144  {
1145  BOOL status;
1146  LARGE_INTEGER freq;
1147 
1148  status = QueryPerformanceFrequency( & freq );
1149  if (! status) {
1150  DWORD error = GetLastError();
1151  __kmp_msg(
1152  kmp_ms_fatal,
1153  KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ),
1154  KMP_ERR( error ),
1155  __kmp_msg_null
1156  );
1157 
1158  }
1159  else {
1160  __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart;
1161  }
1162  }
1163 }
1164 
1165 /* Calculate the elapsed wall clock time for the user */
1166 
1167 void
1168 __kmp_elapsed( double *t )
1169 {
1170  BOOL status;
1171  LARGE_INTEGER now;
1172  status = QueryPerformanceCounter( & now );
1173  *t = ((double) now.QuadPart) * __kmp_win32_tick;
1174 }
1175 
1176 /* Calculate the elapsed wall clock tick for the user */
1177 
1178 void
1179 __kmp_elapsed_tick( double *t )
1180 {
1181  *t = __kmp_win32_tick;
1182 }
1183 
1184 void
1185 __kmp_read_system_time( double *delta )
1186 {
1187  if (delta != NULL) {
1188  BOOL status;
1189  LARGE_INTEGER now;
1190 
1191  status = QueryPerformanceCounter( & now );
1192 
1193  *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time))
1194  * __kmp_win32_tick;
1195  }
1196 }
1197 
1198 /* ------------------------------------------------------------------------ */
1199 /* ------------------------------------------------------------------------ */
1200 
1201 void * __stdcall
1202 __kmp_launch_worker( void *arg )
1203 {
1204  volatile void *stack_data;
1205  void *exit_val;
1206  void *padding = 0;
1207  kmp_info_t *this_thr = (kmp_info_t *) arg;
1208  int gtid;
1209 
1210  gtid = this_thr->th.th_info.ds.ds_gtid;
1211  __kmp_gtid_set_specific( gtid );
1212 #ifdef KMP_TDATA_GTID
1213  #error "This define causes problems with LoadLibrary() + declspec(thread) " \
1214  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
1215  "reference: http://support.microsoft.com/kb/118816"
1216  //__kmp_gtid = gtid;
1217 #endif
1218 
1219 #if USE_ITT_BUILD
1220  __kmp_itt_thread_name( gtid );
1221 #endif /* USE_ITT_BUILD */
1222 
1223  __kmp_affinity_set_init_mask( gtid, FALSE );
1224 
1225 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1226  //
1227  // Set the FP control regs to be a copy of
1228  // the parallel initialization thread's.
1229  //
1230  __kmp_clear_x87_fpu_status_word();
1231  __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
1232  __kmp_load_mxcsr( &__kmp_init_mxcsr );
1233 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1234 
1235  if ( __kmp_stkoffset > 0 && gtid > 0 ) {
1236  padding = KMP_ALLOCA( gtid * __kmp_stkoffset );
1237  }
1238 
1239  KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
1240  this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1241  TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
1242 
1243  if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
1244  TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data);
1245  KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE );
1246  __kmp_check_stack_overlap( this_thr );
1247  }
1248  KMP_MB();
1249  exit_val = __kmp_launch_thread( this_thr );
1250  KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive );
1251  TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
1252  KMP_MB();
1253  return exit_val;
1254 }
1255 
1256 /* The monitor thread controls all of the threads in the complex */
1257 
1258 void * __stdcall
1259 __kmp_launch_monitor( void *arg )
1260 {
1261  DWORD wait_status;
1262  kmp_thread_t monitor;
1263  int status;
1264  int interval;
1265  kmp_info_t *this_thr = (kmp_info_t *) arg;
1266 
1267  KMP_DEBUG_ASSERT(__kmp_init_monitor);
1268  TCW_4( __kmp_init_monitor, 2 ); // AC: Signal the library that monitor has started
1269  // TODO: hide "2" in enum (like {true,false,started})
1270  this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1271  TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE );
1272 
1273  KMP_MB(); /* Flush all pending memory write invalidates. */
1274  KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) );
1275 
1276  monitor = GetCurrentThread();
1277 
1278  /* set thread priority */
1279  status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST );
1280  if (! status) {
1281  DWORD error = GetLastError();
1282  __kmp_msg(
1283  kmp_ms_fatal,
1284  KMP_MSG( CantSetThreadPriority ),
1285  KMP_ERR( error ),
1286  __kmp_msg_null
1287  );
1288  }
1289 
1290  /* register us as monitor */
1291  __kmp_gtid_set_specific( KMP_GTID_MONITOR );
1292 #ifdef KMP_TDATA_GTID
1293  #error "This define causes problems with LoadLibrary() + declspec(thread) " \
1294  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
1295  "reference: http://support.microsoft.com/kb/118816"
1296  //__kmp_gtid = KMP_GTID_MONITOR;
1297 #endif
1298 
1299 #if USE_ITT_BUILD
1300  __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread.
1301 #endif /* USE_ITT_BUILD */
1302 
1303  KMP_MB(); /* Flush all pending memory write invalidates. */
1304 
1305  interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */
1306 
1307  while (! TCR_4(__kmp_global.g.g_done)) {
1308  /* This thread monitors the state of the system */
1309 
1310  KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) );
1311 
1312  wait_status = WaitForSingleObject( __kmp_monitor_ev, interval );
1313 
1314  if (wait_status == WAIT_TIMEOUT) {
1315  TCW_4( __kmp_global.g.g_time.dt.t_value,
1316  TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 );
1317  }
1318 
1319  KMP_MB(); /* Flush all pending memory write invalidates. */
1320  }
1321 
1322  KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) );
1323 
1324  status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL );
1325  if (! status) {
1326  DWORD error = GetLastError();
1327  __kmp_msg(
1328  kmp_ms_fatal,
1329  KMP_MSG( CantSetThreadPriority ),
1330  KMP_ERR( error ),
1331  __kmp_msg_null
1332  );
1333  }
1334 
1335  if (__kmp_global.g.g_abort != 0) {
1336  /* now we need to terminate the worker threads */
1337  /* the value of t_abort is the signal we caught */
1338 
1339  int gtid;
1340 
1341  KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) );
1342 
1343  /* terminate the OpenMP worker threads */
1344  /* TODO this is not valid for sibling threads!!
1345  * the uber master might not be 0 anymore.. */
1346  for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
1347  __kmp_terminate_thread( gtid );
1348 
1349  __kmp_cleanup();
1350 
1351  Sleep( 0 );
1352 
1353  KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) );
1354 
1355  if (__kmp_global.g.g_abort > 0) {
1356  raise( __kmp_global.g.g_abort );
1357  }
1358  }
1359 
1360  TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE );
1361 
1362  KMP_MB();
1363  return arg;
1364 }
1365 
1366 void
1367 __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size )
1368 {
1369  kmp_thread_t handle;
1370  DWORD idThread;
1371 
1372  KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) );
1373 
1374  th->th.th_info.ds.ds_gtid = gtid;
1375 
1376  if ( KMP_UBER_GTID(gtid) ) {
1377  int stack_data;
1378 
1379  /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use.
1380  Is it appropriate to just use GetCurrentThread? When should we close this handle? When
1381  unregistering the root?
1382  */
1383  {
1384  BOOL rc;
1385  rc = DuplicateHandle(
1386  GetCurrentProcess(),
1387  GetCurrentThread(),
1388  GetCurrentProcess(),
1389  &th->th.th_info.ds.ds_thread,
1390  0,
1391  FALSE,
1392  DUPLICATE_SAME_ACCESS
1393  );
1394  KMP_ASSERT( rc );
1395  KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
1396  (LPVOID)th,
1397  th->th.th_info.ds.ds_thread ) );
1398  th->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1399  }
1400  if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid
1401  /* we will dynamically update the stack range if gtid_mode == 1 */
1402  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
1403  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
1404  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
1405  __kmp_check_stack_overlap( th );
1406  }
1407  }
1408  else {
1409  KMP_MB(); /* Flush all pending memory write invalidates. */
1410 
1411  /* Set stack size for this thread now. */
1412  KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC
1413  " bytes\n", stack_size ) );
1414 
1415  stack_size += gtid * __kmp_stkoffset;
1416 
1417  TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size);
1418  TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
1419 
1420  KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %"
1421  KMP_SIZE_T_SPEC
1422  " bytes, &__kmp_launch_worker = %p, th = %p, "
1423  "&idThread = %p\n",
1424  (SIZE_T) stack_size,
1425  (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
1426  (LPVOID) th, &idThread ) );
1427 
1428  handle = CreateThread( NULL, (SIZE_T) stack_size,
1429  (LPTHREAD_START_ROUTINE) __kmp_launch_worker,
1430  (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
1431 
1432  KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %"
1433  KMP_SIZE_T_SPEC
1434  " bytes, &__kmp_launch_worker = %p, th = %p, "
1435  "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n",
1436  (SIZE_T) stack_size,
1437  (LPTHREAD_START_ROUTINE) & __kmp_launch_worker,
1438  (LPVOID) th, idThread, handle ) );
1439 
1440  if ( handle == 0 ) {
1441  DWORD error = GetLastError();
1442  __kmp_msg(kmp_ms_fatal, KMP_MSG( CantCreateThread ), KMP_ERR( error ), __kmp_msg_null);
1443  } else {
1444  th->th.th_info.ds.ds_thread = handle;
1445  }
1446 
1447  KMP_MB(); /* Flush all pending memory write invalidates. */
1448  }
1449 
1450  KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) );
1451 }
1452 
1453 int
1454 __kmp_still_running(kmp_info_t *th) {
1455  return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0));
1456 }
1457 
1458 void
1459 __kmp_create_monitor( kmp_info_t *th )
1460 {
1461  kmp_thread_t handle;
1462  DWORD idThread;
1463  int ideal, new_ideal;
1464 
1465  if( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
1466  // We don't need monitor thread in case of MAX_BLOCKTIME
1467  KA_TRACE( 10, ("__kmp_create_monitor: skipping monitor thread because of MAX blocktime\n" ) );
1468  th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
1469  th->th.th_info.ds.ds_gtid = 0;
1470  TCW_4( __kmp_init_monitor, 2 ); // Signal to stop waiting for monitor creation
1471  return;
1472  }
1473  KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) );
1474 
1475  KMP_MB(); /* Flush all pending memory write invalidates. */
1476 
1477  __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL );
1478  if ( __kmp_monitor_ev == NULL ) {
1479  DWORD error = GetLastError();
1480  __kmp_msg(
1481  kmp_ms_fatal,
1482  KMP_MSG( CantCreateEvent ),
1483  KMP_ERR( error ),
1484  __kmp_msg_null
1485  );
1486  }; // if
1487 #if USE_ITT_BUILD
1488  __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" );
1489 #endif /* USE_ITT_BUILD */
1490 
1491  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
1492  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
1493 
1494  // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how
1495  // to automatically expand stacksize based on CreateThread error code.
1496  if ( __kmp_monitor_stksize == 0 ) {
1497  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
1498  }
1499  if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) {
1500  __kmp_monitor_stksize = __kmp_sys_min_stksize;
1501  }
1502 
1503  KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n",
1504  (int) __kmp_monitor_stksize ) );
1505 
1506  TCW_4( __kmp_global.g.g_time.dt.t_value, 0 );
1507 
1508  handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize,
1509  (LPTHREAD_START_ROUTINE) __kmp_launch_monitor,
1510  (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread );
1511  if (handle == 0) {
1512  DWORD error = GetLastError();
1513  __kmp_msg(
1514  kmp_ms_fatal,
1515  KMP_MSG( CantCreateThread ),
1516  KMP_ERR( error ),
1517  __kmp_msg_null
1518  );
1519  }
1520  else
1521  th->th.th_info.ds.ds_thread = handle;
1522 
1523  KMP_MB(); /* Flush all pending memory write invalidates. */
1524 
1525  KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n",
1526  (void *) th->th.th_info.ds.ds_thread ) );
1527 }
1528 
1529 /*
1530  Check to see if thread is still alive.
1531 
1532  NOTE: The ExitProcess(code) system call causes all threads to Terminate
1533  with a exit_val = code. Because of this we can not rely on
1534  exit_val having any particular value. So this routine may
1535  return STILL_ALIVE in exit_val even after the thread is dead.
1536 */
1537 
1538 int
1539 __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val )
1540 {
1541  DWORD rc;
1542  rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val );
1543  if ( rc == 0 ) {
1544  DWORD error = GetLastError();
1545  __kmp_msg(
1546  kmp_ms_fatal,
1547  KMP_MSG( FunctionError, "GetExitCodeThread()" ),
1548  KMP_ERR( error ),
1549  __kmp_msg_null
1550  );
1551  }; // if
1552  return ( *exit_val == STILL_ACTIVE );
1553 }
1554 
1555 
1556 void
1557 __kmp_exit_thread(
1558  int exit_status
1559 ) {
1560  ExitThread( exit_status );
1561 } // __kmp_exit_thread
1562 
1563 /*
1564  This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor().
1565 */
1566 static void
1567 __kmp_reap_common( kmp_info_t * th )
1568 {
1569  DWORD exit_val;
1570 
1571  KMP_MB(); /* Flush all pending memory write invalidates. */
1572 
1573  KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) );
1574 
1575  /*
1576  2006-10-19:
1577 
1578  There are two opposite situations:
1579 
1580  1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread
1581  function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very
1582  slow".)
1583  2. Windows* OS may kill thread before it resets ds_alive flag.
1584 
1585  Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting.
1586 
1587  */
1588 
1589  {
1590  // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to
1591  // cover this usage also.
1592  void * obj = NULL;
1593  register kmp_uint32 spins;
1594 #if USE_ITT_BUILD
1595  KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive );
1596 #endif /* USE_ITT_BUILD */
1597  KMP_INIT_YIELD( spins );
1598  do {
1599 #if USE_ITT_BUILD
1600  KMP_FSYNC_SPIN_PREPARE( obj );
1601 #endif /* USE_ITT_BUILD */
1602  __kmp_is_thread_alive( th, &exit_val );
1603  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
1604  KMP_YIELD_SPIN( spins );
1605  } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) );
1606 #if USE_ITT_BUILD
1607  if ( exit_val == STILL_ACTIVE ) {
1608  KMP_FSYNC_CANCEL( obj );
1609  } else {
1610  KMP_FSYNC_SPIN_ACQUIRED( obj );
1611  }; // if
1612 #endif /* USE_ITT_BUILD */
1613  }
1614 
1615  __kmp_free_handle( th->th.th_info.ds.ds_thread );
1616 
1617  /*
1618  * NOTE: The ExitProcess(code) system call causes all threads to Terminate
1619  * with a exit_val = code. Because of this we can not rely on
1620  * exit_val having any particular value.
1621  */
1622  if ( exit_val == STILL_ACTIVE ) {
1623  KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) );
1624  } else if ( (void *) exit_val != (void *) th) {
1625  KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) );
1626  }; // if
1627 
1628  KA_TRACE( 10,
1629  (
1630  "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n",
1631  th->th.th_info.ds.ds_gtid,
1632  th->th.th_info.ds.ds_thread
1633  )
1634  );
1635 
1636  th->th.th_info.ds.ds_thread = 0;
1637  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1638  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1639  th->th.th_info.ds.ds_thread_id = 0;
1640 
1641  KMP_MB(); /* Flush all pending memory write invalidates. */
1642 }
1643 
1644 void
1645 __kmp_reap_monitor( kmp_info_t *th )
1646 {
1647  int status;
1648 
1649  KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n",
1650  (void *) th->th.th_info.ds.ds_thread ) );
1651 
1652  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1653  // If both tid and gtid are 0, it means the monitor did not ever start.
1654  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1655  KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid );
1656  if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) {
1657  KA_TRACE( 10, ("__kmp_reap_monitor: monitor did not start, returning\n") );
1658  return;
1659  }; // if
1660 
1661  KMP_MB(); /* Flush all pending memory write invalidates. */
1662 
1663  status = SetEvent( __kmp_monitor_ev );
1664  if ( status == FALSE ) {
1665  DWORD error = GetLastError();
1666  __kmp_msg(
1667  kmp_ms_fatal,
1668  KMP_MSG( CantSetEvent ),
1669  KMP_ERR( error ),
1670  __kmp_msg_null
1671  );
1672  }
1673  KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
1674  __kmp_reap_common( th );
1675 
1676  __kmp_free_handle( __kmp_monitor_ev );
1677 
1678  KMP_MB(); /* Flush all pending memory write invalidates. */
1679 }
1680 
1681 void
1682 __kmp_reap_worker( kmp_info_t * th )
1683 {
1684  KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) );
1685  __kmp_reap_common( th );
1686 }
1687 
1688 /* ------------------------------------------------------------------------ */
1689 /* ------------------------------------------------------------------------ */
1690 
1691 #if KMP_HANDLE_SIGNALS
1692 
1693 
1694 static void
1695 __kmp_team_handler( int signo )
1696 {
1697  if ( __kmp_global.g.g_abort == 0 ) {
1698  // Stage 1 signal handler, let's shut down all of the threads.
1699  if ( __kmp_debug_buf ) {
1700  __kmp_dump_debug_buffer();
1701  }; // if
1702  KMP_MB(); // Flush all pending memory write invalidates.
1703  TCW_4( __kmp_global.g.g_abort, signo );
1704  KMP_MB(); // Flush all pending memory write invalidates.
1705  TCW_4( __kmp_global.g.g_done, TRUE );
1706  KMP_MB(); // Flush all pending memory write invalidates.
1707  }
1708 } // __kmp_team_handler
1709 
1710 
1711 
1712 static
1713 sig_func_t __kmp_signal( int signum, sig_func_t handler ) {
1714  sig_func_t old = signal( signum, handler );
1715  if ( old == SIG_ERR ) {
1716  int error = errno;
1717  __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null );
1718  }; // if
1719  return old;
1720 }
1721 
1722 static void
1723 __kmp_install_one_handler(
1724  int sig,
1725  sig_func_t handler,
1726  int parallel_init
1727 ) {
1728  sig_func_t old;
1729  KMP_MB(); /* Flush all pending memory write invalidates. */
1730  KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) );
1731  if ( parallel_init ) {
1732  old = __kmp_signal( sig, handler );
1733  // SIG_DFL on Windows* OS in NULL or 0.
1734  if ( old == __kmp_sighldrs[ sig ] ) {
1735  __kmp_siginstalled[ sig ] = 1;
1736  } else {
1737  // Restore/keep user's handler if one previously installed.
1738  old = __kmp_signal( sig, old );
1739  }; // if
1740  } else {
1741  // Save initial/system signal handlers to see if user handlers installed.
1742  // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with
1743  // parallel_init == TRUE.
1744  old = __kmp_signal( sig, SIG_DFL );
1745  __kmp_sighldrs[ sig ] = old;
1746  __kmp_signal( sig, old );
1747  }; // if
1748  KMP_MB(); /* Flush all pending memory write invalidates. */
1749 } // __kmp_install_one_handler
1750 
1751 static void
1752 __kmp_remove_one_handler( int sig ) {
1753  if ( __kmp_siginstalled[ sig ] ) {
1754  sig_func_t old;
1755  KMP_MB(); // Flush all pending memory write invalidates.
1756  KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) );
1757  old = __kmp_signal( sig, __kmp_sighldrs[ sig ] );
1758  if ( old != __kmp_team_handler ) {
1759  KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) );
1760  old = __kmp_signal( sig, old );
1761  }; // if
1762  __kmp_sighldrs[ sig ] = NULL;
1763  __kmp_siginstalled[ sig ] = 0;
1764  KMP_MB(); // Flush all pending memory write invalidates.
1765  }; // if
1766 } // __kmp_remove_one_handler
1767 
1768 
1769 void
1770 __kmp_install_signals( int parallel_init )
1771 {
1772  KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) );
1773  if ( ! __kmp_handle_signals ) {
1774  KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) );
1775  return;
1776  }; // if
1777  __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init );
1778  __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init );
1779  __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init );
1780  __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init );
1781  __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init );
1782  __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init );
1783 } // __kmp_install_signals
1784 
1785 
1786 void
1787 __kmp_remove_signals( void )
1788 {
1789  int sig;
1790  KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) );
1791  for ( sig = 1; sig < NSIG; ++ sig ) {
1792  __kmp_remove_one_handler( sig );
1793  }; // for sig
1794 } // __kmp_remove_signals
1795 
1796 
1797 #endif // KMP_HANDLE_SIGNALS
1798 
1799 /* Put the thread to sleep for a time period */
1800 void
1801 __kmp_thread_sleep( int millis )
1802 {
1803  DWORD status;
1804 
1805  status = SleepEx( (DWORD) millis, FALSE );
1806  if ( status ) {
1807  DWORD error = GetLastError();
1808  __kmp_msg(
1809  kmp_ms_fatal,
1810  KMP_MSG( FunctionError, "SleepEx()" ),
1811  KMP_ERR( error ),
1812  __kmp_msg_null
1813  );
1814  }
1815 }
1816 
1817 /* Determine whether the given address is mapped into the current address space. */
1818 int
1819 __kmp_is_address_mapped( void * addr )
1820 {
1821  DWORD status;
1822  MEMORY_BASIC_INFORMATION lpBuffer;
1823  SIZE_T dwLength;
1824 
1825  dwLength = sizeof(MEMORY_BASIC_INFORMATION);
1826 
1827  status = VirtualQuery( addr, &lpBuffer, dwLength );
1828 
1829  return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) ||
1830  (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE )));
1831 }
1832 
1833 kmp_uint64
1834 __kmp_hardware_timestamp(void)
1835 {
1836  kmp_uint64 r = 0;
1837 
1838  QueryPerformanceCounter((LARGE_INTEGER*) &r);
1839  return r;
1840 }
1841 
1842 /* Free handle and check the error code */
1843 void
1844 __kmp_free_handle( kmp_thread_t tHandle )
1845 {
1846 /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */
1847  BOOL rc;
1848  rc = CloseHandle( tHandle );
1849  if ( !rc ) {
1850  DWORD error = GetLastError();
1851  __kmp_msg(
1852  kmp_ms_fatal,
1853  KMP_MSG( CantCloseHandle ),
1854  KMP_ERR( error ),
1855  __kmp_msg_null
1856  );
1857  }
1858 }
1859 
1860 int
1861 __kmp_get_load_balance( int max ) {
1862 
1863  static ULONG glb_buff_size = 100 * 1024;
1864 
1865  static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */
1866  static double glb_call_time = 0; /* Thread balance algorithm call time */
1867 
1868  int running_threads = 0; // Number of running threads in the system.
1869  NTSTATUS status = 0;
1870  ULONG buff_size = 0;
1871  ULONG info_size = 0;
1872  void * buffer = NULL;
1873  PSYSTEM_PROCESS_INFORMATION spi = NULL;
1874  int first_time = 1;
1875 
1876  double call_time = 0.0; //start, finish;
1877 
1878  __kmp_elapsed( & call_time );
1879 
1880  if ( glb_call_time &&
1881  ( call_time - glb_call_time < __kmp_load_balance_interval ) ) {
1882  running_threads = glb_running_threads;
1883  goto finish;
1884  }
1885  glb_call_time = call_time;
1886 
1887  // Do not spend time on running algorithm if we have a permanent error.
1888  if ( NtQuerySystemInformation == NULL ) {
1889  running_threads = -1;
1890  goto finish;
1891  }; // if
1892 
1893  if ( max <= 0 ) {
1894  max = INT_MAX;
1895  }; // if
1896 
1897  do {
1898 
1899  if ( first_time ) {
1900  buff_size = glb_buff_size;
1901  } else {
1902  buff_size = 2 * buff_size;
1903  }
1904 
1905  buffer = KMP_INTERNAL_REALLOC( buffer, buff_size );
1906  if ( buffer == NULL ) {
1907  running_threads = -1;
1908  goto finish;
1909  }; // if
1910  status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size );
1911  first_time = 0;
1912 
1913  } while ( status == STATUS_INFO_LENGTH_MISMATCH );
1914  glb_buff_size = buff_size;
1915 
1916  #define CHECK( cond ) \
1917  { \
1918  KMP_DEBUG_ASSERT( cond ); \
1919  if ( ! ( cond ) ) { \
1920  running_threads = -1; \
1921  goto finish; \
1922  } \
1923  }
1924 
1925  CHECK( buff_size >= info_size );
1926  spi = PSYSTEM_PROCESS_INFORMATION( buffer );
1927  for ( ; ; ) {
1928  ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer );
1929  CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size );
1930  HANDLE pid = spi->ProcessId;
1931  ULONG num = spi->NumberOfThreads;
1932  CHECK( num >= 1 );
1933  size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 );
1934  CHECK( offset + spi_size < info_size ); // Make sure process info record fits the buffer.
1935  if ( spi->NextEntryOffset != 0 ) {
1936  CHECK( spi_size <= spi->NextEntryOffset ); // And do not overlap with the next record.
1937  }; // if
1938  // pid == 0 corresponds to the System Idle Process. It always has running threads
1939  // on all cores. So, we don't consider the running threads of this process.
1940  if ( pid != 0 ) {
1941  for ( int i = 0; i < num; ++ i ) {
1942  THREAD_STATE state = spi->Threads[ i ].State;
1943  // Count threads that have Ready or Running state.
1944  // !!! TODO: Why comment does not match the code???
1945  if ( state == StateRunning ) {
1946  ++ running_threads;
1947  // Stop counting running threads if the number is already greater than
1948  // the number of available cores
1949  if ( running_threads >= max ) {
1950  goto finish;
1951  }
1952  } // if
1953  }; // for i
1954  } // if
1955  if ( spi->NextEntryOffset == 0 ) {
1956  break;
1957  }; // if
1958  spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset );
1959  }; // forever
1960 
1961  #undef CHECK
1962 
1963  finish: // Clean up and exit.
1964 
1965  if ( buffer != NULL ) {
1966  KMP_INTERNAL_FREE( buffer );
1967  }; // if
1968 
1969  glb_running_threads = running_threads;
1970 
1971  return running_threads;
1972 
1973 } //__kmp_get_load_balance()
1974