21 #include "kmp_error.h" 22 #include "kmp_stats.h" 25 #include "ompt-internal.h" 26 #include "ompt-specific.h" 29 #define MAX_MESSAGE 512 50 if (__kmp_ignore_mppbeg() == FALSE) {
51 __kmp_internal_begin();
53 KC_TRACE( 10, (
"__kmpc_begin: called\n" ) );
71 if (__kmp_ignore_mppend() == FALSE) {
72 KC_TRACE( 10, (
"__kmpc_end: called\n" ) );
73 KA_TRACE( 30, (
"__kmpc_end\n" ));
75 __kmp_internal_end_thread( -1 );
101 kmp_int32 gtid = __kmp_entry_gtid();
103 KC_TRACE( 10, (
"__kmpc_global_thread_num: T#%d\n", gtid ) );
124 KC_TRACE( 10, (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
126 return TCR_4(__kmp_nth);
138 KC_TRACE( 10, (
"__kmpc_bound_thread_num: called\n" ) );
139 return __kmp_tid_from_gtid( __kmp_entry_gtid() );
150 KC_TRACE( 10, (
"__kmpc_bound_num_threads: called\n" ) );
152 return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
174 if (__kmp_par_range == 0) {
181 semi2 = strchr(semi2,
';');
185 semi2 = strchr(semi2 + 1,
';');
189 if (__kmp_par_range_filename[0]) {
190 const char *name = semi2 - 1;
191 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
194 if ((*name ==
'/') || (*name ==
';')) {
197 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
198 return __kmp_par_range < 0;
201 semi3 = strchr(semi2 + 1,
';');
202 if (__kmp_par_range_routine[0]) {
203 if ((semi3 != NULL) && (semi3 > semi2)
204 && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
205 return __kmp_par_range < 0;
208 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
209 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
210 return __kmp_par_range > 0;
212 return __kmp_par_range < 0;
228 return __kmp_entry_thread() -> th.th_root -> r.r_active;
243 KA_TRACE( 20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
244 global_tid, num_threads ) );
246 __kmp_push_num_threads( loc, global_tid, num_threads );
250 __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid )
252 KA_TRACE( 20, (
"__kmpc_pop_num_threads: enter\n" ) );
261 __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
263 KA_TRACE( 20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
264 global_tid, proc_bind ) );
266 __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
284 int gtid = __kmp_entry_gtid();
286 #if (KMP_STATS_ENABLED) 301 va_start( ap, microtask );
304 int tid = __kmp_tid_from_gtid( gtid );
305 kmp_info_t *master_th = __kmp_threads[ gtid ];
306 kmp_team_t *parent_team = master_th->th.th_team;
308 parent_team->t.t_implicit_task_taskdata[tid].
309 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
313 #if INCLUDE_SSC_MARKS 316 __kmp_fork_call( loc, gtid, fork_context_intel,
319 VOLATILE_CAST(
void *) microtask,
321 VOLATILE_CAST(microtask_t) microtask,
322 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
324 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
330 #if INCLUDE_SSC_MARKS 333 __kmp_join_call( loc, gtid
343 parent_team->t.t_implicit_task_taskdata[tid].
344 ompt_task_info.frame.reenter_runtime_frame = 0;
365 KA_TRACE( 20, (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
366 global_tid, num_teams, num_threads ) );
368 __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
383 int gtid = __kmp_entry_gtid();
384 kmp_info_t *this_thr = __kmp_threads[ gtid ];
386 va_start( ap, microtask );
391 this_thr->th.th_teams_microtask = microtask;
392 this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level;
395 kmp_team_t *parent_team = this_thr->th.th_team;
396 int tid = __kmp_tid_from_gtid( gtid );
398 parent_team->t.t_implicit_task_taskdata[tid].
399 ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(0);
404 if ( this_thr->th.th_teams_size.nteams == 0 ) {
405 __kmp_push_num_teams( loc, gtid, 0, 0 );
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 __kmp_fork_call( loc, gtid, fork_context_intel,
414 VOLATILE_CAST(
void *) microtask,
416 VOLATILE_CAST(microtask_t) __kmp_teams_master,
417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
418 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
424 __kmp_join_call( loc, gtid
432 parent_team->t.t_implicit_task_taskdata[tid].
433 ompt_task_info.frame.reenter_runtime_frame = NULL;
437 this_thr->th.th_teams_microtask = NULL;
438 this_thr->th.th_teams_level = 0;
439 *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L;
452 __kmpc_invoke_task_func(
int gtid )
454 return __kmp_invoke_task_func( gtid );
472 __kmp_serialized_parallel(loc, global_tid);
487 kmp_internal_control_t *top;
488 kmp_info_t *this_thr;
489 kmp_team_t *serial_team;
491 KC_TRACE( 10, (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
499 if( ! TCR_4( __kmp_init_parallel ) )
500 __kmp_parallel_initialize();
502 this_thr = __kmp_threads[ global_tid ];
503 serial_team = this_thr->th.th_serial_team;
506 kmp_task_team_t * task_team = this_thr->th.th_task_team;
509 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
510 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) );
514 KMP_DEBUG_ASSERT( serial_team );
515 KMP_ASSERT( serial_team -> t.t_serialized );
516 KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
517 KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
518 KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
519 KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
522 top = serial_team -> t.t_control_stack_top;
523 if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
524 copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top );
525 serial_team -> t.t_control_stack_top = top -> next;
530 serial_team -> t.t_level--;
533 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
535 dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
536 serial_team->t.t_dispatch->th_disp_buffer =
537 serial_team->t.t_dispatch->th_disp_buffer->next;
538 __kmp_free( disp_buffer );
541 -- serial_team -> t.t_serialized;
542 if ( serial_team -> t.t_serialized == 0 ) {
546 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 547 if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
548 __kmp_clear_x87_fpu_status_word();
549 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
550 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
554 this_thr -> th.th_team = serial_team -> t.t_parent;
555 this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
558 this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc;
559 this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0];
560 this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
563 this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
564 t.t_dispatch[ serial_team -> t.t_master_tid ];
566 __kmp_pop_current_task_from_thread( this_thr );
568 KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
569 this_thr -> th.th_current_task -> td_flags.executing = 1;
571 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
573 this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
574 KA_TRACE( 20, (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
575 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
578 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
579 KA_TRACE( 20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
580 global_tid, serial_team, serial_team -> t.t_serialized ) );
584 if ( __kmp_env_consistency_check )
585 __kmp_pop_parallel( global_tid, NULL );
599 KC_TRACE( 10, (
"__kmpc_flush: called\n" ) );
604 #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) 617 if ( ! __kmp_cpuinfo.initialized ) {
618 __kmp_query_cpuid( & __kmp_cpuinfo );
620 if ( ! __kmp_cpuinfo.sse2 ) {
625 #elif KMP_COMPILER_MSVC 628 __sync_synchronize();
629 #endif // KMP_COMPILER_ICC 632 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) 648 #error Unknown or unsupported architecture 668 KC_TRACE( 10, (
"__kmpc_barrier: called T#%d\n", global_tid ) );
670 if (! TCR_4(__kmp_init_parallel))
671 __kmp_parallel_initialize();
673 if ( __kmp_env_consistency_check ) {
675 KMP_WARNING( ConstructIdentInvalid );
678 __kmp_check_barrier( global_tid, ct_barrier, loc );
681 __kmp_threads[ global_tid ]->th.th_ident = loc;
689 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
704 KC_TRACE( 10, (
"__kmpc_master: called T#%d\n", global_tid ) );
706 if( ! TCR_4( __kmp_init_parallel ) )
707 __kmp_parallel_initialize();
709 if( KMP_MASTER_GTID( global_tid )) {
711 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
715 #if OMPT_SUPPORT && OMPT_TRACE 718 ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
719 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
720 kmp_team_t *team = this_thr -> th.th_team;
722 int tid = __kmp_tid_from_gtid( global_tid );
723 ompt_callbacks.ompt_callback(ompt_event_master_begin)(
724 team->t.ompt_team_info.parallel_id,
725 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
730 if ( __kmp_env_consistency_check ) {
731 #if KMP_USE_DYNAMIC_LOCK 733 __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 );
735 __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 );
738 __kmp_push_sync( global_tid, ct_master, loc, NULL );
740 __kmp_check_sync( global_tid, ct_master, loc, NULL );
758 KC_TRACE( 10, (
"__kmpc_end_master: called T#%d\n", global_tid ) );
760 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
761 KMP_POP_PARTITIONED_TIMER();
763 #if OMPT_SUPPORT && OMPT_TRACE 764 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
765 kmp_team_t *team = this_thr -> th.th_team;
767 ompt_callbacks.ompt_callback(ompt_event_master_end)) {
768 int tid = __kmp_tid_from_gtid( global_tid );
769 ompt_callbacks.ompt_callback(ompt_event_master_end)(
770 team->t.ompt_team_info.parallel_id,
771 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
775 if ( __kmp_env_consistency_check ) {
777 KMP_WARNING( ThreadIdentInvalid );
779 if( KMP_MASTER_GTID( global_tid ))
780 __kmp_pop_sync( global_tid, ct_master, loc );
796 KMP_DEBUG_ASSERT( __kmp_init_serial );
798 KC_TRACE( 10, (
"__kmpc_ordered: called T#%d\n", gtid ));
800 if (! TCR_4(__kmp_init_parallel))
801 __kmp_parallel_initialize();
804 __kmp_itt_ordered_prep( gtid );
808 th = __kmp_threads[ gtid ];
810 #if OMPT_SUPPORT && OMPT_TRACE 813 th->th.ompt_thread_info.wait_id = (uint64_t) loc;
814 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
817 if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
818 ompt_callbacks.ompt_callback(ompt_event_wait_ordered)(
819 th->th.ompt_thread_info.wait_id);
824 if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
825 (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
827 __kmp_parallel_deo( & gtid, & cid, loc );
829 #if OMPT_SUPPORT && OMPT_TRACE 832 th->th.ompt_thread_info.state = ompt_state_work_parallel;
833 th->th.ompt_thread_info.wait_id = 0;
836 if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
837 ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)(
838 th->th.ompt_thread_info.wait_id);
844 __kmp_itt_ordered_start( gtid );
861 KC_TRACE( 10, (
"__kmpc_end_ordered: called T#%d\n", gtid ) );
864 __kmp_itt_ordered_end( gtid );
868 th = __kmp_threads[ gtid ];
870 if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
871 (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
873 __kmp_parallel_dxo( & gtid, & cid, loc );
875 #if OMPT_SUPPORT && OMPT_BLAME 877 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
878 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
879 th->th.ompt_thread_info.wait_id);
884 #if KMP_USE_DYNAMIC_LOCK 886 static __forceinline
void 887 __kmp_init_indirect_csptr(kmp_critical_name * crit,
ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag)
891 kmp_indirect_lock_t **lck;
892 lck = (kmp_indirect_lock_t **)crit;
893 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
894 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
895 KMP_SET_I_LOCK_LOCATION(ilk, loc);
896 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
897 KA_TRACE(20, (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
899 __kmp_itt_critical_creating(ilk->lock, loc);
901 int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk);
904 __kmp_itt_critical_destroyed(ilk->lock);
909 KMP_DEBUG_ASSERT(*lck != NULL);
913 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \ 914 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 915 if (l->lk.poll != KMP_LOCK_FREE(tas) || \ 916 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ 918 KMP_FSYNC_PREPARE(l); \ 919 KMP_INIT_YIELD(spins); \ 920 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 923 KMP_YIELD_SPIN(spins); \ 925 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 926 while (l->lk.poll != KMP_LOCK_FREE(tas) || \ 927 ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ 928 __kmp_spin_backoff(&backoff); \ 929 if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 932 KMP_YIELD_SPIN(spins); \ 936 KMP_FSYNC_ACQUIRED(l); \ 940 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \ 941 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 942 rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ 943 KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \ 947 #define KMP_RELEASE_TAS_LOCK(lock, gtid) { \ 948 TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ 955 # include <sys/syscall.h> 957 # define FUTEX_WAIT 0 960 # define FUTEX_WAKE 1 964 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \ 965 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 966 kmp_int32 gtid_code = (gtid+1) << 1; \ 968 KMP_FSYNC_PREPARE(ftx); \ 969 kmp_int32 poll_val; \ 970 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 971 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 972 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 974 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \ 977 poll_val |= KMP_LOCK_BUSY(1, futex); \ 980 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \ 985 KMP_FSYNC_ACQUIRED(ftx); \ 989 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \ 990 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 991 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1 << 1, futex))) { \ 992 KMP_FSYNC_ACQUIRED(ftx); \ 1000 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \ 1001 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1003 KMP_FSYNC_RELEASING(ftx); \ 1004 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1005 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1006 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1009 KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ 1012 #endif // KMP_USE_FUTEX 1014 #else // KMP_USE_DYNAMIC_LOCK 1016 static kmp_user_lock_p
1017 __kmp_get_critical_section_ptr( kmp_critical_name * crit,
ident_t const * loc, kmp_int32 gtid )
1019 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1025 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1027 if ( lck == NULL ) {
1032 lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1033 __kmp_init_user_lock_with_checks( lck );
1034 __kmp_set_user_lock_location( lck, loc );
1036 __kmp_itt_critical_creating( lck );
1049 int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1051 if ( status == 0 ) {
1054 __kmp_itt_critical_destroyed( lck );
1058 __kmp_destroy_user_lock_with_checks( lck );
1059 __kmp_user_lock_free( &idx, gtid, lck );
1060 lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1061 KMP_DEBUG_ASSERT( lck != NULL );
1067 #endif // KMP_USE_DYNAMIC_LOCK 1082 #if KMP_USE_DYNAMIC_LOCK 1083 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1086 KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait);
1087 kmp_user_lock_p lck;
1089 KC_TRACE( 10, (
"__kmpc_critical: called T#%d\n", global_tid ) );
1093 KMP_CHECK_USER_LOCK_INIT();
1095 if ( ( __kmp_user_lock_kind == lk_tas )
1096 && (
sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1097 lck = (kmp_user_lock_p)crit;
1100 else if ( ( __kmp_user_lock_kind == lk_futex )
1101 && (
sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1102 lck = (kmp_user_lock_p)crit;
1106 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1109 if ( __kmp_env_consistency_check )
1110 __kmp_push_sync( global_tid, ct_critical, loc, lck );
1119 __kmp_itt_critical_acquiring( lck );
1122 __kmp_acquire_user_lock_with_checks( lck, global_tid );
1125 __kmp_itt_critical_acquired( lck );
1129 KA_TRACE( 15, (
"__kmpc_critical: done T#%d\n", global_tid ));
1130 #endif // KMP_USE_DYNAMIC_LOCK 1133 #if KMP_USE_DYNAMIC_LOCK 1136 static __forceinline kmp_dyna_lockseq_t
1137 __kmp_map_hint_to_lock(uintptr_t hint)
1140 # define KMP_TSX_LOCK(seq) lockseq_##seq 1142 # define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1145 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1146 # define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1148 # define KMP_CPUINFO_RTM 0 1152 if (hint & kmp_lock_hint_hle)
1153 return KMP_TSX_LOCK(hle);
1154 if (hint & kmp_lock_hint_rtm)
1155 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq;
1156 if (hint & kmp_lock_hint_adaptive)
1157 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq;
1160 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1161 return __kmp_user_lock_seq;
1162 if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative))
1163 return __kmp_user_lock_seq;
1166 if (hint & omp_lock_hint_contended)
1167 return lockseq_queuing;
1170 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1174 if (hint & omp_lock_hint_speculative)
1175 return KMP_TSX_LOCK(hle);
1177 return __kmp_user_lock_seq;
1193 __kmpc_critical_with_hint(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint )
1196 kmp_user_lock_p lck;
1198 KC_TRACE( 10, (
"__kmpc_critical: called T#%d\n", global_tid ) );
1200 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1203 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1204 if (KMP_IS_D_LOCK(lckseq)) {
1205 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq));
1207 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1212 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1213 lck = (kmp_user_lock_p)lk;
1214 if (__kmp_env_consistency_check) {
1215 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1218 __kmp_itt_critical_acquiring(lck);
1220 # if KMP_USE_INLINED_TAS 1221 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1222 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1224 # elif KMP_USE_INLINED_FUTEX 1225 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1226 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1230 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1233 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1235 if (__kmp_env_consistency_check) {
1236 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint));
1239 __kmp_itt_critical_acquiring(lck);
1241 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1245 __kmp_itt_critical_acquired( lck );
1248 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1249 KA_TRACE( 15, (
"__kmpc_critical: done T#%d\n", global_tid ));
1252 #endif // KMP_USE_DYNAMIC_LOCK 1266 kmp_user_lock_p lck;
1268 KC_TRACE( 10, (
"__kmpc_end_critical: called T#%d\n", global_tid ));
1270 #if KMP_USE_DYNAMIC_LOCK 1271 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1272 lck = (kmp_user_lock_p)crit;
1273 KMP_ASSERT(lck != NULL);
1274 if (__kmp_env_consistency_check) {
1275 __kmp_pop_sync(global_tid, ct_critical, loc);
1278 __kmp_itt_critical_releasing( lck );
1280 # if KMP_USE_INLINED_TAS 1281 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1282 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1284 # elif KMP_USE_INLINED_FUTEX 1285 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1286 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1290 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1293 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1294 KMP_ASSERT(ilk != NULL);
1296 if (__kmp_env_consistency_check) {
1297 __kmp_pop_sync(global_tid, ct_critical, loc);
1300 __kmp_itt_critical_releasing( lck );
1302 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1305 #else // KMP_USE_DYNAMIC_LOCK 1307 if ( ( __kmp_user_lock_kind == lk_tas )
1308 && (
sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1309 lck = (kmp_user_lock_p)crit;
1312 else if ( ( __kmp_user_lock_kind == lk_futex )
1313 && (
sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1314 lck = (kmp_user_lock_p)crit;
1318 lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1321 KMP_ASSERT(lck != NULL);
1323 if ( __kmp_env_consistency_check )
1324 __kmp_pop_sync( global_tid, ct_critical, loc );
1327 __kmp_itt_critical_releasing( lck );
1330 __kmp_release_user_lock_with_checks( lck, global_tid );
1332 #if OMPT_SUPPORT && OMPT_BLAME 1334 ompt_callbacks.ompt_callback(ompt_event_release_critical)) {
1335 ompt_callbacks.ompt_callback(ompt_event_release_critical)(
1340 #endif // KMP_USE_DYNAMIC_LOCK 1341 KMP_POP_PARTITIONED_TIMER();
1342 KA_TRACE( 15, (
"__kmpc_end_critical: done T#%d\n", global_tid ));
1358 KC_TRACE( 10, (
"__kmpc_barrier_master: called T#%d\n", global_tid ) );
1360 if (! TCR_4(__kmp_init_parallel))
1361 __kmp_parallel_initialize();
1363 if ( __kmp_env_consistency_check )
1364 __kmp_check_barrier( global_tid, ct_barrier, loc );
1367 __kmp_threads[global_tid]->th.th_ident = loc;
1369 status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1371 return (status != 0) ? 0 : 1;
1386 KC_TRACE( 10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1388 __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1406 KC_TRACE( 10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1408 if (! TCR_4(__kmp_init_parallel))
1409 __kmp_parallel_initialize();
1411 if ( __kmp_env_consistency_check ) {
1413 KMP_WARNING( ConstructIdentInvalid );
1415 __kmp_check_barrier( global_tid, ct_barrier, loc );
1419 __kmp_threads[global_tid]->th.th_ident = loc;
1421 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1425 if ( __kmp_env_consistency_check ) {
1429 if ( global_tid < 0 ) {
1430 KMP_WARNING( ThreadIdentInvalid );
1436 __kmp_pop_sync( global_tid, ct_master, loc );
1458 kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1463 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1466 #if OMPT_SUPPORT && OMPT_TRACE 1467 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1468 kmp_team_t *team = this_thr -> th.th_team;
1469 int tid = __kmp_tid_from_gtid( global_tid );
1473 if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
1474 ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)(
1475 team->t.ompt_team_info.parallel_id,
1476 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
1477 team->t.ompt_team_info.microtask);
1480 if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
1481 ompt_callbacks.ompt_callback(ompt_event_single_others_begin)(
1482 team->t.ompt_team_info.parallel_id,
1483 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1485 this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
1505 __kmp_exit_single( global_tid );
1506 KMP_POP_PARTITIONED_TIMER();
1508 #if OMPT_SUPPORT && OMPT_TRACE 1509 kmp_info_t *this_thr = __kmp_threads[ global_tid ];
1510 kmp_team_t *team = this_thr -> th.th_team;
1511 int tid = __kmp_tid_from_gtid( global_tid );
1514 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
1515 ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)(
1516 team->t.ompt_team_info.parallel_id,
1517 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1532 KE_TRACE( 10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1534 #if OMPT_SUPPORT && OMPT_TRACE 1536 ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
1537 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1538 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1539 ompt_callbacks.ompt_callback(ompt_event_loop_end)(
1540 team_info->parallel_id, task_info->task_id);
1544 if ( __kmp_env_consistency_check )
1545 __kmp_pop_workshare( global_tid, ct_pdo, loc );
1554 ompc_set_num_threads(
int arg )
1557 __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1561 ompc_set_dynamic(
int flag )
1566 thread = __kmp_entry_thread();
1568 __kmp_save_internal_controls( thread );
1570 set__dynamic( thread, flag ? TRUE : FALSE );
1574 ompc_set_nested(
int flag )
1579 thread = __kmp_entry_thread();
1581 __kmp_save_internal_controls( thread );
1583 set__nested( thread, flag ? TRUE : FALSE );
1587 ompc_set_max_active_levels(
int max_active_levels )
1593 __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1597 ompc_set_schedule( omp_sched_t kind,
int modifier )
1600 __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1604 ompc_get_ancestor_thread_num(
int level )
1606 return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1610 ompc_get_team_size(
int level )
1612 return __kmp_get_team_size( __kmp_entry_gtid(), level );
1616 kmpc_set_stacksize(
int arg )
1619 __kmp_aux_set_stacksize( arg );
1623 kmpc_set_stacksize_s(
size_t arg )
1626 __kmp_aux_set_stacksize( arg );
1630 kmpc_set_blocktime(
int arg )
1635 gtid = __kmp_entry_gtid();
1636 tid = __kmp_tid_from_gtid(gtid);
1637 thread = __kmp_thread_from_gtid(gtid);
1639 __kmp_aux_set_blocktime( arg, thread, tid );
1643 kmpc_set_library(
int arg )
1646 __kmp_user_set_library( (
enum library_type)arg );
1650 kmpc_set_defaults(
char const * str )
1653 __kmp_aux_set_defaults( str, KMP_STRLEN( str ) );
1657 kmpc_set_disp_num_buffers(
int arg )
1661 if( __kmp_init_serial == 0 && arg > 0 )
1662 __kmp_dispatch_num_buffers = arg;
1666 kmpc_set_affinity_mask_proc(
int proc,
void **mask )
1668 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1671 if ( ! TCR_4(__kmp_init_middle) ) {
1672 __kmp_middle_initialize();
1674 return __kmp_aux_set_affinity_mask_proc( proc, mask );
1679 kmpc_unset_affinity_mask_proc(
int proc,
void **mask )
1681 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1684 if ( ! TCR_4(__kmp_init_middle) ) {
1685 __kmp_middle_initialize();
1687 return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1692 kmpc_get_affinity_mask_proc(
int proc,
void **mask )
1694 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1697 if ( ! TCR_4(__kmp_init_middle) ) {
1698 __kmp_middle_initialize();
1700 return __kmp_aux_get_affinity_mask_proc( proc, mask );
1751 KC_TRACE( 10, (
"__kmpc_copyprivate: called T#%d\n", gtid ));
1755 data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1757 if ( __kmp_env_consistency_check ) {
1759 KMP_WARNING( ConstructIdentInvalid );
1765 if (didit) *data_ptr = cpy_data;
1769 __kmp_threads[gtid]->th.th_ident = loc;
1771 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1773 if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1779 __kmp_threads[gtid]->th.th_ident = loc;
1781 __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1786 #define INIT_LOCK __kmp_init_user_lock_with_checks 1787 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 1788 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 1789 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 1790 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 1791 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed 1792 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 1793 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 1794 #define TEST_LOCK __kmp_test_user_lock_with_checks 1795 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 1796 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 1797 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 1805 #if KMP_USE_DYNAMIC_LOCK 1808 static __forceinline
void 1809 __kmp_init_lock_with_hint(
ident_t *loc,
void **lock, kmp_dyna_lockseq_t seq)
1811 if (KMP_IS_D_LOCK(seq)) {
1812 KMP_INIT_D_LOCK(lock, seq);
1814 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
1817 KMP_INIT_I_LOCK(lock, seq);
1819 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1820 __kmp_itt_lock_creating(ilk->lock, loc);
1826 static __forceinline
void 1827 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock, kmp_dyna_lockseq_t seq)
1831 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
1832 seq = __kmp_user_lock_seq;
1836 seq = lockseq_nested_tas;
1840 seq = lockseq_nested_futex;
1843 case lockseq_ticket:
1844 seq = lockseq_nested_ticket;
1846 case lockseq_queuing:
1847 seq = lockseq_nested_queuing;
1850 seq = lockseq_nested_drdpa;
1853 seq = lockseq_nested_queuing;
1855 KMP_INIT_I_LOCK(lock, seq);
1857 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
1858 __kmp_itt_lock_creating(ilk->lock, loc);
1864 __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock, uintptr_t hint)
1866 KMP_DEBUG_ASSERT(__kmp_init_serial);
1867 if (__kmp_env_consistency_check && user_lock == NULL) {
1868 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
1871 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1876 __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock, uintptr_t hint)
1878 KMP_DEBUG_ASSERT(__kmp_init_serial);
1879 if (__kmp_env_consistency_check && user_lock == NULL) {
1880 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
1883 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
1886 #endif // KMP_USE_DYNAMIC_LOCK 1890 __kmpc_init_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1891 #if KMP_USE_DYNAMIC_LOCK 1892 KMP_DEBUG_ASSERT(__kmp_init_serial);
1893 if (__kmp_env_consistency_check && user_lock == NULL) {
1894 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
1896 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1898 #else // KMP_USE_DYNAMIC_LOCK 1900 static char const *
const func =
"omp_init_lock";
1901 kmp_user_lock_p lck;
1902 KMP_DEBUG_ASSERT( __kmp_init_serial );
1904 if ( __kmp_env_consistency_check ) {
1905 if ( user_lock == NULL ) {
1906 KMP_FATAL( LockIsUninitialized, func );
1910 KMP_CHECK_USER_LOCK_INIT();
1912 if ( ( __kmp_user_lock_kind == lk_tas )
1913 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1914 lck = (kmp_user_lock_p)user_lock;
1917 else if ( ( __kmp_user_lock_kind == lk_futex )
1918 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1919 lck = (kmp_user_lock_p)user_lock;
1923 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1926 __kmp_set_user_lock_location( lck, loc );
1928 #if OMPT_SUPPORT && OMPT_TRACE 1930 ompt_callbacks.ompt_callback(ompt_event_init_lock)) {
1931 ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck);
1936 __kmp_itt_lock_creating( lck );
1939 #endif // KMP_USE_DYNAMIC_LOCK 1944 __kmpc_init_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
1945 #if KMP_USE_DYNAMIC_LOCK 1947 KMP_DEBUG_ASSERT(__kmp_init_serial);
1948 if (__kmp_env_consistency_check && user_lock == NULL) {
1949 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
1951 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
1953 #else // KMP_USE_DYNAMIC_LOCK 1955 static char const *
const func =
"omp_init_nest_lock";
1956 kmp_user_lock_p lck;
1957 KMP_DEBUG_ASSERT( __kmp_init_serial );
1959 if ( __kmp_env_consistency_check ) {
1960 if ( user_lock == NULL ) {
1961 KMP_FATAL( LockIsUninitialized, func );
1965 KMP_CHECK_USER_LOCK_INIT();
1967 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
1968 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1969 lck = (kmp_user_lock_p)user_lock;
1972 else if ( ( __kmp_user_lock_kind == lk_futex )
1973 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
1974 <= OMP_NEST_LOCK_T_SIZE ) ) {
1975 lck = (kmp_user_lock_p)user_lock;
1979 lck = __kmp_user_lock_allocate( user_lock, gtid, 0 );
1982 INIT_NESTED_LOCK( lck );
1983 __kmp_set_user_lock_location( lck, loc );
1985 #if OMPT_SUPPORT && OMPT_TRACE 1987 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) {
1988 ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck);
1993 __kmp_itt_lock_creating( lck );
1996 #endif // KMP_USE_DYNAMIC_LOCK 2000 __kmpc_destroy_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2001 #if KMP_USE_DYNAMIC_LOCK 2004 kmp_user_lock_p lck;
2005 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2006 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2008 lck = (kmp_user_lock_p)user_lock;
2010 __kmp_itt_lock_destroyed(lck);
2012 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2014 kmp_user_lock_p lck;
2016 if ( ( __kmp_user_lock_kind == lk_tas )
2017 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2018 lck = (kmp_user_lock_p)user_lock;
2021 else if ( ( __kmp_user_lock_kind == lk_futex )
2022 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2023 lck = (kmp_user_lock_p)user_lock;
2027 lck = __kmp_lookup_user_lock( user_lock,
"omp_destroy_lock" );
2030 #if OMPT_SUPPORT && OMPT_TRACE 2032 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) {
2033 ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck);
2038 __kmp_itt_lock_destroyed( lck );
2040 DESTROY_LOCK( lck );
2042 if ( ( __kmp_user_lock_kind == lk_tas )
2043 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2047 else if ( ( __kmp_user_lock_kind == lk_futex )
2048 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2053 __kmp_user_lock_free( user_lock, gtid, lck );
2055 #endif // KMP_USE_DYNAMIC_LOCK 2060 __kmpc_destroy_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2061 #if KMP_USE_DYNAMIC_LOCK 2064 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2065 __kmp_itt_lock_destroyed(ilk->lock);
2067 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2069 #else // KMP_USE_DYNAMIC_LOCK 2071 kmp_user_lock_p lck;
2073 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2074 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2075 lck = (kmp_user_lock_p)user_lock;
2078 else if ( ( __kmp_user_lock_kind == lk_futex )
2079 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2080 <= OMP_NEST_LOCK_T_SIZE ) ) {
2081 lck = (kmp_user_lock_p)user_lock;
2085 lck = __kmp_lookup_user_lock( user_lock,
"omp_destroy_nest_lock" );
2088 #if OMPT_SUPPORT && OMPT_TRACE 2090 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) {
2091 ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck);
2096 __kmp_itt_lock_destroyed( lck );
2099 DESTROY_NESTED_LOCK( lck );
2101 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2102 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2106 else if ( ( __kmp_user_lock_kind == lk_futex )
2107 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2108 <= OMP_NEST_LOCK_T_SIZE ) ) {
2113 __kmp_user_lock_free( user_lock, gtid, lck );
2115 #endif // KMP_USE_DYNAMIC_LOCK 2119 __kmpc_set_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2121 #if KMP_USE_DYNAMIC_LOCK 2122 int tag = KMP_EXTRACT_D_TAG(user_lock);
2124 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2126 # if KMP_USE_INLINED_TAS 2127 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2128 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2130 # elif KMP_USE_INLINED_FUTEX 2131 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2132 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2136 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2139 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2142 #else // KMP_USE_DYNAMIC_LOCK 2144 kmp_user_lock_p lck;
2146 if ( ( __kmp_user_lock_kind == lk_tas )
2147 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2148 lck = (kmp_user_lock_p)user_lock;
2151 else if ( ( __kmp_user_lock_kind == lk_futex )
2152 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2153 lck = (kmp_user_lock_p)user_lock;
2157 lck = __kmp_lookup_user_lock( user_lock,
"omp_set_lock" );
2161 __kmp_itt_lock_acquiring( lck );
2164 ACQUIRE_LOCK( lck, gtid );
2167 __kmp_itt_lock_acquired( lck );
2170 #if OMPT_SUPPORT && OMPT_TRACE 2172 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) {
2173 ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck);
2177 #endif // KMP_USE_DYNAMIC_LOCK 2181 __kmpc_set_nest_lock(
ident_t * loc, kmp_int32 gtid,
void ** user_lock ) {
2182 #if KMP_USE_DYNAMIC_LOCK 2185 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2187 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2189 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2192 #if OMPT_SUPPORT && OMPT_TRACE 2198 #else // KMP_USE_DYNAMIC_LOCK 2200 kmp_user_lock_p lck;
2202 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2203 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2204 lck = (kmp_user_lock_p)user_lock;
2207 else if ( ( __kmp_user_lock_kind == lk_futex )
2208 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2209 <= OMP_NEST_LOCK_T_SIZE ) ) {
2210 lck = (kmp_user_lock_p)user_lock;
2214 lck = __kmp_lookup_user_lock( user_lock,
"omp_set_nest_lock" );
2218 __kmp_itt_lock_acquiring( lck );
2221 ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status );
2224 __kmp_itt_lock_acquired( lck );
2227 #if OMPT_SUPPORT && OMPT_TRACE 2229 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2230 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first))
2231 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck);
2233 if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next))
2234 ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck);
2239 #endif // KMP_USE_DYNAMIC_LOCK 2243 __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2245 #if KMP_USE_DYNAMIC_LOCK 2247 int tag = KMP_EXTRACT_D_TAG(user_lock);
2249 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2251 # if KMP_USE_INLINED_TAS 2252 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2253 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2255 # elif KMP_USE_INLINED_FUTEX 2256 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2257 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2261 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2264 #else // KMP_USE_DYNAMIC_LOCK 2266 kmp_user_lock_p lck;
2271 if ( ( __kmp_user_lock_kind == lk_tas )
2272 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2273 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2276 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2278 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2282 lck = (kmp_user_lock_p)user_lock;
2286 else if ( ( __kmp_user_lock_kind == lk_futex )
2287 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2288 lck = (kmp_user_lock_p)user_lock;
2292 lck = __kmp_lookup_user_lock( user_lock,
"omp_unset_lock" );
2296 __kmp_itt_lock_releasing( lck );
2299 RELEASE_LOCK( lck, gtid );
2301 #if OMPT_SUPPORT && OMPT_BLAME 2303 ompt_callbacks.ompt_callback(ompt_event_release_lock)) {
2304 ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
2308 #endif // KMP_USE_DYNAMIC_LOCK 2313 __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2315 #if KMP_USE_DYNAMIC_LOCK 2318 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2320 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2322 #else // KMP_USE_DYNAMIC_LOCK 2324 kmp_user_lock_p lck;
2328 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2329 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2330 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2332 kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
2334 __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
2336 if ( --(tl->lk.depth_locked) == 0 ) {
2337 TCW_4(tl->lk.poll, 0);
2342 lck = (kmp_user_lock_p)user_lock;
2346 else if ( ( __kmp_user_lock_kind == lk_futex )
2347 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2348 <= OMP_NEST_LOCK_T_SIZE ) ) {
2349 lck = (kmp_user_lock_p)user_lock;
2353 lck = __kmp_lookup_user_lock( user_lock,
"omp_unset_nest_lock" );
2357 __kmp_itt_lock_releasing( lck );
2361 release_status = RELEASE_NESTED_LOCK( lck, gtid );
2362 #if OMPT_SUPPORT && OMPT_BLAME 2364 if (release_status == KMP_LOCK_RELEASED) {
2365 if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) {
2366 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)(
2369 }
else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) {
2370 ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)(
2376 #endif // KMP_USE_DYNAMIC_LOCK 2381 __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2385 #if KMP_USE_DYNAMIC_LOCK 2387 int tag = KMP_EXTRACT_D_TAG(user_lock);
2389 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2391 # if KMP_USE_INLINED_TAS 2392 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2393 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2395 # elif KMP_USE_INLINED_FUTEX 2396 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2397 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2401 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2405 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2410 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2415 #else // KMP_USE_DYNAMIC_LOCK 2417 kmp_user_lock_p lck;
2420 if ( ( __kmp_user_lock_kind == lk_tas )
2421 && (
sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2422 lck = (kmp_user_lock_p)user_lock;
2425 else if ( ( __kmp_user_lock_kind == lk_futex )
2426 && (
sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
2427 lck = (kmp_user_lock_p)user_lock;
2431 lck = __kmp_lookup_user_lock( user_lock,
"omp_test_lock" );
2435 __kmp_itt_lock_acquiring( lck );
2438 rc = TEST_LOCK( lck, gtid );
2441 __kmp_itt_lock_acquired( lck );
2443 __kmp_itt_lock_cancelled( lck );
2446 return ( rc ? FTN_TRUE : FTN_FALSE );
2450 #endif // KMP_USE_DYNAMIC_LOCK 2455 __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock )
2457 #if KMP_USE_DYNAMIC_LOCK 2460 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2462 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
2465 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2467 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2472 #else // KMP_USE_DYNAMIC_LOCK 2474 kmp_user_lock_p lck;
2477 if ( ( __kmp_user_lock_kind == lk_tas ) && (
sizeof( lck->tas.lk.poll )
2478 +
sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
2479 lck = (kmp_user_lock_p)user_lock;
2482 else if ( ( __kmp_user_lock_kind == lk_futex )
2483 && (
sizeof( lck->futex.lk.poll ) +
sizeof( lck->futex.lk.depth_locked )
2484 <= OMP_NEST_LOCK_T_SIZE ) ) {
2485 lck = (kmp_user_lock_p)user_lock;
2489 lck = __kmp_lookup_user_lock( user_lock,
"omp_test_nest_lock" );
2493 __kmp_itt_lock_acquiring( lck );
2496 rc = TEST_NESTED_LOCK( lck, gtid );
2499 __kmp_itt_lock_acquired( lck );
2501 __kmp_itt_lock_cancelled( lck );
2508 #endif // KMP_USE_DYNAMIC_LOCK 2521 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \ 2522 ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) ) 2524 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 2525 ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) 2531 static __forceinline
void 2532 __kmp_enter_critical_section_reduce_block(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2538 kmp_user_lock_p lck;
2540 #if KMP_USE_DYNAMIC_LOCK 2542 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
2545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2546 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq));
2548 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq));
2553 if (KMP_EXTRACT_D_TAG(lk) != 0) {
2554 lck = (kmp_user_lock_p)lk;
2555 KMP_DEBUG_ASSERT(lck != NULL);
2556 if (__kmp_env_consistency_check) {
2557 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2559 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
2561 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
2563 KMP_DEBUG_ASSERT(lck != NULL);
2564 if (__kmp_env_consistency_check) {
2565 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
2567 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
2570 #else // KMP_USE_DYNAMIC_LOCK 2575 if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
2576 lck = (kmp_user_lock_p)crit;
2579 lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
2581 KMP_DEBUG_ASSERT( lck != NULL );
2583 if ( __kmp_env_consistency_check )
2584 __kmp_push_sync( global_tid, ct_critical, loc, lck );
2586 __kmp_acquire_user_lock_with_checks( lck, global_tid );
2588 #endif // KMP_USE_DYNAMIC_LOCK 2592 static __forceinline
void 2593 __kmp_end_critical_section_reduce_block(
ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
2595 kmp_user_lock_p lck;
2597 #if KMP_USE_DYNAMIC_LOCK 2599 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
2600 lck = (kmp_user_lock_p)crit;
2601 if (__kmp_env_consistency_check)
2602 __kmp_pop_sync(global_tid, ct_critical, loc);
2603 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
2605 kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
2606 if (__kmp_env_consistency_check)
2607 __kmp_pop_sync(global_tid, ct_critical, loc);
2608 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
2611 #else // KMP_USE_DYNAMIC_LOCK 2615 if ( __kmp_base_user_lock_size > 32 ) {
2616 lck = *( (kmp_user_lock_p *) crit );
2617 KMP_ASSERT( lck != NULL );
2619 lck = (kmp_user_lock_p) crit;
2622 if ( __kmp_env_consistency_check )
2623 __kmp_pop_sync( global_tid, ct_critical, loc );
2625 __kmp_release_user_lock_with_checks( lck, global_tid );
2627 #endif // KMP_USE_DYNAMIC_LOCK 2647 ident_t *loc, kmp_int32 global_tid,
2648 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2649 kmp_critical_name *lck ) {
2653 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2657 int teams_swapped = 0, task_state;
2659 KA_TRACE( 10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2666 if( ! TCR_4( __kmp_init_parallel ) )
2667 __kmp_parallel_initialize();
2670 #if KMP_USE_DYNAMIC_LOCK 2671 if ( __kmp_env_consistency_check )
2672 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2674 if ( __kmp_env_consistency_check )
2675 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2679 th = __kmp_thread_from_gtid(global_tid);
2680 if( th->th.th_teams_microtask ) {
2681 team = th->th.th_team;
2682 if( team->t.t_level == th->th.th_teams_level ) {
2684 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
2687 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2688 th->th.th_team = team->t.t_parent;
2689 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
2690 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
2691 task_state = th->th.th_task_state;
2692 th->th.th_task_state = 0;
2695 #endif // OMP_40_ENABLED 2706 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2707 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2709 if( packed_reduction_method == critical_reduce_block ) {
2711 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2714 }
else if( packed_reduction_method == empty_reduce_block ) {
2719 }
else if( packed_reduction_method == atomic_reduce_block ) {
2726 if ( __kmp_env_consistency_check )
2727 __kmp_pop_sync( global_tid, ct_reduce, loc );
2729 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2742 __kmp_threads[global_tid]->th.th_ident = loc;
2744 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2745 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2749 if ( __kmp_env_consistency_check ) {
2751 __kmp_pop_sync( global_tid, ct_reduce, loc );
2762 if( teams_swapped ) {
2764 th->th.th_info.ds.ds_tid = 0;
2765 th->th.th_team = team;
2766 th->th.th_team_nproc = team->t.t_nproc;
2767 th->th.th_task_team = team->t.t_task_team[task_state];
2768 th->th.th_task_state = task_state;
2771 KA_TRACE( 10, (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2787 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2789 KA_TRACE( 10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2791 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2793 if( packed_reduction_method == critical_reduce_block ) {
2795 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2797 }
else if( packed_reduction_method == empty_reduce_block ) {
2801 }
else if( packed_reduction_method == atomic_reduce_block ) {
2808 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2819 if ( __kmp_env_consistency_check )
2820 __kmp_pop_sync( global_tid, ct_reduce, loc );
2822 KA_TRACE( 10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2844 ident_t *loc, kmp_int32 global_tid,
2845 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
2846 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
2847 kmp_critical_name *lck )
2851 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2853 KA_TRACE( 10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2860 if( ! TCR_4( __kmp_init_parallel ) )
2861 __kmp_parallel_initialize();
2864 #if KMP_USE_DYNAMIC_LOCK 2865 if ( __kmp_env_consistency_check )
2866 __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 );
2868 if ( __kmp_env_consistency_check )
2869 __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2872 packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2873 __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2875 if( packed_reduction_method == critical_reduce_block ) {
2877 __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2880 }
else if( packed_reduction_method == empty_reduce_block ) {
2885 }
else if( packed_reduction_method == atomic_reduce_block ) {
2889 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2895 __kmp_threads[global_tid]->th.th_ident = loc;
2897 retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2898 retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2902 if ( __kmp_env_consistency_check ) {
2904 __kmp_pop_sync( global_tid, ct_reduce, loc );
2915 KA_TRACE( 10, (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2932 PACKED_REDUCTION_METHOD_T packed_reduction_method;
2934 KA_TRACE( 10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2936 packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2941 if( packed_reduction_method == critical_reduce_block ) {
2943 __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2947 __kmp_threads[global_tid]->th.th_ident = loc;
2949 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2951 }
else if( packed_reduction_method == empty_reduce_block ) {
2957 __kmp_threads[global_tid]->th.th_ident = loc;
2959 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2961 }
else if( packed_reduction_method == atomic_reduce_block ) {
2965 __kmp_threads[global_tid]->th.th_ident = loc;
2967 __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2969 }
else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2972 __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2981 if ( __kmp_env_consistency_check )
2982 __kmp_pop_sync( global_tid, ct_reduce, loc );
2984 KA_TRACE( 10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2989 #undef __KMP_GET_REDUCTION_METHOD 2990 #undef __KMP_SET_REDUCTION_METHOD 2995 __kmpc_get_taskid() {
2998 kmp_info_t * thread;
3000 gtid = __kmp_get_gtid();
3004 thread = __kmp_thread_from_gtid( gtid );
3005 return thread->th.th_current_task->td_task_id;
3011 __kmpc_get_parent_taskid() {
3014 kmp_info_t * thread;
3015 kmp_taskdata_t * parent_task;
3017 gtid = __kmp_get_gtid();
3021 thread = __kmp_thread_from_gtid( gtid );
3022 parent_task = thread->th.th_current_task->td_parent;
3023 return ( parent_task == NULL ? 0 : parent_task->td_task_id );
3027 void __kmpc_place_threads(
int nS,
int sO,
int nC,
int cO,
int nT)
3029 if ( ! __kmp_init_serial ) {
3030 __kmp_serial_initialize();
3032 __kmp_place_num_sockets = nS;
3033 __kmp_place_socket_offset = sO;
3034 __kmp_place_num_cores = nC;
3035 __kmp_place_core_offset = cO;
3036 __kmp_place_num_threads_per_core = nT;
3052 __kmpc_doacross_init(
ident_t *loc,
int gtid,
int num_dims,
struct kmp_dim * dims)
3055 kmp_int64 last, trace_count;
3056 kmp_info_t *th = __kmp_threads[gtid];
3057 kmp_team_t *team = th->th.th_team;
3059 kmp_disp_t *pr_buf = th->th.th_dispatch;
3060 dispatch_shared_info_t *sh_buf;
3062 KA_TRACE(20,(
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3063 gtid, num_dims, !team->t.t_serialized));
3064 KMP_DEBUG_ASSERT(dims != NULL);
3065 KMP_DEBUG_ASSERT(num_dims > 0);
3067 if( team->t.t_serialized ) {
3068 KA_TRACE(20,(
"__kmpc_doacross_init() exit: serialized team\n"));
3071 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3072 idx = pr_buf->th_doacross_buf_idx++;
3073 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3076 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3077 pr_buf->th_doacross_info =
3078 (kmp_int64*)__kmp_thread_malloc(th,
sizeof(kmp_int64)*(4 * num_dims + 1));
3079 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3080 pr_buf->th_doacross_info[0] = (kmp_int64)num_dims;
3082 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3083 pr_buf->th_doacross_info[2] = dims[0].lo;
3084 pr_buf->th_doacross_info[3] = dims[0].up;
3085 pr_buf->th_doacross_info[4] = dims[0].st;
3087 for( j = 1; j < num_dims; ++j ) {
3088 kmp_int64 range_length;
3089 if( dims[j].st == 1 ) {
3091 range_length = dims[j].up - dims[j].lo + 1;
3093 if( dims[j].st > 0 ) {
3094 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3095 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3097 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3098 range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3101 pr_buf->th_doacross_info[last++] = range_length;
3102 pr_buf->th_doacross_info[last++] = dims[j].lo;
3103 pr_buf->th_doacross_info[last++] = dims[j].up;
3104 pr_buf->th_doacross_info[last++] = dims[j].st;
3109 if( dims[0].st == 1 ) {
3110 trace_count = dims[0].up - dims[0].lo + 1;
3111 }
else if( dims[0].st > 0 ) {
3112 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3113 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3115 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3116 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3118 for( j = 1; j < num_dims; ++j ) {
3119 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3121 KMP_DEBUG_ASSERT(trace_count > 0);
3124 if( idx != sh_buf->doacross_buf_idx ) {
3126 __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL );
3130 flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64(
3131 (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1);
3132 if( flags == NULL ) {
3134 kmp_int64 size = trace_count / 8 + 8;
3135 sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1);
3136 }
else if( (kmp_int64)flags == 1 ) {
3138 while( (
volatile kmp_int64)sh_buf->doacross_flags == 1 ) {
3142 KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1);
3143 pr_buf->th_doacross_flags = sh_buf->doacross_flags;
3145 KA_TRACE(20,(
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3149 __kmpc_doacross_wait(
ident_t *loc,
int gtid,
long long *vec)
3151 kmp_int32 shft, num_dims, i;
3153 kmp_int64 iter_number;
3154 kmp_info_t *th = __kmp_threads[gtid];
3155 kmp_team_t *team = th->th.th_team;
3157 kmp_int64 lo, up, st;
3159 KA_TRACE(20,(
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3160 if( team->t.t_serialized ) {
3161 KA_TRACE(20,(
"__kmpc_doacross_wait() exit: serialized team\n"));
3166 pr_buf = th->th.th_dispatch;
3167 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3168 num_dims = pr_buf->th_doacross_info[0];
3169 lo = pr_buf->th_doacross_info[2];
3170 up = pr_buf->th_doacross_info[3];
3171 st = pr_buf->th_doacross_info[4];
3173 if( vec[0] < lo || vec[0] > up ) {
3175 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3176 gtid, vec[0], lo, up));
3179 iter_number = vec[0] - lo;
3180 }
else if( st > 0 ) {
3181 if( vec[0] < lo || vec[0] > up ) {
3183 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3184 gtid, vec[0], lo, up));
3187 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3189 if( vec[0] > lo || vec[0] < up ) {
3191 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3192 gtid, vec[0], lo, up));
3195 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3197 for( i = 1; i < num_dims; ++i ) {
3199 kmp_int32 j = i * 4;
3200 ln = pr_buf->th_doacross_info[j + 1];
3201 lo = pr_buf->th_doacross_info[j + 2];
3202 up = pr_buf->th_doacross_info[j + 3];
3203 st = pr_buf->th_doacross_info[j + 4];
3205 if( vec[i] < lo || vec[i] > up ) {
3207 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3208 gtid, vec[i], lo, up));
3212 }
else if( st > 0 ) {
3213 if( vec[i] < lo || vec[i] > up ) {
3215 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3216 gtid, vec[i], lo, up));
3219 iter = (kmp_uint64)(vec[i] - lo) / st;
3221 if( vec[i] > lo || vec[i] < up ) {
3223 "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n",
3224 gtid, vec[i], lo, up));
3227 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3229 iter_number = iter + ln * iter_number;
3231 shft = iter_number % 32;
3234 while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) {
3237 KA_TRACE(20,(
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
3238 gtid, (iter_number<<5)+shft));
3242 __kmpc_doacross_post(
ident_t *loc,
int gtid,
long long *vec)
3244 kmp_int32 shft, num_dims, i;
3246 kmp_int64 iter_number;
3247 kmp_info_t *th = __kmp_threads[gtid];
3248 kmp_team_t *team = th->th.th_team;
3252 KA_TRACE(20,(
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
3253 if( team->t.t_serialized ) {
3254 KA_TRACE(20,(
"__kmpc_doacross_post() exit: serialized team\n"));
3259 pr_buf = th->th.th_dispatch;
3260 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3261 num_dims = pr_buf->th_doacross_info[0];
3262 lo = pr_buf->th_doacross_info[2];
3263 st = pr_buf->th_doacross_info[4];
3265 iter_number = vec[0] - lo;
3266 }
else if( st > 0 ) {
3267 iter_number = (kmp_uint64)(vec[0] - lo) / st;
3269 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3271 for( i = 1; i < num_dims; ++i ) {
3273 kmp_int32 j = i * 4;
3274 ln = pr_buf->th_doacross_info[j + 1];
3275 lo = pr_buf->th_doacross_info[j + 2];
3276 st = pr_buf->th_doacross_info[j + 4];
3279 }
else if( st > 0 ) {
3280 iter = (kmp_uint64)(vec[i] - lo) / st;
3282 iter = (kmp_uint64)(lo - vec[i]) / (-st);
3284 iter_number = iter + ln * iter_number;
3286 shft = iter_number % 32;
3289 if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 )
3290 KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag );
3291 KA_TRACE(20,(
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n",
3292 gtid, (iter_number<<5)+shft));
3296 __kmpc_doacross_fini(
ident_t *loc,
int gtid)
3299 kmp_info_t *th = __kmp_threads[gtid];
3300 kmp_team_t *team = th->th.th_team;
3301 kmp_disp_t *pr_buf = th->th.th_dispatch;
3303 KA_TRACE(20,(
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
3304 if( team->t.t_serialized ) {
3305 KA_TRACE(20,(
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
3308 num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1;
3309 if( num_done == th->th.th_team_nproc ) {
3311 int idx = pr_buf->th_doacross_buf_idx - 1;
3312 dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3313 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done);
3314 KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done);
3315 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
3316 __kmp_thread_free(th, (
void*)sh_buf->doacross_flags);
3317 sh_buf->doacross_flags = NULL;
3318 sh_buf->doacross_num_done = 0;
3319 sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers;
3322 __kmp_thread_free(th, (
void*)pr_buf->th_doacross_info);
3323 pr_buf->th_doacross_info = NULL;
3324 KA_TRACE(20,(
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)