29 #if defined(_WIN32_WINNT) && defined(_M_IX86) 31 #define _WIN32_WINNT 0x0502 38 #include "kmp_error.h" 39 #include "kmp_stats.h" 40 #if KMP_OS_WINDOWS && KMP_ARCH_X86 45 #include "ompt-internal.h" 46 #include "ompt-specific.h" 53 template<
typename T >
59 struct i_maxmin< int > {
60 static const int mx = 0x7fffffff;
61 static const int mn = 0x80000000;
64 struct i_maxmin< unsigned int > {
65 static const unsigned int mx = 0xffffffff;
66 static const unsigned int mn = 0x00000000;
69 struct i_maxmin< long long > {
70 static const long long mx = 0x7fffffffffffffffLL;
71 static const long long mn = 0x8000000000000000LL;
74 struct i_maxmin< unsigned long long > {
75 static const unsigned long long mx = 0xffffffffffffffffLL;
76 static const unsigned long long mn = 0x0000000000000000LL;
80 #if KMP_STATIC_STEAL_ENABLED 83 template<
typename T >
84 struct dispatch_private_infoXX_template {
85 typedef typename traits_t< T >::unsigned_t UT;
86 typedef typename traits_t< T >::signed_t ST;
93 T static_steal_counter;
103 struct KMP_ALIGN( 32 ) {
120 template<
typename T >
121 struct dispatch_private_infoXX_template {
122 typedef typename traits_t< T >::unsigned_t UT;
123 typedef typename traits_t< T >::signed_t ST;
146 template<
typename T >
147 struct KMP_ALIGN_CACHE dispatch_private_info_template {
149 union KMP_ALIGN_CACHE private_info_tmpl {
150 dispatch_private_infoXX_template< T > p;
151 dispatch_private_info64_t p64;
155 kmp_uint32 ordered_bumped;
156 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
157 dispatch_private_info * next;
159 kmp_uint32 type_size;
160 enum cons_type pushed_ws;
165 template<
typename UT >
166 struct dispatch_shared_infoXX_template {
169 volatile UT iteration;
170 volatile UT num_done;
171 volatile UT ordered_iteration;
172 UT ordered_dummy[KMP_MAX_ORDERED-3];
176 template<
typename UT >
177 struct dispatch_shared_info_template {
179 union shared_info_tmpl {
180 dispatch_shared_infoXX_template< UT > s;
181 dispatch_shared_info64_t s64;
183 volatile kmp_uint32 buffer_index;
185 volatile kmp_int32 doacross_buf_idx;
186 kmp_uint32 *doacross_flags;
187 kmp_int32 doacross_num_done;
200 #undef USE_TEST_LOCKS 203 template<
typename T >
204 static __forceinline T
205 test_then_add(
volatile T *p, T d );
208 __forceinline kmp_int32
209 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
212 r = KMP_TEST_THEN_ADD32( p, d );
217 __forceinline kmp_int64
218 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
221 r = KMP_TEST_THEN_ADD64( p, d );
226 template<
typename T >
227 static __forceinline T
228 test_then_inc_acq(
volatile T *p );
231 __forceinline kmp_int32
232 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
235 r = KMP_TEST_THEN_INC_ACQ32( p );
240 __forceinline kmp_int64
241 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
244 r = KMP_TEST_THEN_INC_ACQ64( p );
249 template<
typename T >
250 static __forceinline T
251 test_then_inc(
volatile T *p );
254 __forceinline kmp_int32
255 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
258 r = KMP_TEST_THEN_INC32( p );
263 __forceinline kmp_int64
264 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
267 r = KMP_TEST_THEN_INC64( p );
272 template<
typename T >
273 static __forceinline kmp_int32
274 compare_and_swap(
volatile T *p, T c, T s );
277 __forceinline kmp_int32
278 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
280 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
284 __forceinline kmp_int32
285 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
287 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
303 template<
typename UT >
306 __kmp_wait_yield(
volatile UT * spinner,
308 kmp_uint32 (* pred)( UT, UT )
309 USE_ITT_BUILD_ARG(
void * obj)
313 register volatile UT * spin = spinner;
314 register UT check = checker;
315 register kmp_uint32 spins;
316 register kmp_uint32 (*f) ( UT, UT ) = pred;
319 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
320 KMP_INIT_YIELD( spins );
322 while(!f(r = *spin, check))
324 KMP_FSYNC_SPIN_PREPARE( obj );
333 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
334 KMP_YIELD_SPIN( spins );
336 KMP_FSYNC_SPIN_ACQUIRED( obj );
340 template<
typename UT >
341 static kmp_uint32 __kmp_eq( UT value, UT checker) {
342 return value == checker;
345 template<
typename UT >
346 static kmp_uint32 __kmp_neq( UT value, UT checker) {
347 return value != checker;
350 template<
typename UT >
351 static kmp_uint32 __kmp_lt( UT value, UT checker) {
352 return value < checker;
355 template<
typename UT >
356 static kmp_uint32 __kmp_ge( UT value, UT checker) {
357 return value >= checker;
360 template<
typename UT >
361 static kmp_uint32 __kmp_le( UT value, UT checker) {
362 return value <= checker;
370 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
374 KMP_DEBUG_ASSERT( gtid_ref );
376 if ( __kmp_env_consistency_check ) {
377 th = __kmp_threads[*gtid_ref];
378 if ( th -> th.th_root -> r.r_active
379 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
380 #if KMP_USE_DYNAMIC_LOCK 381 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
383 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
389 template<
typename UT >
391 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
393 typedef typename traits_t< UT >::signed_t ST;
394 dispatch_private_info_template< UT > * pr;
396 int gtid = *gtid_ref;
398 kmp_info_t *th = __kmp_threads[ gtid ];
399 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
401 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
402 if ( __kmp_env_consistency_check ) {
403 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 404 ( th -> th.th_dispatch -> th_dispatch_pr_current );
405 if ( pr -> pushed_ws != ct_none ) {
406 #if KMP_USE_DYNAMIC_LOCK 407 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
409 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
414 if ( ! th -> th.th_team -> t.t_serialized ) {
415 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
> 416 ( th -> th.th_dispatch -> th_dispatch_sh_current );
419 if ( ! __kmp_env_consistency_check ) {
420 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 421 ( th -> th.th_dispatch -> th_dispatch_pr_current );
423 lower = pr->u.p.ordered_lower;
425 #if ! defined( KMP_GOMP_COMPAT ) 426 if ( __kmp_env_consistency_check ) {
427 if ( pr->ordered_bumped ) {
428 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
429 __kmp_error_construct2(
430 kmp_i18n_msg_CnsMultipleNesting,
431 ct_ordered_in_pdo, loc_ref,
432 & p->stack_data[ p->w_top ]
443 buff = __kmp_str_format(
444 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
445 traits_t< UT >::spec, traits_t< UT >::spec );
446 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
447 __kmp_str_free( &buff );
451 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
452 USE_ITT_BUILD_ARG( NULL )
459 buff = __kmp_str_format(
460 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
461 traits_t< UT >::spec, traits_t< UT >::spec );
462 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
463 __kmp_str_free( &buff );
467 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
471 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
475 if ( __kmp_env_consistency_check ) {
476 th = __kmp_threads[*gtid_ref];
477 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
478 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
483 template<
typename UT >
485 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
487 typedef typename traits_t< UT >::signed_t ST;
488 dispatch_private_info_template< UT > * pr;
490 int gtid = *gtid_ref;
492 kmp_info_t *th = __kmp_threads[ gtid ];
493 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
495 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
496 if ( __kmp_env_consistency_check ) {
497 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 498 ( th -> th.th_dispatch -> th_dispatch_pr_current );
499 if ( pr -> pushed_ws != ct_none ) {
500 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
504 if ( ! th -> th.th_team -> t.t_serialized ) {
505 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
> 506 ( th -> th.th_dispatch -> th_dispatch_sh_current );
508 if ( ! __kmp_env_consistency_check ) {
509 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 510 ( th -> th.th_dispatch -> th_dispatch_pr_current );
513 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
514 #if ! defined( KMP_GOMP_COMPAT ) 515 if ( __kmp_env_consistency_check ) {
516 if ( pr->ordered_bumped != 0 ) {
517 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
519 __kmp_error_construct2(
520 kmp_i18n_msg_CnsMultipleNesting,
521 ct_ordered_in_pdo, loc_ref,
522 & p->stack_data[ p->w_top ]
530 pr->ordered_bumped += 1;
532 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
533 gtid, pr->ordered_bumped ) );
538 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
542 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
546 template<
typename UT >
547 static __forceinline
long double 548 __kmp_pow(
long double x, UT y) {
551 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
567 template<
typename T >
568 static __inline
typename traits_t< T >::unsigned_t
569 __kmp_dispatch_guided_remaining(
571 typename traits_t< T >::floating_t base,
572 typename traits_t< T >::unsigned_t idx
581 typedef typename traits_t< T >::unsigned_t UT;
583 long double x = tc * __kmp_pow< UT >(base, idx);
595 static int guided_int_param = 2;
596 static double guided_flt_param = 0.5;
600 template<
typename T >
608 typename traits_t< T >::signed_t st,
609 typename traits_t< T >::signed_t chunk,
612 typedef typename traits_t< T >::unsigned_t UT;
613 typedef typename traits_t< T >::signed_t ST;
614 typedef typename traits_t< T >::floating_t DBL;
615 static const int ___kmp_size_type =
sizeof( UT );
621 kmp_uint32 my_buffer_index;
622 dispatch_private_info_template< T > * pr;
623 dispatch_shared_info_template< UT >
volatile * sh;
625 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
626 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
628 if ( ! TCR_4( __kmp_init_parallel ) )
629 __kmp_parallel_initialize();
631 #if INCLUDE_SSC_MARKS 632 SSC_MARK_DISPATCH_INIT();
638 buff = __kmp_str_format(
639 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
640 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
641 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
642 __kmp_str_free( &buff );
646 th = __kmp_threads[ gtid ];
647 team = th -> th.th_team;
648 active = ! team -> t.t_serialized;
649 th->th.th_ident = loc;
652 kmp_uint64 cur_chunk = chunk;
653 int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
654 KMP_MASTER_GTID(gtid) &&
656 th->th.th_teams_microtask == NULL &&
658 team->t.t_active_level == 1;
661 pr =
reinterpret_cast< dispatch_private_info_template< T >*
> 662 ( th -> th.th_dispatch -> th_disp_buffer );
664 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
665 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
667 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
670 pr =
reinterpret_cast< dispatch_private_info_template< T > *
> 671 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % __kmp_dispatch_num_buffers ] );
672 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
> 673 ( &team -> t.t_disp_buffer[ my_buffer_index % __kmp_dispatch_num_buffers ] );
676 #if ( KMP_STATIC_STEAL_ENABLED ) 677 if ( SCHEDULE_HAS_NONMONOTONIC(schedule) )
682 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
691 pr->type_size = ___kmp_size_type;
700 schedule = __kmp_static;
702 if ( schedule == kmp_sch_runtime ) {
704 schedule = team -> t.t_sched.r_sched_type;
707 schedule = __kmp_guided;
709 schedule = __kmp_static;
712 chunk = team -> t.t_sched.chunk;
720 buff = __kmp_str_format(
721 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
722 traits_t< ST >::spec );
723 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
724 __kmp_str_free( &buff );
729 schedule = __kmp_guided;
732 chunk = KMP_DEFAULT_CHUNK;
738 schedule = __kmp_auto;
743 buff = __kmp_str_format(
744 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
745 traits_t< ST >::spec );
746 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
747 __kmp_str_free( &buff );
753 if ( schedule == kmp_sch_guided_analytical_chunked && th->th.th_team_nproc > 1<<20 ) {
754 schedule = kmp_sch_guided_iterative_chunked;
755 KMP_WARNING( DispatchManyThreads );
757 pr->u.p.parm1 = chunk;
760 "unknown scheduling type" );
764 if ( __kmp_env_consistency_check ) {
766 __kmp_error_construct(
767 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
768 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
779 }
else if ( st < 0 ) {
783 tc = (UT)(lb - ub) / (-st) + 1;
791 tc = (UT)(ub - lb) / st + 1;
799 if (schedule == __kmp_static)
816 pr->u.p.last_upper = ub + st;
822 if ( pr->ordered == 0 ) {
823 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
824 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
826 pr->ordered_bumped = 0;
828 pr->u.p.ordered_lower = 1;
829 pr->u.p.ordered_upper = 0;
831 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
832 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
836 if ( __kmp_env_consistency_check ) {
837 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
839 __kmp_push_workshare( gtid, ws, loc );
842 __kmp_check_workshare( gtid, ws, loc );
843 pr->pushed_ws = ct_none;
847 switch ( schedule ) {
848 #if ( KMP_STATIC_STEAL_ENABLED ) 851 T nproc = th->th.th_team_nproc;
854 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
856 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
857 if ( nproc > 1 && ntc >= nproc ) {
858 T
id = __kmp_tid_from_gtid(gtid);
859 T small_chunk, extras;
861 small_chunk = ntc / nproc;
862 extras = ntc % nproc;
864 init =
id * small_chunk + (
id < extras ?
id : extras );
865 pr->u.p.count = init;
866 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
870 pr->u.p.parm4 = (
id + 1) % nproc;
872 if ( ___kmp_size_type > 4 ) {
878 KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL);
879 th->th.th_dispatch->th_steal_lock =
880 (kmp_lock_t*)__kmp_allocate(
sizeof(kmp_lock_t));
881 __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
885 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
887 schedule = kmp_sch_static_balanced;
893 case kmp_sch_static_balanced:
895 T nproc = th->th.th_team_nproc;
898 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
902 T
id = __kmp_tid_from_gtid(gtid);
908 pr->u.p.parm1 = (
id == tc - 1);
911 pr->u.p.parm1 = FALSE;
915 T small_chunk = tc / nproc;
916 T extras = tc % nproc;
917 init =
id * small_chunk + (
id < extras ?
id : extras);
918 limit = init + small_chunk - (
id < extras ? 0 : 1);
919 pr->u.p.parm1 = (
id == nproc - 1);
925 pr->u.p.parm1 = TRUE;
929 pr->u.p.parm1 = FALSE;
935 if ( itt_need_metadata_reporting )
936 cur_chunk = limit - init + 1;
939 pr->u.p.lb = lb + init;
940 pr->u.p.ub = lb + limit;
942 T ub_tmp = lb + limit * st;
943 pr->u.p.lb = lb + init * st;
946 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
948 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
952 pr->u.p.ordered_lower = init;
953 pr->u.p.ordered_upper = limit;
957 case kmp_sch_guided_iterative_chunked :
959 T nproc = th->th.th_team_nproc;
960 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
963 if ( (2L * chunk + 1 ) * nproc >= tc ) {
965 schedule = kmp_sch_dynamic_chunked;
968 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
969 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
972 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
973 schedule = kmp_sch_static_greedy;
975 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
980 case kmp_sch_guided_analytical_chunked:
982 T nproc = th->th.th_team_nproc;
983 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
986 if ( (2L * chunk + 1 ) * nproc >= tc ) {
988 schedule = kmp_sch_dynamic_chunked;
993 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1006 unsigned int oldFpcw = _control87(0,0);
1007 _control87(_PC_64,_MCW_PC);
1010 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
1017 x = (
long double)1.0 - (
long double)0.5 / nproc;
1028 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
1030 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
1035 *(DBL*)&pr->u.p.parm3 = x;
1039 UT left, right, mid;
1048 p = __kmp_pow< UT >(x,right);
1053 }
while(p>target && right < (1<<27));
1060 while ( left + 1 < right ) {
1061 mid = (left + right) / 2;
1062 if ( __kmp_pow< UT >(x,mid) > target ) {
1071 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1074 pr->u.p.parm2 = cross;
1077 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) ) 1078 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3) 1080 #define GUIDED_ANALYTICAL_WORKAROUND (x) 1083 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1084 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1086 _control87(oldFpcw,_MCW_PC);
1090 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1092 schedule = kmp_sch_static_greedy;
1098 case kmp_sch_static_greedy:
1099 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1100 pr->u.p.parm1 = ( th->th.th_team_nproc > 1 ) ?
1101 ( tc + th->th.th_team_nproc - 1 ) / th->th.th_team_nproc :
1104 case kmp_sch_static_chunked :
1105 case kmp_sch_dynamic_chunked :
1106 if ( pr->u.p.parm1 <= 0 ) {
1107 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
1109 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1111 case kmp_sch_trapezoidal :
1115 T parm1, parm2, parm3, parm4;
1116 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1121 parm2 = ( tc / (2 * th->th.th_team_nproc) );
1132 }
else if ( parm1 > parm2 ) {
1137 parm3 = ( parm2 + parm1 );
1138 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1145 parm4 = ( parm3 - 1 );
1146 parm4 = ( parm2 - parm1 ) / parm4;
1153 pr->u.p.parm1 = parm1;
1154 pr->u.p.parm2 = parm2;
1155 pr->u.p.parm3 = parm3;
1156 pr->u.p.parm4 = parm4;
1164 KMP_MSG( UnknownSchedTypeDetected ),
1165 KMP_HNT( GetNewerLibrary ),
1171 pr->schedule = schedule;
1175 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1176 gtid, my_buffer_index, sh->buffer_index) );
1177 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1178 USE_ITT_BUILD_ARG( NULL )
1183 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1184 gtid, my_buffer_index, sh->buffer_index) );
1186 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1187 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1189 if ( pr->ordered ) {
1190 __kmp_itt_ordered_init( gtid );
1193 if ( itt_need_metadata_reporting ) {
1195 kmp_uint64 schedtype = 0;
1196 switch ( schedule ) {
1197 case kmp_sch_static_chunked:
1198 case kmp_sch_static_balanced:
1200 case kmp_sch_static_greedy:
1201 cur_chunk = pr->u.p.parm1;
1203 case kmp_sch_dynamic_chunked:
1206 case kmp_sch_guided_iterative_chunked:
1207 case kmp_sch_guided_analytical_chunked:
1216 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1225 buff = __kmp_str_format(
1226 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1227 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1228 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1229 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1230 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1231 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1232 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1233 KD_TRACE(10, ( buff,
1234 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1235 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1236 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1237 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1238 __kmp_str_free( &buff );
1241 #if ( KMP_STATIC_STEAL_ENABLED ) 1250 volatile T * p = &pr->u.p.static_steal_counter;
1253 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1255 #if OMPT_SUPPORT && OMPT_TRACE 1257 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
1258 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1259 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1260 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
1261 team_info->parallel_id, task_info->task_id, team_info->microtask);
1273 template<
typename UT >
1275 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1277 typedef typename traits_t< UT >::signed_t ST;
1278 kmp_info_t *th = __kmp_threads[ gtid ];
1280 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1281 if ( ! th -> th.th_team -> t.t_serialized ) {
1283 dispatch_private_info_template< UT > * pr =
1284 reinterpret_cast< dispatch_private_info_template< UT >*
> 1285 ( th->th.th_dispatch->th_dispatch_pr_current );
1286 dispatch_shared_info_template< UT >
volatile * sh =
1287 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
> 1288 ( th->th.th_dispatch->th_dispatch_sh_current );
1289 KMP_DEBUG_ASSERT( pr );
1290 KMP_DEBUG_ASSERT( sh );
1291 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1292 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1294 if ( pr->ordered_bumped ) {
1295 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1297 pr->ordered_bumped = 0;
1299 UT lower = pr->u.p.ordered_lower;
1305 buff = __kmp_str_format(
1306 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1307 traits_t< UT >::spec, traits_t< UT >::spec );
1308 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1309 __kmp_str_free( &buff );
1313 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1314 USE_ITT_BUILD_ARG(NULL)
1321 buff = __kmp_str_format(
1322 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1323 traits_t< UT >::spec, traits_t< UT >::spec );
1324 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1325 __kmp_str_free( &buff );
1329 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1332 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1335 #ifdef KMP_GOMP_COMPAT 1337 template<
typename UT >
1339 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1341 typedef typename traits_t< UT >::signed_t ST;
1342 kmp_info_t *th = __kmp_threads[ gtid ];
1344 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1345 if ( ! th -> th.th_team -> t.t_serialized ) {
1347 dispatch_private_info_template< UT > * pr =
1348 reinterpret_cast< dispatch_private_info_template< UT >*
> 1349 ( th->th.th_dispatch->th_dispatch_pr_current );
1350 dispatch_shared_info_template< UT >
volatile * sh =
1351 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
> 1352 ( th->th.th_dispatch->th_dispatch_sh_current );
1353 KMP_DEBUG_ASSERT( pr );
1354 KMP_DEBUG_ASSERT( sh );
1355 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1356 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1359 UT lower = pr->u.p.ordered_lower;
1360 UT upper = pr->u.p.ordered_upper;
1361 UT inc = upper - lower + 1;
1363 if ( pr->ordered_bumped == inc ) {
1364 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1366 pr->ordered_bumped = 0;
1368 inc -= pr->ordered_bumped;
1374 buff = __kmp_str_format(
1375 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1376 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1377 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1378 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1379 __kmp_str_free( &buff );
1383 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1384 USE_ITT_BUILD_ARG(NULL)
1388 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1390 pr->ordered_bumped = 0;
1396 buff = __kmp_str_format(
1397 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1398 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1399 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1400 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1401 __kmp_str_free( &buff );
1405 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1409 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1417 #if OMPT_SUPPORT && OMPT_TRACE 1418 #define OMPT_LOOP_END \ 1419 if (status == 0) { \ 1420 if (ompt_enabled && \ 1421 ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ 1422 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 1423 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ 1424 ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ 1425 team_info->parallel_id, task_info->task_id); \ 1429 #define OMPT_LOOP_END // no-op 1432 template<
typename T >
1434 __kmp_dispatch_next(
1435 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1438 typedef typename traits_t< T >::unsigned_t UT;
1439 typedef typename traits_t< T >::signed_t ST;
1440 typedef typename traits_t< T >::floating_t DBL;
1441 #if ( KMP_STATIC_STEAL_ENABLED ) 1442 static const int ___kmp_size_type =
sizeof( UT );
1448 KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
1451 dispatch_private_info_template< T > * pr;
1452 kmp_info_t * th = __kmp_threads[ gtid ];
1453 kmp_team_t * team = th -> th.th_team;
1455 KMP_DEBUG_ASSERT( p_lb && p_ub && p_st );
1460 buff = __kmp_str_format(
1461 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1462 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1463 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1464 __kmp_str_free( &buff );
1468 if ( team -> t.t_serialized ) {
1470 pr =
reinterpret_cast< dispatch_private_info_template< T >*
> 1471 ( th -> th.th_dispatch -> th_disp_buffer );
1472 KMP_DEBUG_ASSERT( pr );
1474 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1481 if ( __kmp_env_consistency_check ) {
1482 if ( pr->pushed_ws != ct_none ) {
1483 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1486 }
else if ( pr->nomerge ) {
1489 UT limit, trip, init;
1491 T chunk = pr->u.p.parm1;
1493 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1495 init = chunk * pr->u.p.count++;
1496 trip = pr->u.p.tc - 1;
1498 if ( (status = (init <= trip)) == 0 ) {
1505 if ( __kmp_env_consistency_check ) {
1506 if ( pr->pushed_ws != ct_none ) {
1507 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1512 limit = chunk + init - 1;
1515 if ( (last = (limit >= trip)) != 0 ) {
1518 pr->u.p.last_upper = pr->u.p.ub;
1521 if ( p_last != NULL )
1526 *p_lb = start + init;
1527 *p_ub = start + limit;
1529 *p_lb = start + init * incr;
1530 *p_ub = start + limit * incr;
1533 if ( pr->ordered ) {
1534 pr->u.p.ordered_lower = init;
1535 pr->u.p.ordered_upper = limit;
1540 buff = __kmp_str_format(
1541 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1542 traits_t< UT >::spec, traits_t< UT >::spec );
1543 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1544 __kmp_str_free( &buff );
1554 pr->u.p.last_upper = *p_ub;
1556 if ( p_last != NULL )
1565 buff = __kmp_str_format(
1566 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1567 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1568 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1569 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) );
1570 __kmp_str_free( &buff );
1573 #if INCLUDE_SSC_MARKS 1574 SSC_MARK_DISPATCH_NEXT();
1580 dispatch_shared_info_template< UT > *sh;
1583 UT limit, trip, init;
1585 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1586 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1588 pr =
reinterpret_cast< dispatch_private_info_template< T >*
> 1589 ( th->th.th_dispatch->th_dispatch_pr_current );
1590 KMP_DEBUG_ASSERT( pr );
1591 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
> 1592 ( th->th.th_dispatch->th_dispatch_sh_current );
1593 KMP_DEBUG_ASSERT( sh );
1595 if ( pr->u.p.tc == 0 ) {
1599 switch (pr->schedule) {
1600 #if ( KMP_STATIC_STEAL_ENABLED ) 1603 T chunk = pr->u.p.parm1;
1604 int nproc = th->th.th_team_nproc;
1606 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1608 trip = pr->u.p.tc - 1;
1610 if ( ___kmp_size_type > 4 ) {
1613 kmp_lock_t * lck = th->th.th_dispatch->th_steal_lock;
1614 KMP_DEBUG_ASSERT(lck != NULL);
1615 if( pr->u.p.count < (UT)pr->u.p.ub ) {
1616 __kmp_acquire_lock(lck, gtid);
1618 init = ( pr->u.p.count )++;
1619 status = ( init < (UT)pr->u.p.ub );
1620 __kmp_release_lock(lck, gtid);
1625 kmp_info_t **other_threads = team->t.t_threads;
1626 int while_limit = nproc;
1627 int while_index = 0;
1630 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1632 T victimIdx = pr->u.p.parm4;
1633 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1634 dispatch_private_info_template< T > * victim =
1635 reinterpret_cast< dispatch_private_info_template< T >*
> 1636 (other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current);
1637 while( ( victim == NULL || victim == pr ||
1638 ( *(
volatile T*)&victim->u.p.static_steal_counter !=
1639 *(
volatile T*)&pr->u.p.static_steal_counter ) ) &&
1640 oldVictimIdx != victimIdx )
1642 victimIdx = (victimIdx + 1) % nproc;
1643 victim =
reinterpret_cast< dispatch_private_info_template< T >*
> 1644 (other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current);
1647 ( *(
volatile T *)&victim->u.p.static_steal_counter !=
1648 *(
volatile T *)&pr->u.p.static_steal_counter ) )
1654 if( victim->u.p.count + 2 > (UT)victim->u.p.ub ) {
1655 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1659 lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
1660 KMP_ASSERT(lck != NULL);
1661 __kmp_acquire_lock(lck, gtid);
1662 limit = victim->u.p.ub;
1663 if( victim->u.p.count >= limit ||
1664 (remaining = limit - victim->u.p.count) < 2 )
1666 __kmp_release_lock(lck, gtid);
1667 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1671 if( remaining > 3 ) {
1672 init = ( victim->u.p.ub -= (remaining>>2) );
1674 init = ( victim->u.p.ub -= 1 );
1676 __kmp_release_lock(lck, gtid);
1678 KMP_DEBUG_ASSERT(init + 1 <= limit);
1679 pr->u.p.parm4 = victimIdx;
1683 __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
1684 pr->u.p.count = init + 1;
1686 __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
1700 union_i4 vold, vnew;
1701 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1704 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1705 (
volatile kmp_int64* )&pr->u.p.count,
1706 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1707 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1709 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1714 init = vnew.p.count;
1715 status = ( init < (UT)vnew.p.ub ) ;
1719 kmp_info_t **other_threads = team->t.t_threads;
1720 int while_limit = nproc;
1721 int while_index = 0;
1725 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1726 union_i4 vold, vnew;
1727 kmp_int32 remaining;
1728 T victimIdx = pr->u.p.parm4;
1729 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1730 dispatch_private_info_template< T > * victim =
1731 reinterpret_cast< dispatch_private_info_template< T >*
> 1732 (other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current);
1733 while( (victim == NULL || victim == pr ||
1734 (*(
volatile T*)&victim->u.p.static_steal_counter !=
1735 *(
volatile T*)&pr->u.p.static_steal_counter)) &&
1736 oldVictimIdx != victimIdx )
1738 victimIdx = (victimIdx + 1) % nproc;
1739 victim =
reinterpret_cast< dispatch_private_info_template< T >*
> 1740 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1743 ( *(
volatile T *)&victim->u.p.static_steal_counter !=
1744 *(
volatile T *)&pr->u.p.static_steal_counter ) )
1750 pr->u.p.parm4 = victimIdx;
1752 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1755 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1756 if ( vnew.p.count >= (UT)vnew.p.ub ||
1757 (remaining = vnew.p.ub - vnew.p.count) < 2 )
1759 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1762 if( remaining > 3 ) {
1763 vnew.p.ub -= (remaining>>2);
1767 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1769 if ( KMP_COMPARE_AND_STORE_ACQ64(
1770 (
volatile kmp_int64 * )&victim->u.p.count,
1771 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1772 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1778 vold.p.count = init + 1;
1780 KMP_XCHG_FIXED64((
volatile kmp_int64 * )(&pr->u.p.count), vold.b);
1782 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1794 if ( p_st != NULL ) *p_st = 0;
1796 start = pr->u.p.parm2;
1798 limit = chunk + init - 1;
1801 KMP_DEBUG_ASSERT(init <= trip);
1802 if ( (last = (limit >= trip)) != 0 )
1804 if ( p_st != NULL ) *p_st = incr;
1807 *p_lb = start + init;
1808 *p_ub = start + limit;
1810 *p_lb = start + init * incr;
1811 *p_ub = start + limit * incr;
1814 if ( pr->ordered ) {
1815 pr->u.p.ordered_lower = init;
1816 pr->u.p.ordered_upper = limit;
1821 buff = __kmp_str_format(
1822 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1823 traits_t< UT >::spec, traits_t< UT >::spec );
1824 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1825 __kmp_str_free( &buff );
1832 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1833 case kmp_sch_static_balanced:
1835 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1836 if ( (status = !pr->u.p.count) != 0 ) {
1840 last = pr->u.p.parm1;
1844 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1846 if ( pr->ordered ) {
1851 buff = __kmp_str_format(
1852 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1853 traits_t< UT >::spec, traits_t< UT >::spec );
1854 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1855 __kmp_str_free( &buff );
1861 case kmp_sch_static_greedy:
1862 case kmp_sch_static_chunked:
1866 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1868 parm1 = pr->u.p.parm1;
1870 trip = pr->u.p.tc - 1;
1871 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1873 if ( (status = (init <= trip)) != 0 ) {
1876 limit = parm1 + init - 1;
1878 if ( (last = (limit >= trip)) != 0 )
1881 if ( p_st != NULL ) *p_st = incr;
1883 pr->u.p.count += th->th.th_team_nproc;
1886 *p_lb = start + init;
1887 *p_ub = start + limit;
1890 *p_lb = start + init * incr;
1891 *p_ub = start + limit * incr;
1894 if ( pr->ordered ) {
1895 pr->u.p.ordered_lower = init;
1896 pr->u.p.ordered_upper = limit;
1901 buff = __kmp_str_format(
1902 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1903 traits_t< UT >::spec, traits_t< UT >::spec );
1904 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1905 __kmp_str_free( &buff );
1913 case kmp_sch_dynamic_chunked:
1915 T chunk = pr->u.p.parm1;
1917 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1920 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1921 trip = pr->u.p.tc - 1;
1923 if ( (status = (init <= trip)) == 0 ) {
1926 if ( p_st != NULL ) *p_st = 0;
1929 limit = chunk + init - 1;
1932 if ( (last = (limit >= trip)) != 0 )
1935 if ( p_st != NULL ) *p_st = incr;
1938 *p_lb = start + init;
1939 *p_ub = start + limit;
1941 *p_lb = start + init * incr;
1942 *p_ub = start + limit * incr;
1945 if ( pr->ordered ) {
1946 pr->u.p.ordered_lower = init;
1947 pr->u.p.ordered_upper = limit;
1952 buff = __kmp_str_format(
1953 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1954 traits_t< UT >::spec, traits_t< UT >::spec );
1955 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1956 __kmp_str_free( &buff );
1964 case kmp_sch_guided_iterative_chunked:
1966 T chunkspec = pr->u.p.parm1;
1968 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1973 init = sh->u.s.iteration;
1974 remaining = trip - init;
1975 if ( remaining <= 0 ) {
1980 if ( (T)remaining < pr->u.p.parm2 ) {
1983 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1984 remaining = trip - init;
1985 if (remaining <= 0) {
1990 if ( (T)remaining > chunkspec ) {
1991 limit = init + chunkspec - 1;
1994 limit = init + remaining - 1;
1999 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
2000 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
2007 if ( status != 0 ) {
2012 *p_lb = start + init * incr;
2013 *p_ub = start + limit * incr;
2014 if ( pr->ordered ) {
2015 pr->u.p.ordered_lower = init;
2016 pr->u.p.ordered_upper = limit;
2021 buff = __kmp_str_format(
2022 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2023 traits_t< UT >::spec, traits_t< UT >::spec );
2024 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2025 __kmp_str_free( &buff );
2038 case kmp_sch_guided_analytical_chunked:
2040 T chunkspec = pr->u.p.parm1;
2042 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2045 unsigned int oldFpcw;
2046 unsigned int fpcwSet = 0;
2048 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
2053 KMP_DEBUG_ASSERT(th->th.th_team_nproc > 1);
2054 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)th->th.th_team_nproc < trip);
2057 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
2058 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
2061 init = chunkIdx * chunkspec + pr->u.p.count;
2063 if ( (status = (init > 0 && init <= trip)) != 0 ) {
2064 limit = init + chunkspec -1;
2066 if ( (last = (limit >= trip)) != 0 )
2075 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2080 oldFpcw = _control87(0,0);
2081 _control87(_PC_64,_MCW_PC);
2086 init = __kmp_dispatch_guided_remaining< T >(
2087 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
2088 KMP_DEBUG_ASSERT(init);
2092 limit = trip - __kmp_dispatch_guided_remaining< T >(
2093 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
2094 KMP_ASSERT(init <= limit);
2095 if ( init < limit ) {
2096 KMP_DEBUG_ASSERT(limit <= trip);
2103 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2107 if ( fpcwSet && ( oldFpcw & fpcwSet ) )
2108 _control87(oldFpcw,_MCW_PC);
2110 if ( status != 0 ) {
2115 *p_lb = start + init * incr;
2116 *p_ub = start + limit * incr;
2117 if ( pr->ordered ) {
2118 pr->u.p.ordered_lower = init;
2119 pr->u.p.ordered_upper = limit;
2124 buff = __kmp_str_format(
2125 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2126 traits_t< UT >::spec, traits_t< UT >::spec );
2127 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2128 __kmp_str_free( &buff );
2141 case kmp_sch_trapezoidal:
2144 T parm2 = pr->u.p.parm2;
2145 T parm3 = pr->u.p.parm3;
2146 T parm4 = pr->u.p.parm4;
2147 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2150 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
2152 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
2153 trip = pr->u.p.tc - 1;
2155 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
2158 if ( p_st != NULL ) *p_st = 0;
2161 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
2164 if ( (last = (limit >= trip)) != 0 )
2167 if ( p_st != NULL ) *p_st = incr;
2170 *p_lb = start + init;
2171 *p_ub = start + limit;
2173 *p_lb = start + init * incr;
2174 *p_ub = start + limit * incr;
2177 if ( pr->ordered ) {
2178 pr->u.p.ordered_lower = init;
2179 pr->u.p.ordered_upper = limit;
2184 buff = __kmp_str_format(
2185 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2186 traits_t< UT >::spec, traits_t< UT >::spec );
2187 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2188 __kmp_str_free( &buff );
2200 KMP_MSG( UnknownSchedTypeDetected ),
2201 KMP_HNT( GetNewerLibrary ),
2209 if ( status == 0 ) {
2212 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2217 buff = __kmp_str_format(
2218 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2219 traits_t< UT >::spec );
2220 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2221 __kmp_str_free( &buff );
2225 if ( (ST)num_done == th->th.th_team_nproc - 1 ) {
2226 #if ( KMP_STATIC_STEAL_ENABLED ) 2229 kmp_info_t **other_threads = team->t.t_threads;
2231 for( i = 0; i < th->th.th_team_nproc; ++i ) {
2232 kmp_lock_t * lck = other_threads[i]->th.th_dispatch->th_steal_lock;
2233 KMP_ASSERT(lck != NULL);
2234 __kmp_destroy_lock( lck );
2236 other_threads[i]->th.th_dispatch->th_steal_lock = NULL;
2244 sh->u.s.num_done = 0;
2245 sh->u.s.iteration = 0;
2248 if ( pr->ordered ) {
2249 sh->u.s.ordered_iteration = 0;
2254 sh -> buffer_index += __kmp_dispatch_num_buffers;
2255 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2256 gtid, sh->buffer_index) );
2261 if ( __kmp_env_consistency_check ) {
2262 if ( pr->pushed_ws != ct_none ) {
2263 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2267 th -> th.th_dispatch -> th_deo_fcn = NULL;
2268 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2269 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2270 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2274 pr->u.p.last_upper = pr->u.p.ub;
2277 if ( p_last != NULL && status != 0 )
2285 buff = __kmp_str_format(
2286 "__kmp_dispatch_next: T#%%d normal case: " \
2287 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2288 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2289 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2290 __kmp_str_free( &buff );
2293 #if INCLUDE_SSC_MARKS 2294 SSC_MARK_DISPATCH_NEXT();
2300 template<
typename T >
2302 __kmp_dist_get_bounds(
2305 kmp_int32 *plastiter,
2308 typename traits_t< T >::signed_t incr
2310 typedef typename traits_t< T >::unsigned_t UT;
2311 typedef typename traits_t< T >::signed_t ST;
2312 register kmp_uint32 team_id;
2313 register kmp_uint32 nteams;
2314 register UT trip_count;
2315 register kmp_team_t *team;
2318 KMP_DEBUG_ASSERT( plastiter && plower && pupper );
2319 KE_TRACE( 10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2324 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "\
2325 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2326 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
2327 traits_t< T >::spec );
2328 KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) );
2329 __kmp_str_free( &buff );
2333 if( __kmp_env_consistency_check ) {
2335 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
2337 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
2347 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
2350 th = __kmp_threads[gtid];
2351 team = th->th.th_team;
2353 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2354 nteams = th->th.th_teams_size.nteams;
2356 team_id = team->t.t_master_tid;
2357 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2361 trip_count = *pupper - *plower + 1;
2362 }
else if(incr == -1) {
2363 trip_count = *plower - *pupper + 1;
2364 }
else if ( incr > 0 ) {
2366 trip_count = (UT)(*pupper - *plower) / incr + 1;
2368 trip_count = (UT)(*plower - *pupper) / ( -incr ) + 1;
2371 if( trip_count <= nteams ) {
2373 __kmp_static == kmp_sch_static_greedy || \
2374 __kmp_static == kmp_sch_static_balanced
2377 if( team_id < trip_count ) {
2378 *pupper = *plower = *plower + team_id * incr;
2380 *plower = *pupper + incr;
2382 if( plastiter != NULL )
2383 *plastiter = ( team_id == trip_count - 1 );
2385 if( __kmp_static == kmp_sch_static_balanced ) {
2386 register UT chunk = trip_count / nteams;
2387 register UT extras = trip_count % nteams;
2388 *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) );
2389 *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr );
2390 if( plastiter != NULL )
2391 *plastiter = ( team_id == nteams - 1 );
2393 register T chunk_inc_count =
2394 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
2395 register T upper = *pupper;
2396 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
2398 *plower += team_id * chunk_inc_count;
2399 *pupper = *plower + chunk_inc_count - incr;
2402 if( *pupper < *plower )
2403 *pupper = i_maxmin< T >::mx;
2404 if( plastiter != NULL )
2405 *plastiter = *plower <= upper && *pupper > upper - incr;
2406 if( *pupper > upper )
2409 if( *pupper > *plower )
2410 *pupper = i_maxmin< T >::mn;
2411 if( plastiter != NULL )
2412 *plastiter = *plower >= upper && *pupper < upper - incr;
2413 if( *pupper < upper )
2444 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2446 KMP_DEBUG_ASSERT( __kmp_init_serial );
2447 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2454 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2456 KMP_DEBUG_ASSERT( __kmp_init_serial );
2457 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2465 kmp_int64 lb, kmp_int64 ub,
2466 kmp_int64 st, kmp_int64 chunk )
2468 KMP_DEBUG_ASSERT( __kmp_init_serial );
2469 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2477 kmp_uint64 lb, kmp_uint64 ub,
2478 kmp_int64 st, kmp_int64 chunk )
2480 KMP_DEBUG_ASSERT( __kmp_init_serial );
2481 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2495 kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2497 KMP_DEBUG_ASSERT( __kmp_init_serial );
2498 __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
2499 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2503 __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2504 kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2506 KMP_DEBUG_ASSERT( __kmp_init_serial );
2507 __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
2508 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2512 __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2513 kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
2515 KMP_DEBUG_ASSERT( __kmp_init_serial );
2516 __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
2517 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2521 __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2522 kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
2524 KMP_DEBUG_ASSERT( __kmp_init_serial );
2525 __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
2526 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2543 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2545 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2553 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2555 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2563 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2565 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2573 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2575 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2587 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2596 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2605 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2614 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2621 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2622 return value == checker;
2625 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2626 return value != checker;
2629 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2630 return value < checker;
2633 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2634 return value >= checker;
2637 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2638 return value <= checker;
2642 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2644 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2649 register volatile kmp_uint32 * spin = spinner;
2650 register kmp_uint32 check = checker;
2651 register kmp_uint32 spins;
2652 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2653 register kmp_uint32 r;
2655 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2656 KMP_INIT_YIELD( spins );
2658 while(!f(r = TCR_4(*spin), check)) {
2659 KMP_FSYNC_SPIN_PREPARE( obj );
2667 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2668 KMP_YIELD_SPIN( spins );
2670 KMP_FSYNC_SPIN_ACQUIRED( obj );
2675 __kmp_wait_yield_4_ptr(
void *spinner,
2677 kmp_uint32 (*pred)(
void *, kmp_uint32 ),
2682 register void *spin = spinner;
2683 register kmp_uint32 check = checker;
2684 register kmp_uint32 spins;
2685 register kmp_uint32 (*f) (
void *, kmp_uint32 ) = pred;
2687 KMP_FSYNC_SPIN_INIT( obj, spin );
2688 KMP_INIT_YIELD( spins );
2690 while ( !f( spin, check ) ) {
2691 KMP_FSYNC_SPIN_PREPARE( obj );
2694 KMP_YIELD( TCR_4( __kmp_nth ) > __kmp_avail_proc );
2695 KMP_YIELD_SPIN( spins );
2697 KMP_FSYNC_SPIN_ACQUIRED( obj );
2702 #ifdef KMP_GOMP_COMPAT 2705 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2706 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2707 kmp_int32 chunk,
int push_ws )
2709 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2714 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2715 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2716 kmp_int32 chunk,
int push_ws )
2718 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2723 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2724 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2725 kmp_int64 chunk,
int push_ws )
2727 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2732 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2733 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2734 kmp_int64 chunk,
int push_ws )
2736 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2741 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2743 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2747 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2749 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2753 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2755 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2759 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2761 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)