16 #include "kmp_error.h" 20 #include "kmp_stats.h" 23 #include "ompt-specific.h" 26 #define MAX_MESSAGE 512 42 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
43 __kmp_str_match_true(env)) {
44 __kmp_middle_initialize();
45 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
46 }
else if (__kmp_ignore_mppbeg() == FALSE) {
48 __kmp_internal_begin();
49 KC_TRACE(10, (
"__kmpc_begin: called\n"));
67 if (__kmp_ignore_mppend() == FALSE) {
68 KC_TRACE(10, (
"__kmpc_end: called\n"));
69 KA_TRACE(30, (
"__kmpc_end\n"));
71 __kmp_internal_end_thread(-1);
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT 78 if (ompt_enabled.enabled)
79 __kmp_internal_end_library(__kmp_gtid_get_specific());
102 kmp_int32 gtid = __kmp_entry_gtid();
104 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
125 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 return TCR_4(__kmp_all_nth);
137 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
138 return __kmp_tid_from_gtid(__kmp_entry_gtid());
147 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
149 return __kmp_entry_thread()->th.th_team->t.t_nproc;
169 if (__kmp_par_range == 0) {
176 semi2 = strchr(semi2,
';');
180 semi2 = strchr(semi2 + 1,
';');
184 if (__kmp_par_range_filename[0]) {
185 const char *name = semi2 - 1;
186 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
189 if ((*name ==
'/') || (*name ==
';')) {
192 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193 return __kmp_par_range < 0;
196 semi3 = strchr(semi2 + 1,
';');
197 if (__kmp_par_range_routine[0]) {
198 if ((semi3 != NULL) && (semi3 > semi2) &&
199 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200 return __kmp_par_range < 0;
203 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
204 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205 return __kmp_par_range > 0;
207 return __kmp_par_range < 0;
221 return __kmp_entry_thread()->th.th_root->r.r_active;
234 kmp_int32 num_threads) {
235 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236 global_tid, num_threads));
238 __kmp_push_num_threads(loc, global_tid, num_threads);
241 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
242 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
247 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
248 kmp_int32 proc_bind) {
249 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
252 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
266 int gtid = __kmp_entry_gtid();
268 #if (KMP_STATS_ENABLED) 272 if (previous_state == stats_state_e::SERIAL_REGION) {
273 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
275 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
288 va_start(ap, microtask);
291 ompt_frame_t *ompt_frame;
292 if (ompt_enabled.enabled) {
293 kmp_info_t *master_th = __kmp_threads[gtid];
294 kmp_team_t *parent_team = master_th->th.th_team;
295 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
297 ompt_frame = &(lwt->ompt_task_info.frame);
299 int tid = __kmp_tid_from_gtid(gtid);
301 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
303 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
304 OMPT_STORE_RETURN_ADDRESS(gtid);
308 #if INCLUDE_SSC_MARKS 311 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
312 VOLATILE_CAST(microtask_t) microtask,
313 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
315 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321 #if INCLUDE_SSC_MARKS 324 __kmp_join_call(loc, gtid
334 #if KMP_STATS_ENABLED 335 if (previous_state == stats_state_e::SERIAL_REGION) {
336 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
338 KMP_POP_PARTITIONED_TIMER();
340 #endif // KMP_STATS_ENABLED 355 kmp_int32 num_teams, kmp_int32 num_threads) {
357 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
358 global_tid, num_teams, num_threads));
360 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
375 int gtid = __kmp_entry_gtid();
376 kmp_info_t *this_thr = __kmp_threads[gtid];
378 va_start(ap, microtask);
380 #if KMP_STATS_ENABLED 383 if (previous_state == stats_state_e::SERIAL_REGION) {
384 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
386 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
391 this_thr->th.th_teams_microtask = microtask;
392 this_thr->th.th_teams_level =
393 this_thr->th.th_team->t.t_level;
396 kmp_team_t *parent_team = this_thr->th.th_team;
397 int tid = __kmp_tid_from_gtid(gtid);
398 if (ompt_enabled.enabled) {
399 parent_team->t.t_implicit_task_taskdata[tid]
400 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
402 OMPT_STORE_RETURN_ADDRESS(gtid);
407 if (this_thr->th.th_teams_size.nteams == 0) {
408 __kmp_push_num_teams(loc, gtid, 0, 0);
410 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
411 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
412 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
414 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
415 VOLATILE_CAST(microtask_t)
417 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
418 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
424 __kmp_join_call(loc, gtid
432 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
433 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
434 this_thr->th.th_cg_roots = tmp->up;
435 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up" 436 " to node %p. cg_nthreads was %d\n",
437 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
438 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
439 int i = tmp->cg_nthreads--;
444 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
445 this_thr->th.th_current_task->td_icvs.thread_limit =
446 this_thr->th.th_cg_roots->cg_thread_limit;
448 this_thr->th.th_teams_microtask = NULL;
449 this_thr->th.th_teams_level = 0;
450 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
452 #if KMP_STATS_ENABLED 453 if (previous_state == stats_state_e::SERIAL_REGION) {
454 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
456 KMP_POP_PARTITIONED_TIMER();
458 #endif // KMP_STATS_ENABLED 465 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
484 OMPT_STORE_RETURN_ADDRESS(global_tid);
486 __kmp_serialized_parallel(loc, global_tid);
497 kmp_internal_control_t *top;
498 kmp_info_t *this_thr;
499 kmp_team_t *serial_team;
502 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
510 if (!TCR_4(__kmp_init_parallel))
511 __kmp_parallel_initialize();
513 __kmp_resume_if_soft_paused();
515 this_thr = __kmp_threads[global_tid];
516 serial_team = this_thr->th.th_serial_team;
518 kmp_task_team_t *task_team = this_thr->th.th_task_team;
520 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
521 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
524 KMP_DEBUG_ASSERT(serial_team);
525 KMP_ASSERT(serial_team->t.t_serialized);
526 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
527 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
528 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
529 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
532 if (ompt_enabled.enabled &&
533 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
534 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
535 if (ompt_enabled.ompt_callback_implicit_task) {
536 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
537 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
538 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
542 ompt_data_t *parent_task_data;
543 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
545 if (ompt_enabled.ompt_callback_parallel_end) {
546 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
547 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
548 ompt_parallel_invoker_program | ompt_parallel_team,
549 OMPT_LOAD_RETURN_ADDRESS(global_tid));
551 __ompt_lw_taskteam_unlink(this_thr);
552 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
558 top = serial_team->t.t_control_stack_top;
559 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
560 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
561 serial_team->t.t_control_stack_top = top->next;
566 serial_team->t.t_level--;
569 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
571 dispatch_private_info_t *disp_buffer =
572 serial_team->t.t_dispatch->th_disp_buffer;
573 serial_team->t.t_dispatch->th_disp_buffer =
574 serial_team->t.t_dispatch->th_disp_buffer->next;
575 __kmp_free(disp_buffer);
577 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
579 --serial_team->t.t_serialized;
580 if (serial_team->t.t_serialized == 0) {
584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 585 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
586 __kmp_clear_x87_fpu_status_word();
587 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
588 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
592 this_thr->th.th_team = serial_team->t.t_parent;
593 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
596 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
597 this_thr->th.th_team_master =
598 serial_team->t.t_parent->t.t_threads[0];
599 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
602 this_thr->th.th_dispatch =
603 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
605 __kmp_pop_current_task_from_thread(this_thr);
607 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
608 this_thr->th.th_current_task->td_flags.executing = 1;
610 if (__kmp_tasking_mode != tskm_immediate_exec) {
612 this_thr->th.th_task_team =
613 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
615 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 617 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
620 if (__kmp_tasking_mode != tskm_immediate_exec) {
621 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting " 622 "depth of serial team %p to %d\n",
623 global_tid, serial_team, serial_team->t.t_serialized));
627 if (__kmp_env_consistency_check)
628 __kmp_pop_parallel(global_tid, NULL);
630 if (ompt_enabled.enabled)
631 this_thr->th.ompt_thread_info.state =
632 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
633 : ompt_state_work_parallel);
646 KC_TRACE(10, (
"__kmpc_flush: called\n"));
651 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) 665 if (!__kmp_cpuinfo.initialized) {
666 __kmp_query_cpuid(&__kmp_cpuinfo);
668 if (!__kmp_cpuinfo.sse2) {
673 #elif KMP_COMPILER_MSVC 676 __sync_synchronize();
677 #endif // KMP_COMPILER_ICC 680 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ 697 #error Unknown or unsupported architecture 700 #if OMPT_SUPPORT && OMPT_OPTIONAL 701 if (ompt_enabled.ompt_callback_flush) {
702 ompt_callbacks.ompt_callback(ompt_callback_flush)(
703 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
718 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
720 if (!TCR_4(__kmp_init_parallel))
721 __kmp_parallel_initialize();
723 __kmp_resume_if_soft_paused();
725 if (__kmp_env_consistency_check) {
727 KMP_WARNING(ConstructIdentInvalid);
729 __kmp_check_barrier(global_tid, ct_barrier, loc);
733 ompt_frame_t *ompt_frame;
734 if (ompt_enabled.enabled) {
735 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
736 if (ompt_frame->enter_frame.ptr == NULL)
737 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
738 OMPT_STORE_RETURN_ADDRESS(global_tid);
741 __kmp_threads[global_tid]->th.th_ident = loc;
749 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
750 #if OMPT_SUPPORT && OMPT_OPTIONAL 751 if (ompt_enabled.enabled) {
752 ompt_frame->enter_frame = ompt_data_none;
767 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
769 if (!TCR_4(__kmp_init_parallel))
770 __kmp_parallel_initialize();
772 __kmp_resume_if_soft_paused();
774 if (KMP_MASTER_GTID(global_tid)) {
776 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
780 #if OMPT_SUPPORT && OMPT_OPTIONAL 782 if (ompt_enabled.ompt_callback_master) {
783 kmp_info_t *this_thr = __kmp_threads[global_tid];
784 kmp_team_t *team = this_thr->th.th_team;
786 int tid = __kmp_tid_from_gtid(global_tid);
787 ompt_callbacks.ompt_callback(ompt_callback_master)(
788 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
789 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
790 OMPT_GET_RETURN_ADDRESS(0));
795 if (__kmp_env_consistency_check) {
796 #if KMP_USE_DYNAMIC_LOCK 798 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
800 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
803 __kmp_push_sync(global_tid, ct_master, loc, NULL);
805 __kmp_check_sync(global_tid, ct_master, loc, NULL);
821 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
823 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
824 KMP_POP_PARTITIONED_TIMER();
826 #if OMPT_SUPPORT && OMPT_OPTIONAL 827 kmp_info_t *this_thr = __kmp_threads[global_tid];
828 kmp_team_t *team = this_thr->th.th_team;
829 if (ompt_enabled.ompt_callback_master) {
830 int tid = __kmp_tid_from_gtid(global_tid);
831 ompt_callbacks.ompt_callback(ompt_callback_master)(
832 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
833 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
834 OMPT_GET_RETURN_ADDRESS(0));
838 if (__kmp_env_consistency_check) {
840 KMP_WARNING(ThreadIdentInvalid);
842 if (KMP_MASTER_GTID(global_tid))
843 __kmp_pop_sync(global_tid, ct_master, loc);
857 KMP_DEBUG_ASSERT(__kmp_init_serial);
859 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
861 if (!TCR_4(__kmp_init_parallel))
862 __kmp_parallel_initialize();
864 __kmp_resume_if_soft_paused();
867 __kmp_itt_ordered_prep(gtid);
871 th = __kmp_threads[gtid];
873 #if OMPT_SUPPORT && OMPT_OPTIONAL 877 if (ompt_enabled.enabled) {
878 OMPT_STORE_RETURN_ADDRESS(gtid);
879 team = __kmp_team_from_gtid(gtid);
880 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
882 th->th.ompt_thread_info.wait_id = lck;
883 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
886 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
887 if (ompt_enabled.ompt_callback_mutex_acquire) {
888 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
889 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
895 if (th->th.th_dispatch->th_deo_fcn != 0)
896 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
898 __kmp_parallel_deo(>id, &cid, loc);
900 #if OMPT_SUPPORT && OMPT_OPTIONAL 901 if (ompt_enabled.enabled) {
903 th->th.ompt_thread_info.state = ompt_state_work_parallel;
904 th->th.ompt_thread_info.wait_id = 0;
907 if (ompt_enabled.ompt_callback_mutex_acquired) {
908 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
909 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
915 __kmp_itt_ordered_start(gtid);
930 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
933 __kmp_itt_ordered_end(gtid);
937 th = __kmp_threads[gtid];
939 if (th->th.th_dispatch->th_dxo_fcn != 0)
940 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
942 __kmp_parallel_dxo(>id, &cid, loc);
944 #if OMPT_SUPPORT && OMPT_OPTIONAL 945 OMPT_STORE_RETURN_ADDRESS(gtid);
946 if (ompt_enabled.ompt_callback_mutex_released) {
947 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
949 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
950 ->t.t_ordered.dt.t_value,
951 OMPT_LOAD_RETURN_ADDRESS(gtid));
956 #if KMP_USE_DYNAMIC_LOCK 958 static __forceinline
void 959 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
960 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
964 kmp_indirect_lock_t **lck;
965 lck = (kmp_indirect_lock_t **)crit;
966 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
967 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
968 KMP_SET_I_LOCK_LOCATION(ilk, loc);
969 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
971 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
973 __kmp_itt_critical_creating(ilk->lock, loc);
975 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
978 __kmp_itt_critical_destroyed(ilk->lock);
984 KMP_DEBUG_ASSERT(*lck != NULL);
988 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 990 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 991 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 992 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 993 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 994 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 996 KMP_FSYNC_PREPARE(l); \ 997 KMP_INIT_YIELD(spins); \ 998 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1000 if (TCR_4(__kmp_nth) > \ 1001 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1004 KMP_YIELD_SPIN(spins); \ 1006 __kmp_spin_backoff(&backoff); \ 1008 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1009 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1011 KMP_FSYNC_ACQUIRED(l); \ 1015 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1017 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1018 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1019 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1020 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1021 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1025 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1026 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1030 #include <sys/syscall.h> 1033 #define FUTEX_WAIT 0 1036 #define FUTEX_WAKE 1 1040 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1042 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1043 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1045 KMP_FSYNC_PREPARE(ftx); \ 1046 kmp_int32 poll_val; \ 1047 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1048 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1049 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1050 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1052 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1054 KMP_LOCK_BUSY(1, futex))) { \ 1057 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1060 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1061 NULL, NULL, 0)) != 0) { \ 1066 KMP_FSYNC_ACQUIRED(ftx); \ 1070 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1072 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1073 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1074 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1075 KMP_FSYNC_ACQUIRED(ftx); \ 1083 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1085 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1087 KMP_FSYNC_RELEASING(ftx); \ 1088 kmp_int32 poll_val = \ 1089 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1090 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1091 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1092 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1095 KMP_YIELD_OVERSUB(); \ 1098 #endif // KMP_USE_FUTEX 1100 #else // KMP_USE_DYNAMIC_LOCK 1102 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1105 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1108 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1115 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1116 __kmp_init_user_lock_with_checks(lck);
1117 __kmp_set_user_lock_location(lck, loc);
1119 __kmp_itt_critical_creating(lck);
1130 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1135 __kmp_itt_critical_destroyed(lck);
1139 __kmp_destroy_user_lock_with_checks(lck);
1140 __kmp_user_lock_free(&idx, gtid, lck);
1141 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1142 KMP_DEBUG_ASSERT(lck != NULL);
1148 #endif // KMP_USE_DYNAMIC_LOCK 1161 kmp_critical_name *crit) {
1162 #if KMP_USE_DYNAMIC_LOCK 1163 #if OMPT_SUPPORT && OMPT_OPTIONAL 1164 OMPT_STORE_RETURN_ADDRESS(global_tid);
1165 #endif // OMPT_SUPPORT 1169 #if OMPT_SUPPORT && OMPT_OPTIONAL 1170 ompt_state_t prev_state = ompt_state_undefined;
1171 ompt_thread_info_t ti;
1173 kmp_user_lock_p lck;
1175 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1179 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1180 KMP_CHECK_USER_LOCK_INIT();
1182 if ((__kmp_user_lock_kind == lk_tas) &&
1183 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1184 lck = (kmp_user_lock_p)crit;
1187 else if ((__kmp_user_lock_kind == lk_futex) &&
1188 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1189 lck = (kmp_user_lock_p)crit;
1193 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1196 if (__kmp_env_consistency_check)
1197 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1205 __kmp_itt_critical_acquiring(lck);
1207 #if OMPT_SUPPORT && OMPT_OPTIONAL 1208 OMPT_STORE_RETURN_ADDRESS(gtid);
1209 void *codeptr_ra = NULL;
1210 if (ompt_enabled.enabled) {
1211 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1213 prev_state = ti.state;
1214 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1215 ti.state = ompt_state_wait_critical;
1218 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1219 if (ompt_enabled.ompt_callback_mutex_acquire) {
1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1221 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1222 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1228 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1231 __kmp_itt_critical_acquired(lck);
1233 #if OMPT_SUPPORT && OMPT_OPTIONAL 1234 if (ompt_enabled.enabled) {
1236 ti.state = prev_state;
1240 if (ompt_enabled.ompt_callback_mutex_acquired) {
1241 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1242 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1246 KMP_POP_PARTITIONED_TIMER();
1248 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1249 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1250 #endif // KMP_USE_DYNAMIC_LOCK 1253 #if KMP_USE_DYNAMIC_LOCK 1256 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1258 #define KMP_TSX_LOCK(seq) lockseq_##seq 1260 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1263 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1264 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) 1266 #define KMP_CPUINFO_RTM 0 1270 if (hint & kmp_lock_hint_hle)
1271 return KMP_TSX_LOCK(hle);
1272 if (hint & kmp_lock_hint_rtm)
1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1274 if (hint & kmp_lock_hint_adaptive)
1275 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1278 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1279 return __kmp_user_lock_seq;
1280 if ((hint & omp_lock_hint_speculative) &&
1281 (hint & omp_lock_hint_nonspeculative))
1282 return __kmp_user_lock_seq;
1285 if (hint & omp_lock_hint_contended)
1286 return lockseq_queuing;
1289 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1293 if (hint & omp_lock_hint_speculative)
1294 return KMP_TSX_LOCK(hle);
1296 return __kmp_user_lock_seq;
1299 #if OMPT_SUPPORT && OMPT_OPTIONAL 1300 #if KMP_USE_DYNAMIC_LOCK 1301 static kmp_mutex_impl_t
1302 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1304 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1309 return kmp_mutex_impl_queuing;
1312 return kmp_mutex_impl_spin;
1315 return kmp_mutex_impl_speculative;
1318 return kmp_mutex_impl_none;
1320 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1323 switch (ilock->type) {
1325 case locktag_adaptive:
1327 return kmp_mutex_impl_speculative;
1329 case locktag_nested_tas:
1330 return kmp_mutex_impl_spin;
1332 case locktag_nested_futex:
1334 case locktag_ticket:
1335 case locktag_queuing:
1337 case locktag_nested_ticket:
1338 case locktag_nested_queuing:
1339 case locktag_nested_drdpa:
1340 return kmp_mutex_impl_queuing;
1342 return kmp_mutex_impl_none;
1347 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1348 switch (__kmp_user_lock_kind) {
1350 return kmp_mutex_impl_spin;
1357 return kmp_mutex_impl_queuing;
1362 return kmp_mutex_impl_speculative;
1365 return kmp_mutex_impl_none;
1368 #endif // KMP_USE_DYNAMIC_LOCK 1369 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1385 kmp_critical_name *crit, uint32_t hint) {
1387 kmp_user_lock_p lck;
1388 #if OMPT_SUPPORT && OMPT_OPTIONAL 1389 ompt_state_t prev_state = ompt_state_undefined;
1390 ompt_thread_info_t ti;
1392 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1394 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1397 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1399 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1401 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1403 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1404 if (KMP_IS_D_LOCK(lckseq)) {
1405 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1406 KMP_GET_D_TAG(lckseq));
1408 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1414 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1415 lck = (kmp_user_lock_p)lk;
1416 if (__kmp_env_consistency_check) {
1417 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1418 __kmp_map_hint_to_lock(hint));
1421 __kmp_itt_critical_acquiring(lck);
1423 #if OMPT_SUPPORT && OMPT_OPTIONAL 1424 if (ompt_enabled.enabled) {
1425 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1427 prev_state = ti.state;
1428 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1429 ti.state = ompt_state_wait_critical;
1432 if (ompt_enabled.ompt_callback_mutex_acquire) {
1433 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1434 ompt_mutex_critical, (
unsigned int)hint,
1435 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1440 #if KMP_USE_INLINED_TAS 1441 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1442 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1444 #elif KMP_USE_INLINED_FUTEX 1445 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1446 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1450 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
1453 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1455 if (__kmp_env_consistency_check) {
1456 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1457 __kmp_map_hint_to_lock(hint));
1460 __kmp_itt_critical_acquiring(lck);
1462 #if OMPT_SUPPORT && OMPT_OPTIONAL 1463 if (ompt_enabled.enabled) {
1464 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1466 prev_state = ti.state;
1467 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1468 ti.state = ompt_state_wait_critical;
1471 if (ompt_enabled.ompt_callback_mutex_acquire) {
1472 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1473 ompt_mutex_critical, (
unsigned int)hint,
1474 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1479 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
1481 KMP_POP_PARTITIONED_TIMER();
1484 __kmp_itt_critical_acquired(lck);
1486 #if OMPT_SUPPORT && OMPT_OPTIONAL 1487 if (ompt_enabled.enabled) {
1489 ti.state = prev_state;
1493 if (ompt_enabled.ompt_callback_mutex_acquired) {
1494 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1495 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1500 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1501 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1504 #endif // KMP_USE_DYNAMIC_LOCK 1516 kmp_critical_name *crit) {
1517 kmp_user_lock_p lck;
1519 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1521 #if KMP_USE_DYNAMIC_LOCK 1522 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1523 lck = (kmp_user_lock_p)crit;
1524 KMP_ASSERT(lck != NULL);
1525 if (__kmp_env_consistency_check) {
1526 __kmp_pop_sync(global_tid, ct_critical, loc);
1529 __kmp_itt_critical_releasing(lck);
1531 #if KMP_USE_INLINED_TAS 1532 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1533 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1535 #elif KMP_USE_INLINED_FUTEX 1536 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1537 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1541 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1544 kmp_indirect_lock_t *ilk =
1545 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1546 KMP_ASSERT(ilk != NULL);
1548 if (__kmp_env_consistency_check) {
1549 __kmp_pop_sync(global_tid, ct_critical, loc);
1552 __kmp_itt_critical_releasing(lck);
1554 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1557 #else // KMP_USE_DYNAMIC_LOCK 1559 if ((__kmp_user_lock_kind == lk_tas) &&
1560 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1561 lck = (kmp_user_lock_p)crit;
1564 else if ((__kmp_user_lock_kind == lk_futex) &&
1565 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1566 lck = (kmp_user_lock_p)crit;
1570 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1573 KMP_ASSERT(lck != NULL);
1575 if (__kmp_env_consistency_check)
1576 __kmp_pop_sync(global_tid, ct_critical, loc);
1579 __kmp_itt_critical_releasing(lck);
1583 __kmp_release_user_lock_with_checks(lck, global_tid);
1585 #endif // KMP_USE_DYNAMIC_LOCK 1587 #if OMPT_SUPPORT && OMPT_OPTIONAL 1590 OMPT_STORE_RETURN_ADDRESS(global_tid);
1591 if (ompt_enabled.ompt_callback_mutex_released) {
1592 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1593 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1594 OMPT_LOAD_RETURN_ADDRESS(0));
1598 KMP_POP_PARTITIONED_TIMER();
1599 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1614 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1616 if (!TCR_4(__kmp_init_parallel))
1617 __kmp_parallel_initialize();
1619 __kmp_resume_if_soft_paused();
1621 if (__kmp_env_consistency_check)
1622 __kmp_check_barrier(global_tid, ct_barrier, loc);
1625 ompt_frame_t *ompt_frame;
1626 if (ompt_enabled.enabled) {
1627 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1628 if (ompt_frame->enter_frame.ptr == NULL)
1629 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1630 OMPT_STORE_RETURN_ADDRESS(global_tid);
1634 __kmp_threads[global_tid]->th.th_ident = loc;
1636 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1637 #if OMPT_SUPPORT && OMPT_OPTIONAL 1638 if (ompt_enabled.enabled) {
1639 ompt_frame->enter_frame = ompt_data_none;
1643 return (status != 0) ? 0 : 1;
1656 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1658 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1674 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1676 if (!TCR_4(__kmp_init_parallel))
1677 __kmp_parallel_initialize();
1679 __kmp_resume_if_soft_paused();
1681 if (__kmp_env_consistency_check) {
1683 KMP_WARNING(ConstructIdentInvalid);
1685 __kmp_check_barrier(global_tid, ct_barrier, loc);
1689 ompt_frame_t *ompt_frame;
1690 if (ompt_enabled.enabled) {
1691 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1692 if (ompt_frame->enter_frame.ptr == NULL)
1693 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1694 OMPT_STORE_RETURN_ADDRESS(global_tid);
1698 __kmp_threads[global_tid]->th.th_ident = loc;
1700 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1701 #if OMPT_SUPPORT && OMPT_OPTIONAL 1702 if (ompt_enabled.enabled) {
1703 ompt_frame->enter_frame = ompt_data_none;
1709 if (__kmp_env_consistency_check) {
1713 if (global_tid < 0) {
1714 KMP_WARNING(ThreadIdentInvalid);
1720 __kmp_pop_sync(global_tid, ct_master, loc);
1740 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1745 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1748 #if OMPT_SUPPORT && OMPT_OPTIONAL 1749 kmp_info_t *this_thr = __kmp_threads[global_tid];
1750 kmp_team_t *team = this_thr->th.th_team;
1751 int tid = __kmp_tid_from_gtid(global_tid);
1753 if (ompt_enabled.enabled) {
1755 if (ompt_enabled.ompt_callback_work) {
1756 ompt_callbacks.ompt_callback(ompt_callback_work)(
1757 ompt_work_single_executor, ompt_scope_begin,
1758 &(team->t.ompt_team_info.parallel_data),
1759 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1760 1, OMPT_GET_RETURN_ADDRESS(0));
1763 if (ompt_enabled.ompt_callback_work) {
1764 ompt_callbacks.ompt_callback(ompt_callback_work)(
1765 ompt_work_single_other, ompt_scope_begin,
1766 &(team->t.ompt_team_info.parallel_data),
1767 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1768 1, OMPT_GET_RETURN_ADDRESS(0));
1769 ompt_callbacks.ompt_callback(ompt_callback_work)(
1770 ompt_work_single_other, ompt_scope_end,
1771 &(team->t.ompt_team_info.parallel_data),
1772 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1773 1, OMPT_GET_RETURN_ADDRESS(0));
1792 __kmp_exit_single(global_tid);
1793 KMP_POP_PARTITIONED_TIMER();
1795 #if OMPT_SUPPORT && OMPT_OPTIONAL 1796 kmp_info_t *this_thr = __kmp_threads[global_tid];
1797 kmp_team_t *team = this_thr->th.th_team;
1798 int tid = __kmp_tid_from_gtid(global_tid);
1800 if (ompt_enabled.ompt_callback_work) {
1801 ompt_callbacks.ompt_callback(ompt_callback_work)(
1802 ompt_work_single_executor, ompt_scope_end,
1803 &(team->t.ompt_team_info.parallel_data),
1804 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1805 OMPT_GET_RETURN_ADDRESS(0));
1818 KMP_POP_PARTITIONED_TIMER();
1819 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1821 #if OMPT_SUPPORT && OMPT_OPTIONAL 1822 if (ompt_enabled.ompt_callback_work) {
1823 ompt_work_t ompt_work_type = ompt_work_loop;
1824 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1825 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1829 ompt_work_type = ompt_work_loop;
1831 ompt_work_type = ompt_work_sections;
1833 ompt_work_type = ompt_work_distribute;
1838 KMP_DEBUG_ASSERT(ompt_work_type);
1840 ompt_callbacks.ompt_callback(ompt_callback_work)(
1841 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1842 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1845 if (__kmp_env_consistency_check)
1846 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1852 void ompc_set_num_threads(
int arg) {
1854 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1857 void ompc_set_dynamic(
int flag) {
1861 thread = __kmp_entry_thread();
1863 __kmp_save_internal_controls(thread);
1865 set__dynamic(thread, flag ? TRUE : FALSE);
1868 void ompc_set_nested(
int flag) {
1872 thread = __kmp_entry_thread();
1874 __kmp_save_internal_controls(thread);
1876 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1879 void ompc_set_max_active_levels(
int max_active_levels) {
1884 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1887 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1889 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1892 int ompc_get_ancestor_thread_num(
int level) {
1893 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1896 int ompc_get_team_size(
int level) {
1897 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1902 void ompc_set_affinity_format(
char const *format) {
1903 if (!__kmp_init_serial) {
1904 __kmp_serial_initialize();
1906 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1907 format, KMP_STRLEN(format) + 1);
1910 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1912 if (!__kmp_init_serial) {
1913 __kmp_serial_initialize();
1915 format_size = KMP_STRLEN(__kmp_affinity_format);
1916 if (buffer && size) {
1917 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1923 void ompc_display_affinity(
char const *format) {
1925 if (!TCR_4(__kmp_init_middle)) {
1926 __kmp_middle_initialize();
1928 gtid = __kmp_get_gtid();
1929 __kmp_aux_display_affinity(gtid, format);
1932 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1933 char const *format) {
1935 size_t num_required;
1936 kmp_str_buf_t capture_buf;
1937 if (!TCR_4(__kmp_init_middle)) {
1938 __kmp_middle_initialize();
1940 gtid = __kmp_get_gtid();
1941 __kmp_str_buf_init(&capture_buf);
1942 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1943 if (buffer && buf_size) {
1944 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1945 capture_buf.used + 1);
1947 __kmp_str_buf_free(&capture_buf);
1948 return num_required;
1951 void kmpc_set_stacksize(
int arg) {
1953 __kmp_aux_set_stacksize(arg);
1956 void kmpc_set_stacksize_s(
size_t arg) {
1958 __kmp_aux_set_stacksize(arg);
1961 void kmpc_set_blocktime(
int arg) {
1965 gtid = __kmp_entry_gtid();
1966 tid = __kmp_tid_from_gtid(gtid);
1967 thread = __kmp_thread_from_gtid(gtid);
1969 __kmp_aux_set_blocktime(arg, thread, tid);
1972 void kmpc_set_library(
int arg) {
1974 __kmp_user_set_library((
enum library_type)arg);
1977 void kmpc_set_defaults(
char const *str) {
1979 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1982 void kmpc_set_disp_num_buffers(
int arg) {
1985 if (__kmp_init_serial == 0 && arg > 0)
1986 __kmp_dispatch_num_buffers = arg;
1989 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1990 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 1993 if (!TCR_4(__kmp_init_middle)) {
1994 __kmp_middle_initialize();
1996 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2000 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2001 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2004 if (!TCR_4(__kmp_init_middle)) {
2005 __kmp_middle_initialize();
2007 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2011 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2012 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2015 if (!TCR_4(__kmp_init_middle)) {
2016 __kmp_middle_initialize();
2018 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2068 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2072 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2076 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2078 if (__kmp_env_consistency_check) {
2080 KMP_WARNING(ConstructIdentInvalid);
2087 *data_ptr = cpy_data;
2090 ompt_frame_t *ompt_frame;
2091 if (ompt_enabled.enabled) {
2092 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2093 if (ompt_frame->enter_frame.ptr == NULL)
2094 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2095 OMPT_STORE_RETURN_ADDRESS(gtid);
2100 __kmp_threads[gtid]->th.th_ident = loc;
2102 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2105 (*cpy_func)(cpy_data, *data_ptr);
2111 if (ompt_enabled.enabled) {
2112 OMPT_STORE_RETURN_ADDRESS(gtid);
2116 __kmp_threads[gtid]->th.th_ident = loc;
2119 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2120 #if OMPT_SUPPORT && OMPT_OPTIONAL 2121 if (ompt_enabled.enabled) {
2122 ompt_frame->enter_frame = ompt_data_none;
2129 #define INIT_LOCK __kmp_init_user_lock_with_checks 2130 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2131 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2132 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2133 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2134 #define ACQUIRE_NESTED_LOCK_TIMED \ 2135 __kmp_acquire_nested_user_lock_with_checks_timed 2136 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2137 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2138 #define TEST_LOCK __kmp_test_user_lock_with_checks 2139 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2140 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2141 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2146 #if KMP_USE_DYNAMIC_LOCK 2149 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2150 kmp_dyna_lockseq_t seq) {
2151 if (KMP_IS_D_LOCK(seq)) {
2152 KMP_INIT_D_LOCK(lock, seq);
2154 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2157 KMP_INIT_I_LOCK(lock, seq);
2159 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2160 __kmp_itt_lock_creating(ilk->lock, loc);
2166 static __forceinline
void 2167 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2168 kmp_dyna_lockseq_t seq) {
2171 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2172 seq = __kmp_user_lock_seq;
2176 seq = lockseq_nested_tas;
2180 seq = lockseq_nested_futex;
2183 case lockseq_ticket:
2184 seq = lockseq_nested_ticket;
2186 case lockseq_queuing:
2187 seq = lockseq_nested_queuing;
2190 seq = lockseq_nested_drdpa;
2193 seq = lockseq_nested_queuing;
2195 KMP_INIT_I_LOCK(lock, seq);
2197 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2198 __kmp_itt_lock_creating(ilk->lock, loc);
2203 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2205 KMP_DEBUG_ASSERT(__kmp_init_serial);
2206 if (__kmp_env_consistency_check && user_lock == NULL) {
2207 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2210 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2212 #if OMPT_SUPPORT && OMPT_OPTIONAL 2214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2216 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217 if (ompt_enabled.ompt_callback_lock_init) {
2218 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219 ompt_mutex_lock, (omp_lock_hint_t)hint,
2220 __ompt_get_mutex_impl_type(user_lock),
2221 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2227 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2228 void **user_lock, uintptr_t hint) {
2229 KMP_DEBUG_ASSERT(__kmp_init_serial);
2230 if (__kmp_env_consistency_check && user_lock == NULL) {
2231 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2234 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2236 #if OMPT_SUPPORT && OMPT_OPTIONAL 2238 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2240 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2241 if (ompt_enabled.ompt_callback_lock_init) {
2242 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2243 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2244 __ompt_get_mutex_impl_type(user_lock),
2245 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2250 #endif // KMP_USE_DYNAMIC_LOCK 2253 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2254 #if KMP_USE_DYNAMIC_LOCK 2256 KMP_DEBUG_ASSERT(__kmp_init_serial);
2257 if (__kmp_env_consistency_check && user_lock == NULL) {
2258 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2260 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2262 #if OMPT_SUPPORT && OMPT_OPTIONAL 2264 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2266 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2267 if (ompt_enabled.ompt_callback_lock_init) {
2268 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2269 ompt_mutex_lock, omp_lock_hint_none,
2270 __ompt_get_mutex_impl_type(user_lock),
2271 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2275 #else // KMP_USE_DYNAMIC_LOCK 2277 static char const *
const func =
"omp_init_lock";
2278 kmp_user_lock_p lck;
2279 KMP_DEBUG_ASSERT(__kmp_init_serial);
2281 if (__kmp_env_consistency_check) {
2282 if (user_lock == NULL) {
2283 KMP_FATAL(LockIsUninitialized, func);
2287 KMP_CHECK_USER_LOCK_INIT();
2289 if ((__kmp_user_lock_kind == lk_tas) &&
2290 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2291 lck = (kmp_user_lock_p)user_lock;
2294 else if ((__kmp_user_lock_kind == lk_futex) &&
2295 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2296 lck = (kmp_user_lock_p)user_lock;
2300 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2303 __kmp_set_user_lock_location(lck, loc);
2305 #if OMPT_SUPPORT && OMPT_OPTIONAL 2307 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2309 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2310 if (ompt_enabled.ompt_callback_lock_init) {
2311 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2312 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2313 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2318 __kmp_itt_lock_creating(lck);
2321 #endif // KMP_USE_DYNAMIC_LOCK 2325 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2326 #if KMP_USE_DYNAMIC_LOCK 2328 KMP_DEBUG_ASSERT(__kmp_init_serial);
2329 if (__kmp_env_consistency_check && user_lock == NULL) {
2330 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2332 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2334 #if OMPT_SUPPORT && OMPT_OPTIONAL 2336 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2338 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2339 if (ompt_enabled.ompt_callback_lock_init) {
2340 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2341 ompt_mutex_nest_lock, omp_lock_hint_none,
2342 __ompt_get_mutex_impl_type(user_lock),
2343 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2347 #else // KMP_USE_DYNAMIC_LOCK 2349 static char const *
const func =
"omp_init_nest_lock";
2350 kmp_user_lock_p lck;
2351 KMP_DEBUG_ASSERT(__kmp_init_serial);
2353 if (__kmp_env_consistency_check) {
2354 if (user_lock == NULL) {
2355 KMP_FATAL(LockIsUninitialized, func);
2359 KMP_CHECK_USER_LOCK_INIT();
2361 if ((__kmp_user_lock_kind == lk_tas) &&
2362 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2363 OMP_NEST_LOCK_T_SIZE)) {
2364 lck = (kmp_user_lock_p)user_lock;
2367 else if ((__kmp_user_lock_kind == lk_futex) &&
2368 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2369 OMP_NEST_LOCK_T_SIZE)) {
2370 lck = (kmp_user_lock_p)user_lock;
2374 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2377 INIT_NESTED_LOCK(lck);
2378 __kmp_set_user_lock_location(lck, loc);
2380 #if OMPT_SUPPORT && OMPT_OPTIONAL 2382 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2384 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2385 if (ompt_enabled.ompt_callback_lock_init) {
2386 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2387 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2388 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2393 __kmp_itt_lock_creating(lck);
2396 #endif // KMP_USE_DYNAMIC_LOCK 2399 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2400 #if KMP_USE_DYNAMIC_LOCK 2403 kmp_user_lock_p lck;
2404 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2405 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2407 lck = (kmp_user_lock_p)user_lock;
2409 __kmp_itt_lock_destroyed(lck);
2411 #if OMPT_SUPPORT && OMPT_OPTIONAL 2413 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2415 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2416 if (ompt_enabled.ompt_callback_lock_destroy) {
2417 kmp_user_lock_p lck;
2418 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2419 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2421 lck = (kmp_user_lock_p)user_lock;
2423 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2424 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2427 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2429 kmp_user_lock_p lck;
2431 if ((__kmp_user_lock_kind == lk_tas) &&
2432 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2433 lck = (kmp_user_lock_p)user_lock;
2436 else if ((__kmp_user_lock_kind == lk_futex) &&
2437 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2438 lck = (kmp_user_lock_p)user_lock;
2442 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2445 #if OMPT_SUPPORT && OMPT_OPTIONAL 2447 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2449 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2450 if (ompt_enabled.ompt_callback_lock_destroy) {
2451 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2452 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2457 __kmp_itt_lock_destroyed(lck);
2461 if ((__kmp_user_lock_kind == lk_tas) &&
2462 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2466 else if ((__kmp_user_lock_kind == lk_futex) &&
2467 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2472 __kmp_user_lock_free(user_lock, gtid, lck);
2474 #endif // KMP_USE_DYNAMIC_LOCK 2478 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2479 #if KMP_USE_DYNAMIC_LOCK 2482 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2483 __kmp_itt_lock_destroyed(ilk->lock);
2485 #if OMPT_SUPPORT && OMPT_OPTIONAL 2487 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2489 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2490 if (ompt_enabled.ompt_callback_lock_destroy) {
2491 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2492 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2495 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2497 #else // KMP_USE_DYNAMIC_LOCK 2499 kmp_user_lock_p lck;
2501 if ((__kmp_user_lock_kind == lk_tas) &&
2502 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2503 OMP_NEST_LOCK_T_SIZE)) {
2504 lck = (kmp_user_lock_p)user_lock;
2507 else if ((__kmp_user_lock_kind == lk_futex) &&
2508 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2509 OMP_NEST_LOCK_T_SIZE)) {
2510 lck = (kmp_user_lock_p)user_lock;
2514 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2517 #if OMPT_SUPPORT && OMPT_OPTIONAL 2519 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2521 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2522 if (ompt_enabled.ompt_callback_lock_destroy) {
2523 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2524 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2529 __kmp_itt_lock_destroyed(lck);
2532 DESTROY_NESTED_LOCK(lck);
2534 if ((__kmp_user_lock_kind == lk_tas) &&
2535 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2536 OMP_NEST_LOCK_T_SIZE)) {
2540 else if ((__kmp_user_lock_kind == lk_futex) &&
2541 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2542 OMP_NEST_LOCK_T_SIZE)) {
2547 __kmp_user_lock_free(user_lock, gtid, lck);
2549 #endif // KMP_USE_DYNAMIC_LOCK 2552 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2554 #if KMP_USE_DYNAMIC_LOCK 2555 int tag = KMP_EXTRACT_D_TAG(user_lock);
2557 __kmp_itt_lock_acquiring(
2561 #if OMPT_SUPPORT && OMPT_OPTIONAL 2563 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2565 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2566 if (ompt_enabled.ompt_callback_mutex_acquire) {
2567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2568 ompt_mutex_lock, omp_lock_hint_none,
2569 __ompt_get_mutex_impl_type(user_lock),
2570 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2573 #if KMP_USE_INLINED_TAS 2574 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2575 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2577 #elif KMP_USE_INLINED_FUTEX 2578 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2579 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2583 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2586 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2588 #if OMPT_SUPPORT && OMPT_OPTIONAL 2589 if (ompt_enabled.ompt_callback_mutex_acquired) {
2590 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2591 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2595 #else // KMP_USE_DYNAMIC_LOCK 2597 kmp_user_lock_p lck;
2599 if ((__kmp_user_lock_kind == lk_tas) &&
2600 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2601 lck = (kmp_user_lock_p)user_lock;
2604 else if ((__kmp_user_lock_kind == lk_futex) &&
2605 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2606 lck = (kmp_user_lock_p)user_lock;
2610 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2614 __kmp_itt_lock_acquiring(lck);
2616 #if OMPT_SUPPORT && OMPT_OPTIONAL 2618 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2620 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621 if (ompt_enabled.ompt_callback_mutex_acquire) {
2622 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2623 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2624 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2628 ACQUIRE_LOCK(lck, gtid);
2631 __kmp_itt_lock_acquired(lck);
2634 #if OMPT_SUPPORT && OMPT_OPTIONAL 2635 if (ompt_enabled.ompt_callback_mutex_acquired) {
2636 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2637 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2641 #endif // KMP_USE_DYNAMIC_LOCK 2644 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2645 #if KMP_USE_DYNAMIC_LOCK 2648 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL 2652 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2654 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2655 if (ompt_enabled.enabled) {
2656 if (ompt_enabled.ompt_callback_mutex_acquire) {
2657 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2658 ompt_mutex_nest_lock, omp_lock_hint_none,
2659 __ompt_get_mutex_impl_type(user_lock),
2660 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2664 int acquire_status =
2665 KMP_D_LOCK_FUNC(user_lock,
set)((kmp_dyna_lock_t *)user_lock, gtid);
2666 (void) acquire_status;
2668 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2671 #if OMPT_SUPPORT && OMPT_OPTIONAL 2672 if (ompt_enabled.enabled) {
2673 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2674 if (ompt_enabled.ompt_callback_mutex_acquired) {
2676 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2677 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2681 if (ompt_enabled.ompt_callback_nest_lock) {
2683 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2684 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2690 #else // KMP_USE_DYNAMIC_LOCK 2692 kmp_user_lock_p lck;
2694 if ((__kmp_user_lock_kind == lk_tas) &&
2695 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2696 OMP_NEST_LOCK_T_SIZE)) {
2697 lck = (kmp_user_lock_p)user_lock;
2700 else if ((__kmp_user_lock_kind == lk_futex) &&
2701 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2702 OMP_NEST_LOCK_T_SIZE)) {
2703 lck = (kmp_user_lock_p)user_lock;
2707 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2711 __kmp_itt_lock_acquiring(lck);
2713 #if OMPT_SUPPORT && OMPT_OPTIONAL 2715 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2717 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2718 if (ompt_enabled.enabled) {
2719 if (ompt_enabled.ompt_callback_mutex_acquire) {
2720 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2721 ompt_mutex_nest_lock, omp_lock_hint_none,
2722 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2728 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2731 __kmp_itt_lock_acquired(lck);
2734 #if OMPT_SUPPORT && OMPT_OPTIONAL 2735 if (ompt_enabled.enabled) {
2736 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2737 if (ompt_enabled.ompt_callback_mutex_acquired) {
2739 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2740 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2743 if (ompt_enabled.ompt_callback_nest_lock) {
2745 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2746 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2752 #endif // KMP_USE_DYNAMIC_LOCK 2755 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2756 #if KMP_USE_DYNAMIC_LOCK 2758 int tag = KMP_EXTRACT_D_TAG(user_lock);
2760 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2762 #if KMP_USE_INLINED_TAS 2763 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2764 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2766 #elif KMP_USE_INLINED_FUTEX 2767 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2768 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2772 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2775 #if OMPT_SUPPORT && OMPT_OPTIONAL 2777 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2779 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2780 if (ompt_enabled.ompt_callback_mutex_released) {
2781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2782 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2786 #else // KMP_USE_DYNAMIC_LOCK 2788 kmp_user_lock_p lck;
2793 if ((__kmp_user_lock_kind == lk_tas) &&
2794 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2795 #if KMP_OS_LINUX && \ 2796 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2799 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2801 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2804 #if OMPT_SUPPORT && OMPT_OPTIONAL 2806 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2808 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2809 if (ompt_enabled.ompt_callback_mutex_released) {
2810 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2811 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2817 lck = (kmp_user_lock_p)user_lock;
2821 else if ((__kmp_user_lock_kind == lk_futex) &&
2822 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2823 lck = (kmp_user_lock_p)user_lock;
2827 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2831 __kmp_itt_lock_releasing(lck);
2834 RELEASE_LOCK(lck, gtid);
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL 2838 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2840 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2841 if (ompt_enabled.ompt_callback_mutex_released) {
2842 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2843 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2847 #endif // KMP_USE_DYNAMIC_LOCK 2851 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2852 #if KMP_USE_DYNAMIC_LOCK 2855 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2857 int release_status =
2858 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2859 (void) release_status;
2861 #if OMPT_SUPPORT && OMPT_OPTIONAL 2863 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2865 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2866 if (ompt_enabled.enabled) {
2867 if (release_status == KMP_LOCK_RELEASED) {
2868 if (ompt_enabled.ompt_callback_mutex_released) {
2870 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2871 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2874 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2876 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2877 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2882 #else // KMP_USE_DYNAMIC_LOCK 2884 kmp_user_lock_p lck;
2888 if ((__kmp_user_lock_kind == lk_tas) &&
2889 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2890 OMP_NEST_LOCK_T_SIZE)) {
2891 #if KMP_OS_LINUX && \ 2892 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2894 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2896 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2899 #if OMPT_SUPPORT && OMPT_OPTIONAL 2900 int release_status = KMP_LOCK_STILL_HELD;
2903 if (--(tl->lk.depth_locked) == 0) {
2904 TCW_4(tl->lk.poll, 0);
2905 #if OMPT_SUPPORT && OMPT_OPTIONAL 2906 release_status = KMP_LOCK_RELEASED;
2911 #if OMPT_SUPPORT && OMPT_OPTIONAL 2913 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2915 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2916 if (ompt_enabled.enabled) {
2917 if (release_status == KMP_LOCK_RELEASED) {
2918 if (ompt_enabled.ompt_callback_mutex_released) {
2920 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2921 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2923 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2925 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2926 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2933 lck = (kmp_user_lock_p)user_lock;
2937 else if ((__kmp_user_lock_kind == lk_futex) &&
2938 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2939 OMP_NEST_LOCK_T_SIZE)) {
2940 lck = (kmp_user_lock_p)user_lock;
2944 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2948 __kmp_itt_lock_releasing(lck);
2952 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2953 #if OMPT_SUPPORT && OMPT_OPTIONAL 2955 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2957 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2958 if (ompt_enabled.enabled) {
2959 if (release_status == KMP_LOCK_RELEASED) {
2960 if (ompt_enabled.ompt_callback_mutex_released) {
2962 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2963 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2965 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2967 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2968 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2973 #endif // KMP_USE_DYNAMIC_LOCK 2977 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2980 #if KMP_USE_DYNAMIC_LOCK 2982 int tag = KMP_EXTRACT_D_TAG(user_lock);
2984 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2986 #if OMPT_SUPPORT && OMPT_OPTIONAL 2988 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2990 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2991 if (ompt_enabled.ompt_callback_mutex_acquire) {
2992 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2993 ompt_mutex_lock, omp_lock_hint_none,
2994 __ompt_get_mutex_impl_type(user_lock),
2995 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2998 #if KMP_USE_INLINED_TAS 2999 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3000 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3002 #elif KMP_USE_INLINED_FUTEX 3003 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3004 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3008 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3012 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3014 #if OMPT_SUPPORT && OMPT_OPTIONAL 3015 if (ompt_enabled.ompt_callback_mutex_acquired) {
3016 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3017 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3023 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3028 #else // KMP_USE_DYNAMIC_LOCK 3030 kmp_user_lock_p lck;
3033 if ((__kmp_user_lock_kind == lk_tas) &&
3034 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3035 lck = (kmp_user_lock_p)user_lock;
3038 else if ((__kmp_user_lock_kind == lk_futex) &&
3039 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3040 lck = (kmp_user_lock_p)user_lock;
3044 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3048 __kmp_itt_lock_acquiring(lck);
3050 #if OMPT_SUPPORT && OMPT_OPTIONAL 3052 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3054 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3055 if (ompt_enabled.ompt_callback_mutex_acquire) {
3056 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3057 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3058 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3062 rc = TEST_LOCK(lck, gtid);
3065 __kmp_itt_lock_acquired(lck);
3067 __kmp_itt_lock_cancelled(lck);
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL 3071 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3072 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3073 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3077 return (rc ? FTN_TRUE : FTN_FALSE);
3081 #endif // KMP_USE_DYNAMIC_LOCK 3085 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3086 #if KMP_USE_DYNAMIC_LOCK 3089 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3091 #if OMPT_SUPPORT && OMPT_OPTIONAL 3093 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3095 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3096 if (ompt_enabled.ompt_callback_mutex_acquire) {
3097 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3098 ompt_mutex_nest_lock, omp_lock_hint_none,
3099 __ompt_get_mutex_impl_type(user_lock),
3100 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3103 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3106 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3108 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3111 #if OMPT_SUPPORT && OMPT_OPTIONAL 3112 if (ompt_enabled.enabled && rc) {
3114 if (ompt_enabled.ompt_callback_mutex_acquired) {
3116 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3117 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3121 if (ompt_enabled.ompt_callback_nest_lock) {
3123 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3124 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3131 #else // KMP_USE_DYNAMIC_LOCK 3133 kmp_user_lock_p lck;
3136 if ((__kmp_user_lock_kind == lk_tas) &&
3137 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3138 OMP_NEST_LOCK_T_SIZE)) {
3139 lck = (kmp_user_lock_p)user_lock;
3142 else if ((__kmp_user_lock_kind == lk_futex) &&
3143 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3144 OMP_NEST_LOCK_T_SIZE)) {
3145 lck = (kmp_user_lock_p)user_lock;
3149 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3153 __kmp_itt_lock_acquiring(lck);
3156 #if OMPT_SUPPORT && OMPT_OPTIONAL 3158 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3160 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3161 if (ompt_enabled.enabled) &&
3162 ompt_enabled.ompt_callback_mutex_acquire) {
3163 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3164 ompt_mutex_nest_lock, omp_lock_hint_none,
3165 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3170 rc = TEST_NESTED_LOCK(lck, gtid);
3173 __kmp_itt_lock_acquired(lck);
3175 __kmp_itt_lock_cancelled(lck);
3178 #if OMPT_SUPPORT && OMPT_OPTIONAL 3179 if (ompt_enabled.enabled && rc) {
3181 if (ompt_enabled.ompt_callback_mutex_acquired) {
3183 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3184 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3187 if (ompt_enabled.ompt_callback_nest_lock) {
3189 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3190 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3199 #endif // KMP_USE_DYNAMIC_LOCK 3209 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3210 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3212 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3213 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3219 static __forceinline
void 3220 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3221 kmp_critical_name *crit) {
3227 kmp_user_lock_p lck;
3229 #if KMP_USE_DYNAMIC_LOCK 3231 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3234 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3235 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3236 KMP_GET_D_TAG(__kmp_user_lock_seq));
3238 __kmp_init_indirect_csptr(crit, loc, global_tid,
3239 KMP_GET_I_TAG(__kmp_user_lock_seq));
3245 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3246 lck = (kmp_user_lock_p)lk;
3247 KMP_DEBUG_ASSERT(lck != NULL);
3248 if (__kmp_env_consistency_check) {
3249 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3251 KMP_D_LOCK_FUNC(lk,
set)(lk, global_tid);
3253 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3255 KMP_DEBUG_ASSERT(lck != NULL);
3256 if (__kmp_env_consistency_check) {
3257 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3259 KMP_I_LOCK_FUNC(ilk,
set)(lck, global_tid);
3262 #else // KMP_USE_DYNAMIC_LOCK 3267 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3268 lck = (kmp_user_lock_p)crit;
3270 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3272 KMP_DEBUG_ASSERT(lck != NULL);
3274 if (__kmp_env_consistency_check)
3275 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3277 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3279 #endif // KMP_USE_DYNAMIC_LOCK 3283 static __forceinline
void 3284 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3285 kmp_critical_name *crit) {
3287 kmp_user_lock_p lck;
3289 #if KMP_USE_DYNAMIC_LOCK 3291 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3292 lck = (kmp_user_lock_p)crit;
3293 if (__kmp_env_consistency_check)
3294 __kmp_pop_sync(global_tid, ct_critical, loc);
3295 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3297 kmp_indirect_lock_t *ilk =
3298 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3299 if (__kmp_env_consistency_check)
3300 __kmp_pop_sync(global_tid, ct_critical, loc);
3301 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3304 #else // KMP_USE_DYNAMIC_LOCK 3309 if (__kmp_base_user_lock_size > 32) {
3310 lck = *((kmp_user_lock_p *)crit);
3311 KMP_ASSERT(lck != NULL);
3313 lck = (kmp_user_lock_p)crit;
3316 if (__kmp_env_consistency_check)
3317 __kmp_pop_sync(global_tid, ct_critical, loc);
3319 __kmp_release_user_lock_with_checks(lck, global_tid);
3321 #endif // KMP_USE_DYNAMIC_LOCK 3324 static __forceinline
int 3325 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3330 if (th->th.th_teams_microtask) {
3331 *team_p = team = th->th.th_team;
3332 if (team->t.t_level == th->th.th_teams_level) {
3334 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3336 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3337 th->th.th_team = team->t.t_parent;
3338 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3339 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3340 *task_state = th->th.th_task_state;
3341 th->th.th_task_state = 0;
3349 static __forceinline
void 3350 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3352 th->th.th_info.ds.ds_tid = 0;
3353 th->th.th_team = team;
3354 th->th.th_team_nproc = team->t.t_nproc;
3355 th->th.th_task_team = team->t.t_task_team[task_state];
3356 th->th.th_task_state = task_state;
3377 size_t reduce_size,
void *reduce_data,
3378 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3379 kmp_critical_name *lck) {
3383 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3386 int teams_swapped = 0, task_state;
3387 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3395 if (!TCR_4(__kmp_init_parallel))
3396 __kmp_parallel_initialize();
3398 __kmp_resume_if_soft_paused();
3401 #if KMP_USE_DYNAMIC_LOCK 3402 if (__kmp_env_consistency_check)
3403 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3405 if (__kmp_env_consistency_check)
3406 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3409 th = __kmp_thread_from_gtid(global_tid);
3410 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3428 packed_reduction_method = __kmp_determine_reduction_method(
3429 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3430 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3432 if (packed_reduction_method == critical_reduce_block) {
3434 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3437 }
else if (packed_reduction_method == empty_reduce_block) {
3443 }
else if (packed_reduction_method == atomic_reduce_block) {
3453 if (__kmp_env_consistency_check)
3454 __kmp_pop_sync(global_tid, ct_reduce, loc);
3456 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3457 tree_reduce_block)) {
3477 ompt_frame_t *ompt_frame;
3478 if (ompt_enabled.enabled) {
3479 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3480 if (ompt_frame->enter_frame.ptr == NULL)
3481 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3482 OMPT_STORE_RETURN_ADDRESS(global_tid);
3486 __kmp_threads[global_tid]->th.th_ident = loc;
3489 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3490 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3491 retval = (retval != 0) ? (0) : (1);
3492 #if OMPT_SUPPORT && OMPT_OPTIONAL 3493 if (ompt_enabled.enabled) {
3494 ompt_frame->enter_frame = ompt_data_none;
3500 if (__kmp_env_consistency_check) {
3502 __kmp_pop_sync(global_tid, ct_reduce, loc);
3511 if (teams_swapped) {
3512 __kmp_restore_swapped_teams(th, team, task_state);
3516 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3517 global_tid, packed_reduction_method, retval));
3531 kmp_critical_name *lck) {
3533 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3535 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3537 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3539 if (packed_reduction_method == critical_reduce_block) {
3541 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3543 }
else if (packed_reduction_method == empty_reduce_block) {
3548 }
else if (packed_reduction_method == atomic_reduce_block) {
3555 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3556 tree_reduce_block)) {
3566 if (__kmp_env_consistency_check)
3567 __kmp_pop_sync(global_tid, ct_reduce, loc);
3569 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3570 global_tid, packed_reduction_method));
3593 size_t reduce_size,
void *reduce_data,
3594 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3595 kmp_critical_name *lck) {
3598 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3601 int teams_swapped = 0, task_state;
3603 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3611 if (!TCR_4(__kmp_init_parallel))
3612 __kmp_parallel_initialize();
3614 __kmp_resume_if_soft_paused();
3617 #if KMP_USE_DYNAMIC_LOCK 3618 if (__kmp_env_consistency_check)
3619 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3621 if (__kmp_env_consistency_check)
3622 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3625 th = __kmp_thread_from_gtid(global_tid);
3626 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3628 packed_reduction_method = __kmp_determine_reduction_method(
3629 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3630 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3632 if (packed_reduction_method == critical_reduce_block) {
3634 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3637 }
else if (packed_reduction_method == empty_reduce_block) {
3643 }
else if (packed_reduction_method == atomic_reduce_block) {
3647 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3648 tree_reduce_block)) {
3654 ompt_frame_t *ompt_frame;
3655 if (ompt_enabled.enabled) {
3656 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3657 if (ompt_frame->enter_frame.ptr == NULL)
3658 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3659 OMPT_STORE_RETURN_ADDRESS(global_tid);
3663 __kmp_threads[global_tid]->th.th_ident =
3667 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3668 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3669 retval = (retval != 0) ? (0) : (1);
3670 #if OMPT_SUPPORT && OMPT_OPTIONAL 3671 if (ompt_enabled.enabled) {
3672 ompt_frame->enter_frame = ompt_data_none;
3678 if (__kmp_env_consistency_check) {
3680 __kmp_pop_sync(global_tid, ct_reduce, loc);
3689 if (teams_swapped) {
3690 __kmp_restore_swapped_teams(th, team, task_state);
3694 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3695 global_tid, packed_reduction_method, retval));
3710 kmp_critical_name *lck) {
3712 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3715 int teams_swapped = 0, task_state;
3717 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3719 th = __kmp_thread_from_gtid(global_tid);
3720 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3722 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3727 if (packed_reduction_method == critical_reduce_block) {
3728 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3732 ompt_frame_t *ompt_frame;
3733 if (ompt_enabled.enabled) {
3734 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3735 if (ompt_frame->enter_frame.ptr == NULL)
3736 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3737 OMPT_STORE_RETURN_ADDRESS(global_tid);
3741 __kmp_threads[global_tid]->th.th_ident = loc;
3743 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3744 #if OMPT_SUPPORT && OMPT_OPTIONAL 3745 if (ompt_enabled.enabled) {
3746 ompt_frame->enter_frame = ompt_data_none;
3750 }
else if (packed_reduction_method == empty_reduce_block) {
3756 ompt_frame_t *ompt_frame;
3757 if (ompt_enabled.enabled) {
3758 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3759 if (ompt_frame->enter_frame.ptr == NULL)
3760 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3761 OMPT_STORE_RETURN_ADDRESS(global_tid);
3765 __kmp_threads[global_tid]->th.th_ident = loc;
3767 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3768 #if OMPT_SUPPORT && OMPT_OPTIONAL 3769 if (ompt_enabled.enabled) {
3770 ompt_frame->enter_frame = ompt_data_none;
3774 }
else if (packed_reduction_method == atomic_reduce_block) {
3777 ompt_frame_t *ompt_frame;
3778 if (ompt_enabled.enabled) {
3779 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3780 if (ompt_frame->enter_frame.ptr == NULL)
3781 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3782 OMPT_STORE_RETURN_ADDRESS(global_tid);
3787 __kmp_threads[global_tid]->th.th_ident = loc;
3789 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3790 #if OMPT_SUPPORT && OMPT_OPTIONAL 3791 if (ompt_enabled.enabled) {
3792 ompt_frame->enter_frame = ompt_data_none;
3796 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3797 tree_reduce_block)) {
3800 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3808 if (teams_swapped) {
3809 __kmp_restore_swapped_teams(th, team, task_state);
3812 if (__kmp_env_consistency_check)
3813 __kmp_pop_sync(global_tid, ct_reduce, loc);
3815 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3816 global_tid, packed_reduction_method));
3821 #undef __KMP_GET_REDUCTION_METHOD 3822 #undef __KMP_SET_REDUCTION_METHOD 3826 kmp_uint64 __kmpc_get_taskid() {
3831 gtid = __kmp_get_gtid();
3835 thread = __kmp_thread_from_gtid(gtid);
3836 return thread->th.th_current_task->td_task_id;
3840 kmp_uint64 __kmpc_get_parent_taskid() {
3844 kmp_taskdata_t *parent_task;
3846 gtid = __kmp_get_gtid();
3850 thread = __kmp_thread_from_gtid(gtid);
3851 parent_task = thread->th.th_current_task->td_parent;
3852 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3868 const struct kmp_dim *dims) {
3870 kmp_int64 last, trace_count;
3871 kmp_info_t *th = __kmp_threads[gtid];
3872 kmp_team_t *team = th->th.th_team;
3874 kmp_disp_t *pr_buf = th->th.th_dispatch;
3875 dispatch_shared_info_t *sh_buf;
3879 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3880 gtid, num_dims, !team->t.t_serialized));
3881 KMP_DEBUG_ASSERT(dims != NULL);
3882 KMP_DEBUG_ASSERT(num_dims > 0);
3884 if (team->t.t_serialized) {
3885 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3888 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3889 idx = pr_buf->th_doacross_buf_idx++;
3891 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3894 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3895 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3896 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3897 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3898 pr_buf->th_doacross_info[0] =
3899 (kmp_int64)num_dims;
3902 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3903 pr_buf->th_doacross_info[2] = dims[0].lo;
3904 pr_buf->th_doacross_info[3] = dims[0].up;
3905 pr_buf->th_doacross_info[4] = dims[0].st;
3907 for (j = 1; j < num_dims; ++j) {
3910 if (dims[j].st == 1) {
3912 range_length = dims[j].up - dims[j].lo + 1;
3914 if (dims[j].st > 0) {
3915 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3916 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3918 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3920 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3923 pr_buf->th_doacross_info[last++] = range_length;
3924 pr_buf->th_doacross_info[last++] = dims[j].lo;
3925 pr_buf->th_doacross_info[last++] = dims[j].up;
3926 pr_buf->th_doacross_info[last++] = dims[j].st;
3931 if (dims[0].st == 1) {
3932 trace_count = dims[0].up - dims[0].lo + 1;
3933 }
else if (dims[0].st > 0) {
3934 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3935 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3937 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3938 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3940 for (j = 1; j < num_dims; ++j) {
3941 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3943 KMP_DEBUG_ASSERT(trace_count > 0);
3947 if (idx != sh_buf->doacross_buf_idx) {
3949 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3956 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3957 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3959 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3960 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3962 if (flags == NULL) {
3964 size_t size = trace_count / 8 + 8;
3965 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3967 sh_buf->doacross_flags = flags;
3968 }
else if (flags == (kmp_uint32 *)1) {
3971 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3973 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3980 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
3981 pr_buf->th_doacross_flags =
3982 sh_buf->doacross_flags;
3984 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3987 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
3988 kmp_int32 shft, num_dims, i;
3990 kmp_int64 iter_number;
3991 kmp_info_t *th = __kmp_threads[gtid];
3992 kmp_team_t *team = th->th.th_team;
3994 kmp_int64 lo, up, st;
3996 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3997 if (team->t.t_serialized) {
3998 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4003 pr_buf = th->th.th_dispatch;
4004 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4005 num_dims = pr_buf->th_doacross_info[0];
4006 lo = pr_buf->th_doacross_info[2];
4007 up = pr_buf->th_doacross_info[3];
4008 st = pr_buf->th_doacross_info[4];
4010 if (vec[0] < lo || vec[0] > up) {
4011 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4012 "bounds [%lld,%lld]\n",
4013 gtid, vec[0], lo, up));
4016 iter_number = vec[0] - lo;
4017 }
else if (st > 0) {
4018 if (vec[0] < lo || vec[0] > up) {
4019 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4020 "bounds [%lld,%lld]\n",
4021 gtid, vec[0], lo, up));
4024 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4026 if (vec[0] > lo || vec[0] < up) {
4027 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4028 "bounds [%lld,%lld]\n",
4029 gtid, vec[0], lo, up));
4032 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4034 for (i = 1; i < num_dims; ++i) {
4036 kmp_int32 j = i * 4;
4037 ln = pr_buf->th_doacross_info[j + 1];
4038 lo = pr_buf->th_doacross_info[j + 2];
4039 up = pr_buf->th_doacross_info[j + 3];
4040 st = pr_buf->th_doacross_info[j + 4];
4042 if (vec[i] < lo || vec[i] > up) {
4043 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4044 "bounds [%lld,%lld]\n",
4045 gtid, vec[i], lo, up));
4049 }
else if (st > 0) {
4050 if (vec[i] < lo || vec[i] > up) {
4051 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4052 "bounds [%lld,%lld]\n",
4053 gtid, vec[i], lo, up));
4056 iter = (kmp_uint64)(vec[i] - lo) / st;
4058 if (vec[i] > lo || vec[i] < up) {
4059 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4060 "bounds [%lld,%lld]\n",
4061 gtid, vec[i], lo, up));
4064 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4066 iter_number = iter + ln * iter_number;
4068 shft = iter_number % 32;
4071 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4076 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4077 gtid, (iter_number << 5) + shft));
4080 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4081 kmp_int32 shft, num_dims, i;
4083 kmp_int64 iter_number;
4084 kmp_info_t *th = __kmp_threads[gtid];
4085 kmp_team_t *team = th->th.th_team;
4089 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4090 if (team->t.t_serialized) {
4091 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4097 pr_buf = th->th.th_dispatch;
4098 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4099 num_dims = pr_buf->th_doacross_info[0];
4100 lo = pr_buf->th_doacross_info[2];
4101 st = pr_buf->th_doacross_info[4];
4103 iter_number = vec[0] - lo;
4104 }
else if (st > 0) {
4105 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4107 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4109 for (i = 1; i < num_dims; ++i) {
4111 kmp_int32 j = i * 4;
4112 ln = pr_buf->th_doacross_info[j + 1];
4113 lo = pr_buf->th_doacross_info[j + 2];
4114 st = pr_buf->th_doacross_info[j + 4];
4117 }
else if (st > 0) {
4118 iter = (kmp_uint64)(vec[i] - lo) / st;
4120 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4122 iter_number = iter + ln * iter_number;
4124 shft = iter_number % 32;
4128 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4129 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4130 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4131 (iter_number << 5) + shft));
4134 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4136 kmp_info_t *th = __kmp_threads[gtid];
4137 kmp_team_t *team = th->th.th_team;
4138 kmp_disp_t *pr_buf = th->th.th_dispatch;
4140 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4141 if (team->t.t_serialized) {
4142 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4145 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4146 if (num_done == th->th.th_team_nproc) {
4148 int idx = pr_buf->th_doacross_buf_idx - 1;
4149 dispatch_shared_info_t *sh_buf =
4150 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4151 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4152 (kmp_int64)&sh_buf->doacross_num_done);
4153 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4154 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4155 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4156 sh_buf->doacross_flags = NULL;
4157 sh_buf->doacross_num_done = 0;
4158 sh_buf->doacross_buf_idx +=
4159 __kmp_dispatch_num_buffers;
4162 pr_buf->th_doacross_flags = NULL;
4163 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4164 pr_buf->th_doacross_info = NULL;
4165 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4169 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4170 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4173 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4174 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4177 int __kmpc_get_target_offload(
void) {
4178 if (!__kmp_init_serial) {
4179 __kmp_serial_initialize();
4181 return __kmp_target_offload;
4184 int __kmpc_pause_resource(kmp_pause_status_t level) {
4185 if (!__kmp_init_serial) {
4188 return __kmp_pause_resource(level);
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)