22#include "ompt-specific.h"
24#define MAX_MESSAGE 512
40 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
45 }
else if (__kmp_ignore_mppbeg() == FALSE) {
47 __kmp_internal_begin();
48 KC_TRACE(10, (
"__kmpc_begin: called\n"));
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, (
"__kmpc_end: called\n"));
68 KA_TRACE(30, (
"__kmpc_end\n"));
70 __kmp_internal_end_thread(-1);
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
101 kmp_int32 gtid = __kmp_entry_gtid();
103 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
124 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 return TCR_4(__kmp_all_nth);
136 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
146 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
168 if (__kmp_par_range == 0) {
175 semi2 = strchr(semi2,
';');
179 semi2 = strchr(semi2 + 1,
';');
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
188 if ((*name ==
'/') || (*name ==
';')) {
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
195 semi3 = strchr(semi2 + 1,
';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
202 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
206 return __kmp_par_range < 0;
220 return __kmp_entry_thread()->th.th_root->r.r_active;
233 kmp_int32 num_threads) {
234 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
240void __kmpc_push_num_threads_strict(
ident_t *loc, kmp_int32 global_tid,
241 kmp_int32 num_threads,
int severity,
242 const char *message) {
243 __kmp_push_num_threads(loc, global_tid, num_threads);
244 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
261 kmp_uint32 list_length,
262 kmp_int32 *num_threads_list) {
263 KA_TRACE(20, (
"__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
265 KA_TRACE(20, (
"%d", num_threads_list[0]));
267 for (kmp_uint32 i = 1; i < list_length; ++i)
268 KA_TRACE(20, (
", %d", num_threads_list[i]));
270 KA_TRACE(20, (
"/n"));
272 __kmp_assert_valid_gtid(global_tid);
273 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
276void __kmpc_push_num_threads_list_strict(
ident_t *loc, kmp_int32 global_tid,
277 kmp_uint32 list_length,
278 kmp_int32 *num_threads_list,
279 int severity,
const char *message) {
280 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
281 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
284void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
285 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
289void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
290 kmp_int32 proc_bind) {
291 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
293 __kmp_assert_valid_gtid(global_tid);
294 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
308 int gtid = __kmp_entry_gtid();
310#if (KMP_STATS_ENABLED)
314 if (previous_state == stats_state_e::SERIAL_REGION) {
315 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
317 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
330 va_start(ap, microtask);
333 ompt_frame_t *ompt_frame;
334 if (ompt_enabled.enabled) {
335 kmp_info_t *master_th = __kmp_threads[gtid];
336 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
337 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
339 OMPT_STORE_RETURN_ADDRESS(gtid);
345 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
346 VOLATILE_CAST(microtask_t) microtask,
347 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
352 __kmp_join_call(loc, gtid
362 if (ompt_enabled.enabled) {
363 ompt_frame->enter_frame = ompt_data_none;
369 if (previous_state == stats_state_e::SERIAL_REGION) {
370 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
371 KMP_SET_THREAD_STATE(previous_state);
373 KMP_POP_PARTITIONED_TIMER();
389 kmp_int32 cond,
void *args) {
390 int gtid = __kmp_entry_gtid();
400 void *exit_frame_ptr;
404 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
413 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
439 kmp_int32 num_teams, kmp_int32 num_threads) {
441 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
442 global_tid, num_teams, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
458 kmp_int32 thread_limit) {
459 __kmp_assert_valid_gtid(global_tid);
460 kmp_info_t *thread = __kmp_threads[global_tid];
461 if (thread_limit > 0)
462 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
482 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
483 kmp_int32 num_threads) {
484 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
485 " num_teams_ub=%d num_threads=%d\n",
486 global_tid, num_teams_lb, num_teams_ub, num_threads));
487 __kmp_assert_valid_gtid(global_tid);
488 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
504 int gtid = __kmp_entry_gtid();
505 kmp_info_t *this_thr = __kmp_threads[gtid];
507 va_start(ap, microtask);
512 if (previous_state == stats_state_e::SERIAL_REGION) {
513 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
515 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
520 this_thr->th.th_teams_microtask = microtask;
521 this_thr->th.th_teams_level =
522 this_thr->th.th_team->t.t_level;
525 kmp_team_t *parent_team = this_thr->th.th_team;
526 int tid = __kmp_tid_from_gtid(gtid);
527 if (ompt_enabled.enabled) {
528 parent_team->t.t_implicit_task_taskdata[tid]
529 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
531 OMPT_STORE_RETURN_ADDRESS(gtid);
536 if (this_thr->th.th_teams_size.nteams == 0) {
537 __kmp_push_num_teams(loc, gtid, 0, 0);
539 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
540 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
541 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
544 loc, gtid, fork_context_intel, argc,
545 VOLATILE_CAST(microtask_t) __kmp_teams_master,
546 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
547 __kmp_join_call(loc, gtid
555 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
556 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
557 this_thr->th.th_cg_roots = tmp->up;
558 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
559 " to node %p. cg_nthreads was %d\n",
560 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
561 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
562 int i = tmp->cg_nthreads--;
567 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
568 this_thr->th.th_current_task->td_icvs.thread_limit =
569 this_thr->th.th_cg_roots->cg_thread_limit;
571 this_thr->th.th_teams_microtask = NULL;
572 this_thr->th.th_teams_level = 0;
573 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
576 if (previous_state == stats_state_e::SERIAL_REGION) {
577 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
578 KMP_SET_THREAD_STATE(previous_state);
580 KMP_POP_PARTITIONED_TIMER();
589int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
607 __kmp_assert_valid_gtid(global_tid);
609 OMPT_STORE_RETURN_ADDRESS(global_tid);
611 __kmp_serialized_parallel(loc, global_tid);
622 kmp_internal_control_t *top;
623 kmp_info_t *this_thr;
624 kmp_team_t *serial_team;
627 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
635 __kmp_assert_valid_gtid(global_tid);
636 if (!TCR_4(__kmp_init_parallel))
637 __kmp_parallel_initialize();
639 __kmp_resume_if_soft_paused();
641 this_thr = __kmp_threads[global_tid];
642 serial_team = this_thr->th.th_serial_team;
644 kmp_task_team_t *task_team = this_thr->th.th_task_team;
646 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
647 task_team->tt.tt_hidden_helper_task_encountered))
648 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
651 KMP_DEBUG_ASSERT(serial_team);
652 KMP_ASSERT(serial_team->t.t_serialized);
653 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
654 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
655 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
656 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
659 if (ompt_enabled.enabled &&
660 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
661 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
662 if (ompt_enabled.ompt_callback_implicit_task) {
663 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
664 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
665 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
669 ompt_data_t *parent_task_data;
670 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
672 if (ompt_enabled.ompt_callback_parallel_end) {
673 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
674 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
675 ompt_parallel_invoker_program | ompt_parallel_team,
676 OMPT_LOAD_RETURN_ADDRESS(global_tid));
678 __ompt_lw_taskteam_unlink(this_thr);
679 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
685 top = serial_team->t.t_control_stack_top;
686 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
687 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
688 serial_team->t.t_control_stack_top = top->next;
693 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
695 dispatch_private_info_t *disp_buffer =
696 serial_team->t.t_dispatch->th_disp_buffer;
697 serial_team->t.t_dispatch->th_disp_buffer =
698 serial_team->t.t_dispatch->th_disp_buffer->next;
699 __kmp_free(disp_buffer);
703 if (serial_team->t.t_serialized > 1) {
704 __kmp_pop_task_team_node(this_thr, serial_team);
707 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
709 --serial_team->t.t_serialized;
710 if (serial_team->t.t_serialized == 0) {
714#if KMP_ARCH_X86 || KMP_ARCH_X86_64
715 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
716 __kmp_clear_x87_fpu_status_word();
717 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
718 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
722 __kmp_pop_current_task_from_thread(this_thr);
724 if (ompd_state & OMPD_ENABLE_BP)
725 ompd_bp_parallel_end();
728 this_thr->th.th_team = serial_team->t.t_parent;
729 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
732 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
733 this_thr->th.th_team_master =
734 serial_team->t.t_parent->t.t_threads[0];
735 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
738 this_thr->th.th_dispatch =
739 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
741 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
742 this_thr->th.th_current_task->td_flags.executing = 1;
744 if (__kmp_tasking_mode != tskm_immediate_exec) {
746 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
747 serial_team->t.t_primary_task_state == 1);
748 this_thr->th.th_task_state =
749 (kmp_uint8)serial_team->t.t_primary_task_state;
751 this_thr->th.th_task_team =
752 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
754 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
756 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
758#if KMP_AFFINITY_SUPPORTED
759 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
760 __kmp_reset_root_init_mask(global_tid);
764 if (__kmp_tasking_mode != tskm_immediate_exec) {
765 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
766 "depth of serial team %p to %d\n",
767 global_tid, serial_team, serial_team->t.t_serialized));
771 serial_team->t.t_level--;
772 if (__kmp_env_consistency_check)
773 __kmp_pop_parallel(global_tid, NULL);
775 if (ompt_enabled.enabled)
776 this_thr->th.ompt_thread_info.state =
777 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
778 : ompt_state_work_parallel);
791 KC_TRACE(10, (
"__kmpc_flush: called\n"));
796#if OMPT_SUPPORT && OMPT_OPTIONAL
797 if (ompt_enabled.ompt_callback_flush) {
798 ompt_callbacks.ompt_callback(ompt_callback_flush)(
799 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
814 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
815 __kmp_assert_valid_gtid(global_tid);
817 if (!TCR_4(__kmp_init_parallel))
818 __kmp_parallel_initialize();
820 __kmp_resume_if_soft_paused();
822 if (__kmp_env_consistency_check) {
824 KMP_WARNING(ConstructIdentInvalid);
826 __kmp_check_barrier(global_tid, ct_barrier, loc);
830 ompt_frame_t *ompt_frame;
831 if (ompt_enabled.enabled) {
832 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
833 if (ompt_frame->enter_frame.ptr == NULL)
834 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
836 OMPT_STORE_RETURN_ADDRESS(global_tid);
838 __kmp_threads[global_tid]->th.th_ident = loc;
846 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
847#if OMPT_SUPPORT && OMPT_OPTIONAL
848 if (ompt_enabled.enabled) {
849 ompt_frame->enter_frame = ompt_data_none;
864 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(global_tid);
867 if (!TCR_4(__kmp_init_parallel))
868 __kmp_parallel_initialize();
870 __kmp_resume_if_soft_paused();
872 if (KMP_MASTER_GTID(global_tid)) {
874 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
878#if OMPT_SUPPORT && OMPT_OPTIONAL
880 if (ompt_enabled.ompt_callback_masked) {
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
884 int tid = __kmp_tid_from_gtid(global_tid);
885 ompt_callbacks.ompt_callback(ompt_callback_masked)(
886 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888 OMPT_GET_RETURN_ADDRESS(0));
893 if (__kmp_env_consistency_check) {
894#if KMP_USE_DYNAMIC_LOCK
896 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
898 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
901 __kmp_push_sync(global_tid, ct_master, loc, NULL);
903 __kmp_check_sync(global_tid, ct_master, loc, NULL);
919 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
920 __kmp_assert_valid_gtid(global_tid);
921 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
922 KMP_POP_PARTITIONED_TIMER();
924#if OMPT_SUPPORT && OMPT_OPTIONAL
925 kmp_info_t *this_thr = __kmp_threads[global_tid];
926 kmp_team_t *team = this_thr->th.th_team;
927 if (ompt_enabled.ompt_callback_masked) {
928 int tid = __kmp_tid_from_gtid(global_tid);
929 ompt_callbacks.ompt_callback(ompt_callback_masked)(
930 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932 OMPT_GET_RETURN_ADDRESS(0));
936 if (__kmp_env_consistency_check) {
937 if (KMP_MASTER_GTID(global_tid))
938 __kmp_pop_sync(global_tid, ct_master, loc);
953 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
954 __kmp_assert_valid_gtid(global_tid);
956 if (!TCR_4(__kmp_init_parallel))
957 __kmp_parallel_initialize();
959 __kmp_resume_if_soft_paused();
961 tid = __kmp_tid_from_gtid(global_tid);
964 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
968#if OMPT_SUPPORT && OMPT_OPTIONAL
970 if (ompt_enabled.ompt_callback_masked) {
971 kmp_info_t *this_thr = __kmp_threads[global_tid];
972 kmp_team_t *team = this_thr->th.th_team;
973 ompt_callbacks.ompt_callback(ompt_callback_masked)(
974 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
975 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
976 OMPT_GET_RETURN_ADDRESS(0));
981 if (__kmp_env_consistency_check) {
982#if KMP_USE_DYNAMIC_LOCK
984 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
986 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
989 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
991 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
1007 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
1008 __kmp_assert_valid_gtid(global_tid);
1009 KMP_POP_PARTITIONED_TIMER();
1011#if OMPT_SUPPORT && OMPT_OPTIONAL
1012 kmp_info_t *this_thr = __kmp_threads[global_tid];
1013 kmp_team_t *team = this_thr->th.th_team;
1014 if (ompt_enabled.ompt_callback_masked) {
1015 int tid = __kmp_tid_from_gtid(global_tid);
1016 ompt_callbacks.ompt_callback(ompt_callback_masked)(
1017 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
1018 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1019 OMPT_GET_RETURN_ADDRESS(0));
1023 if (__kmp_env_consistency_check) {
1024 __kmp_pop_sync(global_tid, ct_masked, loc);
1038 KMP_DEBUG_ASSERT(__kmp_init_serial);
1040 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
1041 __kmp_assert_valid_gtid(gtid);
1043 if (!TCR_4(__kmp_init_parallel))
1044 __kmp_parallel_initialize();
1046 __kmp_resume_if_soft_paused();
1049 __kmp_itt_ordered_prep(gtid);
1053 th = __kmp_threads[gtid];
1055#if OMPT_SUPPORT && OMPT_OPTIONAL
1059 OMPT_STORE_RETURN_ADDRESS(gtid);
1060 if (ompt_enabled.enabled) {
1061 team = __kmp_team_from_gtid(gtid);
1062 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1064 th->th.ompt_thread_info.wait_id = lck;
1065 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1068 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1069 if (ompt_enabled.ompt_callback_mutex_acquire) {
1070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1071 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1077 if (th->th.th_dispatch->th_deo_fcn != 0)
1078 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
1080 __kmp_parallel_deo(>id, &cid, loc);
1082#if OMPT_SUPPORT && OMPT_OPTIONAL
1083 if (ompt_enabled.enabled) {
1085 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1086 th->th.ompt_thread_info.wait_id = 0;
1089 if (ompt_enabled.ompt_callback_mutex_acquired) {
1090 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1091 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1097 __kmp_itt_ordered_start(gtid);
1112 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1113 __kmp_assert_valid_gtid(gtid);
1116 __kmp_itt_ordered_end(gtid);
1120 th = __kmp_threads[gtid];
1122 if (th->th.th_dispatch->th_dxo_fcn != 0)
1123 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1125 __kmp_parallel_dxo(>id, &cid, loc);
1127#if OMPT_SUPPORT && OMPT_OPTIONAL
1128 OMPT_STORE_RETURN_ADDRESS(gtid);
1129 if (ompt_enabled.ompt_callback_mutex_released) {
1130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1132 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1133 ->t.t_ordered.dt.t_value,
1134 OMPT_LOAD_RETURN_ADDRESS(gtid));
1139#if KMP_USE_DYNAMIC_LOCK
1141static __forceinline
void
1142__kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1143 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1147 kmp_indirect_lock_t **lck;
1148 lck = (kmp_indirect_lock_t **)crit;
1149 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1150 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1151 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1152 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1154 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1156 __kmp_itt_critical_creating(ilk->lock, loc);
1158 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1161 __kmp_itt_critical_destroyed(ilk->lock);
1167 KMP_DEBUG_ASSERT(*lck != NULL);
1171#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1173 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1174 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1175 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1176 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1177 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1179 KMP_FSYNC_PREPARE(l); \
1180 KMP_INIT_YIELD(spins); \
1181 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1183 if (TCR_4(__kmp_nth) > \
1184 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1187 KMP_YIELD_SPIN(spins); \
1189 __kmp_spin_backoff(&backoff); \
1191 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1192 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1194 KMP_FSYNC_ACQUIRED(l); \
1198#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1200 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1201 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1202 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1203 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1204 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1208#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1209 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1213#include <sys/syscall.h>
1223#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1225 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226 kmp_int32 gtid_code = (gtid + 1) << 1; \
1228 KMP_FSYNC_PREPARE(ftx); \
1229 kmp_int32 poll_val; \
1230 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1231 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1232 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1233 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1235 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1237 KMP_LOCK_BUSY(1, futex))) { \
1240 poll_val |= KMP_LOCK_BUSY(1, futex); \
1243 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1244 NULL, NULL, 0)) != 0) { \
1249 KMP_FSYNC_ACQUIRED(ftx); \
1253#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1255 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1256 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1257 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1258 KMP_FSYNC_ACQUIRED(ftx); \
1266#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1268 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1270 KMP_FSYNC_RELEASING(ftx); \
1271 kmp_int32 poll_val = \
1272 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1273 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1274 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1275 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1278 KMP_YIELD_OVERSUB(); \
1285static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1288 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1291 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1298 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1299 __kmp_init_user_lock_with_checks(lck);
1300 __kmp_set_user_lock_location(lck, loc);
1302 __kmp_itt_critical_creating(lck);
1313 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1318 __kmp_itt_critical_destroyed(lck);
1322 __kmp_destroy_user_lock_with_checks(lck);
1323 __kmp_user_lock_free(&idx, gtid, lck);
1324 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1325 KMP_DEBUG_ASSERT(lck != NULL);
1344 kmp_critical_name *crit) {
1345#if KMP_USE_DYNAMIC_LOCK
1346#if OMPT_SUPPORT && OMPT_OPTIONAL
1347 OMPT_STORE_RETURN_ADDRESS(global_tid);
1349 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1352#if OMPT_SUPPORT && OMPT_OPTIONAL
1353 ompt_state_t prev_state = ompt_state_undefined;
1354 ompt_thread_info_t ti;
1356 kmp_user_lock_p lck;
1358 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1359 __kmp_assert_valid_gtid(global_tid);
1363 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1364 KMP_CHECK_USER_LOCK_INIT();
1366 if ((__kmp_user_lock_kind == lk_tas) &&
1367 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1368 lck = (kmp_user_lock_p)crit;
1371 else if ((__kmp_user_lock_kind == lk_futex) &&
1372 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1373 lck = (kmp_user_lock_p)crit;
1377 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1380 if (__kmp_env_consistency_check)
1381 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1389 __kmp_itt_critical_acquiring(lck);
1391#if OMPT_SUPPORT && OMPT_OPTIONAL
1392 OMPT_STORE_RETURN_ADDRESS(gtid);
1393 void *codeptr_ra = NULL;
1394 if (ompt_enabled.enabled) {
1395 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1397 prev_state = ti.state;
1398 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1399 ti.state = ompt_state_wait_critical;
1402 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1403 if (ompt_enabled.ompt_callback_mutex_acquire) {
1404 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1405 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1406 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1412 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1415 __kmp_itt_critical_acquired(lck);
1417#if OMPT_SUPPORT && OMPT_OPTIONAL
1418 if (ompt_enabled.enabled) {
1420 ti.state = prev_state;
1424 if (ompt_enabled.ompt_callback_mutex_acquired) {
1425 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1426 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1430 KMP_POP_PARTITIONED_TIMER();
1432 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1433 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1437#if KMP_USE_DYNAMIC_LOCK
1440static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1442#define KMP_TSX_LOCK(seq) lockseq_##seq
1444#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1447#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1448#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1450#define KMP_CPUINFO_RTM 0
1454 if (hint & kmp_lock_hint_hle)
1455 return KMP_TSX_LOCK(hle);
1456 if (hint & kmp_lock_hint_rtm)
1457 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1458 if (hint & kmp_lock_hint_adaptive)
1459 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1462 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1463 return __kmp_user_lock_seq;
1464 if ((hint & omp_lock_hint_speculative) &&
1465 (hint & omp_lock_hint_nonspeculative))
1466 return __kmp_user_lock_seq;
1469 if (hint & omp_lock_hint_contended)
1470 return lockseq_queuing;
1473 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1477 if (hint & omp_lock_hint_speculative)
1478 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1480 return __kmp_user_lock_seq;
1483#if OMPT_SUPPORT && OMPT_OPTIONAL
1484#if KMP_USE_DYNAMIC_LOCK
1485static kmp_mutex_impl_t
1486__ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1488 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1493 return kmp_mutex_impl_queuing;
1496 return kmp_mutex_impl_spin;
1499 case locktag_rtm_spin:
1500 return kmp_mutex_impl_speculative;
1503 return kmp_mutex_impl_none;
1505 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1508 switch (ilock->type) {
1510 case locktag_adaptive:
1511 case locktag_rtm_queuing:
1512 return kmp_mutex_impl_speculative;
1514 case locktag_nested_tas:
1515 return kmp_mutex_impl_spin;
1517 case locktag_nested_futex:
1519 case locktag_ticket:
1520 case locktag_queuing:
1522 case locktag_nested_ticket:
1523 case locktag_nested_queuing:
1524 case locktag_nested_drdpa:
1525 return kmp_mutex_impl_queuing;
1527 return kmp_mutex_impl_none;
1532static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1533 switch (__kmp_user_lock_kind) {
1535 return kmp_mutex_impl_spin;
1542 return kmp_mutex_impl_queuing;
1545 case lk_rtm_queuing:
1548 return kmp_mutex_impl_speculative;
1551 return kmp_mutex_impl_none;
1570void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1571 kmp_critical_name *crit, uint32_t hint) {
1573 kmp_user_lock_p lck;
1574#if OMPT_SUPPORT && OMPT_OPTIONAL
1575 ompt_state_t prev_state = ompt_state_undefined;
1576 ompt_thread_info_t ti;
1578 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1580 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1583 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1584 __kmp_assert_valid_gtid(global_tid);
1586 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1588 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1589 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1591 if (KMP_IS_D_LOCK(lockseq)) {
1592 KMP_COMPARE_AND_STORE_ACQ32(
1593 (
volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1594 KMP_GET_D_TAG(lockseq));
1596 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1602 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1603 lck = (kmp_user_lock_p)lk;
1604 if (__kmp_env_consistency_check) {
1605 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1606 __kmp_map_hint_to_lock(hint));
1609 __kmp_itt_critical_acquiring(lck);
1611#if OMPT_SUPPORT && OMPT_OPTIONAL
1612 if (ompt_enabled.enabled) {
1613 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1615 prev_state = ti.state;
1616 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1617 ti.state = ompt_state_wait_critical;
1620 if (ompt_enabled.ompt_callback_mutex_acquire) {
1621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1622 ompt_mutex_critical, (
unsigned int)hint,
1623 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1628#if KMP_USE_INLINED_TAS
1629 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1630 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1632#elif KMP_USE_INLINED_FUTEX
1633 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1634 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1638 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1641 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1643 if (__kmp_env_consistency_check) {
1644 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1645 __kmp_map_hint_to_lock(hint));
1648 __kmp_itt_critical_acquiring(lck);
1650#if OMPT_SUPPORT && OMPT_OPTIONAL
1651 if (ompt_enabled.enabled) {
1652 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1654 prev_state = ti.state;
1655 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1656 ti.state = ompt_state_wait_critical;
1659 if (ompt_enabled.ompt_callback_mutex_acquire) {
1660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1661 ompt_mutex_critical, (
unsigned int)hint,
1662 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1667 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1669 KMP_POP_PARTITIONED_TIMER();
1672 __kmp_itt_critical_acquired(lck);
1674#if OMPT_SUPPORT && OMPT_OPTIONAL
1675 if (ompt_enabled.enabled) {
1677 ti.state = prev_state;
1681 if (ompt_enabled.ompt_callback_mutex_acquired) {
1682 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1683 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1688 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1689 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1704 kmp_critical_name *crit) {
1705 kmp_user_lock_p lck;
1707 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1709#if KMP_USE_DYNAMIC_LOCK
1710 int locktag = KMP_EXTRACT_D_TAG(crit);
1712 lck = (kmp_user_lock_p)crit;
1713 KMP_ASSERT(lck != NULL);
1714 if (__kmp_env_consistency_check) {
1715 __kmp_pop_sync(global_tid, ct_critical, loc);
1718 __kmp_itt_critical_releasing(lck);
1720#if KMP_USE_INLINED_TAS
1721 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1722 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1724#elif KMP_USE_INLINED_FUTEX
1725 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1726 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1730 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1733 kmp_indirect_lock_t *ilk =
1734 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1735 KMP_ASSERT(ilk != NULL);
1737 if (__kmp_env_consistency_check) {
1738 __kmp_pop_sync(global_tid, ct_critical, loc);
1741 __kmp_itt_critical_releasing(lck);
1743 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1748 if ((__kmp_user_lock_kind == lk_tas) &&
1749 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1750 lck = (kmp_user_lock_p)crit;
1753 else if ((__kmp_user_lock_kind == lk_futex) &&
1754 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1755 lck = (kmp_user_lock_p)crit;
1759 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1762 KMP_ASSERT(lck != NULL);
1764 if (__kmp_env_consistency_check)
1765 __kmp_pop_sync(global_tid, ct_critical, loc);
1768 __kmp_itt_critical_releasing(lck);
1772 __kmp_release_user_lock_with_checks(lck, global_tid);
1776#if OMPT_SUPPORT && OMPT_OPTIONAL
1779 OMPT_STORE_RETURN_ADDRESS(global_tid);
1780 if (ompt_enabled.ompt_callback_mutex_released) {
1781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1782 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1783 OMPT_LOAD_RETURN_ADDRESS(0));
1787 KMP_POP_PARTITIONED_TIMER();
1788 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1802 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1803 __kmp_assert_valid_gtid(global_tid);
1805 if (!TCR_4(__kmp_init_parallel))
1806 __kmp_parallel_initialize();
1808 __kmp_resume_if_soft_paused();
1810 if (__kmp_env_consistency_check)
1811 __kmp_check_barrier(global_tid, ct_barrier, loc);
1814 ompt_frame_t *ompt_frame;
1815 if (ompt_enabled.enabled) {
1816 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1817 if (ompt_frame->enter_frame.ptr == NULL)
1818 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1820 OMPT_STORE_RETURN_ADDRESS(global_tid);
1823 __kmp_threads[global_tid]->th.th_ident = loc;
1825 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1826#if OMPT_SUPPORT && OMPT_OPTIONAL
1827 if (ompt_enabled.enabled) {
1828 ompt_frame->enter_frame = ompt_data_none;
1832 return (status != 0) ? 0 : 1;
1845 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1846 __kmp_assert_valid_gtid(global_tid);
1847 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1862 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1863 __kmp_assert_valid_gtid(global_tid);
1865 if (!TCR_4(__kmp_init_parallel))
1866 __kmp_parallel_initialize();
1868 __kmp_resume_if_soft_paused();
1870 if (__kmp_env_consistency_check) {
1872 KMP_WARNING(ConstructIdentInvalid);
1874 __kmp_check_barrier(global_tid, ct_barrier, loc);
1878 ompt_frame_t *ompt_frame;
1879 if (ompt_enabled.enabled) {
1880 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1881 if (ompt_frame->enter_frame.ptr == NULL)
1882 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1884 OMPT_STORE_RETURN_ADDRESS(global_tid);
1887 __kmp_threads[global_tid]->th.th_ident = loc;
1889 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1890#if OMPT_SUPPORT && OMPT_OPTIONAL
1891 if (ompt_enabled.enabled) {
1892 ompt_frame->enter_frame = ompt_data_none;
1898 if (__kmp_env_consistency_check) {
1904 __kmp_pop_sync(global_tid, ct_master, loc);
1924 __kmp_assert_valid_gtid(global_tid);
1925 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1930 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1933#if OMPT_SUPPORT && OMPT_OPTIONAL
1934 kmp_info_t *this_thr = __kmp_threads[global_tid];
1935 kmp_team_t *team = this_thr->th.th_team;
1936 int tid = __kmp_tid_from_gtid(global_tid);
1938 if (ompt_enabled.enabled) {
1940 if (ompt_enabled.ompt_callback_work) {
1941 ompt_callbacks.ompt_callback(ompt_callback_work)(
1942 ompt_work_single_executor, ompt_scope_begin,
1943 &(team->t.ompt_team_info.parallel_data),
1944 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1945 1, OMPT_GET_RETURN_ADDRESS(0));
1948 if (ompt_enabled.ompt_callback_work) {
1949 ompt_callbacks.ompt_callback(ompt_callback_work)(
1950 ompt_work_single_other, ompt_scope_begin,
1951 &(team->t.ompt_team_info.parallel_data),
1952 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1953 1, OMPT_GET_RETURN_ADDRESS(0));
1954 ompt_callbacks.ompt_callback(ompt_callback_work)(
1955 ompt_work_single_other, ompt_scope_end,
1956 &(team->t.ompt_team_info.parallel_data),
1957 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1958 1, OMPT_GET_RETURN_ADDRESS(0));
1977 __kmp_assert_valid_gtid(global_tid);
1978 __kmp_exit_single(global_tid);
1979 KMP_POP_PARTITIONED_TIMER();
1981#if OMPT_SUPPORT && OMPT_OPTIONAL
1982 kmp_info_t *this_thr = __kmp_threads[global_tid];
1983 kmp_team_t *team = this_thr->th.th_team;
1984 int tid = __kmp_tid_from_gtid(global_tid);
1986 if (ompt_enabled.ompt_callback_work) {
1987 ompt_callbacks.ompt_callback(ompt_callback_work)(
1988 ompt_work_single_executor, ompt_scope_end,
1989 &(team->t.ompt_team_info.parallel_data),
1990 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1991 OMPT_GET_RETURN_ADDRESS(0));
2004 KMP_POP_PARTITIONED_TIMER();
2005 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
2007#if OMPT_SUPPORT && OMPT_OPTIONAL
2008 if (ompt_enabled.ompt_callback_work) {
2009 ompt_work_t ompt_work_type = ompt_work_loop_static;
2010 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2011 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2015 ompt_work_type = ompt_work_loop_static;
2017 ompt_work_type = ompt_work_sections;
2019 ompt_work_type = ompt_work_distribute;
2024 KMP_DEBUG_ASSERT(ompt_work_type);
2026 ompt_callbacks.ompt_callback(ompt_callback_work)(
2027 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
2028 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2031 if (__kmp_env_consistency_check)
2032 __kmp_pop_workshare(global_tid, ct_pdo, loc);
2038void ompc_set_num_threads(
int arg) {
2040 __kmp_set_num_threads(arg, __kmp_entry_gtid());
2043void ompc_set_dynamic(
int flag) {
2047 thread = __kmp_entry_thread();
2049 __kmp_save_internal_controls(thread);
2051 set__dynamic(thread, flag ?
true : false);
2054void ompc_set_nested(
int flag) {
2058 thread = __kmp_entry_thread();
2060 __kmp_save_internal_controls(thread);
2062 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2065void ompc_set_max_active_levels(
int max_active_levels) {
2070 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2073void ompc_set_schedule(omp_sched_t kind,
int modifier) {
2075 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2078int ompc_get_ancestor_thread_num(
int level) {
2079 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2082int ompc_get_team_size(
int level) {
2083 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2087void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
2088 if (!__kmp_init_serial) {
2089 __kmp_serial_initialize();
2091 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2092 format, KMP_STRLEN(format) + 1);
2095size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2097 if (!__kmp_init_serial) {
2098 __kmp_serial_initialize();
2100 format_size = KMP_STRLEN(__kmp_affinity_format);
2101 if (buffer && size) {
2102 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2108void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2110 if (!TCR_4(__kmp_init_middle)) {
2111 __kmp_middle_initialize();
2113 __kmp_assign_root_init_mask();
2114 gtid = __kmp_get_gtid();
2115#if KMP_AFFINITY_SUPPORTED
2116 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2117 __kmp_affinity.flags.reset) {
2118 __kmp_reset_root_init_mask(gtid);
2121 __kmp_aux_display_affinity(gtid, format);
2124size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2125 char const *format) {
2127 size_t num_required;
2128 kmp_str_buf_t capture_buf;
2129 if (!TCR_4(__kmp_init_middle)) {
2130 __kmp_middle_initialize();
2132 __kmp_assign_root_init_mask();
2133 gtid = __kmp_get_gtid();
2134#if KMP_AFFINITY_SUPPORTED
2135 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2136 __kmp_affinity.flags.reset) {
2137 __kmp_reset_root_init_mask(gtid);
2140 __kmp_str_buf_init(&capture_buf);
2141 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2142 if (buffer && buf_size) {
2143 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2144 capture_buf.used + 1);
2146 __kmp_str_buf_free(&capture_buf);
2147 return num_required;
2150void kmpc_set_stacksize(
int arg) {
2152 __kmp_aux_set_stacksize(arg);
2155void kmpc_set_stacksize_s(
size_t arg) {
2157 __kmp_aux_set_stacksize(arg);
2160void kmpc_set_blocktime(
int arg) {
2161 int gtid, tid, bt = arg;
2164 gtid = __kmp_entry_gtid();
2165 tid = __kmp_tid_from_gtid(gtid);
2166 thread = __kmp_thread_from_gtid(gtid);
2168 __kmp_aux_convert_blocktime(&bt);
2169 __kmp_aux_set_blocktime(bt, thread, tid);
2172void kmpc_set_library(
int arg) {
2174 __kmp_user_set_library((
enum library_type)arg);
2177void kmpc_set_defaults(
char const *str) {
2179 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2182void kmpc_set_disp_num_buffers(
int arg) {
2185 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2186 arg <= KMP_MAX_DISP_NUM_BUFF) {
2187 __kmp_dispatch_num_buffers = arg;
2191int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2192#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2195 if (!TCR_4(__kmp_init_middle)) {
2196 __kmp_middle_initialize();
2198 __kmp_assign_root_init_mask();
2199 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2203int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2204#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2207 if (!TCR_4(__kmp_init_middle)) {
2208 __kmp_middle_initialize();
2210 __kmp_assign_root_init_mask();
2211 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2215int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2216#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2219 if (!TCR_4(__kmp_init_middle)) {
2220 __kmp_middle_initialize();
2222 __kmp_assign_root_init_mask();
2223 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2273 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2276 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2277 __kmp_assert_valid_gtid(gtid);
2281 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2283 if (__kmp_env_consistency_check) {
2285 KMP_WARNING(ConstructIdentInvalid);
2292 *data_ptr = cpy_data;
2295 ompt_frame_t *ompt_frame;
2296 if (ompt_enabled.enabled) {
2297 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2298 if (ompt_frame->enter_frame.ptr == NULL)
2299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2301 OMPT_STORE_RETURN_ADDRESS(gtid);
2305 __kmp_threads[gtid]->th.th_ident = loc;
2307 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2310 (*cpy_func)(cpy_data, *data_ptr);
2316 OMPT_STORE_RETURN_ADDRESS(gtid);
2319 __kmp_threads[gtid]->th.th_ident = loc;
2322 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2323#if OMPT_SUPPORT && OMPT_OPTIONAL
2324 if (ompt_enabled.enabled) {
2325 ompt_frame->enter_frame = ompt_data_none;
2351 KC_TRACE(10, (
"__kmpc_copyprivate_light: called T#%d\n", gtid));
2355 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2357 if (__kmp_env_consistency_check) {
2359 KMP_WARNING(ConstructIdentInvalid);
2366 *data_ptr = cpy_data;
2369 ompt_frame_t *ompt_frame;
2370 if (ompt_enabled.enabled) {
2371 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2372 if (ompt_frame->enter_frame.ptr == NULL)
2373 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2374 OMPT_STORE_RETURN_ADDRESS(gtid);
2379 __kmp_threads[gtid]->th.th_ident = loc;
2381 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2388#define INIT_LOCK __kmp_init_user_lock_with_checks
2389#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2390#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2391#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2392#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2393#define ACQUIRE_NESTED_LOCK_TIMED \
2394 __kmp_acquire_nested_user_lock_with_checks_timed
2395#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2396#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2397#define TEST_LOCK __kmp_test_user_lock_with_checks
2398#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2399#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2400#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2405#if KMP_USE_DYNAMIC_LOCK
2408static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2409 kmp_dyna_lockseq_t seq) {
2410 if (KMP_IS_D_LOCK(seq)) {
2411 KMP_INIT_D_LOCK(lock, seq);
2413 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2416 KMP_INIT_I_LOCK(lock, seq);
2418 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2419 __kmp_itt_lock_creating(ilk->lock, loc);
2425static __forceinline
void
2426__kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2427 kmp_dyna_lockseq_t seq) {
2430 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2431 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2432 seq = __kmp_user_lock_seq;
2436 seq = lockseq_nested_tas;
2440 seq = lockseq_nested_futex;
2443 case lockseq_ticket:
2444 seq = lockseq_nested_ticket;
2446 case lockseq_queuing:
2447 seq = lockseq_nested_queuing;
2450 seq = lockseq_nested_drdpa;
2453 seq = lockseq_nested_queuing;
2455 KMP_INIT_I_LOCK(lock, seq);
2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2458 __kmp_itt_lock_creating(ilk->lock, loc);
2463void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2465 KMP_DEBUG_ASSERT(__kmp_init_serial);
2466 if (__kmp_env_consistency_check && user_lock == NULL) {
2467 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2470 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2472#if OMPT_SUPPORT && OMPT_OPTIONAL
2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2476 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477 if (ompt_enabled.ompt_callback_lock_init) {
2478 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2479 ompt_mutex_lock, (omp_lock_hint_t)hint,
2480 __ompt_get_mutex_impl_type(user_lock),
2481 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2487void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2488 void **user_lock, uintptr_t hint) {
2489 KMP_DEBUG_ASSERT(__kmp_init_serial);
2490 if (__kmp_env_consistency_check && user_lock == NULL) {
2491 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2494 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2496#if OMPT_SUPPORT && OMPT_OPTIONAL
2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2500 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501 if (ompt_enabled.ompt_callback_lock_init) {
2502 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2503 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2504 __ompt_get_mutex_impl_type(user_lock),
2505 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2513void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2514#if KMP_USE_DYNAMIC_LOCK
2516 KMP_DEBUG_ASSERT(__kmp_init_serial);
2517 if (__kmp_env_consistency_check && user_lock == NULL) {
2518 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2520 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2522#if OMPT_SUPPORT && OMPT_OPTIONAL
2524 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2526 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2527 if (ompt_enabled.ompt_callback_lock_init) {
2528 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2529 ompt_mutex_lock, omp_lock_hint_none,
2530 __ompt_get_mutex_impl_type(user_lock),
2531 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2537 static char const *
const func =
"omp_init_lock";
2538 kmp_user_lock_p lck;
2539 KMP_DEBUG_ASSERT(__kmp_init_serial);
2541 if (__kmp_env_consistency_check) {
2542 if (user_lock == NULL) {
2543 KMP_FATAL(LockIsUninitialized, func);
2547 KMP_CHECK_USER_LOCK_INIT();
2549 if ((__kmp_user_lock_kind == lk_tas) &&
2550 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2551 lck = (kmp_user_lock_p)user_lock;
2554 else if ((__kmp_user_lock_kind == lk_futex) &&
2555 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2556 lck = (kmp_user_lock_p)user_lock;
2560 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2563 __kmp_set_user_lock_location(lck, loc);
2565#if OMPT_SUPPORT && OMPT_OPTIONAL
2567 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2569 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2570 if (ompt_enabled.ompt_callback_lock_init) {
2571 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2572 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2573 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2578 __kmp_itt_lock_creating(lck);
2585void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2586#if KMP_USE_DYNAMIC_LOCK
2588 KMP_DEBUG_ASSERT(__kmp_init_serial);
2589 if (__kmp_env_consistency_check && user_lock == NULL) {
2590 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2592 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2594#if OMPT_SUPPORT && OMPT_OPTIONAL
2596 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2598 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2599 if (ompt_enabled.ompt_callback_lock_init) {
2600 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2601 ompt_mutex_nest_lock, omp_lock_hint_none,
2602 __ompt_get_mutex_impl_type(user_lock),
2603 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2609 static char const *
const func =
"omp_init_nest_lock";
2610 kmp_user_lock_p lck;
2611 KMP_DEBUG_ASSERT(__kmp_init_serial);
2613 if (__kmp_env_consistency_check) {
2614 if (user_lock == NULL) {
2615 KMP_FATAL(LockIsUninitialized, func);
2619 KMP_CHECK_USER_LOCK_INIT();
2621 if ((__kmp_user_lock_kind == lk_tas) &&
2622 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2623 OMP_NEST_LOCK_T_SIZE)) {
2624 lck = (kmp_user_lock_p)user_lock;
2627 else if ((__kmp_user_lock_kind == lk_futex) &&
2628 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2629 OMP_NEST_LOCK_T_SIZE)) {
2630 lck = (kmp_user_lock_p)user_lock;
2634 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2637 INIT_NESTED_LOCK(lck);
2638 __kmp_set_user_lock_location(lck, loc);
2640#if OMPT_SUPPORT && OMPT_OPTIONAL
2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2644 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645 if (ompt_enabled.ompt_callback_lock_init) {
2646 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2647 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2648 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2653 __kmp_itt_lock_creating(lck);
2659void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2660#if KMP_USE_DYNAMIC_LOCK
2663 kmp_user_lock_p lck;
2664 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2665 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2667 lck = (kmp_user_lock_p)user_lock;
2669 __kmp_itt_lock_destroyed(lck);
2671#if OMPT_SUPPORT && OMPT_OPTIONAL
2673 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2675 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2676 if (ompt_enabled.ompt_callback_lock_destroy) {
2677 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2678 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2681 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2683 kmp_user_lock_p lck;
2685 if ((__kmp_user_lock_kind == lk_tas) &&
2686 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2687 lck = (kmp_user_lock_p)user_lock;
2690 else if ((__kmp_user_lock_kind == lk_futex) &&
2691 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2692 lck = (kmp_user_lock_p)user_lock;
2696 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2699#if OMPT_SUPPORT && OMPT_OPTIONAL
2701 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2703 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2704 if (ompt_enabled.ompt_callback_lock_destroy) {
2705 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2706 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2711 __kmp_itt_lock_destroyed(lck);
2715 if ((__kmp_user_lock_kind == lk_tas) &&
2716 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2720 else if ((__kmp_user_lock_kind == lk_futex) &&
2721 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2726 __kmp_user_lock_free(user_lock, gtid, lck);
2732void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2733#if KMP_USE_DYNAMIC_LOCK
2736 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2737 __kmp_itt_lock_destroyed(ilk->lock);
2739#if OMPT_SUPPORT && OMPT_OPTIONAL
2741 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2743 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2744 if (ompt_enabled.ompt_callback_lock_destroy) {
2745 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2746 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2749 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2753 kmp_user_lock_p lck;
2755 if ((__kmp_user_lock_kind == lk_tas) &&
2756 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2757 OMP_NEST_LOCK_T_SIZE)) {
2758 lck = (kmp_user_lock_p)user_lock;
2761 else if ((__kmp_user_lock_kind == lk_futex) &&
2762 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2763 OMP_NEST_LOCK_T_SIZE)) {
2764 lck = (kmp_user_lock_p)user_lock;
2768 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2771#if OMPT_SUPPORT && OMPT_OPTIONAL
2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2775 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776 if (ompt_enabled.ompt_callback_lock_destroy) {
2777 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2778 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2783 __kmp_itt_lock_destroyed(lck);
2786 DESTROY_NESTED_LOCK(lck);
2788 if ((__kmp_user_lock_kind == lk_tas) &&
2789 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2790 OMP_NEST_LOCK_T_SIZE)) {
2794 else if ((__kmp_user_lock_kind == lk_futex) &&
2795 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2796 OMP_NEST_LOCK_T_SIZE)) {
2801 __kmp_user_lock_free(user_lock, gtid, lck);
2806void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2808#if KMP_USE_DYNAMIC_LOCK
2809 int tag = KMP_EXTRACT_D_TAG(user_lock);
2811 __kmp_itt_lock_acquiring(
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_acquire) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822 ompt_mutex_lock, omp_lock_hint_none,
2823 __ompt_get_mutex_impl_type(user_lock),
2824 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2827#if KMP_USE_INLINED_TAS
2828 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2829 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2831#elif KMP_USE_INLINED_FUTEX
2832 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2833 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2837 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2840 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2842#if OMPT_SUPPORT && OMPT_OPTIONAL
2843 if (ompt_enabled.ompt_callback_mutex_acquired) {
2844 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2845 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2851 kmp_user_lock_p lck;
2853 if ((__kmp_user_lock_kind == lk_tas) &&
2854 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2855 lck = (kmp_user_lock_p)user_lock;
2858 else if ((__kmp_user_lock_kind == lk_futex) &&
2859 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2860 lck = (kmp_user_lock_p)user_lock;
2864 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2868 __kmp_itt_lock_acquiring(lck);
2870#if OMPT_SUPPORT && OMPT_OPTIONAL
2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2874 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875 if (ompt_enabled.ompt_callback_mutex_acquire) {
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2877 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2878 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2882 ACQUIRE_LOCK(lck, gtid);
2885 __kmp_itt_lock_acquired(lck);
2888#if OMPT_SUPPORT && OMPT_OPTIONAL
2889 if (ompt_enabled.ompt_callback_mutex_acquired) {
2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2891 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2898void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2899#if KMP_USE_DYNAMIC_LOCK
2902 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2904#if OMPT_SUPPORT && OMPT_OPTIONAL
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909 if (ompt_enabled.enabled) {
2910 if (ompt_enabled.ompt_callback_mutex_acquire) {
2911 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2912 ompt_mutex_nest_lock, omp_lock_hint_none,
2913 __ompt_get_mutex_impl_type(user_lock),
2914 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2918 int acquire_status =
2919 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2920 (void)acquire_status;
2922 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2925#if OMPT_SUPPORT && OMPT_OPTIONAL
2926 if (ompt_enabled.enabled) {
2927 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2928 if (ompt_enabled.ompt_callback_mutex_acquired) {
2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2931 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2935 if (ompt_enabled.ompt_callback_nest_lock) {
2937 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2938 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2946 kmp_user_lock_p lck;
2948 if ((__kmp_user_lock_kind == lk_tas) &&
2949 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2950 OMP_NEST_LOCK_T_SIZE)) {
2951 lck = (kmp_user_lock_p)user_lock;
2954 else if ((__kmp_user_lock_kind == lk_futex) &&
2955 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2956 OMP_NEST_LOCK_T_SIZE)) {
2957 lck = (kmp_user_lock_p)user_lock;
2961 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2965 __kmp_itt_lock_acquiring(lck);
2967#if OMPT_SUPPORT && OMPT_OPTIONAL
2969 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2971 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2972 if (ompt_enabled.enabled) {
2973 if (ompt_enabled.ompt_callback_mutex_acquire) {
2974 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2975 ompt_mutex_nest_lock, omp_lock_hint_none,
2976 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2982 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2985 __kmp_itt_lock_acquired(lck);
2988#if OMPT_SUPPORT && OMPT_OPTIONAL
2989 if (ompt_enabled.enabled) {
2990 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2991 if (ompt_enabled.ompt_callback_mutex_acquired) {
2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2994 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2997 if (ompt_enabled.ompt_callback_nest_lock) {
2999 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3000 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3009void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3010#if KMP_USE_DYNAMIC_LOCK
3012 int tag = KMP_EXTRACT_D_TAG(user_lock);
3014 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3016#if KMP_USE_INLINED_TAS
3017 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3018 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
3020#elif KMP_USE_INLINED_FUTEX
3021 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3022 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
3026 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3029#if OMPT_SUPPORT && OMPT_OPTIONAL
3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3033 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034 if (ompt_enabled.ompt_callback_mutex_released) {
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3042 kmp_user_lock_p lck;
3047 if ((__kmp_user_lock_kind == lk_tas) &&
3048 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3049#if KMP_OS_LINUX && \
3050 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3055 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3058#if OMPT_SUPPORT && OMPT_OPTIONAL
3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3062 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled.ompt_callback_mutex_released) {
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3065 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3071 lck = (kmp_user_lock_p)user_lock;
3075 else if ((__kmp_user_lock_kind == lk_futex) &&
3076 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3077 lck = (kmp_user_lock_p)user_lock;
3081 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
3085 __kmp_itt_lock_releasing(lck);
3088 RELEASE_LOCK(lck, gtid);
3090#if OMPT_SUPPORT && OMPT_OPTIONAL
3092 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3094 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3095 if (ompt_enabled.ompt_callback_mutex_released) {
3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3105void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3106#if KMP_USE_DYNAMIC_LOCK
3109 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3111 int release_status =
3112 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3113 (void)release_status;
3115#if OMPT_SUPPORT && OMPT_OPTIONAL
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120 if (ompt_enabled.enabled) {
3121 if (release_status == KMP_LOCK_RELEASED) {
3122 if (ompt_enabled.ompt_callback_mutex_released) {
3124 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3125 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3128 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3130 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3131 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3138 kmp_user_lock_p lck;
3142 if ((__kmp_user_lock_kind == lk_tas) &&
3143 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3144 OMP_NEST_LOCK_T_SIZE)) {
3145#if KMP_OS_LINUX && \
3146 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3148 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3150 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3153#if OMPT_SUPPORT && OMPT_OPTIONAL
3154 int release_status = KMP_LOCK_STILL_HELD;
3157 if (--(tl->lk.depth_locked) == 0) {
3158 TCW_4(tl->lk.poll, 0);
3159#if OMPT_SUPPORT && OMPT_OPTIONAL
3160 release_status = KMP_LOCK_RELEASED;
3165#if OMPT_SUPPORT && OMPT_OPTIONAL
3167 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3169 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3170 if (ompt_enabled.enabled) {
3171 if (release_status == KMP_LOCK_RELEASED) {
3172 if (ompt_enabled.ompt_callback_mutex_released) {
3174 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3175 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3177 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3179 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3180 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3187 lck = (kmp_user_lock_p)user_lock;
3191 else if ((__kmp_user_lock_kind == lk_futex) &&
3192 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3193 OMP_NEST_LOCK_T_SIZE)) {
3194 lck = (kmp_user_lock_p)user_lock;
3198 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3202 __kmp_itt_lock_releasing(lck);
3206 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3207#if OMPT_SUPPORT && OMPT_OPTIONAL
3209 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3211 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3212 if (ompt_enabled.enabled) {
3213 if (release_status == KMP_LOCK_RELEASED) {
3214 if (ompt_enabled.ompt_callback_mutex_released) {
3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3217 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3219 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3221 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3222 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3231int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3234#if KMP_USE_DYNAMIC_LOCK
3236 int tag = KMP_EXTRACT_D_TAG(user_lock);
3238 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3240#if OMPT_SUPPORT && OMPT_OPTIONAL
3242 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3244 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3245 if (ompt_enabled.ompt_callback_mutex_acquire) {
3246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3247 ompt_mutex_test_lock, omp_lock_hint_none,
3248 __ompt_get_mutex_impl_type(user_lock),
3249 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3252#if KMP_USE_INLINED_TAS
3253 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3254 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3256#elif KMP_USE_INLINED_FUTEX
3257 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3258 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3262 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3268#if OMPT_SUPPORT && OMPT_OPTIONAL
3269 if (ompt_enabled.ompt_callback_mutex_acquired) {
3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3277 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3284 kmp_user_lock_p lck;
3287 if ((__kmp_user_lock_kind == lk_tas) &&
3288 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3289 lck = (kmp_user_lock_p)user_lock;
3292 else if ((__kmp_user_lock_kind == lk_futex) &&
3293 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3294 lck = (kmp_user_lock_p)user_lock;
3298 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3302 __kmp_itt_lock_acquiring(lck);
3304#if OMPT_SUPPORT && OMPT_OPTIONAL
3306 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3308 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3309 if (ompt_enabled.ompt_callback_mutex_acquire) {
3310 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3311 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3312 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3316 rc = TEST_LOCK(lck, gtid);
3319 __kmp_itt_lock_acquired(lck);
3321 __kmp_itt_lock_cancelled(lck);
3324#if OMPT_SUPPORT && OMPT_OPTIONAL
3325 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3331 return (rc ? FTN_TRUE : FTN_FALSE);
3339int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3340#if KMP_USE_DYNAMIC_LOCK
3343 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3345#if OMPT_SUPPORT && OMPT_OPTIONAL
3347 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3349 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3350 if (ompt_enabled.ompt_callback_mutex_acquire) {
3351 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3352 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3353 __ompt_get_mutex_impl_type(user_lock),
3354 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3357 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3360 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3362 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3365#if OMPT_SUPPORT && OMPT_OPTIONAL
3366 if (ompt_enabled.enabled && rc) {
3368 if (ompt_enabled.ompt_callback_mutex_acquired) {
3370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3371 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3375 if (ompt_enabled.ompt_callback_nest_lock) {
3377 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3378 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3387 kmp_user_lock_p lck;
3390 if ((__kmp_user_lock_kind == lk_tas) &&
3391 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3392 OMP_NEST_LOCK_T_SIZE)) {
3393 lck = (kmp_user_lock_p)user_lock;
3396 else if ((__kmp_user_lock_kind == lk_futex) &&
3397 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3398 OMP_NEST_LOCK_T_SIZE)) {
3399 lck = (kmp_user_lock_p)user_lock;
3403 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3407 __kmp_itt_lock_acquiring(lck);
3410#if OMPT_SUPPORT && OMPT_OPTIONAL
3412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3415 if (ompt_enabled.enabled) &&
3416 ompt_enabled.ompt_callback_mutex_acquire) {
3417 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3418 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3419 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3424 rc = TEST_NESTED_LOCK(lck, gtid);
3427 __kmp_itt_lock_acquired(lck);
3429 __kmp_itt_lock_cancelled(lck);
3432#if OMPT_SUPPORT && OMPT_OPTIONAL
3433 if (ompt_enabled.enabled && rc) {
3435 if (ompt_enabled.ompt_callback_mutex_acquired) {
3437 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3438 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3441 if (ompt_enabled.ompt_callback_nest_lock) {
3443 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3444 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3463#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3464 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3466#define __KMP_GET_REDUCTION_METHOD(gtid) \
3467 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3473static __forceinline
void
3474__kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3475 kmp_critical_name *crit) {
3481 kmp_user_lock_p lck;
3483#if KMP_USE_DYNAMIC_LOCK
3485 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3490 KMP_GET_D_TAG(__kmp_user_lock_seq));
3492 __kmp_init_indirect_csptr(crit, loc, global_tid,
3493 KMP_GET_I_TAG(__kmp_user_lock_seq));
3499 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3500 lck = (kmp_user_lock_p)lk;
3501 KMP_DEBUG_ASSERT(lck != NULL);
3502 if (__kmp_env_consistency_check) {
3503 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3505 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3507 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3509 KMP_DEBUG_ASSERT(lck != NULL);
3510 if (__kmp_env_consistency_check) {
3511 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3513 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3521 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3522 lck = (kmp_user_lock_p)crit;
3524 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3526 KMP_DEBUG_ASSERT(lck != NULL);
3528 if (__kmp_env_consistency_check)
3529 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3531 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3537static __forceinline
void
3538__kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3539 kmp_critical_name *crit) {
3541 kmp_user_lock_p lck;
3543#if KMP_USE_DYNAMIC_LOCK
3545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3546 lck = (kmp_user_lock_p)crit;
3547 if (__kmp_env_consistency_check)
3548 __kmp_pop_sync(global_tid, ct_critical, loc);
3549 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3551 kmp_indirect_lock_t *ilk =
3552 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3553 if (__kmp_env_consistency_check)
3554 __kmp_pop_sync(global_tid, ct_critical, loc);
3555 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3563 if (__kmp_base_user_lock_size > 32) {
3564 lck = *((kmp_user_lock_p *)crit);
3565 KMP_ASSERT(lck != NULL);
3567 lck = (kmp_user_lock_p)crit;
3570 if (__kmp_env_consistency_check)
3571 __kmp_pop_sync(global_tid, ct_critical, loc);
3573 __kmp_release_user_lock_with_checks(lck, global_tid);
3578static __forceinline
int
3579__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3584 if (th->th.th_teams_microtask) {
3585 *team_p = team = th->th.th_team;
3586 if (team->t.t_level == th->th.th_teams_level) {
3588 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3590 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3591 th->th.th_team = team->t.t_parent;
3592 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3593 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3594 *task_state = th->th.th_task_state;
3595 th->th.th_task_state = 0;
3603static __forceinline
void
3604__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3606 th->th.th_info.ds.ds_tid = 0;
3607 th->th.th_team = team;
3608 th->th.th_team_nproc = team->t.t_nproc;
3609 th->th.th_task_team = team->t.t_task_team[task_state];
3610 __kmp_type_convert(task_state, &(th->th.th_task_state));
3631 size_t reduce_size,
void *reduce_data,
3632 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3633 kmp_critical_name *lck) {
3637 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3640 int teams_swapped = 0, task_state;
3641 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3642 __kmp_assert_valid_gtid(global_tid);
3650 if (!TCR_4(__kmp_init_parallel))
3651 __kmp_parallel_initialize();
3653 __kmp_resume_if_soft_paused();
3656#if KMP_USE_DYNAMIC_LOCK
3657 if (__kmp_env_consistency_check)
3658 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3660 if (__kmp_env_consistency_check)
3661 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3664 th = __kmp_thread_from_gtid(global_tid);
3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3683 packed_reduction_method = __kmp_determine_reduction_method(
3684 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3685 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3687 OMPT_REDUCTION_DECL(th, global_tid);
3688 if (packed_reduction_method == critical_reduce_block) {
3690 OMPT_REDUCTION_BEGIN;
3692 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3695 }
else if (packed_reduction_method == empty_reduce_block) {
3697 OMPT_REDUCTION_BEGIN;
3703 }
else if (packed_reduction_method == atomic_reduce_block) {
3713 if (__kmp_env_consistency_check)
3714 __kmp_pop_sync(global_tid, ct_reduce, loc);
3716 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3717 tree_reduce_block)) {
3737 ompt_frame_t *ompt_frame;
3738 if (ompt_enabled.enabled) {
3739 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3740 if (ompt_frame->enter_frame.ptr == NULL)
3741 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3743 OMPT_STORE_RETURN_ADDRESS(global_tid);
3746 __kmp_threads[global_tid]->th.th_ident = loc;
3749 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3750 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3751 retval = (retval != 0) ? (0) : (1);
3752#if OMPT_SUPPORT && OMPT_OPTIONAL
3753 if (ompt_enabled.enabled) {
3754 ompt_frame->enter_frame = ompt_data_none;
3760 if (__kmp_env_consistency_check) {
3762 __kmp_pop_sync(global_tid, ct_reduce, loc);
3771 if (teams_swapped) {
3772 __kmp_restore_swapped_teams(th, team, task_state);
3776 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3777 global_tid, packed_reduction_method, retval));
3791 kmp_critical_name *lck) {
3793 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3795 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3796 __kmp_assert_valid_gtid(global_tid);
3798 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3800 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3802 if (packed_reduction_method == critical_reduce_block) {
3804 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3807 }
else if (packed_reduction_method == empty_reduce_block) {
3814 }
else if (packed_reduction_method == atomic_reduce_block) {
3821 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3822 tree_reduce_block)) {
3833 if (__kmp_env_consistency_check)
3834 __kmp_pop_sync(global_tid, ct_reduce, loc);
3836 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3837 global_tid, packed_reduction_method));
3860 size_t reduce_size,
void *reduce_data,
3861 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3862 kmp_critical_name *lck) {
3865 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3868 int teams_swapped = 0, task_state;
3870 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3871 __kmp_assert_valid_gtid(global_tid);
3879 if (!TCR_4(__kmp_init_parallel))
3880 __kmp_parallel_initialize();
3882 __kmp_resume_if_soft_paused();
3885#if KMP_USE_DYNAMIC_LOCK
3886 if (__kmp_env_consistency_check)
3887 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3889 if (__kmp_env_consistency_check)
3890 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3893 th = __kmp_thread_from_gtid(global_tid);
3894 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3896 packed_reduction_method = __kmp_determine_reduction_method(
3897 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3898 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3900 OMPT_REDUCTION_DECL(th, global_tid);
3902 if (packed_reduction_method == critical_reduce_block) {
3904 OMPT_REDUCTION_BEGIN;
3905 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3908 }
else if (packed_reduction_method == empty_reduce_block) {
3910 OMPT_REDUCTION_BEGIN;
3915 }
else if (packed_reduction_method == atomic_reduce_block) {
3919 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920 tree_reduce_block)) {
3926 ompt_frame_t *ompt_frame;
3927 if (ompt_enabled.enabled) {
3928 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3929 if (ompt_frame->enter_frame.ptr == NULL)
3930 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3932 OMPT_STORE_RETURN_ADDRESS(global_tid);
3935 __kmp_threads[global_tid]->th.th_ident =
3939 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3940 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3941 retval = (retval != 0) ? (0) : (1);
3942#if OMPT_SUPPORT && OMPT_OPTIONAL
3943 if (ompt_enabled.enabled) {
3944 ompt_frame->enter_frame = ompt_data_none;
3950 if (__kmp_env_consistency_check) {
3952 __kmp_pop_sync(global_tid, ct_reduce, loc);
3961 if (teams_swapped) {
3962 __kmp_restore_swapped_teams(th, team, task_state);
3966 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3967 global_tid, packed_reduction_method, retval));
3982 kmp_critical_name *lck) {
3984 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3987 int teams_swapped = 0, task_state;
3989 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3990 __kmp_assert_valid_gtid(global_tid);
3992 th = __kmp_thread_from_gtid(global_tid);
3993 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3995 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3999 OMPT_REDUCTION_DECL(th, global_tid);
4001 if (packed_reduction_method == critical_reduce_block) {
4002 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
4008 ompt_frame_t *ompt_frame;
4009 if (ompt_enabled.enabled) {
4010 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4011 if (ompt_frame->enter_frame.ptr == NULL)
4012 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4014 OMPT_STORE_RETURN_ADDRESS(global_tid);
4017 __kmp_threads[global_tid]->th.th_ident = loc;
4019 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4020#if OMPT_SUPPORT && OMPT_OPTIONAL
4021 if (ompt_enabled.enabled) {
4022 ompt_frame->enter_frame = ompt_data_none;
4026 }
else if (packed_reduction_method == empty_reduce_block) {
4034 ompt_frame_t *ompt_frame;
4035 if (ompt_enabled.enabled) {
4036 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4037 if (ompt_frame->enter_frame.ptr == NULL)
4038 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4040 OMPT_STORE_RETURN_ADDRESS(global_tid);
4043 __kmp_threads[global_tid]->th.th_ident = loc;
4045 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4046#if OMPT_SUPPORT && OMPT_OPTIONAL
4047 if (ompt_enabled.enabled) {
4048 ompt_frame->enter_frame = ompt_data_none;
4052 }
else if (packed_reduction_method == atomic_reduce_block) {
4055 ompt_frame_t *ompt_frame;
4056 if (ompt_enabled.enabled) {
4057 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4058 if (ompt_frame->enter_frame.ptr == NULL)
4059 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4061 OMPT_STORE_RETURN_ADDRESS(global_tid);
4065 __kmp_threads[global_tid]->th.th_ident = loc;
4067 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4068#if OMPT_SUPPORT && OMPT_OPTIONAL
4069 if (ompt_enabled.enabled) {
4070 ompt_frame->enter_frame = ompt_data_none;
4074 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4075 tree_reduce_block)) {
4078 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4086 if (teams_swapped) {
4087 __kmp_restore_swapped_teams(th, team, task_state);
4090 if (__kmp_env_consistency_check)
4091 __kmp_pop_sync(global_tid, ct_reduce, loc);
4093 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4094 global_tid, packed_reduction_method));
4099#undef __KMP_GET_REDUCTION_METHOD
4100#undef __KMP_SET_REDUCTION_METHOD
4104kmp_uint64 __kmpc_get_taskid() {
4109 gtid = __kmp_get_gtid();
4113 thread = __kmp_thread_from_gtid(gtid);
4114 return thread->th.th_current_task->td_task_id;
4118kmp_uint64 __kmpc_get_parent_taskid() {
4122 kmp_taskdata_t *parent_task;
4124 gtid = __kmp_get_gtid();
4128 thread = __kmp_thread_from_gtid(gtid);
4129 parent_task = thread->th.th_current_task->td_parent;
4130 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4146 const struct kmp_dim *dims) {
4147 __kmp_assert_valid_gtid(gtid);
4149 kmp_int64 last, trace_count;
4150 kmp_info_t *th = __kmp_threads[gtid];
4151 kmp_team_t *team = th->th.th_team;
4153 kmp_disp_t *pr_buf = th->th.th_dispatch;
4154 dispatch_shared_info_t *sh_buf;
4158 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4159 gtid, num_dims, !team->t.t_serialized));
4160 KMP_DEBUG_ASSERT(dims != NULL);
4161 KMP_DEBUG_ASSERT(num_dims > 0);
4163 if (team->t.t_serialized) {
4164 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4167 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4168 idx = pr_buf->th_doacross_buf_idx++;
4170 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4173 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4174 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4175 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4177 pr_buf->th_doacross_info[0] =
4178 (kmp_int64)num_dims;
4181 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4182 pr_buf->th_doacross_info[2] = dims[0].lo;
4183 pr_buf->th_doacross_info[3] = dims[0].up;
4184 pr_buf->th_doacross_info[4] = dims[0].st;
4186 for (j = 1; j < num_dims; ++j) {
4189 if (dims[j].st == 1) {
4191 range_length = dims[j].up - dims[j].lo + 1;
4193 if (dims[j].st > 0) {
4194 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4195 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4197 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4199 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4202 pr_buf->th_doacross_info[last++] = range_length;
4203 pr_buf->th_doacross_info[last++] = dims[j].lo;
4204 pr_buf->th_doacross_info[last++] = dims[j].up;
4205 pr_buf->th_doacross_info[last++] = dims[j].st;
4210 if (dims[0].st == 1) {
4211 trace_count = dims[0].up - dims[0].lo + 1;
4212 }
else if (dims[0].st > 0) {
4213 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4214 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4216 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4217 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4219 for (j = 1; j < num_dims; ++j) {
4220 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4222 KMP_DEBUG_ASSERT(trace_count > 0);
4226 if (idx != sh_buf->doacross_buf_idx) {
4228 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4235 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4236 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4238 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4239 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4241 if (flags == NULL) {
4244 (size_t)trace_count / 8 + 8;
4245 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4247 sh_buf->doacross_flags = flags;
4248 }
else if (flags == (kmp_uint32 *)1) {
4251 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4253 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4260 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4261 pr_buf->th_doacross_flags =
4262 sh_buf->doacross_flags;
4264 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4267void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4268 __kmp_assert_valid_gtid(gtid);
4272 kmp_int64 iter_number;
4273 kmp_info_t *th = __kmp_threads[gtid];
4274 kmp_team_t *team = th->th.th_team;
4276 kmp_int64 lo, up, st;
4278 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4279 if (team->t.t_serialized) {
4280 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4285 pr_buf = th->th.th_dispatch;
4286 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4287 num_dims = (size_t)pr_buf->th_doacross_info[0];
4288 lo = pr_buf->th_doacross_info[2];
4289 up = pr_buf->th_doacross_info[3];
4290 st = pr_buf->th_doacross_info[4];
4291#if OMPT_SUPPORT && OMPT_OPTIONAL
4295 if (vec[0] < lo || vec[0] > up) {
4296 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297 "bounds [%lld,%lld]\n",
4298 gtid, vec[0], lo, up));
4301 iter_number = vec[0] - lo;
4302 }
else if (st > 0) {
4303 if (vec[0] < lo || vec[0] > up) {
4304 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305 "bounds [%lld,%lld]\n",
4306 gtid, vec[0], lo, up));
4309 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4311 if (vec[0] > lo || vec[0] < up) {
4312 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4313 "bounds [%lld,%lld]\n",
4314 gtid, vec[0], lo, up));
4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4319#if OMPT_SUPPORT && OMPT_OPTIONAL
4320 deps[0].variable.value = iter_number;
4321 deps[0].dependence_type = ompt_dependence_type_sink;
4323 for (i = 1; i < num_dims; ++i) {
4326 ln = pr_buf->th_doacross_info[j + 1];
4327 lo = pr_buf->th_doacross_info[j + 2];
4328 up = pr_buf->th_doacross_info[j + 3];
4329 st = pr_buf->th_doacross_info[j + 4];
4331 if (vec[i] < lo || vec[i] > up) {
4332 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4333 "bounds [%lld,%lld]\n",
4334 gtid, vec[i], lo, up));
4338 }
else if (st > 0) {
4339 if (vec[i] < lo || vec[i] > up) {
4340 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4341 "bounds [%lld,%lld]\n",
4342 gtid, vec[i], lo, up));
4345 iter = (kmp_uint64)(vec[i] - lo) / st;
4347 if (vec[i] > lo || vec[i] < up) {
4348 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4349 "bounds [%lld,%lld]\n",
4350 gtid, vec[i], lo, up));
4353 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4355 iter_number = iter + ln * iter_number;
4356#if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps[i].variable.value = iter;
4358 deps[i].dependence_type = ompt_dependence_type_sink;
4361 shft = iter_number % 32;
4364 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4368#if OMPT_SUPPORT && OMPT_OPTIONAL
4369 if (ompt_enabled.ompt_callback_dependences) {
4370 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4371 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4375 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4376 gtid, (iter_number << 5) + shft));
4379void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4380 __kmp_assert_valid_gtid(gtid);
4384 kmp_int64 iter_number;
4385 kmp_info_t *th = __kmp_threads[gtid];
4386 kmp_team_t *team = th->th.th_team;
4390 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4391 if (team->t.t_serialized) {
4392 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4398 pr_buf = th->th.th_dispatch;
4399 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4400 num_dims = (size_t)pr_buf->th_doacross_info[0];
4401 lo = pr_buf->th_doacross_info[2];
4402 st = pr_buf->th_doacross_info[4];
4403#if OMPT_SUPPORT && OMPT_OPTIONAL
4407 iter_number = vec[0] - lo;
4408 }
else if (st > 0) {
4409 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4411 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4413#if OMPT_SUPPORT && OMPT_OPTIONAL
4414 deps[0].variable.value = iter_number;
4415 deps[0].dependence_type = ompt_dependence_type_source;
4417 for (i = 1; i < num_dims; ++i) {
4420 ln = pr_buf->th_doacross_info[j + 1];
4421 lo = pr_buf->th_doacross_info[j + 2];
4422 st = pr_buf->th_doacross_info[j + 4];
4425 }
else if (st > 0) {
4426 iter = (kmp_uint64)(vec[i] - lo) / st;
4428 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4430 iter_number = iter + ln * iter_number;
4431#if OMPT_SUPPORT && OMPT_OPTIONAL
4432 deps[i].variable.value = iter;
4433 deps[i].dependence_type = ompt_dependence_type_source;
4436#if OMPT_SUPPORT && OMPT_OPTIONAL
4437 if (ompt_enabled.ompt_callback_dependences) {
4438 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4439 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4442 shft = iter_number % 32;
4446 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4447 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4448 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4449 (iter_number << 5) + shft));
4452void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4453 __kmp_assert_valid_gtid(gtid);
4455 kmp_info_t *th = __kmp_threads[gtid];
4456 kmp_team_t *team = th->th.th_team;
4457 kmp_disp_t *pr_buf = th->th.th_dispatch;
4459 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4460 if (team->t.t_serialized) {
4461 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4465 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4466 if (num_done == th->th.th_team_nproc) {
4468 int idx = pr_buf->th_doacross_buf_idx - 1;
4469 dispatch_shared_info_t *sh_buf =
4470 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4471 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4472 (kmp_int64)&sh_buf->doacross_num_done);
4473 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4474 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4475 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4476 sh_buf->doacross_flags = NULL;
4477 sh_buf->doacross_num_done = 0;
4478 sh_buf->doacross_buf_idx +=
4479 __kmp_dispatch_num_buffers;
4482 pr_buf->th_doacross_flags = NULL;
4483 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4484 pr_buf->th_doacross_info = NULL;
4485 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4489void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4490 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4493void *omp_aligned_alloc(
size_t align,
size_t size,
4494 omp_allocator_handle_t allocator) {
4495 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4498void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4499 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4502void *omp_aligned_calloc(
size_t align,
size_t nmemb,
size_t size,
4503 omp_allocator_handle_t allocator) {
4504 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4507void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4508 omp_allocator_handle_t free_allocator) {
4509 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4513void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4514 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4518int __kmpc_get_target_offload(
void) {
4519 if (!__kmp_init_serial) {
4520 __kmp_serial_initialize();
4522 return __kmp_target_offload;
4525int __kmpc_pause_resource(kmp_pause_status_t level) {
4526 if (!__kmp_init_serial) {
4529 return __kmp_pause_resource(level);
4532void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4533 if (!__kmp_init_serial)
4534 __kmp_serial_initialize();
4536 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4539 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4540 ompt_callbacks.ompt_callback(ompt_callback_error)(
4541 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4542 OMPT_GET_RETURN_ADDRESS(0));
4548 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4550 __kmp_str_format(
"%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4551 __kmp_str_loc_free(&str_loc);
4553 src_loc = __kmp_str_format(
"unknown");
4556 if (severity == severity_warning)
4557 KMP_WARNING(UserDirectedWarning, src_loc, message);
4559 KMP_FATAL(UserDirectedError, src_loc, message);
4561 __kmp_str_free(&src_loc);
4565void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4567#if OMPT_SUPPORT && OMPT_OPTIONAL
4568 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4569 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4570 int tid = __kmp_tid_from_gtid(gtid);
4571 ompt_callbacks.ompt_callback(ompt_callback_work)(
4572 ompt_work_scope, ompt_scope_begin,
4573 &(team->t.ompt_team_info.parallel_data),
4574 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4575 OMPT_GET_RETURN_ADDRESS(0));
4581void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4583#if OMPT_SUPPORT && OMPT_OPTIONAL
4584 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4585 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4586 int tid = __kmp_tid_from_gtid(gtid);
4587 ompt_callbacks.ompt_callback(ompt_callback_work)(
4588 ompt_work_scope, ompt_scope_end,
4589 &(team->t.ompt_team_info.parallel_data),
4590 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4591 OMPT_GET_RETURN_ADDRESS(0));
4596#ifdef KMP_USE_VERSION_SYMBOLS
4605#ifdef omp_set_affinity_format
4606#undef omp_set_affinity_format
4608#ifdef omp_get_affinity_format
4609#undef omp_get_affinity_format
4611#ifdef omp_display_affinity
4612#undef omp_display_affinity
4614#ifdef omp_capture_affinity
4615#undef omp_capture_affinity
4617KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4619KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4621KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4623KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)