LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "kmp_utils.h"
22#include "ompt-specific.h"
23
24#define MAX_MESSAGE 512
25
26// flags will be used in future, e.g. to implement openmp_strict library
27// restrictions
28
37void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38 // By default __kmpc_begin() is no-op.
39 char *env;
40 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
41 __kmp_str_match_true(env)) {
42 __kmp_middle_initialize();
43 __kmp_assign_root_init_mask();
44 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45 } else if (__kmp_ignore_mppbeg() == FALSE) {
46 // By default __kmp_ignore_mppbeg() returns TRUE.
47 __kmp_internal_begin();
48 KC_TRACE(10, ("__kmpc_begin: called\n"));
49 }
50}
51
60void __kmpc_end(ident_t *loc) {
61 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64 // returns FALSE and __kmpc_end() will unregister this root (it can cause
65 // library shut down).
66 if (__kmp_ignore_mppend() == FALSE) {
67 KC_TRACE(10, ("__kmpc_end: called\n"));
68 KA_TRACE(30, ("__kmpc_end\n"));
69
70 __kmp_internal_end_thread(-1);
71 }
72#if KMP_OS_WINDOWS && OMPT_SUPPORT
73 // Normal exit process on Windows does not allow worker threads of the final
74 // parallel region to finish reporting their events, so shutting down the
75 // library here fixes the issue at least for the cases where __kmpc_end() is
76 // placed properly.
77 if (ompt_enabled.enabled)
78 __kmp_internal_end_library(__kmp_gtid_get_specific());
79#endif
80}
81
101 kmp_int32 gtid = __kmp_entry_gtid();
102
103 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105 return gtid;
106}
107
123 KC_TRACE(10,
124 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126 return TCR_4(__kmp_all_nth);
127}
128
136 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137 return __kmp_tid_from_gtid(__kmp_entry_gtid());
138}
139
146 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148 return __kmp_entry_thread()->th.th_team->t.t_nproc;
149}
150
157kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158#ifndef KMP_DEBUG
159
160 return TRUE;
161
162#else
163
164 const char *semi2;
165 const char *semi3;
166 int line_no;
167
168 if (__kmp_par_range == 0) {
169 return TRUE;
170 }
171 semi2 = loc->psource;
172 if (semi2 == NULL) {
173 return TRUE;
174 }
175 semi2 = strchr(semi2, ';');
176 if (semi2 == NULL) {
177 return TRUE;
178 }
179 semi2 = strchr(semi2 + 1, ';');
180 if (semi2 == NULL) {
181 return TRUE;
182 }
183 if (__kmp_par_range_filename[0]) {
184 const char *name = semi2 - 1;
185 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186 name--;
187 }
188 if ((*name == '/') || (*name == ';')) {
189 name++;
190 }
191 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192 return __kmp_par_range < 0;
193 }
194 }
195 semi3 = strchr(semi2 + 1, ';');
196 if (__kmp_par_range_routine[0]) {
197 if ((semi3 != NULL) && (semi3 > semi2) &&
198 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199 return __kmp_par_range < 0;
200 }
201 }
202 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204 return __kmp_par_range > 0;
205 }
206 return __kmp_par_range < 0;
207 }
208 return TRUE;
209
210#endif /* KMP_DEBUG */
211}
212
220 return __kmp_entry_thread()->th.th_root->r.r_active;
221}
222
232void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233 kmp_int32 num_threads) {
234 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235 global_tid, num_threads));
236 __kmp_assert_valid_gtid(global_tid);
237 __kmp_push_num_threads(loc, global_tid, num_threads);
238}
239
240void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
241 kmp_int32 num_threads, int severity,
242 const char *message) {
243 __kmp_push_num_threads(loc, global_tid, num_threads);
244 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
245}
246
260void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
261 kmp_uint32 list_length,
262 kmp_int32 *num_threads_list) {
263 KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
264 global_tid));
265 KA_TRACE(20, ("%d", num_threads_list[0]));
266#ifdef KMP_DEBUG
267 for (kmp_uint32 i = 1; i < list_length; ++i)
268 KA_TRACE(20, (", %d", num_threads_list[i]));
269#endif
270 KA_TRACE(20, ("/n"));
271
272 __kmp_assert_valid_gtid(global_tid);
273 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
274}
275
276void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
277 kmp_uint32 list_length,
278 kmp_int32 *num_threads_list,
279 int severity, const char *message) {
280 __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
281 __kmp_set_strict_num_threads(loc, global_tid, severity, message);
282}
283
284void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
285 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
286 /* the num_threads are automatically popped */
287}
288
289void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
290 kmp_int32 proc_bind) {
291 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
292 proc_bind));
293 __kmp_assert_valid_gtid(global_tid);
294 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
295}
296
307void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
308 int gtid = __kmp_entry_gtid();
309
310#if (KMP_STATS_ENABLED)
311 // If we were in a serial region, then stop the serial timer, record
312 // the event, and start parallel region timer
313 stats_state_e previous_state = KMP_GET_THREAD_STATE();
314 if (previous_state == stats_state_e::SERIAL_REGION) {
315 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
316 } else {
317 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
318 }
319 int inParallel = __kmpc_in_parallel(loc);
320 if (inParallel) {
321 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
322 } else {
323 KMP_COUNT_BLOCK(OMP_PARALLEL);
324 }
325#endif
326
327 // maybe to save thr_state is enough here
328 {
329 va_list ap;
330 va_start(ap, microtask);
331
332#if OMPT_SUPPORT
333 ompt_frame_t *ompt_frame;
334 if (ompt_enabled.enabled) {
335 kmp_info_t *master_th = __kmp_threads[gtid];
336 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
337 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
338 }
339 OMPT_STORE_RETURN_ADDRESS(gtid);
340#endif
341
342#if INCLUDE_SSC_MARKS
343 SSC_MARK_FORKING();
344#endif
345 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
346 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
347 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
348 kmp_va_addr_of(ap));
349#if INCLUDE_SSC_MARKS
350 SSC_MARK_JOINING();
351#endif
352 __kmp_join_call(loc, gtid
353#if OMPT_SUPPORT
354 ,
355 fork_context_intel
356#endif
357 );
358
359 va_end(ap);
360
361#if OMPT_SUPPORT
362 if (ompt_enabled.enabled) {
363 ompt_frame->enter_frame = ompt_data_none;
364 }
365#endif
366 }
367
368#if KMP_STATS_ENABLED
369 if (previous_state == stats_state_e::SERIAL_REGION) {
370 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
371 KMP_SET_THREAD_STATE(previous_state);
372 } else {
373 KMP_POP_PARTITIONED_TIMER();
374 }
375#endif // KMP_STATS_ENABLED
376}
377
388void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
389 kmp_int32 cond, void *args) {
390 int gtid = __kmp_entry_gtid();
391 if (cond) {
392 if (args)
393 __kmpc_fork_call(loc, argc, microtask, args);
394 else
395 __kmpc_fork_call(loc, argc, microtask);
396 } else {
398
399#if OMPT_SUPPORT
400 void *exit_frame_ptr;
401#endif
402
403 if (args)
404 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
405 /*npr=*/0,
406 /*argc=*/1, &args
407#if OMPT_SUPPORT
408 ,
409 &exit_frame_ptr
410#endif
411 );
412 else
413 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
414 /*npr=*/0,
415 /*argc=*/0,
416 /*args=*/nullptr
417#if OMPT_SUPPORT
418 ,
419 &exit_frame_ptr
420#endif
421 );
422
424 }
425}
426
438void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
439 kmp_int32 num_teams, kmp_int32 num_threads) {
440 KA_TRACE(20,
441 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
442 global_tid, num_teams, num_threads));
443 __kmp_assert_valid_gtid(global_tid);
444 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
445}
446
457void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
458 kmp_int32 thread_limit) {
459 __kmp_assert_valid_gtid(global_tid);
460 kmp_info_t *thread = __kmp_threads[global_tid];
461 if (thread_limit > 0)
462 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
463}
464
481void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
482 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
483 kmp_int32 num_threads) {
484 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
485 " num_teams_ub=%d num_threads=%d\n",
486 global_tid, num_teams_lb, num_teams_ub, num_threads));
487 __kmp_assert_valid_gtid(global_tid);
488 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
489 num_threads);
490}
491
502void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
503 ...) {
504 int gtid = __kmp_entry_gtid();
505 kmp_info_t *this_thr = __kmp_threads[gtid];
506 va_list ap;
507 va_start(ap, microtask);
508
509#if KMP_STATS_ENABLED
510 KMP_COUNT_BLOCK(OMP_TEAMS);
511 stats_state_e previous_state = KMP_GET_THREAD_STATE();
512 if (previous_state == stats_state_e::SERIAL_REGION) {
513 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
514 } else {
515 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
516 }
517#endif
518
519 // remember teams entry point and nesting level
520 this_thr->th.th_teams_microtask = microtask;
521 this_thr->th.th_teams_level =
522 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
523
524#if OMPT_SUPPORT
525 kmp_team_t *parent_team = this_thr->th.th_team;
526 int tid = __kmp_tid_from_gtid(gtid);
527 if (ompt_enabled.enabled) {
528 parent_team->t.t_implicit_task_taskdata[tid]
529 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
530 }
531 OMPT_STORE_RETURN_ADDRESS(gtid);
532#endif
533
534 // check if __kmpc_push_num_teams called, set default number of teams
535 // otherwise
536 if (this_thr->th.th_teams_size.nteams == 0) {
537 __kmp_push_num_teams(loc, gtid, 0, 0);
538 }
539 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
540 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
541 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
542
543 __kmp_fork_call(
544 loc, gtid, fork_context_intel, argc,
545 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
546 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
547 __kmp_join_call(loc, gtid
548#if OMPT_SUPPORT
549 ,
550 fork_context_intel
551#endif
552 );
553
554 // Pop current CG root off list
555 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
556 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
557 this_thr->th.th_cg_roots = tmp->up;
558 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
559 " to node %p. cg_nthreads was %d\n",
560 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
561 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
562 int i = tmp->cg_nthreads--;
563 if (i == 1) { // check is we are the last thread in CG (not always the case)
564 __kmp_free(tmp);
565 }
566 // Restore current task's thread_limit from CG root
567 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
568 this_thr->th.th_current_task->td_icvs.thread_limit =
569 this_thr->th.th_cg_roots->cg_thread_limit;
570
571 this_thr->th.th_teams_microtask = NULL;
572 this_thr->th.th_teams_level = 0;
573 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
574 va_end(ap);
575#if KMP_STATS_ENABLED
576 if (previous_state == stats_state_e::SERIAL_REGION) {
577 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
578 KMP_SET_THREAD_STATE(previous_state);
579 } else {
580 KMP_POP_PARTITIONED_TIMER();
581 }
582#endif // KMP_STATS_ENABLED
583}
584
585// I don't think this function should ever have been exported.
586// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
587// openmp code ever called it, but it's been exported from the RTL for so
588// long that I'm afraid to remove the definition.
589int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
590
603void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
604 // The implementation is now in kmp_runtime.cpp so that it can share static
605 // functions with kmp_fork_call since the tasks to be done are similar in
606 // each case.
607 __kmp_assert_valid_gtid(global_tid);
608#if OMPT_SUPPORT
609 OMPT_STORE_RETURN_ADDRESS(global_tid);
610#endif
611 __kmp_serialized_parallel(loc, global_tid);
612}
613
621void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
622 kmp_internal_control_t *top;
623 kmp_info_t *this_thr;
624 kmp_team_t *serial_team;
625
626 KC_TRACE(10,
627 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
628
629 /* skip all this code for autopar serialized loops since it results in
630 unacceptable overhead */
631 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
632 return;
633
634 // Not autopar code
635 __kmp_assert_valid_gtid(global_tid);
636 if (!TCR_4(__kmp_init_parallel))
637 __kmp_parallel_initialize();
638
639 __kmp_resume_if_soft_paused();
640
641 this_thr = __kmp_threads[global_tid];
642 serial_team = this_thr->th.th_serial_team;
643
644 kmp_task_team_t *task_team = this_thr->th.th_task_team;
645 // we need to wait for the proxy tasks before finishing the thread
646 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
647 task_team->tt.tt_hidden_helper_task_encountered))
648 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
649
650 KMP_MB();
651 KMP_DEBUG_ASSERT(serial_team);
652 KMP_ASSERT(serial_team->t.t_serialized);
653 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
654 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
655 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
656 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
657
658#if OMPT_SUPPORT
659 if (ompt_enabled.enabled &&
660 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
661 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
662 if (ompt_enabled.ompt_callback_implicit_task) {
663 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
664 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
665 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
666 }
667
668 // reset clear the task id only after unlinking the task
669 ompt_data_t *parent_task_data;
670 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
671
672 if (ompt_enabled.ompt_callback_parallel_end) {
673 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
674 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
675 ompt_parallel_invoker_program | ompt_parallel_team,
676 OMPT_LOAD_RETURN_ADDRESS(global_tid));
677 }
678 __ompt_lw_taskteam_unlink(this_thr);
679 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
680 }
681#endif
682
683 /* If necessary, pop the internal control stack values and replace the team
684 * values */
685 top = serial_team->t.t_control_stack_top;
686 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
687 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
688 serial_team->t.t_control_stack_top = top->next;
689 __kmp_free(top);
690 }
691
692 /* pop dispatch buffers stack */
693 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
694 {
695 dispatch_private_info_t *disp_buffer =
696 serial_team->t.t_dispatch->th_disp_buffer;
697 serial_team->t.t_dispatch->th_disp_buffer =
698 serial_team->t.t_dispatch->th_disp_buffer->next;
699 __kmp_free(disp_buffer);
700 }
701
702 /* pop the task team stack */
703 if (serial_team->t.t_serialized > 1) {
704 __kmp_pop_task_team_node(this_thr, serial_team);
705 }
706
707 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
708
709 --serial_team->t.t_serialized;
710 if (serial_team->t.t_serialized == 0) {
711
712 /* return to the parallel section */
713
714#if KMP_ARCH_X86 || KMP_ARCH_X86_64
715 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
716 __kmp_clear_x87_fpu_status_word();
717 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
718 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
719 }
720#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
721
722 __kmp_pop_current_task_from_thread(this_thr);
723#if OMPD_SUPPORT
724 if (ompd_state & OMPD_ENABLE_BP)
725 ompd_bp_parallel_end();
726#endif
727
728 this_thr->th.th_team = serial_team->t.t_parent;
729 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
730
731 /* restore values cached in the thread */
732 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
733 this_thr->th.th_team_master =
734 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
735 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
736
737 /* TODO the below shouldn't need to be adjusted for serialized teams */
738 this_thr->th.th_dispatch =
739 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
740
741 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
742 this_thr->th.th_current_task->td_flags.executing = 1;
743
744 if (__kmp_tasking_mode != tskm_immediate_exec) {
745 // Restore task state from serial team structure
746 KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
747 serial_team->t.t_primary_task_state == 1);
748 this_thr->th.th_task_state =
749 (kmp_uint8)serial_team->t.t_primary_task_state;
750 // Copy the task team from the new child / old parent team to the thread.
751 this_thr->th.th_task_team =
752 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
753 KA_TRACE(20,
754 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
755 "team %p\n",
756 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
757 }
758#if KMP_AFFINITY_SUPPORTED
759 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
760 __kmp_reset_root_init_mask(global_tid);
761 }
762#endif
763 } else {
764 if (__kmp_tasking_mode != tskm_immediate_exec) {
765 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
766 "depth of serial team %p to %d\n",
767 global_tid, serial_team, serial_team->t.t_serialized));
768 }
769 }
770
771 serial_team->t.t_level--;
772 if (__kmp_env_consistency_check)
773 __kmp_pop_parallel(global_tid, NULL);
774#if OMPT_SUPPORT
775 if (ompt_enabled.enabled)
776 this_thr->th.ompt_thread_info.state =
777 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
778 : ompt_state_work_parallel);
779#endif
780}
781
791 KC_TRACE(10, ("__kmpc_flush: called\n"));
792
793 /* need explicit __mf() here since use volatile instead in library */
794 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
795
796#if OMPT_SUPPORT && OMPT_OPTIONAL
797 if (ompt_enabled.ompt_callback_flush) {
798 ompt_callbacks.ompt_callback(ompt_callback_flush)(
799 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
800 }
801#endif
802}
803
804/* -------------------------------------------------------------------------- */
812void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
813 KMP_COUNT_BLOCK(OMP_BARRIER);
814 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
815 __kmp_assert_valid_gtid(global_tid);
816
817 if (!TCR_4(__kmp_init_parallel))
818 __kmp_parallel_initialize();
819
820 __kmp_resume_if_soft_paused();
821
822 if (__kmp_env_consistency_check) {
823 if (loc == 0) {
824 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
825 }
826 __kmp_check_barrier(global_tid, ct_barrier, loc);
827 }
828
829#if OMPT_SUPPORT
830 ompt_frame_t *ompt_frame;
831 if (ompt_enabled.enabled) {
832 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
833 if (ompt_frame->enter_frame.ptr == NULL)
834 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
835 }
836 OMPT_STORE_RETURN_ADDRESS(global_tid);
837#endif
838 __kmp_threads[global_tid]->th.th_ident = loc;
839 // TODO: explicit barrier_wait_id:
840 // this function is called when 'barrier' directive is present or
841 // implicit barrier at the end of a worksharing construct.
842 // 1) better to add a per-thread barrier counter to a thread data structure
843 // 2) set to 0 when a new team is created
844 // 4) no sync is required
845
846 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
847#if OMPT_SUPPORT && OMPT_OPTIONAL
848 if (ompt_enabled.enabled) {
849 ompt_frame->enter_frame = ompt_data_none;
850 }
851#endif
852}
853
854/* The BARRIER for a MASTER section is always explicit */
861kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
862 int status = 0;
863
864 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
865 __kmp_assert_valid_gtid(global_tid);
866
867 if (!TCR_4(__kmp_init_parallel))
868 __kmp_parallel_initialize();
869
870 __kmp_resume_if_soft_paused();
871
872 if (KMP_MASTER_GTID(global_tid)) {
873 KMP_COUNT_BLOCK(OMP_MASTER);
874 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
875 status = 1;
876 }
877
878#if OMPT_SUPPORT && OMPT_OPTIONAL
879 if (status) {
880 if (ompt_enabled.ompt_callback_masked) {
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
883
884 int tid = __kmp_tid_from_gtid(global_tid);
885 ompt_callbacks.ompt_callback(ompt_callback_masked)(
886 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888 OMPT_GET_RETURN_ADDRESS(0));
889 }
890 }
891#endif
892
893 if (__kmp_env_consistency_check) {
894#if KMP_USE_DYNAMIC_LOCK
895 if (status)
896 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
897 else
898 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
899#else
900 if (status)
901 __kmp_push_sync(global_tid, ct_master, loc, NULL);
902 else
903 __kmp_check_sync(global_tid, ct_master, loc, NULL);
904#endif
905 }
906
907 return status;
908}
909
918void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
919 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
920 __kmp_assert_valid_gtid(global_tid);
921 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
922 KMP_POP_PARTITIONED_TIMER();
923
924#if OMPT_SUPPORT && OMPT_OPTIONAL
925 kmp_info_t *this_thr = __kmp_threads[global_tid];
926 kmp_team_t *team = this_thr->th.th_team;
927 if (ompt_enabled.ompt_callback_masked) {
928 int tid = __kmp_tid_from_gtid(global_tid);
929 ompt_callbacks.ompt_callback(ompt_callback_masked)(
930 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
931 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932 OMPT_GET_RETURN_ADDRESS(0));
933 }
934#endif
935
936 if (__kmp_env_consistency_check) {
937 if (KMP_MASTER_GTID(global_tid))
938 __kmp_pop_sync(global_tid, ct_master, loc);
939 }
940}
941
950kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
951 int status = 0;
952 int tid;
953 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
954 __kmp_assert_valid_gtid(global_tid);
955
956 if (!TCR_4(__kmp_init_parallel))
957 __kmp_parallel_initialize();
958
959 __kmp_resume_if_soft_paused();
960
961 tid = __kmp_tid_from_gtid(global_tid);
962 if (tid == filter) {
963 KMP_COUNT_BLOCK(OMP_MASKED);
964 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
965 status = 1;
966 }
967
968#if OMPT_SUPPORT && OMPT_OPTIONAL
969 if (status) {
970 if (ompt_enabled.ompt_callback_masked) {
971 kmp_info_t *this_thr = __kmp_threads[global_tid];
972 kmp_team_t *team = this_thr->th.th_team;
973 ompt_callbacks.ompt_callback(ompt_callback_masked)(
974 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
975 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
976 OMPT_GET_RETURN_ADDRESS(0));
977 }
978 }
979#endif
980
981 if (__kmp_env_consistency_check) {
982#if KMP_USE_DYNAMIC_LOCK
983 if (status)
984 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
985 else
986 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
987#else
988 if (status)
989 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
990 else
991 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
992#endif
993 }
994
995 return status;
996}
997
1006void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
1007 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
1008 __kmp_assert_valid_gtid(global_tid);
1009 KMP_POP_PARTITIONED_TIMER();
1010
1011#if OMPT_SUPPORT && OMPT_OPTIONAL
1012 kmp_info_t *this_thr = __kmp_threads[global_tid];
1013 kmp_team_t *team = this_thr->th.th_team;
1014 if (ompt_enabled.ompt_callback_masked) {
1015 int tid = __kmp_tid_from_gtid(global_tid);
1016 ompt_callbacks.ompt_callback(ompt_callback_masked)(
1017 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
1018 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1019 OMPT_GET_RETURN_ADDRESS(0));
1020 }
1021#endif
1022
1023 if (__kmp_env_consistency_check) {
1024 __kmp_pop_sync(global_tid, ct_masked, loc);
1025 }
1026}
1027
1035void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
1036 int cid = 0;
1037 kmp_info_t *th;
1038 KMP_DEBUG_ASSERT(__kmp_init_serial);
1039
1040 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
1041 __kmp_assert_valid_gtid(gtid);
1042
1043 if (!TCR_4(__kmp_init_parallel))
1044 __kmp_parallel_initialize();
1045
1046 __kmp_resume_if_soft_paused();
1047
1048#if USE_ITT_BUILD
1049 __kmp_itt_ordered_prep(gtid);
1050// TODO: ordered_wait_id
1051#endif /* USE_ITT_BUILD */
1052
1053 th = __kmp_threads[gtid];
1054
1055#if OMPT_SUPPORT && OMPT_OPTIONAL
1056 kmp_team_t *team;
1057 ompt_wait_id_t lck;
1058 void *codeptr_ra;
1059 OMPT_STORE_RETURN_ADDRESS(gtid);
1060 if (ompt_enabled.enabled) {
1061 team = __kmp_team_from_gtid(gtid);
1062 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1063 /* OMPT state update */
1064 th->th.ompt_thread_info.wait_id = lck;
1065 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1066
1067 /* OMPT event callback */
1068 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1069 if (ompt_enabled.ompt_callback_mutex_acquire) {
1070 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1071 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1072 codeptr_ra);
1073 }
1074 }
1075#endif
1076
1077 if (th->th.th_dispatch->th_deo_fcn != 0)
1078 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1079 else
1080 __kmp_parallel_deo(&gtid, &cid, loc);
1081
1082#if OMPT_SUPPORT && OMPT_OPTIONAL
1083 if (ompt_enabled.enabled) {
1084 /* OMPT state update */
1085 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1086 th->th.ompt_thread_info.wait_id = 0;
1087
1088 /* OMPT event callback */
1089 if (ompt_enabled.ompt_callback_mutex_acquired) {
1090 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1091 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1092 }
1093 }
1094#endif
1095
1096#if USE_ITT_BUILD
1097 __kmp_itt_ordered_start(gtid);
1098#endif /* USE_ITT_BUILD */
1099}
1100
1108void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1109 int cid = 0;
1110 kmp_info_t *th;
1111
1112 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1113 __kmp_assert_valid_gtid(gtid);
1114
1115#if USE_ITT_BUILD
1116 __kmp_itt_ordered_end(gtid);
1117// TODO: ordered_wait_id
1118#endif /* USE_ITT_BUILD */
1119
1120 th = __kmp_threads[gtid];
1121
1122 if (th->th.th_dispatch->th_dxo_fcn != 0)
1123 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1124 else
1125 __kmp_parallel_dxo(&gtid, &cid, loc);
1126
1127#if OMPT_SUPPORT && OMPT_OPTIONAL
1128 OMPT_STORE_RETURN_ADDRESS(gtid);
1129 if (ompt_enabled.ompt_callback_mutex_released) {
1130 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1131 ompt_mutex_ordered,
1132 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1133 ->t.t_ordered.dt.t_value,
1134 OMPT_LOAD_RETURN_ADDRESS(gtid));
1135 }
1136#endif
1137}
1138
1139#if KMP_USE_DYNAMIC_LOCK
1140
1141static __forceinline void
1142__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1143 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1144 // Pointer to the allocated indirect lock is written to crit, while indexing
1145 // is ignored.
1146 void *idx;
1147 kmp_indirect_lock_t **lck;
1148 lck = (kmp_indirect_lock_t **)crit;
1149 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1150 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1151 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1152 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1153 KA_TRACE(20,
1154 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1155#if USE_ITT_BUILD
1156 __kmp_itt_critical_creating(ilk->lock, loc);
1157#endif
1158 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1159 if (status == 0) {
1160#if USE_ITT_BUILD
1161 __kmp_itt_critical_destroyed(ilk->lock);
1162#endif
1163 // We don't really need to destroy the unclaimed lock here since it will be
1164 // cleaned up at program exit.
1165 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1166 }
1167 KMP_DEBUG_ASSERT(*lck != NULL);
1168}
1169
1170// Fast-path acquire tas lock
1171#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1172 { \
1173 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1174 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1175 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1176 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1177 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1178 kmp_uint32 spins; \
1179 KMP_FSYNC_PREPARE(l); \
1180 KMP_INIT_YIELD(spins); \
1181 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1182 do { \
1183 if (TCR_4(__kmp_nth) > \
1184 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1185 KMP_YIELD(TRUE); \
1186 } else { \
1187 KMP_YIELD_SPIN(spins); \
1188 } \
1189 __kmp_spin_backoff(&backoff); \
1190 } while ( \
1191 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1192 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1193 } \
1194 KMP_FSYNC_ACQUIRED(l); \
1195 }
1196
1197// Fast-path test tas lock
1198#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1199 { \
1200 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1201 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1202 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1203 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1204 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1205 }
1206
1207// Fast-path release tas lock
1208#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1209 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1210
1211#if KMP_USE_FUTEX
1212
1213#include <sys/syscall.h>
1214#include <unistd.h>
1215#ifndef FUTEX_WAIT
1216#define FUTEX_WAIT 0
1217#endif
1218#ifndef FUTEX_WAKE
1219#define FUTEX_WAKE 1
1220#endif
1221
1222// Fast-path acquire futex lock
1223#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1224 { \
1225 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226 kmp_int32 gtid_code = (gtid + 1) << 1; \
1227 KMP_MB(); \
1228 KMP_FSYNC_PREPARE(ftx); \
1229 kmp_int32 poll_val; \
1230 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1231 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1232 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1233 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1234 if (!cond) { \
1235 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1236 poll_val | \
1237 KMP_LOCK_BUSY(1, futex))) { \
1238 continue; \
1239 } \
1240 poll_val |= KMP_LOCK_BUSY(1, futex); \
1241 } \
1242 kmp_int32 rc; \
1243 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1244 NULL, NULL, 0)) != 0) { \
1245 continue; \
1246 } \
1247 gtid_code |= 1; \
1248 } \
1249 KMP_FSYNC_ACQUIRED(ftx); \
1250 }
1251
1252// Fast-path test futex lock
1253#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1254 { \
1255 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1256 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1257 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1258 KMP_FSYNC_ACQUIRED(ftx); \
1259 rc = TRUE; \
1260 } else { \
1261 rc = FALSE; \
1262 } \
1263 }
1264
1265// Fast-path release futex lock
1266#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1267 { \
1268 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1269 KMP_MB(); \
1270 KMP_FSYNC_RELEASING(ftx); \
1271 kmp_int32 poll_val = \
1272 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1273 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1274 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1275 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1276 } \
1277 KMP_MB(); \
1278 KMP_YIELD_OVERSUB(); \
1279 }
1280
1281#endif // KMP_USE_FUTEX
1282
1283#else // KMP_USE_DYNAMIC_LOCK
1284
1285static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1286 ident_t const *loc,
1287 kmp_int32 gtid) {
1288 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1289
1290 // Because of the double-check, the following load doesn't need to be volatile
1291 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1292
1293 if (lck == NULL) {
1294 void *idx;
1295
1296 // Allocate & initialize the lock.
1297 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1298 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1299 __kmp_init_user_lock_with_checks(lck);
1300 __kmp_set_user_lock_location(lck, loc);
1301#if USE_ITT_BUILD
1302 __kmp_itt_critical_creating(lck);
1303// __kmp_itt_critical_creating() should be called *before* the first usage
1304// of underlying lock. It is the only place where we can guarantee it. There
1305// are chances the lock will destroyed with no usage, but it is not a
1306// problem, because this is not real event seen by user but rather setting
1307// name for object (lock). See more details in kmp_itt.h.
1308#endif /* USE_ITT_BUILD */
1309
1310 // Use a cmpxchg instruction to slam the start of the critical section with
1311 // the lock pointer. If another thread beat us to it, deallocate the lock,
1312 // and use the lock that the other thread allocated.
1313 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1314
1315 if (status == 0) {
1316// Deallocate the lock and reload the value.
1317#if USE_ITT_BUILD
1318 __kmp_itt_critical_destroyed(lck);
1319// Let ITT know the lock is destroyed and the same memory location may be reused
1320// for another purpose.
1321#endif /* USE_ITT_BUILD */
1322 __kmp_destroy_user_lock_with_checks(lck);
1323 __kmp_user_lock_free(&idx, gtid, lck);
1324 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1325 KMP_DEBUG_ASSERT(lck != NULL);
1326 }
1327 }
1328 return lck;
1329}
1330
1331#endif // KMP_USE_DYNAMIC_LOCK
1332
1343void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1344 kmp_critical_name *crit) {
1345#if KMP_USE_DYNAMIC_LOCK
1346#if OMPT_SUPPORT && OMPT_OPTIONAL
1347 OMPT_STORE_RETURN_ADDRESS(global_tid);
1348#endif // OMPT_SUPPORT
1349 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1350#else
1351 KMP_COUNT_BLOCK(OMP_CRITICAL);
1352#if OMPT_SUPPORT && OMPT_OPTIONAL
1353 ompt_state_t prev_state = ompt_state_undefined;
1354 ompt_thread_info_t ti;
1355#endif
1356 kmp_user_lock_p lck;
1357
1358 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1359 __kmp_assert_valid_gtid(global_tid);
1360
1361 // TODO: add THR_OVHD_STATE
1362
1363 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1364 KMP_CHECK_USER_LOCK_INIT();
1365
1366 if ((__kmp_user_lock_kind == lk_tas) &&
1367 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1368 lck = (kmp_user_lock_p)crit;
1369 }
1370#if KMP_USE_FUTEX
1371 else if ((__kmp_user_lock_kind == lk_futex) &&
1372 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1373 lck = (kmp_user_lock_p)crit;
1374 }
1375#endif
1376 else { // ticket, queuing or drdpa
1377 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1378 }
1379
1380 if (__kmp_env_consistency_check)
1381 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1382
1383 // since the critical directive binds to all threads, not just the current
1384 // team we have to check this even if we are in a serialized team.
1385 // also, even if we are the uber thread, we still have to conduct the lock,
1386 // as we have to contend with sibling threads.
1387
1388#if USE_ITT_BUILD
1389 __kmp_itt_critical_acquiring(lck);
1390#endif /* USE_ITT_BUILD */
1391#if OMPT_SUPPORT && OMPT_OPTIONAL
1392 OMPT_STORE_RETURN_ADDRESS(gtid);
1393 void *codeptr_ra = NULL;
1394 if (ompt_enabled.enabled) {
1395 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1396 /* OMPT state update */
1397 prev_state = ti.state;
1398 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1399 ti.state = ompt_state_wait_critical;
1400
1401 /* OMPT event callback */
1402 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1403 if (ompt_enabled.ompt_callback_mutex_acquire) {
1404 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1405 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1406 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1407 }
1408 }
1409#endif
1410 // Value of 'crit' should be good for using as a critical_id of the critical
1411 // section directive.
1412 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1413
1414#if USE_ITT_BUILD
1415 __kmp_itt_critical_acquired(lck);
1416#endif /* USE_ITT_BUILD */
1417#if OMPT_SUPPORT && OMPT_OPTIONAL
1418 if (ompt_enabled.enabled) {
1419 /* OMPT state update */
1420 ti.state = prev_state;
1421 ti.wait_id = 0;
1422
1423 /* OMPT event callback */
1424 if (ompt_enabled.ompt_callback_mutex_acquired) {
1425 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1426 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1427 }
1428 }
1429#endif
1430 KMP_POP_PARTITIONED_TIMER();
1431
1432 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1433 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1434#endif // KMP_USE_DYNAMIC_LOCK
1435}
1436
1437#if KMP_USE_DYNAMIC_LOCK
1438
1439// Converts the given hint to an internal lock implementation
1440static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1441#if KMP_USE_TSX
1442#define KMP_TSX_LOCK(seq) lockseq_##seq
1443#else
1444#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1445#endif
1446
1447#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1448#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1449#else
1450#define KMP_CPUINFO_RTM 0
1451#endif
1452
1453 // Hints that do not require further logic
1454 if (hint & kmp_lock_hint_hle)
1455 return KMP_TSX_LOCK(hle);
1456 if (hint & kmp_lock_hint_rtm)
1457 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1458 if (hint & kmp_lock_hint_adaptive)
1459 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1460
1461 // Rule out conflicting hints first by returning the default lock
1462 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1463 return __kmp_user_lock_seq;
1464 if ((hint & omp_lock_hint_speculative) &&
1465 (hint & omp_lock_hint_nonspeculative))
1466 return __kmp_user_lock_seq;
1467
1468 // Do not even consider speculation when it appears to be contended
1469 if (hint & omp_lock_hint_contended)
1470 return lockseq_queuing;
1471
1472 // Uncontended lock without speculation
1473 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1474 return lockseq_tas;
1475
1476 // Use RTM lock for speculation
1477 if (hint & omp_lock_hint_speculative)
1478 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1479
1480 return __kmp_user_lock_seq;
1481}
1482
1483#if OMPT_SUPPORT && OMPT_OPTIONAL
1484#if KMP_USE_DYNAMIC_LOCK
1485static kmp_mutex_impl_t
1486__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1487 if (user_lock) {
1488 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1489 case 0:
1490 break;
1491#if KMP_USE_FUTEX
1492 case locktag_futex:
1493 return kmp_mutex_impl_queuing;
1494#endif
1495 case locktag_tas:
1496 return kmp_mutex_impl_spin;
1497#if KMP_USE_TSX
1498 case locktag_hle:
1499 case locktag_rtm_spin:
1500 return kmp_mutex_impl_speculative;
1501#endif
1502 default:
1503 return kmp_mutex_impl_none;
1504 }
1505 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1506 }
1507 KMP_ASSERT(ilock);
1508 switch (ilock->type) {
1509#if KMP_USE_TSX
1510 case locktag_adaptive:
1511 case locktag_rtm_queuing:
1512 return kmp_mutex_impl_speculative;
1513#endif
1514 case locktag_nested_tas:
1515 return kmp_mutex_impl_spin;
1516#if KMP_USE_FUTEX
1517 case locktag_nested_futex:
1518#endif
1519 case locktag_ticket:
1520 case locktag_queuing:
1521 case locktag_drdpa:
1522 case locktag_nested_ticket:
1523 case locktag_nested_queuing:
1524 case locktag_nested_drdpa:
1525 return kmp_mutex_impl_queuing;
1526 default:
1527 return kmp_mutex_impl_none;
1528 }
1529}
1530#else
1531// For locks without dynamic binding
1532static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1533 switch (__kmp_user_lock_kind) {
1534 case lk_tas:
1535 return kmp_mutex_impl_spin;
1536#if KMP_USE_FUTEX
1537 case lk_futex:
1538#endif
1539 case lk_ticket:
1540 case lk_queuing:
1541 case lk_drdpa:
1542 return kmp_mutex_impl_queuing;
1543#if KMP_USE_TSX
1544 case lk_hle:
1545 case lk_rtm_queuing:
1546 case lk_rtm_spin:
1547 case lk_adaptive:
1548 return kmp_mutex_impl_speculative;
1549#endif
1550 default:
1551 return kmp_mutex_impl_none;
1552 }
1553}
1554#endif // KMP_USE_DYNAMIC_LOCK
1555#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1556
1570void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1571 kmp_critical_name *crit, uint32_t hint) {
1572 KMP_COUNT_BLOCK(OMP_CRITICAL);
1573 kmp_user_lock_p lck;
1574#if OMPT_SUPPORT && OMPT_OPTIONAL
1575 ompt_state_t prev_state = ompt_state_undefined;
1576 ompt_thread_info_t ti;
1577 // This is the case, if called from __kmpc_critical:
1578 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1579 if (!codeptr)
1580 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1581#endif
1582
1583 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1584 __kmp_assert_valid_gtid(global_tid);
1585
1586 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1587 // Check if it is initialized.
1588 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1589 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1590 if (*lk == 0) {
1591 if (KMP_IS_D_LOCK(lockseq)) {
1592 KMP_COMPARE_AND_STORE_ACQ32(
1593 (volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1594 KMP_GET_D_TAG(lockseq));
1595 } else {
1596 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1597 }
1598 }
1599 // Branch for accessing the actual lock object and set operation. This
1600 // branching is inevitable since this lock initialization does not follow the
1601 // normal dispatch path (lock table is not used).
1602 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1603 lck = (kmp_user_lock_p)lk;
1604 if (__kmp_env_consistency_check) {
1605 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1606 __kmp_map_hint_to_lock(hint));
1607 }
1608#if USE_ITT_BUILD
1609 __kmp_itt_critical_acquiring(lck);
1610#endif
1611#if OMPT_SUPPORT && OMPT_OPTIONAL
1612 if (ompt_enabled.enabled) {
1613 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1614 /* OMPT state update */
1615 prev_state = ti.state;
1616 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1617 ti.state = ompt_state_wait_critical;
1618
1619 /* OMPT event callback */
1620 if (ompt_enabled.ompt_callback_mutex_acquire) {
1621 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1622 ompt_mutex_critical, (unsigned int)hint,
1623 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1624 codeptr);
1625 }
1626 }
1627#endif
1628#if KMP_USE_INLINED_TAS
1629 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1630 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1631 } else
1632#elif KMP_USE_INLINED_FUTEX
1633 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1634 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1635 } else
1636#endif
1637 {
1638 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1639 }
1640 } else {
1641 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1642 lck = ilk->lock;
1643 if (__kmp_env_consistency_check) {
1644 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1645 __kmp_map_hint_to_lock(hint));
1646 }
1647#if USE_ITT_BUILD
1648 __kmp_itt_critical_acquiring(lck);
1649#endif
1650#if OMPT_SUPPORT && OMPT_OPTIONAL
1651 if (ompt_enabled.enabled) {
1652 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1653 /* OMPT state update */
1654 prev_state = ti.state;
1655 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1656 ti.state = ompt_state_wait_critical;
1657
1658 /* OMPT event callback */
1659 if (ompt_enabled.ompt_callback_mutex_acquire) {
1660 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1661 ompt_mutex_critical, (unsigned int)hint,
1662 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1663 codeptr);
1664 }
1665 }
1666#endif
1667 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1668 }
1669 KMP_POP_PARTITIONED_TIMER();
1670
1671#if USE_ITT_BUILD
1672 __kmp_itt_critical_acquired(lck);
1673#endif /* USE_ITT_BUILD */
1674#if OMPT_SUPPORT && OMPT_OPTIONAL
1675 if (ompt_enabled.enabled) {
1676 /* OMPT state update */
1677 ti.state = prev_state;
1678 ti.wait_id = 0;
1679
1680 /* OMPT event callback */
1681 if (ompt_enabled.ompt_callback_mutex_acquired) {
1682 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1683 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1684 }
1685 }
1686#endif
1687
1688 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1689 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1690} // __kmpc_critical_with_hint
1691
1692#endif // KMP_USE_DYNAMIC_LOCK
1693
1703void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1704 kmp_critical_name *crit) {
1705 kmp_user_lock_p lck;
1706
1707 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1708
1709#if KMP_USE_DYNAMIC_LOCK
1710 int locktag = KMP_EXTRACT_D_TAG(crit);
1711 if (locktag) {
1712 lck = (kmp_user_lock_p)crit;
1713 KMP_ASSERT(lck != NULL);
1714 if (__kmp_env_consistency_check) {
1715 __kmp_pop_sync(global_tid, ct_critical, loc);
1716 }
1717#if USE_ITT_BUILD
1718 __kmp_itt_critical_releasing(lck);
1719#endif
1720#if KMP_USE_INLINED_TAS
1721 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1722 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1723 } else
1724#elif KMP_USE_INLINED_FUTEX
1725 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1726 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1727 } else
1728#endif
1729 {
1730 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1731 }
1732 } else {
1733 kmp_indirect_lock_t *ilk =
1734 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1735 KMP_ASSERT(ilk != NULL);
1736 lck = ilk->lock;
1737 if (__kmp_env_consistency_check) {
1738 __kmp_pop_sync(global_tid, ct_critical, loc);
1739 }
1740#if USE_ITT_BUILD
1741 __kmp_itt_critical_releasing(lck);
1742#endif
1743 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1744 }
1745
1746#else // KMP_USE_DYNAMIC_LOCK
1747
1748 if ((__kmp_user_lock_kind == lk_tas) &&
1749 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1750 lck = (kmp_user_lock_p)crit;
1751 }
1752#if KMP_USE_FUTEX
1753 else if ((__kmp_user_lock_kind == lk_futex) &&
1754 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1755 lck = (kmp_user_lock_p)crit;
1756 }
1757#endif
1758 else { // ticket, queuing or drdpa
1759 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1760 }
1761
1762 KMP_ASSERT(lck != NULL);
1763
1764 if (__kmp_env_consistency_check)
1765 __kmp_pop_sync(global_tid, ct_critical, loc);
1766
1767#if USE_ITT_BUILD
1768 __kmp_itt_critical_releasing(lck);
1769#endif /* USE_ITT_BUILD */
1770 // Value of 'crit' should be good for using as a critical_id of the critical
1771 // section directive.
1772 __kmp_release_user_lock_with_checks(lck, global_tid);
1773
1774#endif // KMP_USE_DYNAMIC_LOCK
1775
1776#if OMPT_SUPPORT && OMPT_OPTIONAL
1777 /* OMPT release event triggers after lock is released; place here to trigger
1778 * for all #if branches */
1779 OMPT_STORE_RETURN_ADDRESS(global_tid);
1780 if (ompt_enabled.ompt_callback_mutex_released) {
1781 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1782 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1783 OMPT_LOAD_RETURN_ADDRESS(0));
1784 }
1785#endif
1786
1787 KMP_POP_PARTITIONED_TIMER();
1788 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1789}
1790
1800kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1801 int status;
1802 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1803 __kmp_assert_valid_gtid(global_tid);
1804
1805 if (!TCR_4(__kmp_init_parallel))
1806 __kmp_parallel_initialize();
1807
1808 __kmp_resume_if_soft_paused();
1809
1810 if (__kmp_env_consistency_check)
1811 __kmp_check_barrier(global_tid, ct_barrier, loc);
1812
1813#if OMPT_SUPPORT
1814 ompt_frame_t *ompt_frame;
1815 if (ompt_enabled.enabled) {
1816 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1817 if (ompt_frame->enter_frame.ptr == NULL)
1818 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1819 }
1820 OMPT_STORE_RETURN_ADDRESS(global_tid);
1821#endif
1822#if USE_ITT_NOTIFY
1823 __kmp_threads[global_tid]->th.th_ident = loc;
1824#endif
1825 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1826#if OMPT_SUPPORT && OMPT_OPTIONAL
1827 if (ompt_enabled.enabled) {
1828 ompt_frame->enter_frame = ompt_data_none;
1829 }
1830#endif
1831
1832 return (status != 0) ? 0 : 1;
1833}
1834
1844void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1845 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1846 __kmp_assert_valid_gtid(global_tid);
1847 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1848}
1849
1860kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1861 kmp_int32 ret;
1862 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1863 __kmp_assert_valid_gtid(global_tid);
1864
1865 if (!TCR_4(__kmp_init_parallel))
1866 __kmp_parallel_initialize();
1867
1868 __kmp_resume_if_soft_paused();
1869
1870 if (__kmp_env_consistency_check) {
1871 if (loc == 0) {
1872 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1873 }
1874 __kmp_check_barrier(global_tid, ct_barrier, loc);
1875 }
1876
1877#if OMPT_SUPPORT
1878 ompt_frame_t *ompt_frame;
1879 if (ompt_enabled.enabled) {
1880 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1881 if (ompt_frame->enter_frame.ptr == NULL)
1882 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1883 }
1884 OMPT_STORE_RETURN_ADDRESS(global_tid);
1885#endif
1886#if USE_ITT_NOTIFY
1887 __kmp_threads[global_tid]->th.th_ident = loc;
1888#endif
1889 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1890#if OMPT_SUPPORT && OMPT_OPTIONAL
1891 if (ompt_enabled.enabled) {
1892 ompt_frame->enter_frame = ompt_data_none;
1893 }
1894#endif
1895
1896 ret = __kmpc_master(loc, global_tid);
1897
1898 if (__kmp_env_consistency_check) {
1899 /* there's no __kmpc_end_master called; so the (stats) */
1900 /* actions of __kmpc_end_master are done here */
1901 if (ret) {
1902 /* only one thread should do the pop since only */
1903 /* one did the push (see __kmpc_master()) */
1904 __kmp_pop_sync(global_tid, ct_master, loc);
1905 }
1906 }
1907
1908 return (ret);
1909}
1910
1911/* The BARRIER for a SINGLE process section is always explicit */
1923kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1924 __kmp_assert_valid_gtid(global_tid);
1925 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1926
1927 if (rc) {
1928 // We are going to execute the single statement, so we should count it.
1929 KMP_COUNT_BLOCK(OMP_SINGLE);
1930 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1931 }
1932
1933#if OMPT_SUPPORT && OMPT_OPTIONAL
1934 kmp_info_t *this_thr = __kmp_threads[global_tid];
1935 kmp_team_t *team = this_thr->th.th_team;
1936 int tid = __kmp_tid_from_gtid(global_tid);
1937
1938 if (ompt_enabled.enabled) {
1939 if (rc) {
1940 if (ompt_enabled.ompt_callback_work) {
1941 ompt_callbacks.ompt_callback(ompt_callback_work)(
1942 ompt_work_single_executor, ompt_scope_begin,
1943 &(team->t.ompt_team_info.parallel_data),
1944 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1945 1, OMPT_GET_RETURN_ADDRESS(0));
1946 }
1947 } else {
1948 if (ompt_enabled.ompt_callback_work) {
1949 ompt_callbacks.ompt_callback(ompt_callback_work)(
1950 ompt_work_single_other, ompt_scope_begin,
1951 &(team->t.ompt_team_info.parallel_data),
1952 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1953 1, OMPT_GET_RETURN_ADDRESS(0));
1954 ompt_callbacks.ompt_callback(ompt_callback_work)(
1955 ompt_work_single_other, ompt_scope_end,
1956 &(team->t.ompt_team_info.parallel_data),
1957 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1958 1, OMPT_GET_RETURN_ADDRESS(0));
1959 }
1960 }
1961 }
1962#endif
1963
1964 return rc;
1965}
1966
1976void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1977 __kmp_assert_valid_gtid(global_tid);
1978 __kmp_exit_single(global_tid);
1979 KMP_POP_PARTITIONED_TIMER();
1980
1981#if OMPT_SUPPORT && OMPT_OPTIONAL
1982 kmp_info_t *this_thr = __kmp_threads[global_tid];
1983 kmp_team_t *team = this_thr->th.th_team;
1984 int tid = __kmp_tid_from_gtid(global_tid);
1985
1986 if (ompt_enabled.ompt_callback_work) {
1987 ompt_callbacks.ompt_callback(ompt_callback_work)(
1988 ompt_work_single_executor, ompt_scope_end,
1989 &(team->t.ompt_team_info.parallel_data),
1990 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1991 OMPT_GET_RETURN_ADDRESS(0));
1992 }
1993#endif
1994}
1995
2003void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
2004 KMP_POP_PARTITIONED_TIMER();
2005 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
2006
2007#if OMPT_SUPPORT && OMPT_OPTIONAL
2008 if (ompt_enabled.ompt_callback_work) {
2009 ompt_work_t ompt_work_type = ompt_work_loop_static;
2010 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2011 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2012 // Determine workshare type
2013 if (loc != NULL) {
2014 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
2015 ompt_work_type = ompt_work_loop_static;
2016 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
2017 ompt_work_type = ompt_work_sections;
2018 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
2019 ompt_work_type = ompt_work_distribute;
2020 } else {
2021 // use default set above.
2022 // a warning about this case is provided in __kmpc_for_static_init
2023 }
2024 KMP_DEBUG_ASSERT(ompt_work_type);
2025 }
2026 ompt_callbacks.ompt_callback(ompt_callback_work)(
2027 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
2028 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2029 }
2030#endif
2031 if (__kmp_env_consistency_check)
2032 __kmp_pop_workshare(global_tid, ct_pdo, loc);
2033}
2034
2035// User routines which take C-style arguments (call by value)
2036// different from the Fortran equivalent routines
2037
2038void ompc_set_num_threads(int arg) {
2039 // !!!!! TODO: check the per-task binding
2040 __kmp_set_num_threads(arg, __kmp_entry_gtid());
2041}
2042
2043void ompc_set_dynamic(int flag) {
2044 kmp_info_t *thread;
2045
2046 /* For the thread-private implementation of the internal controls */
2047 thread = __kmp_entry_thread();
2048
2049 __kmp_save_internal_controls(thread);
2050
2051 set__dynamic(thread, flag ? true : false);
2052}
2053
2054void ompc_set_nested(int flag) {
2055 kmp_info_t *thread;
2056
2057 /* For the thread-private internal controls implementation */
2058 thread = __kmp_entry_thread();
2059
2060 __kmp_save_internal_controls(thread);
2061
2062 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2063}
2064
2065void ompc_set_max_active_levels(int max_active_levels) {
2066 /* TO DO */
2067 /* we want per-task implementation of this internal control */
2068
2069 /* For the per-thread internal controls implementation */
2070 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2071}
2072
2073void ompc_set_schedule(omp_sched_t kind, int modifier) {
2074 // !!!!! TODO: check the per-task binding
2075 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2076}
2077
2078int ompc_get_ancestor_thread_num(int level) {
2079 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2080}
2081
2082int ompc_get_team_size(int level) {
2083 return __kmp_get_team_size(__kmp_entry_gtid(), level);
2084}
2085
2086/* OpenMP 5.0 Affinity Format API */
2087void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2088 if (!__kmp_init_serial) {
2089 __kmp_serial_initialize();
2090 }
2091 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2092 format, KMP_STRLEN(format) + 1);
2093}
2094
2095size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2096 size_t format_size;
2097 if (!__kmp_init_serial) {
2098 __kmp_serial_initialize();
2099 }
2100 format_size = KMP_STRLEN(__kmp_affinity_format);
2101 if (buffer && size) {
2102 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2103 format_size + 1);
2104 }
2105 return format_size;
2106}
2107
2108void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2109 int gtid;
2110 if (!TCR_4(__kmp_init_middle)) {
2111 __kmp_middle_initialize();
2112 }
2113 __kmp_assign_root_init_mask();
2114 gtid = __kmp_get_gtid();
2115#if KMP_AFFINITY_SUPPORTED
2116 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2117 __kmp_affinity.flags.reset) {
2118 __kmp_reset_root_init_mask(gtid);
2119 }
2120#endif
2121 __kmp_aux_display_affinity(gtid, format);
2122}
2123
2124size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2125 char const *format) {
2126 int gtid;
2127 size_t num_required;
2128 kmp_str_buf_t capture_buf;
2129 if (!TCR_4(__kmp_init_middle)) {
2130 __kmp_middle_initialize();
2131 }
2132 __kmp_assign_root_init_mask();
2133 gtid = __kmp_get_gtid();
2134#if KMP_AFFINITY_SUPPORTED
2135 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2136 __kmp_affinity.flags.reset) {
2137 __kmp_reset_root_init_mask(gtid);
2138 }
2139#endif
2140 __kmp_str_buf_init(&capture_buf);
2141 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2142 if (buffer && buf_size) {
2143 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2144 capture_buf.used + 1);
2145 }
2146 __kmp_str_buf_free(&capture_buf);
2147 return num_required;
2148}
2149
2150void kmpc_set_stacksize(int arg) {
2151 // __kmp_aux_set_stacksize initializes the library if needed
2152 __kmp_aux_set_stacksize(arg);
2153}
2154
2155void kmpc_set_stacksize_s(size_t arg) {
2156 // __kmp_aux_set_stacksize initializes the library if needed
2157 __kmp_aux_set_stacksize(arg);
2158}
2159
2160void kmpc_set_blocktime(int arg) {
2161 int gtid, tid, bt = arg;
2162 kmp_info_t *thread;
2163
2164 gtid = __kmp_entry_gtid();
2165 tid = __kmp_tid_from_gtid(gtid);
2166 thread = __kmp_thread_from_gtid(gtid);
2167
2168 __kmp_aux_convert_blocktime(&bt);
2169 __kmp_aux_set_blocktime(bt, thread, tid);
2170}
2171
2172void kmpc_set_library(int arg) {
2173 // __kmp_user_set_library initializes the library if needed
2174 __kmp_user_set_library((enum library_type)arg);
2175}
2176
2177void kmpc_set_defaults(char const *str) {
2178 // __kmp_aux_set_defaults initializes the library if needed
2179 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2180}
2181
2182void kmpc_set_disp_num_buffers(int arg) {
2183 // ignore after initialization because some teams have already
2184 // allocated dispatch buffers
2185 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2186 arg <= KMP_MAX_DISP_NUM_BUFF) {
2187 __kmp_dispatch_num_buffers = arg;
2188 }
2189}
2190
2191int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2192#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2193 return -1;
2194#else
2195 if (!TCR_4(__kmp_init_middle)) {
2196 __kmp_middle_initialize();
2197 }
2198 __kmp_assign_root_init_mask();
2199 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2200#endif
2201}
2202
2203int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2204#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2205 return -1;
2206#else
2207 if (!TCR_4(__kmp_init_middle)) {
2208 __kmp_middle_initialize();
2209 }
2210 __kmp_assign_root_init_mask();
2211 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2212#endif
2213}
2214
2215int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2216#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2217 return -1;
2218#else
2219 if (!TCR_4(__kmp_init_middle)) {
2220 __kmp_middle_initialize();
2221 }
2222 __kmp_assign_root_init_mask();
2223 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2224#endif
2225}
2226
2227/* -------------------------------------------------------------------------- */
2272void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2273 void *cpy_data, void (*cpy_func)(void *, void *),
2274 kmp_int32 didit) {
2275 void **data_ptr;
2276 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2277 __kmp_assert_valid_gtid(gtid);
2278
2279 KMP_MB();
2280
2281 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2282
2283 if (__kmp_env_consistency_check) {
2284 if (loc == 0) {
2285 KMP_WARNING(ConstructIdentInvalid);
2286 }
2287 }
2288
2289 // ToDo: Optimize the following two barriers into some kind of split barrier
2290
2291 if (didit)
2292 *data_ptr = cpy_data;
2293
2294#if OMPT_SUPPORT
2295 ompt_frame_t *ompt_frame;
2296 if (ompt_enabled.enabled) {
2297 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2298 if (ompt_frame->enter_frame.ptr == NULL)
2299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2300 }
2301 OMPT_STORE_RETURN_ADDRESS(gtid);
2302#endif
2303/* This barrier is not a barrier region boundary */
2304#if USE_ITT_NOTIFY
2305 __kmp_threads[gtid]->th.th_ident = loc;
2306#endif
2307 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2308
2309 if (!didit)
2310 (*cpy_func)(cpy_data, *data_ptr);
2311
2312 // Consider next barrier a user-visible barrier for barrier region boundaries
2313 // Nesting checks are already handled by the single construct checks
2314 {
2315#if OMPT_SUPPORT
2316 OMPT_STORE_RETURN_ADDRESS(gtid);
2317#endif
2318#if USE_ITT_NOTIFY
2319 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2320// tasks can overwrite the location)
2321#endif
2322 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2323#if OMPT_SUPPORT && OMPT_OPTIONAL
2324 if (ompt_enabled.enabled) {
2325 ompt_frame->enter_frame = ompt_data_none;
2326 }
2327#endif
2328 }
2329}
2330
2331/* --------------------------------------------------------------------------*/
2348void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2349 void **data_ptr;
2350
2351 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2352
2353 KMP_MB();
2354
2355 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2356
2357 if (__kmp_env_consistency_check) {
2358 if (loc == 0) {
2359 KMP_WARNING(ConstructIdentInvalid);
2360 }
2361 }
2362
2363 // ToDo: Optimize the following barrier
2364
2365 if (cpy_data)
2366 *data_ptr = cpy_data;
2367
2368#if OMPT_SUPPORT
2369 ompt_frame_t *ompt_frame;
2370 if (ompt_enabled.enabled) {
2371 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2372 if (ompt_frame->enter_frame.ptr == NULL)
2373 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2374 OMPT_STORE_RETURN_ADDRESS(gtid);
2375 }
2376#endif
2377/* This barrier is not a barrier region boundary */
2378#if USE_ITT_NOTIFY
2379 __kmp_threads[gtid]->th.th_ident = loc;
2380#endif
2381 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2382
2383 return *data_ptr;
2384}
2385
2386/* -------------------------------------------------------------------------- */
2387
2388#define INIT_LOCK __kmp_init_user_lock_with_checks
2389#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2390#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2391#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2392#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2393#define ACQUIRE_NESTED_LOCK_TIMED \
2394 __kmp_acquire_nested_user_lock_with_checks_timed
2395#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2396#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2397#define TEST_LOCK __kmp_test_user_lock_with_checks
2398#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2399#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2400#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2401
2402// TODO: Make check abort messages use location info & pass it into
2403// with_checks routines
2404
2405#if KMP_USE_DYNAMIC_LOCK
2406
2407// internal lock initializer
2408static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2409 kmp_dyna_lockseq_t seq) {
2410 if (KMP_IS_D_LOCK(seq)) {
2411 KMP_INIT_D_LOCK(lock, seq);
2412#if USE_ITT_BUILD
2413 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2414#endif
2415 } else {
2416 KMP_INIT_I_LOCK(lock, seq);
2417#if USE_ITT_BUILD
2418 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2419 __kmp_itt_lock_creating(ilk->lock, loc);
2420#endif
2421 }
2422}
2423
2424// internal nest lock initializer
2425static __forceinline void
2426__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2427 kmp_dyna_lockseq_t seq) {
2428#if KMP_USE_TSX
2429 // Don't have nested lock implementation for speculative locks
2430 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2431 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2432 seq = __kmp_user_lock_seq;
2433#endif
2434 switch (seq) {
2435 case lockseq_tas:
2436 seq = lockseq_nested_tas;
2437 break;
2438#if KMP_USE_FUTEX
2439 case lockseq_futex:
2440 seq = lockseq_nested_futex;
2441 break;
2442#endif
2443 case lockseq_ticket:
2444 seq = lockseq_nested_ticket;
2445 break;
2446 case lockseq_queuing:
2447 seq = lockseq_nested_queuing;
2448 break;
2449 case lockseq_drdpa:
2450 seq = lockseq_nested_drdpa;
2451 break;
2452 default:
2453 seq = lockseq_nested_queuing;
2454 }
2455 KMP_INIT_I_LOCK(lock, seq);
2456#if USE_ITT_BUILD
2457 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2458 __kmp_itt_lock_creating(ilk->lock, loc);
2459#endif
2460}
2461
2462/* initialize the lock with a hint */
2463void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2464 uintptr_t hint) {
2465 KMP_DEBUG_ASSERT(__kmp_init_serial);
2466 if (__kmp_env_consistency_check && user_lock == NULL) {
2467 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2468 }
2469
2470 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2471
2472#if OMPT_SUPPORT && OMPT_OPTIONAL
2473 // This is the case, if called from omp_init_lock_with_hint:
2474 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475 if (!codeptr)
2476 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477 if (ompt_enabled.ompt_callback_lock_init) {
2478 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2479 ompt_mutex_lock, (omp_lock_hint_t)hint,
2480 __ompt_get_mutex_impl_type(user_lock),
2481 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2482 }
2483#endif
2484}
2485
2486/* initialize the lock with a hint */
2487void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2488 void **user_lock, uintptr_t hint) {
2489 KMP_DEBUG_ASSERT(__kmp_init_serial);
2490 if (__kmp_env_consistency_check && user_lock == NULL) {
2491 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2492 }
2493
2494 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2495
2496#if OMPT_SUPPORT && OMPT_OPTIONAL
2497 // This is the case, if called from omp_init_lock_with_hint:
2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2499 if (!codeptr)
2500 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501 if (ompt_enabled.ompt_callback_lock_init) {
2502 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2503 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2504 __ompt_get_mutex_impl_type(user_lock),
2505 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2506 }
2507#endif
2508}
2509
2510#endif // KMP_USE_DYNAMIC_LOCK
2511
2512/* initialize the lock */
2513void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2514#if KMP_USE_DYNAMIC_LOCK
2515
2516 KMP_DEBUG_ASSERT(__kmp_init_serial);
2517 if (__kmp_env_consistency_check && user_lock == NULL) {
2518 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2519 }
2520 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2521
2522#if OMPT_SUPPORT && OMPT_OPTIONAL
2523 // This is the case, if called from omp_init_lock_with_hint:
2524 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2525 if (!codeptr)
2526 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2527 if (ompt_enabled.ompt_callback_lock_init) {
2528 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2529 ompt_mutex_lock, omp_lock_hint_none,
2530 __ompt_get_mutex_impl_type(user_lock),
2531 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2532 }
2533#endif
2534
2535#else // KMP_USE_DYNAMIC_LOCK
2536
2537 static char const *const func = "omp_init_lock";
2538 kmp_user_lock_p lck;
2539 KMP_DEBUG_ASSERT(__kmp_init_serial);
2540
2541 if (__kmp_env_consistency_check) {
2542 if (user_lock == NULL) {
2543 KMP_FATAL(LockIsUninitialized, func);
2544 }
2545 }
2546
2547 KMP_CHECK_USER_LOCK_INIT();
2548
2549 if ((__kmp_user_lock_kind == lk_tas) &&
2550 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2551 lck = (kmp_user_lock_p)user_lock;
2552 }
2553#if KMP_USE_FUTEX
2554 else if ((__kmp_user_lock_kind == lk_futex) &&
2555 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2556 lck = (kmp_user_lock_p)user_lock;
2557 }
2558#endif
2559 else {
2560 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2561 }
2562 INIT_LOCK(lck);
2563 __kmp_set_user_lock_location(lck, loc);
2564
2565#if OMPT_SUPPORT && OMPT_OPTIONAL
2566 // This is the case, if called from omp_init_lock_with_hint:
2567 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2568 if (!codeptr)
2569 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2570 if (ompt_enabled.ompt_callback_lock_init) {
2571 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2572 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2573 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2574 }
2575#endif
2576
2577#if USE_ITT_BUILD
2578 __kmp_itt_lock_creating(lck);
2579#endif /* USE_ITT_BUILD */
2580
2581#endif // KMP_USE_DYNAMIC_LOCK
2582} // __kmpc_init_lock
2583
2584/* initialize the lock */
2585void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2586#if KMP_USE_DYNAMIC_LOCK
2587
2588 KMP_DEBUG_ASSERT(__kmp_init_serial);
2589 if (__kmp_env_consistency_check && user_lock == NULL) {
2590 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2591 }
2592 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2593
2594#if OMPT_SUPPORT && OMPT_OPTIONAL
2595 // This is the case, if called from omp_init_lock_with_hint:
2596 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2597 if (!codeptr)
2598 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2599 if (ompt_enabled.ompt_callback_lock_init) {
2600 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2601 ompt_mutex_nest_lock, omp_lock_hint_none,
2602 __ompt_get_mutex_impl_type(user_lock),
2603 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2604 }
2605#endif
2606
2607#else // KMP_USE_DYNAMIC_LOCK
2608
2609 static char const *const func = "omp_init_nest_lock";
2610 kmp_user_lock_p lck;
2611 KMP_DEBUG_ASSERT(__kmp_init_serial);
2612
2613 if (__kmp_env_consistency_check) {
2614 if (user_lock == NULL) {
2615 KMP_FATAL(LockIsUninitialized, func);
2616 }
2617 }
2618
2619 KMP_CHECK_USER_LOCK_INIT();
2620
2621 if ((__kmp_user_lock_kind == lk_tas) &&
2622 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2623 OMP_NEST_LOCK_T_SIZE)) {
2624 lck = (kmp_user_lock_p)user_lock;
2625 }
2626#if KMP_USE_FUTEX
2627 else if ((__kmp_user_lock_kind == lk_futex) &&
2628 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2629 OMP_NEST_LOCK_T_SIZE)) {
2630 lck = (kmp_user_lock_p)user_lock;
2631 }
2632#endif
2633 else {
2634 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2635 }
2636
2637 INIT_NESTED_LOCK(lck);
2638 __kmp_set_user_lock_location(lck, loc);
2639
2640#if OMPT_SUPPORT && OMPT_OPTIONAL
2641 // This is the case, if called from omp_init_lock_with_hint:
2642 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2643 if (!codeptr)
2644 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645 if (ompt_enabled.ompt_callback_lock_init) {
2646 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2647 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2648 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2649 }
2650#endif
2651
2652#if USE_ITT_BUILD
2653 __kmp_itt_lock_creating(lck);
2654#endif /* USE_ITT_BUILD */
2655
2656#endif // KMP_USE_DYNAMIC_LOCK
2657} // __kmpc_init_nest_lock
2658
2659void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2660#if KMP_USE_DYNAMIC_LOCK
2661
2662#if USE_ITT_BUILD
2663 kmp_user_lock_p lck;
2664 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2665 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2666 } else {
2667 lck = (kmp_user_lock_p)user_lock;
2668 }
2669 __kmp_itt_lock_destroyed(lck);
2670#endif
2671#if OMPT_SUPPORT && OMPT_OPTIONAL
2672 // This is the case, if called from omp_init_lock_with_hint:
2673 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2674 if (!codeptr)
2675 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2676 if (ompt_enabled.ompt_callback_lock_destroy) {
2677 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2678 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2679 }
2680#endif
2681 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2682#else
2683 kmp_user_lock_p lck;
2684
2685 if ((__kmp_user_lock_kind == lk_tas) &&
2686 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2687 lck = (kmp_user_lock_p)user_lock;
2688 }
2689#if KMP_USE_FUTEX
2690 else if ((__kmp_user_lock_kind == lk_futex) &&
2691 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2692 lck = (kmp_user_lock_p)user_lock;
2693 }
2694#endif
2695 else {
2696 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2697 }
2698
2699#if OMPT_SUPPORT && OMPT_OPTIONAL
2700 // This is the case, if called from omp_init_lock_with_hint:
2701 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2702 if (!codeptr)
2703 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2704 if (ompt_enabled.ompt_callback_lock_destroy) {
2705 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2706 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2707 }
2708#endif
2709
2710#if USE_ITT_BUILD
2711 __kmp_itt_lock_destroyed(lck);
2712#endif /* USE_ITT_BUILD */
2713 DESTROY_LOCK(lck);
2714
2715 if ((__kmp_user_lock_kind == lk_tas) &&
2716 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2717 ;
2718 }
2719#if KMP_USE_FUTEX
2720 else if ((__kmp_user_lock_kind == lk_futex) &&
2721 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2722 ;
2723 }
2724#endif
2725 else {
2726 __kmp_user_lock_free(user_lock, gtid, lck);
2727 }
2728#endif // KMP_USE_DYNAMIC_LOCK
2729} // __kmpc_destroy_lock
2730
2731/* destroy the lock */
2732void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2733#if KMP_USE_DYNAMIC_LOCK
2734
2735#if USE_ITT_BUILD
2736 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2737 __kmp_itt_lock_destroyed(ilk->lock);
2738#endif
2739#if OMPT_SUPPORT && OMPT_OPTIONAL
2740 // This is the case, if called from omp_init_lock_with_hint:
2741 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2742 if (!codeptr)
2743 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2744 if (ompt_enabled.ompt_callback_lock_destroy) {
2745 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2746 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2747 }
2748#endif
2749 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2750
2751#else // KMP_USE_DYNAMIC_LOCK
2752
2753 kmp_user_lock_p lck;
2754
2755 if ((__kmp_user_lock_kind == lk_tas) &&
2756 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2757 OMP_NEST_LOCK_T_SIZE)) {
2758 lck = (kmp_user_lock_p)user_lock;
2759 }
2760#if KMP_USE_FUTEX
2761 else if ((__kmp_user_lock_kind == lk_futex) &&
2762 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2763 OMP_NEST_LOCK_T_SIZE)) {
2764 lck = (kmp_user_lock_p)user_lock;
2765 }
2766#endif
2767 else {
2768 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2769 }
2770
2771#if OMPT_SUPPORT && OMPT_OPTIONAL
2772 // This is the case, if called from omp_init_lock_with_hint:
2773 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2774 if (!codeptr)
2775 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776 if (ompt_enabled.ompt_callback_lock_destroy) {
2777 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2778 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2779 }
2780#endif
2781
2782#if USE_ITT_BUILD
2783 __kmp_itt_lock_destroyed(lck);
2784#endif /* USE_ITT_BUILD */
2785
2786 DESTROY_NESTED_LOCK(lck);
2787
2788 if ((__kmp_user_lock_kind == lk_tas) &&
2789 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2790 OMP_NEST_LOCK_T_SIZE)) {
2791 ;
2792 }
2793#if KMP_USE_FUTEX
2794 else if ((__kmp_user_lock_kind == lk_futex) &&
2795 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2796 OMP_NEST_LOCK_T_SIZE)) {
2797 ;
2798 }
2799#endif
2800 else {
2801 __kmp_user_lock_free(user_lock, gtid, lck);
2802 }
2803#endif // KMP_USE_DYNAMIC_LOCK
2804} // __kmpc_destroy_nest_lock
2805
2806void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2807 KMP_COUNT_BLOCK(OMP_set_lock);
2808#if KMP_USE_DYNAMIC_LOCK
2809 int tag = KMP_EXTRACT_D_TAG(user_lock);
2810#if USE_ITT_BUILD
2811 __kmp_itt_lock_acquiring(
2812 (kmp_user_lock_p)
2813 user_lock); // itt function will get to the right lock object.
2814#endif
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_acquire) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822 ompt_mutex_lock, omp_lock_hint_none,
2823 __ompt_get_mutex_impl_type(user_lock),
2824 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2825 }
2826#endif
2827#if KMP_USE_INLINED_TAS
2828 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2829 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2830 } else
2831#elif KMP_USE_INLINED_FUTEX
2832 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2833 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2834 } else
2835#endif
2836 {
2837 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2838 }
2839#if USE_ITT_BUILD
2840 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2841#endif
2842#if OMPT_SUPPORT && OMPT_OPTIONAL
2843 if (ompt_enabled.ompt_callback_mutex_acquired) {
2844 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2845 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2846 }
2847#endif
2848
2849#else // KMP_USE_DYNAMIC_LOCK
2850
2851 kmp_user_lock_p lck;
2852
2853 if ((__kmp_user_lock_kind == lk_tas) &&
2854 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2855 lck = (kmp_user_lock_p)user_lock;
2856 }
2857#if KMP_USE_FUTEX
2858 else if ((__kmp_user_lock_kind == lk_futex) &&
2859 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2860 lck = (kmp_user_lock_p)user_lock;
2861 }
2862#endif
2863 else {
2864 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2865 }
2866
2867#if USE_ITT_BUILD
2868 __kmp_itt_lock_acquiring(lck);
2869#endif /* USE_ITT_BUILD */
2870#if OMPT_SUPPORT && OMPT_OPTIONAL
2871 // This is the case, if called from omp_init_lock_with_hint:
2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873 if (!codeptr)
2874 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875 if (ompt_enabled.ompt_callback_mutex_acquire) {
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2877 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2878 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2879 }
2880#endif
2881
2882 ACQUIRE_LOCK(lck, gtid);
2883
2884#if USE_ITT_BUILD
2885 __kmp_itt_lock_acquired(lck);
2886#endif /* USE_ITT_BUILD */
2887
2888#if OMPT_SUPPORT && OMPT_OPTIONAL
2889 if (ompt_enabled.ompt_callback_mutex_acquired) {
2890 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2891 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2892 }
2893#endif
2894
2895#endif // KMP_USE_DYNAMIC_LOCK
2896}
2897
2898void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2899#if KMP_USE_DYNAMIC_LOCK
2900
2901#if USE_ITT_BUILD
2902 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2903#endif
2904#if OMPT_SUPPORT && OMPT_OPTIONAL
2905 // This is the case, if called from omp_init_lock_with_hint:
2906 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907 if (!codeptr)
2908 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909 if (ompt_enabled.enabled) {
2910 if (ompt_enabled.ompt_callback_mutex_acquire) {
2911 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2912 ompt_mutex_nest_lock, omp_lock_hint_none,
2913 __ompt_get_mutex_impl_type(user_lock),
2914 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2915 }
2916 }
2917#endif
2918 int acquire_status =
2919 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2920 (void)acquire_status;
2921#if USE_ITT_BUILD
2922 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2923#endif
2924
2925#if OMPT_SUPPORT && OMPT_OPTIONAL
2926 if (ompt_enabled.enabled) {
2927 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2928 if (ompt_enabled.ompt_callback_mutex_acquired) {
2929 // lock_first
2930 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2931 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2932 codeptr);
2933 }
2934 } else {
2935 if (ompt_enabled.ompt_callback_nest_lock) {
2936 // lock_next
2937 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2938 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2939 }
2940 }
2941 }
2942#endif
2943
2944#else // KMP_USE_DYNAMIC_LOCK
2945 int acquire_status;
2946 kmp_user_lock_p lck;
2947
2948 if ((__kmp_user_lock_kind == lk_tas) &&
2949 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2950 OMP_NEST_LOCK_T_SIZE)) {
2951 lck = (kmp_user_lock_p)user_lock;
2952 }
2953#if KMP_USE_FUTEX
2954 else if ((__kmp_user_lock_kind == lk_futex) &&
2955 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2956 OMP_NEST_LOCK_T_SIZE)) {
2957 lck = (kmp_user_lock_p)user_lock;
2958 }
2959#endif
2960 else {
2961 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2962 }
2963
2964#if USE_ITT_BUILD
2965 __kmp_itt_lock_acquiring(lck);
2966#endif /* USE_ITT_BUILD */
2967#if OMPT_SUPPORT && OMPT_OPTIONAL
2968 // This is the case, if called from omp_init_lock_with_hint:
2969 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2970 if (!codeptr)
2971 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2972 if (ompt_enabled.enabled) {
2973 if (ompt_enabled.ompt_callback_mutex_acquire) {
2974 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2975 ompt_mutex_nest_lock, omp_lock_hint_none,
2976 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2977 codeptr);
2978 }
2979 }
2980#endif
2981
2982 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2983
2984#if USE_ITT_BUILD
2985 __kmp_itt_lock_acquired(lck);
2986#endif /* USE_ITT_BUILD */
2987
2988#if OMPT_SUPPORT && OMPT_OPTIONAL
2989 if (ompt_enabled.enabled) {
2990 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2991 if (ompt_enabled.ompt_callback_mutex_acquired) {
2992 // lock_first
2993 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2994 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2995 }
2996 } else {
2997 if (ompt_enabled.ompt_callback_nest_lock) {
2998 // lock_next
2999 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3000 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3001 }
3002 }
3003 }
3004#endif
3005
3006#endif // KMP_USE_DYNAMIC_LOCK
3007}
3008
3009void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3010#if KMP_USE_DYNAMIC_LOCK
3011
3012 int tag = KMP_EXTRACT_D_TAG(user_lock);
3013#if USE_ITT_BUILD
3014 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3015#endif
3016#if KMP_USE_INLINED_TAS
3017 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3018 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
3019 } else
3020#elif KMP_USE_INLINED_FUTEX
3021 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3022 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
3023 } else
3024#endif
3025 {
3026 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3027 }
3028
3029#if OMPT_SUPPORT && OMPT_OPTIONAL
3030 // This is the case, if called from omp_init_lock_with_hint:
3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3032 if (!codeptr)
3033 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034 if (ompt_enabled.ompt_callback_mutex_released) {
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3037 }
3038#endif
3039
3040#else // KMP_USE_DYNAMIC_LOCK
3041
3042 kmp_user_lock_p lck;
3043
3044 /* Can't use serial interval since not block structured */
3045 /* release the lock */
3046
3047 if ((__kmp_user_lock_kind == lk_tas) &&
3048 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3049#if KMP_OS_LINUX && \
3050 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3051// "fast" path implemented to fix customer performance issue
3052#if USE_ITT_BUILD
3053 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054#endif /* USE_ITT_BUILD */
3055 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3056 KMP_MB();
3057
3058#if OMPT_SUPPORT && OMPT_OPTIONAL
3059 // This is the case, if called from omp_init_lock_with_hint:
3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061 if (!codeptr)
3062 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled.ompt_callback_mutex_released) {
3064 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3065 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3066 }
3067#endif
3068
3069 return;
3070#else
3071 lck = (kmp_user_lock_p)user_lock;
3072#endif
3073 }
3074#if KMP_USE_FUTEX
3075 else if ((__kmp_user_lock_kind == lk_futex) &&
3076 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3077 lck = (kmp_user_lock_p)user_lock;
3078 }
3079#endif
3080 else {
3081 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3082 }
3083
3084#if USE_ITT_BUILD
3085 __kmp_itt_lock_releasing(lck);
3086#endif /* USE_ITT_BUILD */
3087
3088 RELEASE_LOCK(lck, gtid);
3089
3090#if OMPT_SUPPORT && OMPT_OPTIONAL
3091 // This is the case, if called from omp_init_lock_with_hint:
3092 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3093 if (!codeptr)
3094 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3095 if (ompt_enabled.ompt_callback_mutex_released) {
3096 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3097 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098 }
3099#endif
3100
3101#endif // KMP_USE_DYNAMIC_LOCK
3102}
3103
3104/* release the lock */
3105void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3106#if KMP_USE_DYNAMIC_LOCK
3107
3108#if USE_ITT_BUILD
3109 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3110#endif
3111 int release_status =
3112 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3113 (void)release_status;
3114
3115#if OMPT_SUPPORT && OMPT_OPTIONAL
3116 // This is the case, if called from omp_init_lock_with_hint:
3117 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118 if (!codeptr)
3119 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120 if (ompt_enabled.enabled) {
3121 if (release_status == KMP_LOCK_RELEASED) {
3122 if (ompt_enabled.ompt_callback_mutex_released) {
3123 // release_lock_last
3124 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3125 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3126 codeptr);
3127 }
3128 } else if (ompt_enabled.ompt_callback_nest_lock) {
3129 // release_lock_prev
3130 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3131 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3132 }
3133 }
3134#endif
3135
3136#else // KMP_USE_DYNAMIC_LOCK
3137
3138 kmp_user_lock_p lck;
3139
3140 /* Can't use serial interval since not block structured */
3141
3142 if ((__kmp_user_lock_kind == lk_tas) &&
3143 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3144 OMP_NEST_LOCK_T_SIZE)) {
3145#if KMP_OS_LINUX && \
3146 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3147 // "fast" path implemented to fix customer performance issue
3148 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3149#if USE_ITT_BUILD
3150 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3151#endif /* USE_ITT_BUILD */
3152
3153#if OMPT_SUPPORT && OMPT_OPTIONAL
3154 int release_status = KMP_LOCK_STILL_HELD;
3155#endif
3156
3157 if (--(tl->lk.depth_locked) == 0) {
3158 TCW_4(tl->lk.poll, 0);
3159#if OMPT_SUPPORT && OMPT_OPTIONAL
3160 release_status = KMP_LOCK_RELEASED;
3161#endif
3162 }
3163 KMP_MB();
3164
3165#if OMPT_SUPPORT && OMPT_OPTIONAL
3166 // This is the case, if called from omp_init_lock_with_hint:
3167 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3168 if (!codeptr)
3169 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3170 if (ompt_enabled.enabled) {
3171 if (release_status == KMP_LOCK_RELEASED) {
3172 if (ompt_enabled.ompt_callback_mutex_released) {
3173 // release_lock_last
3174 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3175 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3176 }
3177 } else if (ompt_enabled.ompt_callback_nest_lock) {
3178 // release_lock_previous
3179 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3180 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3181 }
3182 }
3183#endif
3184
3185 return;
3186#else
3187 lck = (kmp_user_lock_p)user_lock;
3188#endif
3189 }
3190#if KMP_USE_FUTEX
3191 else if ((__kmp_user_lock_kind == lk_futex) &&
3192 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3193 OMP_NEST_LOCK_T_SIZE)) {
3194 lck = (kmp_user_lock_p)user_lock;
3195 }
3196#endif
3197 else {
3198 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3199 }
3200
3201#if USE_ITT_BUILD
3202 __kmp_itt_lock_releasing(lck);
3203#endif /* USE_ITT_BUILD */
3204
3205 int release_status;
3206 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3207#if OMPT_SUPPORT && OMPT_OPTIONAL
3208 // This is the case, if called from omp_init_lock_with_hint:
3209 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3210 if (!codeptr)
3211 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3212 if (ompt_enabled.enabled) {
3213 if (release_status == KMP_LOCK_RELEASED) {
3214 if (ompt_enabled.ompt_callback_mutex_released) {
3215 // release_lock_last
3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3217 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3218 }
3219 } else if (ompt_enabled.ompt_callback_nest_lock) {
3220 // release_lock_previous
3221 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3222 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3223 }
3224 }
3225#endif
3226
3227#endif // KMP_USE_DYNAMIC_LOCK
3228}
3229
3230/* try to acquire the lock */
3231int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3232 KMP_COUNT_BLOCK(OMP_test_lock);
3233
3234#if KMP_USE_DYNAMIC_LOCK
3235 int rc;
3236 int tag = KMP_EXTRACT_D_TAG(user_lock);
3237#if USE_ITT_BUILD
3238 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3239#endif
3240#if OMPT_SUPPORT && OMPT_OPTIONAL
3241 // This is the case, if called from omp_init_lock_with_hint:
3242 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3243 if (!codeptr)
3244 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3245 if (ompt_enabled.ompt_callback_mutex_acquire) {
3246 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3247 ompt_mutex_test_lock, omp_lock_hint_none,
3248 __ompt_get_mutex_impl_type(user_lock),
3249 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3250 }
3251#endif
3252#if KMP_USE_INLINED_TAS
3253 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3254 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3255 } else
3256#elif KMP_USE_INLINED_FUTEX
3257 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3258 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3259 } else
3260#endif
3261 {
3262 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3263 }
3264 if (rc) {
3265#if USE_ITT_BUILD
3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267#endif
3268#if OMPT_SUPPORT && OMPT_OPTIONAL
3269 if (ompt_enabled.ompt_callback_mutex_acquired) {
3270 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3272 }
3273#endif
3274 return FTN_TRUE;
3275 } else {
3276#if USE_ITT_BUILD
3277 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3278#endif
3279 return FTN_FALSE;
3280 }
3281
3282#else // KMP_USE_DYNAMIC_LOCK
3283
3284 kmp_user_lock_p lck;
3285 int rc;
3286
3287 if ((__kmp_user_lock_kind == lk_tas) &&
3288 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3289 lck = (kmp_user_lock_p)user_lock;
3290 }
3291#if KMP_USE_FUTEX
3292 else if ((__kmp_user_lock_kind == lk_futex) &&
3293 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3294 lck = (kmp_user_lock_p)user_lock;
3295 }
3296#endif
3297 else {
3298 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3299 }
3300
3301#if USE_ITT_BUILD
3302 __kmp_itt_lock_acquiring(lck);
3303#endif /* USE_ITT_BUILD */
3304#if OMPT_SUPPORT && OMPT_OPTIONAL
3305 // This is the case, if called from omp_init_lock_with_hint:
3306 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3307 if (!codeptr)
3308 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3309 if (ompt_enabled.ompt_callback_mutex_acquire) {
3310 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3311 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3312 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3313 }
3314#endif
3315
3316 rc = TEST_LOCK(lck, gtid);
3317#if USE_ITT_BUILD
3318 if (rc) {
3319 __kmp_itt_lock_acquired(lck);
3320 } else {
3321 __kmp_itt_lock_cancelled(lck);
3322 }
3323#endif /* USE_ITT_BUILD */
3324#if OMPT_SUPPORT && OMPT_OPTIONAL
3325 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3326 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3328 }
3329#endif
3330
3331 return (rc ? FTN_TRUE : FTN_FALSE);
3332
3333 /* Can't use serial interval since not block structured */
3334
3335#endif // KMP_USE_DYNAMIC_LOCK
3336}
3337
3338/* try to acquire the lock */
3339int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3340#if KMP_USE_DYNAMIC_LOCK
3341 int rc;
3342#if USE_ITT_BUILD
3343 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3344#endif
3345#if OMPT_SUPPORT && OMPT_OPTIONAL
3346 // This is the case, if called from omp_init_lock_with_hint:
3347 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3348 if (!codeptr)
3349 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3350 if (ompt_enabled.ompt_callback_mutex_acquire) {
3351 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3352 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3353 __ompt_get_mutex_impl_type(user_lock),
3354 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3355 }
3356#endif
3357 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3358#if USE_ITT_BUILD
3359 if (rc) {
3360 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3361 } else {
3362 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3363 }
3364#endif
3365#if OMPT_SUPPORT && OMPT_OPTIONAL
3366 if (ompt_enabled.enabled && rc) {
3367 if (rc == 1) {
3368 if (ompt_enabled.ompt_callback_mutex_acquired) {
3369 // lock_first
3370 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3371 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3372 codeptr);
3373 }
3374 } else {
3375 if (ompt_enabled.ompt_callback_nest_lock) {
3376 // lock_next
3377 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3378 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3379 }
3380 }
3381 }
3382#endif
3383 return rc;
3384
3385#else // KMP_USE_DYNAMIC_LOCK
3386
3387 kmp_user_lock_p lck;
3388 int rc;
3389
3390 if ((__kmp_user_lock_kind == lk_tas) &&
3391 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3392 OMP_NEST_LOCK_T_SIZE)) {
3393 lck = (kmp_user_lock_p)user_lock;
3394 }
3395#if KMP_USE_FUTEX
3396 else if ((__kmp_user_lock_kind == lk_futex) &&
3397 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3398 OMP_NEST_LOCK_T_SIZE)) {
3399 lck = (kmp_user_lock_p)user_lock;
3400 }
3401#endif
3402 else {
3403 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3404 }
3405
3406#if USE_ITT_BUILD
3407 __kmp_itt_lock_acquiring(lck);
3408#endif /* USE_ITT_BUILD */
3409
3410#if OMPT_SUPPORT && OMPT_OPTIONAL
3411 // This is the case, if called from omp_init_lock_with_hint:
3412 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3413 if (!codeptr)
3414 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3415 if (ompt_enabled.enabled) &&
3416 ompt_enabled.ompt_callback_mutex_acquire) {
3417 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3418 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3419 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3420 codeptr);
3421 }
3422#endif
3423
3424 rc = TEST_NESTED_LOCK(lck, gtid);
3425#if USE_ITT_BUILD
3426 if (rc) {
3427 __kmp_itt_lock_acquired(lck);
3428 } else {
3429 __kmp_itt_lock_cancelled(lck);
3430 }
3431#endif /* USE_ITT_BUILD */
3432#if OMPT_SUPPORT && OMPT_OPTIONAL
3433 if (ompt_enabled.enabled && rc) {
3434 if (rc == 1) {
3435 if (ompt_enabled.ompt_callback_mutex_acquired) {
3436 // lock_first
3437 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3438 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3439 }
3440 } else {
3441 if (ompt_enabled.ompt_callback_nest_lock) {
3442 // lock_next
3443 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3444 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3445 }
3446 }
3447 }
3448#endif
3449 return rc;
3450
3451 /* Can't use serial interval since not block structured */
3452
3453#endif // KMP_USE_DYNAMIC_LOCK
3454}
3455
3456// Interface to fast scalable reduce methods routines
3457
3458// keep the selected method in a thread local structure for cross-function
3459// usage: will be used in __kmpc_end_reduce* functions;
3460// another solution: to re-determine the method one more time in
3461// __kmpc_end_reduce* functions (new prototype required then)
3462// AT: which solution is better?
3463#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3464 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3465
3466#define __KMP_GET_REDUCTION_METHOD(gtid) \
3467 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3468
3469// description of the packed_reduction_method variable: look at the macros in
3470// kmp.h
3471
3472// used in a critical section reduce block
3473static __forceinline void
3474__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3475 kmp_critical_name *crit) {
3476
3477 // this lock was visible to a customer and to the threading profile tool as a
3478 // serial overhead span (although it's used for an internal purpose only)
3479 // why was it visible in previous implementation?
3480 // should we keep it visible in new reduce block?
3481 kmp_user_lock_p lck;
3482
3483#if KMP_USE_DYNAMIC_LOCK
3484
3485 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3486 // Check if it is initialized.
3487 if (*lk == 0) {
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3490 KMP_GET_D_TAG(__kmp_user_lock_seq));
3491 } else {
3492 __kmp_init_indirect_csptr(crit, loc, global_tid,
3493 KMP_GET_I_TAG(__kmp_user_lock_seq));
3494 }
3495 }
3496 // Branch for accessing the actual lock object and set operation. This
3497 // branching is inevitable since this lock initialization does not follow the
3498 // normal dispatch path (lock table is not used).
3499 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3500 lck = (kmp_user_lock_p)lk;
3501 KMP_DEBUG_ASSERT(lck != NULL);
3502 if (__kmp_env_consistency_check) {
3503 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3504 }
3505 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3506 } else {
3507 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3508 lck = ilk->lock;
3509 KMP_DEBUG_ASSERT(lck != NULL);
3510 if (__kmp_env_consistency_check) {
3511 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3512 }
3513 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3514 }
3515
3516#else // KMP_USE_DYNAMIC_LOCK
3517
3518 // We know that the fast reduction code is only emitted by Intel compilers
3519 // with 32 byte critical sections. If there isn't enough space, then we
3520 // have to use a pointer.
3521 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3522 lck = (kmp_user_lock_p)crit;
3523 } else {
3524 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3525 }
3526 KMP_DEBUG_ASSERT(lck != NULL);
3527
3528 if (__kmp_env_consistency_check)
3529 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3530
3531 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3532
3533#endif // KMP_USE_DYNAMIC_LOCK
3534}
3535
3536// used in a critical section reduce block
3537static __forceinline void
3538__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3539 kmp_critical_name *crit) {
3540
3541 kmp_user_lock_p lck;
3542
3543#if KMP_USE_DYNAMIC_LOCK
3544
3545 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3546 lck = (kmp_user_lock_p)crit;
3547 if (__kmp_env_consistency_check)
3548 __kmp_pop_sync(global_tid, ct_critical, loc);
3549 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3550 } else {
3551 kmp_indirect_lock_t *ilk =
3552 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3553 if (__kmp_env_consistency_check)
3554 __kmp_pop_sync(global_tid, ct_critical, loc);
3555 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3556 }
3557
3558#else // KMP_USE_DYNAMIC_LOCK
3559
3560 // We know that the fast reduction code is only emitted by Intel compilers
3561 // with 32 byte critical sections. If there isn't enough space, then we have
3562 // to use a pointer.
3563 if (__kmp_base_user_lock_size > 32) {
3564 lck = *((kmp_user_lock_p *)crit);
3565 KMP_ASSERT(lck != NULL);
3566 } else {
3567 lck = (kmp_user_lock_p)crit;
3568 }
3569
3570 if (__kmp_env_consistency_check)
3571 __kmp_pop_sync(global_tid, ct_critical, loc);
3572
3573 __kmp_release_user_lock_with_checks(lck, global_tid);
3574
3575#endif // KMP_USE_DYNAMIC_LOCK
3576} // __kmp_end_critical_section_reduce_block
3577
3578static __forceinline int
3579__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3580 int *task_state) {
3581 kmp_team_t *team;
3582
3583 // Check if we are inside the teams construct?
3584 if (th->th.th_teams_microtask) {
3585 *team_p = team = th->th.th_team;
3586 if (team->t.t_level == th->th.th_teams_level) {
3587 // This is reduction at teams construct.
3588 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3589 // Let's swap teams temporarily for the reduction.
3590 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3591 th->th.th_team = team->t.t_parent;
3592 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3593 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3594 *task_state = th->th.th_task_state;
3595 th->th.th_task_state = 0;
3596
3597 return 1;
3598 }
3599 }
3600 return 0;
3601}
3602
3603static __forceinline void
3604__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3605 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3606 th->th.th_info.ds.ds_tid = 0;
3607 th->th.th_team = team;
3608 th->th.th_team_nproc = team->t.t_nproc;
3609 th->th.th_task_team = team->t.t_task_team[task_state];
3610 __kmp_type_convert(task_state, &(th->th.th_task_state));
3611}
3612
3613/* 2.a.i. Reduce Block without a terminating barrier */
3629kmp_int32
3630__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3631 size_t reduce_size, void *reduce_data,
3632 void (*reduce_func)(void *lhs_data, void *rhs_data),
3633 kmp_critical_name *lck) {
3634
3635 KMP_COUNT_BLOCK(REDUCE_nowait);
3636 int retval = 0;
3637 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638 kmp_info_t *th;
3639 kmp_team_t *team;
3640 int teams_swapped = 0, task_state;
3641 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3642 __kmp_assert_valid_gtid(global_tid);
3643
3644 // why do we need this initialization here at all?
3645 // Reduction clause can not be used as a stand-alone directive.
3646
3647 // do not call __kmp_serial_initialize(), it will be called by
3648 // __kmp_parallel_initialize() if needed
3649 // possible detection of false-positive race by the threadchecker ???
3650 if (!TCR_4(__kmp_init_parallel))
3651 __kmp_parallel_initialize();
3652
3653 __kmp_resume_if_soft_paused();
3654
3655// check correctness of reduce block nesting
3656#if KMP_USE_DYNAMIC_LOCK
3657 if (__kmp_env_consistency_check)
3658 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3659#else
3660 if (__kmp_env_consistency_check)
3661 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3662#endif
3663
3664 th = __kmp_thread_from_gtid(global_tid);
3665 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3666
3667 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3668 // the value should be kept in a variable
3669 // the variable should be either a construct-specific or thread-specific
3670 // property, not a team specific property
3671 // (a thread can reach the next reduce block on the next construct, reduce
3672 // method may differ on the next construct)
3673 // an ident_t "loc" parameter could be used as a construct-specific property
3674 // (what if loc == 0?)
3675 // (if both construct-specific and team-specific variables were shared,
3676 // then unness extra syncs should be needed)
3677 // a thread-specific variable is better regarding two issues above (next
3678 // construct and extra syncs)
3679 // a thread-specific "th_local.reduction_method" variable is used currently
3680 // each thread executes 'determine' and 'set' lines (no need to execute by one
3681 // thread, to avoid unness extra syncs)
3682
3683 packed_reduction_method = __kmp_determine_reduction_method(
3684 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3685 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3686
3687 OMPT_REDUCTION_DECL(th, global_tid);
3688 if (packed_reduction_method == critical_reduce_block) {
3689
3690 OMPT_REDUCTION_BEGIN;
3691
3692 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3693 retval = 1;
3694
3695 } else if (packed_reduction_method == empty_reduce_block) {
3696
3697 OMPT_REDUCTION_BEGIN;
3698
3699 // usage: if team size == 1, no synchronization is required ( Intel
3700 // platforms only )
3701 retval = 1;
3702
3703 } else if (packed_reduction_method == atomic_reduce_block) {
3704
3705 retval = 2;
3706
3707 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3708 // won't be called by the code gen)
3709 // (it's not quite good, because the checking block has been closed by
3710 // this 'pop',
3711 // but atomic operation has not been executed yet, will be executed
3712 // slightly later, literally on next instruction)
3713 if (__kmp_env_consistency_check)
3714 __kmp_pop_sync(global_tid, ct_reduce, loc);
3715
3716 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3717 tree_reduce_block)) {
3718
3719// AT: performance issue: a real barrier here
3720// AT: (if primary thread is slow, other threads are blocked here waiting for
3721// the primary thread to come and release them)
3722// AT: (it's not what a customer might expect specifying NOWAIT clause)
3723// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3724// be confusing to a customer)
3725// AT: another implementation of *barrier_gather*nowait() (or some other design)
3726// might go faster and be more in line with sense of NOWAIT
3727// AT: TO DO: do epcc test and compare times
3728
3729// this barrier should be invisible to a customer and to the threading profile
3730// tool (it's neither a terminating barrier nor customer's code, it's
3731// used for an internal purpose)
3732#if OMPT_SUPPORT
3733 // JP: can this barrier potentially leed to task scheduling?
3734 // JP: as long as there is a barrier in the implementation, OMPT should and
3735 // will provide the barrier events
3736 // so we set-up the necessary frame/return addresses.
3737 ompt_frame_t *ompt_frame;
3738 if (ompt_enabled.enabled) {
3739 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3740 if (ompt_frame->enter_frame.ptr == NULL)
3741 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3742 }
3743 OMPT_STORE_RETURN_ADDRESS(global_tid);
3744#endif
3745#if USE_ITT_NOTIFY
3746 __kmp_threads[global_tid]->th.th_ident = loc;
3747#endif
3748 retval =
3749 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3750 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3751 retval = (retval != 0) ? (0) : (1);
3752#if OMPT_SUPPORT && OMPT_OPTIONAL
3753 if (ompt_enabled.enabled) {
3754 ompt_frame->enter_frame = ompt_data_none;
3755 }
3756#endif
3757
3758 // all other workers except primary thread should do this pop here
3759 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3760 if (__kmp_env_consistency_check) {
3761 if (retval == 0) {
3762 __kmp_pop_sync(global_tid, ct_reduce, loc);
3763 }
3764 }
3765
3766 } else {
3767
3768 // should never reach this block
3769 KMP_ASSERT(0); // "unexpected method"
3770 }
3771 if (teams_swapped) {
3772 __kmp_restore_swapped_teams(th, team, task_state);
3773 }
3774 KA_TRACE(
3775 10,
3776 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3777 global_tid, packed_reduction_method, retval));
3778
3779 return retval;
3780}
3781
3790void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3791 kmp_critical_name *lck) {
3792
3793 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3794
3795 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3796 __kmp_assert_valid_gtid(global_tid);
3797
3798 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3799
3800 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3801
3802 if (packed_reduction_method == critical_reduce_block) {
3803
3804 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3805 OMPT_REDUCTION_END;
3806
3807 } else if (packed_reduction_method == empty_reduce_block) {
3808
3809 // usage: if team size == 1, no synchronization is required ( on Intel
3810 // platforms only )
3811
3812 OMPT_REDUCTION_END;
3813
3814 } else if (packed_reduction_method == atomic_reduce_block) {
3815
3816 // neither primary thread nor other workers should get here
3817 // (code gen does not generate this call in case 2: atomic reduce block)
3818 // actually it's better to remove this elseif at all;
3819 // after removal this value will checked by the 'else' and will assert
3820
3821 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3822 tree_reduce_block)) {
3823
3824 // only primary thread gets here
3825 // OMPT: tree reduction is annotated in the barrier code
3826
3827 } else {
3828
3829 // should never reach this block
3830 KMP_ASSERT(0); // "unexpected method"
3831 }
3832
3833 if (__kmp_env_consistency_check)
3834 __kmp_pop_sync(global_tid, ct_reduce, loc);
3835
3836 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3837 global_tid, packed_reduction_method));
3838
3839 return;
3840}
3841
3842/* 2.a.ii. Reduce Block with a terminating barrier */
3843
3859kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3860 size_t reduce_size, void *reduce_data,
3861 void (*reduce_func)(void *lhs_data, void *rhs_data),
3862 kmp_critical_name *lck) {
3863 KMP_COUNT_BLOCK(REDUCE_wait);
3864 int retval = 0;
3865 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3866 kmp_info_t *th;
3867 kmp_team_t *team;
3868 int teams_swapped = 0, task_state;
3869
3870 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3871 __kmp_assert_valid_gtid(global_tid);
3872
3873 // why do we need this initialization here at all?
3874 // Reduction clause can not be a stand-alone directive.
3875
3876 // do not call __kmp_serial_initialize(), it will be called by
3877 // __kmp_parallel_initialize() if needed
3878 // possible detection of false-positive race by the threadchecker ???
3879 if (!TCR_4(__kmp_init_parallel))
3880 __kmp_parallel_initialize();
3881
3882 __kmp_resume_if_soft_paused();
3883
3884// check correctness of reduce block nesting
3885#if KMP_USE_DYNAMIC_LOCK
3886 if (__kmp_env_consistency_check)
3887 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3888#else
3889 if (__kmp_env_consistency_check)
3890 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3891#endif
3892
3893 th = __kmp_thread_from_gtid(global_tid);
3894 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3895
3896 packed_reduction_method = __kmp_determine_reduction_method(
3897 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3898 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3899
3900 OMPT_REDUCTION_DECL(th, global_tid);
3901
3902 if (packed_reduction_method == critical_reduce_block) {
3903
3904 OMPT_REDUCTION_BEGIN;
3905 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3906 retval = 1;
3907
3908 } else if (packed_reduction_method == empty_reduce_block) {
3909
3910 OMPT_REDUCTION_BEGIN;
3911 // usage: if team size == 1, no synchronization is required ( Intel
3912 // platforms only )
3913 retval = 1;
3914
3915 } else if (packed_reduction_method == atomic_reduce_block) {
3916
3917 retval = 2;
3918
3919 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920 tree_reduce_block)) {
3921
3922// case tree_reduce_block:
3923// this barrier should be visible to a customer and to the threading profile
3924// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3925#if OMPT_SUPPORT
3926 ompt_frame_t *ompt_frame;
3927 if (ompt_enabled.enabled) {
3928 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3929 if (ompt_frame->enter_frame.ptr == NULL)
3930 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3931 }
3932 OMPT_STORE_RETURN_ADDRESS(global_tid);
3933#endif
3934#if USE_ITT_NOTIFY
3935 __kmp_threads[global_tid]->th.th_ident =
3936 loc; // needed for correct notification of frames
3937#endif
3938 retval =
3939 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3940 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3941 retval = (retval != 0) ? (0) : (1);
3942#if OMPT_SUPPORT && OMPT_OPTIONAL
3943 if (ompt_enabled.enabled) {
3944 ompt_frame->enter_frame = ompt_data_none;
3945 }
3946#endif
3947
3948 // all other workers except primary thread should do this pop here
3949 // (none of other workers except primary will enter __kmpc_end_reduce())
3950 if (__kmp_env_consistency_check) {
3951 if (retval == 0) { // 0: all other workers; 1: primary thread
3952 __kmp_pop_sync(global_tid, ct_reduce, loc);
3953 }
3954 }
3955
3956 } else {
3957
3958 // should never reach this block
3959 KMP_ASSERT(0); // "unexpected method"
3960 }
3961 if (teams_swapped) {
3962 __kmp_restore_swapped_teams(th, team, task_state);
3963 }
3964
3965 KA_TRACE(10,
3966 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3967 global_tid, packed_reduction_method, retval));
3968 return retval;
3969}
3970
3981void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3982 kmp_critical_name *lck) {
3983
3984 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3985 kmp_info_t *th;
3986 kmp_team_t *team;
3987 int teams_swapped = 0, task_state;
3988
3989 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3990 __kmp_assert_valid_gtid(global_tid);
3991
3992 th = __kmp_thread_from_gtid(global_tid);
3993 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3994
3995 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3996
3997 // this barrier should be visible to a customer and to the threading profile
3998 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3999 OMPT_REDUCTION_DECL(th, global_tid);
4000
4001 if (packed_reduction_method == critical_reduce_block) {
4002 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
4003
4004 OMPT_REDUCTION_END;
4005
4006// TODO: implicit barrier: should be exposed
4007#if OMPT_SUPPORT
4008 ompt_frame_t *ompt_frame;
4009 if (ompt_enabled.enabled) {
4010 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4011 if (ompt_frame->enter_frame.ptr == NULL)
4012 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4013 }
4014 OMPT_STORE_RETURN_ADDRESS(global_tid);
4015#endif
4016#if USE_ITT_NOTIFY
4017 __kmp_threads[global_tid]->th.th_ident = loc;
4018#endif
4019 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4020#if OMPT_SUPPORT && OMPT_OPTIONAL
4021 if (ompt_enabled.enabled) {
4022 ompt_frame->enter_frame = ompt_data_none;
4023 }
4024#endif
4025
4026 } else if (packed_reduction_method == empty_reduce_block) {
4027
4028 OMPT_REDUCTION_END;
4029
4030// usage: if team size==1, no synchronization is required (Intel platforms only)
4031
4032// TODO: implicit barrier: should be exposed
4033#if OMPT_SUPPORT
4034 ompt_frame_t *ompt_frame;
4035 if (ompt_enabled.enabled) {
4036 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4037 if (ompt_frame->enter_frame.ptr == NULL)
4038 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4039 }
4040 OMPT_STORE_RETURN_ADDRESS(global_tid);
4041#endif
4042#if USE_ITT_NOTIFY
4043 __kmp_threads[global_tid]->th.th_ident = loc;
4044#endif
4045 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4046#if OMPT_SUPPORT && OMPT_OPTIONAL
4047 if (ompt_enabled.enabled) {
4048 ompt_frame->enter_frame = ompt_data_none;
4049 }
4050#endif
4051
4052 } else if (packed_reduction_method == atomic_reduce_block) {
4053
4054#if OMPT_SUPPORT
4055 ompt_frame_t *ompt_frame;
4056 if (ompt_enabled.enabled) {
4057 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4058 if (ompt_frame->enter_frame.ptr == NULL)
4059 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4060 }
4061 OMPT_STORE_RETURN_ADDRESS(global_tid);
4062#endif
4063// TODO: implicit barrier: should be exposed
4064#if USE_ITT_NOTIFY
4065 __kmp_threads[global_tid]->th.th_ident = loc;
4066#endif
4067 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4068#if OMPT_SUPPORT && OMPT_OPTIONAL
4069 if (ompt_enabled.enabled) {
4070 ompt_frame->enter_frame = ompt_data_none;
4071 }
4072#endif
4073
4074 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4075 tree_reduce_block)) {
4076
4077 // only primary thread executes here (primary releases all other workers)
4078 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4079 global_tid);
4080
4081 } else {
4082
4083 // should never reach this block
4084 KMP_ASSERT(0); // "unexpected method"
4085 }
4086 if (teams_swapped) {
4087 __kmp_restore_swapped_teams(th, team, task_state);
4088 }
4089
4090 if (__kmp_env_consistency_check)
4091 __kmp_pop_sync(global_tid, ct_reduce, loc);
4092
4093 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4094 global_tid, packed_reduction_method));
4095
4096 return;
4097}
4098
4099#undef __KMP_GET_REDUCTION_METHOD
4100#undef __KMP_SET_REDUCTION_METHOD
4101
4102/* end of interface to fast scalable reduce routines */
4103
4104kmp_uint64 __kmpc_get_taskid() {
4105
4106 kmp_int32 gtid;
4107 kmp_info_t *thread;
4108
4109 gtid = __kmp_get_gtid();
4110 if (gtid < 0) {
4111 return 0;
4112 }
4113 thread = __kmp_thread_from_gtid(gtid);
4114 return thread->th.th_current_task->td_task_id;
4115
4116} // __kmpc_get_taskid
4117
4118kmp_uint64 __kmpc_get_parent_taskid() {
4119
4120 kmp_int32 gtid;
4121 kmp_info_t *thread;
4122 kmp_taskdata_t *parent_task;
4123
4124 gtid = __kmp_get_gtid();
4125 if (gtid < 0) {
4126 return 0;
4127 }
4128 thread = __kmp_thread_from_gtid(gtid);
4129 parent_task = thread->th.th_current_task->td_parent;
4130 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4131
4132} // __kmpc_get_parent_taskid
4133
4145void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4146 const struct kmp_dim *dims) {
4147 __kmp_assert_valid_gtid(gtid);
4148 int j, idx;
4149 kmp_int64 last, trace_count;
4150 kmp_info_t *th = __kmp_threads[gtid];
4151 kmp_team_t *team = th->th.th_team;
4152 kmp_uint32 *flags;
4153 kmp_disp_t *pr_buf = th->th.th_dispatch;
4154 dispatch_shared_info_t *sh_buf;
4155
4156 KA_TRACE(
4157 20,
4158 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4159 gtid, num_dims, !team->t.t_serialized));
4160 KMP_DEBUG_ASSERT(dims != NULL);
4161 KMP_DEBUG_ASSERT(num_dims > 0);
4162
4163 if (team->t.t_serialized) {
4164 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4165 return; // no dependencies if team is serialized
4166 }
4167 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4168 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4169 // the next loop
4170 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4171
4172 // Save bounds info into allocated private buffer
4173 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4174 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4175 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4176 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4177 pr_buf->th_doacross_info[0] =
4178 (kmp_int64)num_dims; // first element is number of dimensions
4179 // Save also address of num_done in order to access it later without knowing
4180 // the buffer index
4181 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4182 pr_buf->th_doacross_info[2] = dims[0].lo;
4183 pr_buf->th_doacross_info[3] = dims[0].up;
4184 pr_buf->th_doacross_info[4] = dims[0].st;
4185 last = 5;
4186 for (j = 1; j < num_dims; ++j) {
4187 kmp_int64
4188 range_length; // To keep ranges of all dimensions but the first dims[0]
4189 if (dims[j].st == 1) { // most common case
4190 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4191 range_length = dims[j].up - dims[j].lo + 1;
4192 } else {
4193 if (dims[j].st > 0) {
4194 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4195 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4196 } else { // negative increment
4197 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4198 range_length =
4199 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4200 }
4201 }
4202 pr_buf->th_doacross_info[last++] = range_length;
4203 pr_buf->th_doacross_info[last++] = dims[j].lo;
4204 pr_buf->th_doacross_info[last++] = dims[j].up;
4205 pr_buf->th_doacross_info[last++] = dims[j].st;
4206 }
4207
4208 // Compute total trip count.
4209 // Start with range of dims[0] which we don't need to keep in the buffer.
4210 if (dims[0].st == 1) { // most common case
4211 trace_count = dims[0].up - dims[0].lo + 1;
4212 } else if (dims[0].st > 0) {
4213 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4214 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4215 } else { // negative increment
4216 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4217 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4218 }
4219 for (j = 1; j < num_dims; ++j) {
4220 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4221 }
4222 KMP_DEBUG_ASSERT(trace_count > 0);
4223
4224 // Check if shared buffer is not occupied by other loop (idx -
4225 // __kmp_dispatch_num_buffers)
4226 if (idx != sh_buf->doacross_buf_idx) {
4227 // Shared buffer is occupied, wait for it to be free
4228 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4229 __kmp_eq_4, NULL);
4230 }
4231#if KMP_32_BIT_ARCH
4232 // Check if we are the first thread. After the CAS the first thread gets 0,
4233 // others get 1 if initialization is in progress, allocated pointer otherwise.
4234 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4235 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4236 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4237#else
4238 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4239 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4240#endif
4241 if (flags == NULL) {
4242 // we are the first thread, allocate the array of flags
4243 size_t size =
4244 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4245 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4246 KMP_MB();
4247 sh_buf->doacross_flags = flags;
4248 } else if (flags == (kmp_uint32 *)1) {
4249#if KMP_32_BIT_ARCH
4250 // initialization is still in progress, need to wait
4251 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4252#else
4253 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4254#endif
4255 KMP_YIELD(TRUE);
4256 KMP_MB();
4257 } else {
4258 KMP_MB();
4259 }
4260 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4261 pr_buf->th_doacross_flags =
4262 sh_buf->doacross_flags; // save private copy in order to not
4263 // touch shared buffer on each iteration
4264 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4265}
4266
4267void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4268 __kmp_assert_valid_gtid(gtid);
4269 kmp_int64 shft;
4270 size_t num_dims, i;
4271 kmp_uint32 flag;
4272 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4273 kmp_info_t *th = __kmp_threads[gtid];
4274 kmp_team_t *team = th->th.th_team;
4275 kmp_disp_t *pr_buf;
4276 kmp_int64 lo, up, st;
4277
4278 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4279 if (team->t.t_serialized) {
4280 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4281 return; // no dependencies if team is serialized
4282 }
4283
4284 // calculate sequential iteration number and check out-of-bounds condition
4285 pr_buf = th->th.th_dispatch;
4286 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4287 num_dims = (size_t)pr_buf->th_doacross_info[0];
4288 lo = pr_buf->th_doacross_info[2];
4289 up = pr_buf->th_doacross_info[3];
4290 st = pr_buf->th_doacross_info[4];
4291#if OMPT_SUPPORT && OMPT_OPTIONAL
4292 SimpleVLA<ompt_dependence_t> deps(num_dims);
4293#endif
4294 if (st == 1) { // most common case
4295 if (vec[0] < lo || vec[0] > up) {
4296 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297 "bounds [%lld,%lld]\n",
4298 gtid, vec[0], lo, up));
4299 return;
4300 }
4301 iter_number = vec[0] - lo;
4302 } else if (st > 0) {
4303 if (vec[0] < lo || vec[0] > up) {
4304 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305 "bounds [%lld,%lld]\n",
4306 gtid, vec[0], lo, up));
4307 return;
4308 }
4309 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4310 } else { // negative increment
4311 if (vec[0] > lo || vec[0] < up) {
4312 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4313 "bounds [%lld,%lld]\n",
4314 gtid, vec[0], lo, up));
4315 return;
4316 }
4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318 }
4319#if OMPT_SUPPORT && OMPT_OPTIONAL
4320 deps[0].variable.value = iter_number;
4321 deps[0].dependence_type = ompt_dependence_type_sink;
4322#endif
4323 for (i = 1; i < num_dims; ++i) {
4324 kmp_int64 iter, ln;
4325 size_t j = i * 4;
4326 ln = pr_buf->th_doacross_info[j + 1];
4327 lo = pr_buf->th_doacross_info[j + 2];
4328 up = pr_buf->th_doacross_info[j + 3];
4329 st = pr_buf->th_doacross_info[j + 4];
4330 if (st == 1) {
4331 if (vec[i] < lo || vec[i] > up) {
4332 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4333 "bounds [%lld,%lld]\n",
4334 gtid, vec[i], lo, up));
4335 return;
4336 }
4337 iter = vec[i] - lo;
4338 } else if (st > 0) {
4339 if (vec[i] < lo || vec[i] > up) {
4340 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4341 "bounds [%lld,%lld]\n",
4342 gtid, vec[i], lo, up));
4343 return;
4344 }
4345 iter = (kmp_uint64)(vec[i] - lo) / st;
4346 } else { // st < 0
4347 if (vec[i] > lo || vec[i] < up) {
4348 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4349 "bounds [%lld,%lld]\n",
4350 gtid, vec[i], lo, up));
4351 return;
4352 }
4353 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4354 }
4355 iter_number = iter + ln * iter_number;
4356#if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps[i].variable.value = iter;
4358 deps[i].dependence_type = ompt_dependence_type_sink;
4359#endif
4360 }
4361 shft = iter_number % 32; // use 32-bit granularity
4362 iter_number >>= 5; // divided by 32
4363 flag = 1 << shft;
4364 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4365 KMP_YIELD(TRUE);
4366 }
4367 KMP_MB();
4368#if OMPT_SUPPORT && OMPT_OPTIONAL
4369 if (ompt_enabled.ompt_callback_dependences) {
4370 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4371 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4372 }
4373#endif
4374 KA_TRACE(20,
4375 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4376 gtid, (iter_number << 5) + shft));
4377}
4378
4379void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4380 __kmp_assert_valid_gtid(gtid);
4381 kmp_int64 shft;
4382 size_t num_dims, i;
4383 kmp_uint32 flag;
4384 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4385 kmp_info_t *th = __kmp_threads[gtid];
4386 kmp_team_t *team = th->th.th_team;
4387 kmp_disp_t *pr_buf;
4388 kmp_int64 lo, st;
4389
4390 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4391 if (team->t.t_serialized) {
4392 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4393 return; // no dependencies if team is serialized
4394 }
4395
4396 // calculate sequential iteration number (same as in "wait" but no
4397 // out-of-bounds checks)
4398 pr_buf = th->th.th_dispatch;
4399 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4400 num_dims = (size_t)pr_buf->th_doacross_info[0];
4401 lo = pr_buf->th_doacross_info[2];
4402 st = pr_buf->th_doacross_info[4];
4403#if OMPT_SUPPORT && OMPT_OPTIONAL
4404 SimpleVLA<ompt_dependence_t> deps(num_dims);
4405#endif
4406 if (st == 1) { // most common case
4407 iter_number = vec[0] - lo;
4408 } else if (st > 0) {
4409 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4410 } else { // negative increment
4411 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4412 }
4413#if OMPT_SUPPORT && OMPT_OPTIONAL
4414 deps[0].variable.value = iter_number;
4415 deps[0].dependence_type = ompt_dependence_type_source;
4416#endif
4417 for (i = 1; i < num_dims; ++i) {
4418 kmp_int64 iter, ln;
4419 size_t j = i * 4;
4420 ln = pr_buf->th_doacross_info[j + 1];
4421 lo = pr_buf->th_doacross_info[j + 2];
4422 st = pr_buf->th_doacross_info[j + 4];
4423 if (st == 1) {
4424 iter = vec[i] - lo;
4425 } else if (st > 0) {
4426 iter = (kmp_uint64)(vec[i] - lo) / st;
4427 } else { // st < 0
4428 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4429 }
4430 iter_number = iter + ln * iter_number;
4431#if OMPT_SUPPORT && OMPT_OPTIONAL
4432 deps[i].variable.value = iter;
4433 deps[i].dependence_type = ompt_dependence_type_source;
4434#endif
4435 }
4436#if OMPT_SUPPORT && OMPT_OPTIONAL
4437 if (ompt_enabled.ompt_callback_dependences) {
4438 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4439 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4440 }
4441#endif
4442 shft = iter_number % 32; // use 32-bit granularity
4443 iter_number >>= 5; // divided by 32
4444 flag = 1 << shft;
4445 KMP_MB();
4446 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4447 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4448 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4449 (iter_number << 5) + shft));
4450}
4451
4452void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4453 __kmp_assert_valid_gtid(gtid);
4454 kmp_int32 num_done;
4455 kmp_info_t *th = __kmp_threads[gtid];
4456 kmp_team_t *team = th->th.th_team;
4457 kmp_disp_t *pr_buf = th->th.th_dispatch;
4458
4459 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4460 if (team->t.t_serialized) {
4461 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4462 return; // nothing to do
4463 }
4464 num_done =
4465 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4466 if (num_done == th->th.th_team_nproc) {
4467 // we are the last thread, need to free shared resources
4468 int idx = pr_buf->th_doacross_buf_idx - 1;
4469 dispatch_shared_info_t *sh_buf =
4470 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4471 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4472 (kmp_int64)&sh_buf->doacross_num_done);
4473 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4474 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4475 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4476 sh_buf->doacross_flags = NULL;
4477 sh_buf->doacross_num_done = 0;
4478 sh_buf->doacross_buf_idx +=
4479 __kmp_dispatch_num_buffers; // free buffer for future re-use
4480 }
4481 // free private resources (need to keep buffer index forever)
4482 pr_buf->th_doacross_flags = NULL;
4483 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4484 pr_buf->th_doacross_info = NULL;
4485 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4486}
4487
4488/* OpenMP 5.1 Memory Management routines */
4489void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4490 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4491}
4492
4493void *omp_aligned_alloc(size_t align, size_t size,
4494 omp_allocator_handle_t allocator) {
4495 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4496}
4497
4498void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4499 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4500}
4501
4502void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4503 omp_allocator_handle_t allocator) {
4504 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4505}
4506
4507void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4508 omp_allocator_handle_t free_allocator) {
4509 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4510 free_allocator);
4511}
4512
4513void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4514 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4515}
4516/* end of OpenMP 5.1 Memory Management routines */
4517
4518int __kmpc_get_target_offload(void) {
4519 if (!__kmp_init_serial) {
4520 __kmp_serial_initialize();
4521 }
4522 return __kmp_target_offload;
4523}
4524
4525int __kmpc_pause_resource(kmp_pause_status_t level) {
4526 if (!__kmp_init_serial) {
4527 return 1; // Can't pause if runtime is not initialized
4528 }
4529 return __kmp_pause_resource(level);
4530}
4531
4532void __kmpc_error(ident_t *loc, int severity, const char *message) {
4533 if (!__kmp_init_serial)
4534 __kmp_serial_initialize();
4535
4536 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4537
4538#if OMPT_SUPPORT
4539 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4540 ompt_callbacks.ompt_callback(ompt_callback_error)(
4541 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4542 OMPT_GET_RETURN_ADDRESS(0));
4543 }
4544#endif // OMPT_SUPPORT
4545
4546 char *src_loc;
4547 if (loc && loc->psource) {
4548 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4549 src_loc =
4550 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4551 __kmp_str_loc_free(&str_loc);
4552 } else {
4553 src_loc = __kmp_str_format("unknown");
4554 }
4555
4556 if (severity == severity_warning)
4557 KMP_WARNING(UserDirectedWarning, src_loc, message);
4558 else
4559 KMP_FATAL(UserDirectedError, src_loc, message);
4560
4561 __kmp_str_free(&src_loc);
4562}
4563
4564// Mark begin of scope directive.
4565void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4566// reserved is for extension of scope directive and not used.
4567#if OMPT_SUPPORT && OMPT_OPTIONAL
4568 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4569 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4570 int tid = __kmp_tid_from_gtid(gtid);
4571 ompt_callbacks.ompt_callback(ompt_callback_work)(
4572 ompt_work_scope, ompt_scope_begin,
4573 &(team->t.ompt_team_info.parallel_data),
4574 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4575 OMPT_GET_RETURN_ADDRESS(0));
4576 }
4577#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4578}
4579
4580// Mark end of scope directive
4581void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4582// reserved is for extension of scope directive and not used.
4583#if OMPT_SUPPORT && OMPT_OPTIONAL
4584 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4585 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4586 int tid = __kmp_tid_from_gtid(gtid);
4587 ompt_callbacks.ompt_callback(ompt_callback_work)(
4588 ompt_work_scope, ompt_scope_end,
4589 &(team->t.ompt_team_info.parallel_data),
4590 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4591 OMPT_GET_RETURN_ADDRESS(0));
4592 }
4593#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4594}
4595
4596#ifdef KMP_USE_VERSION_SYMBOLS
4597// For GOMP compatibility there are two versions of each omp_* API.
4598// One is the plain C symbol and one is the Fortran symbol with an appended
4599// underscore. When we implement a specific ompc_* version of an omp_*
4600// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4601// instead of the Fortran versions in kmp_ftn_entry.h
4602extern "C" {
4603// Have to undef these from omp.h so they aren't translated into
4604// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4605#ifdef omp_set_affinity_format
4606#undef omp_set_affinity_format
4607#endif
4608#ifdef omp_get_affinity_format
4609#undef omp_get_affinity_format
4610#endif
4611#ifdef omp_display_affinity
4612#undef omp_display_affinity
4613#endif
4614#ifdef omp_capture_affinity
4615#undef omp_capture_affinity
4616#endif
4617KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4618 "OMP_5.0");
4619KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4620 "OMP_5.0");
4621KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4622 "OMP_5.0");
4623KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4624 "OMP_5.0");
4625} // extern "C"
4626#endif
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:227
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:229
@ KMP_IDENT_AUTOPAR
Definition kmp.h:212
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:231
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length, kmp_int32 *num_threads_list)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition kmp.h:1752
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition kmp.h:247
char const * psource
Definition kmp.h:257
kmp_int32 flags
Definition kmp.h:249