LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_sched.cpp
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 KMP_DEBUG_ASSERT(i != 0); \
56 t = (l - u) / (-i) + 1; \
57 } \
58 KMP_COUNT_VALUE(stat, t); \
59 KMP_POP_PARTITIONED_TIMER(); \
60 }
61#else
62#define KMP_STATS_LOOP_END(stat) /* Nothing */
63#endif
64
65#if USE_ITT_BUILD || defined KMP_DEBUG
66static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
67static inline void check_loc(ident_t *&loc) {
68 if (loc == NULL)
69 loc = &loc_stub; // may need to report location info to ittnotify
70}
71#endif
72
73template <typename T>
74static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
75 kmp_int32 schedtype, kmp_int32 *plastiter,
76 T *plower, T *pupper,
77 typename traits_t<T>::signed_t *pstride,
78 typename traits_t<T>::signed_t incr,
79 typename traits_t<T>::signed_t chunk
80#if OMPT_SUPPORT && OMPT_OPTIONAL
81 ,
82 void *codeptr
83#endif
84) {
85 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
87 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
88
89 // Clear monotonic/nonmonotonic bits (ignore it)
90 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
91
92 typedef typename traits_t<T>::unsigned_t UT;
93 typedef typename traits_t<T>::signed_t ST;
94 /* this all has to be changed back to TID and such.. */
95 kmp_int32 gtid = global_tid;
96 kmp_uint32 tid;
97 kmp_uint32 nth;
98 UT trip_count;
99 kmp_team_t *team;
100 __kmp_assert_valid_gtid(gtid);
101 kmp_info_t *th = __kmp_threads[gtid];
102
103#if OMPT_SUPPORT && OMPT_OPTIONAL
104 ompt_team_info_t *team_info = NULL;
105 ompt_task_info_t *task_info = NULL;
106 ompt_work_t ompt_work_type = ompt_work_loop_static;
107
108 static kmp_int8 warn = 0;
109
110 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
111 // Only fully initialize variables needed by OMPT if OMPT is enabled.
112 team_info = __ompt_get_teaminfo(0, NULL);
113 task_info = __ompt_get_task_info_object(0);
114 // Determine workshare type
115 if (loc != NULL) {
116 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
117 ompt_work_type = ompt_work_loop_static;
118 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
119 ompt_work_type = ompt_work_sections;
120 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
121 ompt_work_type = ompt_work_distribute;
122 } else {
123 kmp_int8 bool_res =
124 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
125 if (bool_res)
126 KMP_WARNING(OmptOutdatedWorkshare);
127 }
128 KMP_DEBUG_ASSERT(ompt_work_type);
129 }
130 }
131#endif
132
133 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
134 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
135#ifdef KMP_DEBUG
136 {
137 char *buff;
138 // create format specifiers before the debug output
139 buff = __kmp_str_format(
140 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
141 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
142 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
143 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
144 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
145 *pstride, incr, chunk));
146 __kmp_str_free(&buff);
147 }
148#endif
149
150 if (__kmp_env_consistency_check) {
151 __kmp_push_workshare(global_tid, ct_pdo, loc);
152 if (incr == 0) {
153 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
154 loc);
155 }
156 }
157 /* special handling for zero-trip loops */
158 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
159 if (plastiter != NULL)
160 *plastiter = FALSE;
161 /* leave pupper and plower set to entire iteration space */
162 *pstride = incr; /* value should never be used */
163// *plower = *pupper - incr;
164// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
165// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
166// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
167#ifdef KMP_DEBUG
168 {
169 char *buff;
170 // create format specifiers before the debug output
171 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
172 "lower=%%%s upper=%%%s stride = %%%s "
173 "signed?<%s>, loc = %%s\n",
174 traits_t<T>::spec, traits_t<T>::spec,
175 traits_t<ST>::spec, traits_t<T>::spec);
176 check_loc(loc);
177 KD_TRACE(100,
178 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
179 __kmp_str_free(&buff);
180 }
181#endif
182 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
183
184#if OMPT_SUPPORT && OMPT_OPTIONAL
185 if (ompt_enabled.ompt_callback_work) {
186 ompt_callbacks.ompt_callback(ompt_callback_work)(
187 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
188 &(task_info->task_data), 0, codeptr);
189 }
190#endif
191 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
192 return;
193 }
194
195 // Although there are schedule enumerations above kmp_ord_upper which are not
196 // schedules for "distribute", the only ones which are useful are dynamic, so
197 // cannot be seen here, since this codepath is only executed for static
198 // schedules.
199 if (schedtype > kmp_ord_upper) {
200 // we are in DISTRIBUTE construct
201 schedtype += kmp_sch_static -
202 kmp_distribute_static; // AC: convert to usual schedule type
203 if (th->th.th_team->t.t_serialized > 1) {
204 tid = 0;
205 team = th->th.th_team;
206 } else {
207 tid = th->th.th_team->t.t_master_tid;
208 team = th->th.th_team->t.t_parent;
209 }
210 } else {
211 tid = __kmp_tid_from_gtid(global_tid);
212 team = th->th.th_team;
213 }
214
215 /* determine if "for" loop is an active worksharing construct */
216 if (team->t.t_serialized) {
217 /* serialized parallel, each thread executes whole iteration space */
218 if (plastiter != NULL)
219 *plastiter = TRUE;
220 /* leave pupper and plower set to entire iteration space */
221 *pstride =
222 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
223
224#ifdef KMP_DEBUG
225 {
226 char *buff;
227 // create format specifiers before the debug output
228 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
229 "lower=%%%s upper=%%%s stride = %%%s\n",
230 traits_t<T>::spec, traits_t<T>::spec,
231 traits_t<ST>::spec);
232 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
233 __kmp_str_free(&buff);
234 }
235#endif
236 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
237
238#if OMPT_SUPPORT && OMPT_OPTIONAL
239 if (ompt_enabled.ompt_callback_work) {
240 ompt_callbacks.ompt_callback(ompt_callback_work)(
241 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
242 &(task_info->task_data), *pstride, codeptr);
243 }
244#endif
245 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
246 return;
247 }
248 nth = team->t.t_nproc;
249 if (nth == 1) {
250 if (plastiter != NULL)
251 *plastiter = TRUE;
252 *pstride =
253 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
254#ifdef KMP_DEBUG
255 {
256 char *buff;
257 // create format specifiers before the debug output
258 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
259 "lower=%%%s upper=%%%s stride = %%%s\n",
260 traits_t<T>::spec, traits_t<T>::spec,
261 traits_t<ST>::spec);
262 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
263 __kmp_str_free(&buff);
264 }
265#endif
266 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
267
268#if OMPT_SUPPORT && OMPT_OPTIONAL
269 if (ompt_enabled.ompt_callback_work) {
270 ompt_callbacks.ompt_callback(ompt_callback_work)(
271 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
272 &(task_info->task_data), *pstride, codeptr);
273 }
274#endif
275 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
276 return;
277 }
278
279 /* compute trip count */
280 if (incr == 1) {
281 trip_count = *pupper - *plower + 1;
282 } else if (incr == -1) {
283 trip_count = *plower - *pupper + 1;
284 } else if (incr > 0) {
285 // upper-lower can exceed the limit of signed type
286 trip_count = (UT)(*pupper - *plower) / incr + 1;
287 } else {
288 KMP_DEBUG_ASSERT(incr != 0);
289 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
290 }
291
292#if KMP_STATS_ENABLED
293 if (KMP_MASTER_GTID(gtid)) {
294 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
295 }
296#endif
297
298 if (__kmp_env_consistency_check) {
299 /* tripcount overflow? */
300 if (trip_count == 0 && *pupper != *plower) {
301 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
302 loc);
303 }
304 }
305
306 /* compute remaining parameters */
307 switch (schedtype) {
308 case kmp_sch_static: {
309 if (trip_count < nth) {
310 KMP_DEBUG_ASSERT(
311 __kmp_static == kmp_sch_static_greedy ||
312 __kmp_static ==
313 kmp_sch_static_balanced); // Unknown static scheduling type.
314 if (tid < trip_count) {
315 *pupper = *plower = *plower + tid * incr;
316 } else {
317 // set bounds so non-active threads execute no iterations
318 *plower = *pupper + (incr > 0 ? 1 : -1);
319 }
320 if (plastiter != NULL)
321 *plastiter = (tid == trip_count - 1);
322 } else {
323 KMP_DEBUG_ASSERT(nth != 0);
324 if (__kmp_static == kmp_sch_static_balanced) {
325 UT small_chunk = trip_count / nth;
326 UT extras = trip_count % nth;
327 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
328 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
329 if (plastiter != NULL)
330 *plastiter = (tid == nth - 1);
331 } else {
332 T big_chunk_inc_count =
333 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
334 T old_upper = *pupper;
335
336 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
337 // Unknown static scheduling type.
338
339 *plower += tid * big_chunk_inc_count;
340 *pupper = *plower + big_chunk_inc_count - incr;
341 if (incr > 0) {
342 if (*pupper < *plower)
343 *pupper = traits_t<T>::max_value;
344 if (plastiter != NULL)
345 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
346 if (*pupper > old_upper)
347 *pupper = old_upper; // tracker C73258
348 } else {
349 if (*pupper > *plower)
350 *pupper = traits_t<T>::min_value;
351 if (plastiter != NULL)
352 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
353 if (*pupper < old_upper)
354 *pupper = old_upper; // tracker C73258
355 }
356 }
357 }
358 *pstride = trip_count;
359 break;
360 }
361 case kmp_sch_static_chunked: {
362 ST span;
363 UT nchunks;
364 KMP_DEBUG_ASSERT(chunk != 0);
365 if (chunk < 1)
366 chunk = 1;
367 else if ((UT)chunk > trip_count)
368 chunk = trip_count;
369 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
370 span = chunk * incr;
371 if (nchunks < nth) {
372 *pstride = span * nchunks;
373 if (tid < nchunks) {
374 *plower = *plower + (span * tid);
375 *pupper = *plower + span - incr;
376 } else {
377 *plower = *pupper + (incr > 0 ? 1 : -1);
378 }
379 } else {
380 *pstride = span * nth;
381 *plower = *plower + (span * tid);
382 *pupper = *plower + span - incr;
383 }
384 if (plastiter != NULL)
385 *plastiter = (tid == (nchunks - 1) % nth);
386 break;
387 }
388 case kmp_sch_static_balanced_chunked: {
389 T old_upper = *pupper;
390 KMP_DEBUG_ASSERT(nth != 0);
391 // round up to make sure the chunk is enough to cover all iterations
392 UT span = (trip_count + nth - 1) / nth;
393
394 // perform chunk adjustment
395 chunk = (span + chunk - 1) & ~(chunk - 1);
396
397 span = chunk * incr;
398 *plower = *plower + (span * tid);
399 *pupper = *plower + span - incr;
400 if (incr > 0) {
401 if (*pupper > old_upper)
402 *pupper = old_upper;
403 } else if (*pupper < old_upper)
404 *pupper = old_upper;
405
406 if (plastiter != NULL) {
407 KMP_DEBUG_ASSERT(chunk != 0);
408 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
409 }
410 break;
411 }
412 default:
413 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
414 break;
415 }
416
417#if USE_ITT_BUILD
418 // Report loop metadata
419 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
420 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
421 team->t.t_active_level == 1) {
422 kmp_uint64 cur_chunk = chunk;
423 check_loc(loc);
424 // Calculate chunk in case it was not specified; it is specified for
425 // kmp_sch_static_chunked
426 if (schedtype == kmp_sch_static) {
427 KMP_DEBUG_ASSERT(nth != 0);
428 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
429 }
430 // 0 - "static" schedule
431 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
432 }
433#endif
434#ifdef KMP_DEBUG
435 {
436 char *buff;
437 // create format specifiers before the debug output
438 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
439 "upper=%%%s stride = %%%s signed?<%s>\n",
440 traits_t<T>::spec, traits_t<T>::spec,
441 traits_t<ST>::spec, traits_t<T>::spec);
442 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
443 __kmp_str_free(&buff);
444 }
445#endif
446 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
447
448#if OMPT_SUPPORT && OMPT_OPTIONAL
449 if (ompt_enabled.ompt_callback_work) {
450 ompt_callbacks.ompt_callback(ompt_callback_work)(
451 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
452 &(task_info->task_data), trip_count, codeptr);
453 }
454 if (ompt_enabled.ompt_callback_dispatch) {
455 ompt_dispatch_t dispatch_type;
456 ompt_data_t instance = ompt_data_none;
457 ompt_dispatch_chunk_t dispatch_chunk;
458 if (ompt_work_type == ompt_work_sections) {
459 dispatch_type = ompt_dispatch_section;
460 instance.ptr = codeptr;
461 } else {
462 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
463 dispatch_type = (ompt_work_type == ompt_work_distribute)
464 ? ompt_dispatch_distribute_chunk
465 : ompt_dispatch_ws_loop_chunk;
466 instance.ptr = &dispatch_chunk;
467 }
468 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
469 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
470 instance);
471 }
472#endif
473
474 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
475 return;
476}
477
478template <typename T>
479static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
480 kmp_int32 schedule, kmp_int32 *plastiter,
481 T *plower, T *pupper, T *pupperDist,
482 typename traits_t<T>::signed_t *pstride,
483 typename traits_t<T>::signed_t incr,
484 typename traits_t<T>::signed_t chunk
485#if OMPT_SUPPORT && OMPT_OPTIONAL
486 ,
487 void *codeptr
488#endif
489) {
490 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
491 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
492 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
493 typedef typename traits_t<T>::unsigned_t UT;
494 typedef typename traits_t<T>::signed_t ST;
495 kmp_uint32 tid;
496 kmp_uint32 nth;
497 kmp_uint32 team_id;
498 kmp_uint32 nteams;
499 UT trip_count;
500 kmp_team_t *team;
501 kmp_info_t *th;
502
503 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
504 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
505 __kmp_assert_valid_gtid(gtid);
506#ifdef KMP_DEBUG
507 {
508 char *buff;
509 // create format specifiers before the debug output
510 buff = __kmp_str_format(
511 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
512 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
513 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
514 traits_t<ST>::spec, traits_t<T>::spec);
515 KD_TRACE(100,
516 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
517 __kmp_str_free(&buff);
518 }
519#endif
520
521 if (__kmp_env_consistency_check) {
522 __kmp_push_workshare(gtid, ct_pdo, loc);
523 if (incr == 0) {
524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
525 loc);
526 }
527 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
528 // The loop is illegal.
529 // Some zero-trip loops maintained by compiler, e.g.:
530 // for(i=10;i<0;++i) // lower >= upper - run-time check
531 // for(i=0;i>10;--i) // lower <= upper - run-time check
532 // for(i=0;i>10;++i) // incr > 0 - compile-time check
533 // for(i=10;i<0;--i) // incr < 0 - compile-time check
534 // Compiler does not check the following illegal loops:
535 // for(i=0;i<10;i+=incr) // where incr<0
536 // for(i=10;i>0;i-=incr) // where incr<0
537 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
538 }
539 }
540 tid = __kmp_tid_from_gtid(gtid);
541 th = __kmp_threads[gtid];
542 nth = th->th.th_team_nproc;
543 team = th->th.th_team;
544 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
545 nteams = th->th.th_teams_size.nteams;
546 team_id = team->t.t_master_tid;
547 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
548
549 // compute global trip count
550 if (incr == 1) {
551 trip_count = *pupper - *plower + 1;
552 } else if (incr == -1) {
553 trip_count = *plower - *pupper + 1;
554 } else if (incr > 0) {
555 // upper-lower can exceed the limit of signed type
556 trip_count = (UT)(*pupper - *plower) / incr + 1;
557 } else {
558 KMP_DEBUG_ASSERT(incr != 0);
559 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
560 }
561
562 *pstride = *pupper - *plower; // just in case (can be unused)
563 if (trip_count <= nteams) {
564 KMP_DEBUG_ASSERT(
565 __kmp_static == kmp_sch_static_greedy ||
566 __kmp_static ==
567 kmp_sch_static_balanced); // Unknown static scheduling type.
568 // only primary threads of some teams get single iteration, other threads
569 // get nothing
570 if (team_id < trip_count && tid == 0) {
571 *pupper = *pupperDist = *plower = *plower + team_id * incr;
572 } else {
573 *pupperDist = *pupper;
574 *plower = *pupper + incr; // compiler should skip loop body
575 }
576 if (plastiter != NULL)
577 *plastiter = (tid == 0 && team_id == trip_count - 1);
578 } else {
579 // Get the team's chunk first (each team gets at most one chunk)
580 KMP_DEBUG_ASSERT(nteams != 0);
581 if (__kmp_static == kmp_sch_static_balanced) {
582 UT chunkD = trip_count / nteams;
583 UT extras = trip_count % nteams;
584 *plower +=
585 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
586 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
587 if (plastiter != NULL)
588 *plastiter = (team_id == nteams - 1);
589 } else {
590 T chunk_inc_count =
591 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
592 T upper = *pupper;
593 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
594 // Unknown static scheduling type.
595 *plower += team_id * chunk_inc_count;
596 *pupperDist = *plower + chunk_inc_count - incr;
597 // Check/correct bounds if needed
598 if (incr > 0) {
599 if (*pupperDist < *plower)
600 *pupperDist = traits_t<T>::max_value;
601 if (plastiter != NULL)
602 *plastiter = *plower <= upper && *pupperDist > upper - incr;
603 if (*pupperDist > upper)
604 *pupperDist = upper; // tracker C73258
605 if (*plower > *pupperDist) {
606 *pupper = *pupperDist; // no iterations available for the team
607 goto end;
608 }
609 } else {
610 if (*pupperDist > *plower)
611 *pupperDist = traits_t<T>::min_value;
612 if (plastiter != NULL)
613 *plastiter = *plower >= upper && *pupperDist < upper - incr;
614 if (*pupperDist < upper)
615 *pupperDist = upper; // tracker C73258
616 if (*plower < *pupperDist) {
617 *pupper = *pupperDist; // no iterations available for the team
618 goto end;
619 }
620 }
621 }
622 // Get the parallel loop chunk now (for thread)
623 // compute trip count for team's chunk
624 if (incr == 1) {
625 trip_count = *pupperDist - *plower + 1;
626 } else if (incr == -1) {
627 trip_count = *plower - *pupperDist + 1;
628 } else if (incr > 1) {
629 // upper-lower can exceed the limit of signed type
630 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
631 } else {
632 KMP_DEBUG_ASSERT(incr != 0);
633 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
634 }
635 KMP_DEBUG_ASSERT(trip_count);
636 switch (schedule) {
637 case kmp_sch_static: {
638 if (trip_count <= nth) {
639 KMP_DEBUG_ASSERT(
640 __kmp_static == kmp_sch_static_greedy ||
641 __kmp_static ==
642 kmp_sch_static_balanced); // Unknown static scheduling type.
643 if (tid < trip_count)
644 *pupper = *plower = *plower + tid * incr;
645 else
646 *plower = *pupper + incr; // no iterations available
647 if (plastiter != NULL)
648 if (*plastiter != 0 && !(tid == trip_count - 1))
649 *plastiter = 0;
650 } else {
651 KMP_DEBUG_ASSERT(nth != 0);
652 if (__kmp_static == kmp_sch_static_balanced) {
653 UT chunkL = trip_count / nth;
654 UT extras = trip_count % nth;
655 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
656 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
657 if (plastiter != NULL)
658 if (*plastiter != 0 && !(tid == nth - 1))
659 *plastiter = 0;
660 } else {
661 T chunk_inc_count =
662 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
663 T upper = *pupperDist;
664 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
665 // Unknown static scheduling type.
666 *plower += tid * chunk_inc_count;
667 *pupper = *plower + chunk_inc_count - incr;
668 if (incr > 0) {
669 if (*pupper < *plower)
670 *pupper = traits_t<T>::max_value;
671 if (plastiter != NULL)
672 if (*plastiter != 0 &&
673 !(*plower <= upper && *pupper > upper - incr))
674 *plastiter = 0;
675 if (*pupper > upper)
676 *pupper = upper; // tracker C73258
677 } else {
678 if (*pupper > *plower)
679 *pupper = traits_t<T>::min_value;
680 if (plastiter != NULL)
681 if (*plastiter != 0 &&
682 !(*plower >= upper && *pupper < upper - incr))
683 *plastiter = 0;
684 if (*pupper < upper)
685 *pupper = upper; // tracker C73258
686 }
687 }
688 }
689 break;
690 }
691 case kmp_sch_static_chunked: {
692 ST span;
693 if (chunk < 1)
694 chunk = 1;
695 span = chunk * incr;
696 *pstride = span * nth;
697 *plower = *plower + (span * tid);
698 *pupper = *plower + span - incr;
699 if (plastiter != NULL) {
700 KMP_DEBUG_ASSERT(chunk != 0);
701 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
702 *plastiter = 0;
703 }
704 break;
705 }
706 default:
707 KMP_ASSERT2(0,
708 "__kmpc_dist_for_static_init: unknown loop scheduling type");
709 break;
710 }
711 }
712end:;
713#ifdef KMP_DEBUG
714 {
715 char *buff;
716 // create format specifiers before the debug output
717 buff = __kmp_str_format(
718 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
719 "stride=%%%s signed?<%s>\n",
720 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
721 traits_t<ST>::spec, traits_t<T>::spec);
722 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
723 __kmp_str_free(&buff);
724 }
725#endif
726 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
727#if OMPT_SUPPORT && OMPT_OPTIONAL
728 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
729 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
730 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
731 if (ompt_enabled.ompt_callback_work) {
732 ompt_callbacks.ompt_callback(ompt_callback_work)(
733 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
734 &(task_info->task_data), 0, codeptr);
735 }
736 if (ompt_enabled.ompt_callback_dispatch) {
737 ompt_data_t instance = ompt_data_none;
738 ompt_dispatch_chunk_t dispatch_chunk;
739 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
740 instance.ptr = &dispatch_chunk;
741 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
742 &(team_info->parallel_data), &(task_info->task_data),
743 ompt_dispatch_distribute_chunk, instance);
744 }
745 }
746#endif // OMPT_SUPPORT && OMPT_OPTIONAL
747 KMP_STATS_LOOP_END(OMP_distribute_iterations);
748 return;
749}
750
751template <typename T>
752static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
753 kmp_int32 *p_last, T *p_lb, T *p_ub,
754 typename traits_t<T>::signed_t *p_st,
755 typename traits_t<T>::signed_t incr,
756 typename traits_t<T>::signed_t chunk) {
757 // The routine returns the first chunk distributed to the team and
758 // stride for next chunks calculation.
759 // Last iteration flag set for the team that will execute
760 // the last iteration of the loop.
761 // The routine is called for dist_schedule(static,chunk) only.
762 typedef typename traits_t<T>::unsigned_t UT;
763 typedef typename traits_t<T>::signed_t ST;
764 kmp_uint32 team_id;
765 kmp_uint32 nteams;
766 UT trip_count;
767 T lower;
768 T upper;
769 ST span;
770 kmp_team_t *team;
771 kmp_info_t *th;
772
773 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
774 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
775 __kmp_assert_valid_gtid(gtid);
776#ifdef KMP_DEBUG
777 {
778 char *buff;
779 // create format specifiers before the debug output
780 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
781 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
782 traits_t<T>::spec, traits_t<T>::spec,
783 traits_t<ST>::spec, traits_t<ST>::spec,
784 traits_t<T>::spec);
785 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
786 __kmp_str_free(&buff);
787 }
788#endif
789
790 lower = *p_lb;
791 upper = *p_ub;
792 if (__kmp_env_consistency_check) {
793 if (incr == 0) {
794 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
795 loc);
796 }
797 if (incr > 0 ? (upper < lower) : (lower < upper)) {
798 // The loop is illegal.
799 // Some zero-trip loops maintained by compiler, e.g.:
800 // for(i=10;i<0;++i) // lower >= upper - run-time check
801 // for(i=0;i>10;--i) // lower <= upper - run-time check
802 // for(i=0;i>10;++i) // incr > 0 - compile-time check
803 // for(i=10;i<0;--i) // incr < 0 - compile-time check
804 // Compiler does not check the following illegal loops:
805 // for(i=0;i<10;i+=incr) // where incr<0
806 // for(i=10;i>0;i-=incr) // where incr<0
807 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
808 }
809 }
810 th = __kmp_threads[gtid];
811 team = th->th.th_team;
812 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
813 nteams = th->th.th_teams_size.nteams;
814 team_id = team->t.t_master_tid;
815 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
816
817 // compute trip count
818 if (incr == 1) {
819 trip_count = upper - lower + 1;
820 } else if (incr == -1) {
821 trip_count = lower - upper + 1;
822 } else if (incr > 0) {
823 // upper-lower can exceed the limit of signed type
824 trip_count = (UT)(upper - lower) / incr + 1;
825 } else {
826 KMP_DEBUG_ASSERT(incr != 0);
827 trip_count = (UT)(lower - upper) / (-incr) + 1;
828 }
829 if (chunk < 1)
830 chunk = 1;
831 span = chunk * incr;
832 *p_st = span * nteams;
833 *p_lb = lower + (span * team_id);
834 *p_ub = *p_lb + span - incr;
835 if (p_last != NULL) {
836 KMP_DEBUG_ASSERT(chunk != 0);
837 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
838 }
839 // Correct upper bound if needed
840 if (incr > 0) {
841 if (*p_ub < *p_lb) // overflow?
842 *p_ub = traits_t<T>::max_value;
843 if (*p_ub > upper)
844 *p_ub = upper; // tracker C73258
845 } else { // incr < 0
846 if (*p_ub > *p_lb)
847 *p_ub = traits_t<T>::min_value;
848 if (*p_ub < upper)
849 *p_ub = upper; // tracker C73258
850 }
851#ifdef KMP_DEBUG
852 {
853 char *buff;
854 // create format specifiers before the debug output
855 buff =
856 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
857 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
858 traits_t<T>::spec, traits_t<T>::spec,
859 traits_t<ST>::spec, traits_t<ST>::spec);
860 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
861 __kmp_str_free(&buff);
862 }
863#endif
864}
865
866//------------------------------------------------------------------------------
867extern "C" {
889void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
890 kmp_int32 *plastiter, kmp_int32 *plower,
891 kmp_int32 *pupper, kmp_int32 *pstride,
892 kmp_int32 incr, kmp_int32 chunk) {
893 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
894 pupper, pstride, incr, chunk
895#if OMPT_SUPPORT && OMPT_OPTIONAL
896 ,
897 OMPT_GET_RETURN_ADDRESS(0)
898#endif
899 );
900}
901
905void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
906 kmp_int32 schedtype, kmp_int32 *plastiter,
907 kmp_uint32 *plower, kmp_uint32 *pupper,
908 kmp_int32 *pstride, kmp_int32 incr,
909 kmp_int32 chunk) {
910 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
911 pupper, pstride, incr, chunk
912#if OMPT_SUPPORT && OMPT_OPTIONAL
913 ,
914 OMPT_GET_RETURN_ADDRESS(0)
915#endif
916 );
917}
918
922void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
923 kmp_int32 *plastiter, kmp_int64 *plower,
924 kmp_int64 *pupper, kmp_int64 *pstride,
925 kmp_int64 incr, kmp_int64 chunk) {
926 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
927 pupper, pstride, incr, chunk
928#if OMPT_SUPPORT && OMPT_OPTIONAL
929 ,
930 OMPT_GET_RETURN_ADDRESS(0)
931#endif
932 );
933}
934
938void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
939 kmp_int32 schedtype, kmp_int32 *plastiter,
940 kmp_uint64 *plower, kmp_uint64 *pupper,
941 kmp_int64 *pstride, kmp_int64 incr,
942 kmp_int64 chunk) {
943 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
944 pupper, pstride, incr, chunk
945#if OMPT_SUPPORT && OMPT_OPTIONAL
946 ,
947 OMPT_GET_RETURN_ADDRESS(0)
948#endif
949 );
950}
955#if OMPT_SUPPORT && OMPT_OPTIONAL
956#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
957#else
958#define OMPT_CODEPTR_ARG
959#endif
960
983void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
984 kmp_int32 schedule, kmp_int32 *plastiter,
985 kmp_int32 *plower, kmp_int32 *pupper,
986 kmp_int32 *pupperD, kmp_int32 *pstride,
987 kmp_int32 incr, kmp_int32 chunk) {
988 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
989 pupper, pupperD, pstride, incr,
990 chunk OMPT_CODEPTR_ARG);
991}
992
996void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
997 kmp_int32 schedule, kmp_int32 *plastiter,
998 kmp_uint32 *plower, kmp_uint32 *pupper,
999 kmp_uint32 *pupperD, kmp_int32 *pstride,
1000 kmp_int32 incr, kmp_int32 chunk) {
1001 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
1002 pupper, pupperD, pstride, incr,
1003 chunk OMPT_CODEPTR_ARG);
1004}
1005
1009void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
1010 kmp_int32 schedule, kmp_int32 *plastiter,
1011 kmp_int64 *plower, kmp_int64 *pupper,
1012 kmp_int64 *pupperD, kmp_int64 *pstride,
1013 kmp_int64 incr, kmp_int64 chunk) {
1014 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
1015 pupper, pupperD, pstride, incr,
1016 chunk OMPT_CODEPTR_ARG);
1017}
1018
1022void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1023 kmp_int32 schedule, kmp_int32 *plastiter,
1024 kmp_uint64 *plower, kmp_uint64 *pupper,
1025 kmp_uint64 *pupperD, kmp_int64 *pstride,
1026 kmp_int64 incr, kmp_int64 chunk) {
1027 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1028 pupper, pupperD, pstride, incr,
1029 chunk OMPT_CODEPTR_ARG);
1030}
1035//------------------------------------------------------------------------------
1036// Auxiliary routines for Distribute Parallel Loop construct implementation
1037// Transfer call to template< type T >
1038// __kmp_team_static_init( ident_t *loc, int gtid,
1039// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1040
1061void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1062 kmp_int32 *p_lb, kmp_int32 *p_ub,
1063 kmp_int32 *p_st, kmp_int32 incr,
1064 kmp_int32 chunk) {
1065 KMP_DEBUG_ASSERT(__kmp_init_serial);
1066 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1067 chunk);
1068}
1069
1073void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1074 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1075 kmp_int32 *p_st, kmp_int32 incr,
1076 kmp_int32 chunk) {
1077 KMP_DEBUG_ASSERT(__kmp_init_serial);
1078 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1079 chunk);
1080}
1081
1085void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1086 kmp_int64 *p_lb, kmp_int64 *p_ub,
1087 kmp_int64 *p_st, kmp_int64 incr,
1088 kmp_int64 chunk) {
1089 KMP_DEBUG_ASSERT(__kmp_init_serial);
1090 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1091 chunk);
1092}
1093
1097void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1098 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1099 kmp_int64 *p_st, kmp_int64 incr,
1100 kmp_int64 chunk) {
1101 KMP_DEBUG_ASSERT(__kmp_init_serial);
1102 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1103 chunk);
1104}
1109} // extern "C"
@ KMP_IDENT_KMPC
Definition kmp.h:209
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:227
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:229
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:231
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition kmp_stats.h:898
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
@ kmp_sch_static
Definition kmp.h:373
@ kmp_distribute_static
Definition kmp.h:409
@ kmp_ord_upper
Definition kmp.h:405
Definition kmp.h:247
char const * psource
Definition kmp.h:257
kmp_int32 flags
Definition kmp.h:249