LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 
22 #if OMPT_SUPPORT
23 #include "ompt-specific.h"
24 #endif
25 
26 #define MAX_MESSAGE 512
27 
28 // flags will be used in future, e.g. to implement openmp_strict library
29 // restrictions
30 
39 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
40  // By default __kmpc_begin() is no-op.
41  char *env;
42  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
43  __kmp_str_match_true(env)) {
44  __kmp_middle_initialize();
45  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
46  } else if (__kmp_ignore_mppbeg() == FALSE) {
47  // By default __kmp_ignore_mppbeg() returns TRUE.
48  __kmp_internal_begin();
49  KC_TRACE(10, ("__kmpc_begin: called\n"));
50  }
51 }
52 
61 void __kmpc_end(ident_t *loc) {
62  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
63  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
64  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
65  // returns FALSE and __kmpc_end() will unregister this root (it can cause
66  // library shut down).
67  if (__kmp_ignore_mppend() == FALSE) {
68  KC_TRACE(10, ("__kmpc_end: called\n"));
69  KA_TRACE(30, ("__kmpc_end\n"));
70 
71  __kmp_internal_end_thread(-1);
72  }
73 #if KMP_OS_WINDOWS && OMPT_SUPPORT
74  // Normal exit process on Windows does not allow worker threads of the final
75  // parallel region to finish reporting their events, so shutting down the
76  // library here fixes the issue at least for the cases where __kmpc_end() is
77  // placed properly.
78  if (ompt_enabled.enabled)
79  __kmp_internal_end_library(__kmp_gtid_get_specific());
80 #endif
81 }
82 
102  kmp_int32 gtid = __kmp_entry_gtid();
103 
104  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
105 
106  return gtid;
107 }
108 
124  KC_TRACE(10,
125  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
126 
127  return TCR_4(__kmp_all_nth);
128 }
129 
137  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
138  return __kmp_tid_from_gtid(__kmp_entry_gtid());
139 }
140 
147  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
148 
149  return __kmp_entry_thread()->th.th_team->t.t_nproc;
150 }
151 
158 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
159 #ifndef KMP_DEBUG
160 
161  return TRUE;
162 
163 #else
164 
165  const char *semi2;
166  const char *semi3;
167  int line_no;
168 
169  if (__kmp_par_range == 0) {
170  return TRUE;
171  }
172  semi2 = loc->psource;
173  if (semi2 == NULL) {
174  return TRUE;
175  }
176  semi2 = strchr(semi2, ';');
177  if (semi2 == NULL) {
178  return TRUE;
179  }
180  semi2 = strchr(semi2 + 1, ';');
181  if (semi2 == NULL) {
182  return TRUE;
183  }
184  if (__kmp_par_range_filename[0]) {
185  const char *name = semi2 - 1;
186  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
187  name--;
188  }
189  if ((*name == '/') || (*name == ';')) {
190  name++;
191  }
192  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
193  return __kmp_par_range < 0;
194  }
195  }
196  semi3 = strchr(semi2 + 1, ';');
197  if (__kmp_par_range_routine[0]) {
198  if ((semi3 != NULL) && (semi3 > semi2) &&
199  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
200  return __kmp_par_range < 0;
201  }
202  }
203  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
204  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
205  return __kmp_par_range > 0;
206  }
207  return __kmp_par_range < 0;
208  }
209  return TRUE;
210 
211 #endif /* KMP_DEBUG */
212 }
213 
220 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
221  return __kmp_entry_thread()->th.th_root->r.r_active;
222 }
223 
233 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
234  kmp_int32 num_threads) {
235  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
236  global_tid, num_threads));
237 
238  __kmp_push_num_threads(loc, global_tid, num_threads);
239 }
240 
241 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
242  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
243 
244  /* the num_threads are automatically popped */
245 }
246 
247 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
248  kmp_int32 proc_bind) {
249  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
250  proc_bind));
251 
252  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
253 }
254 
265 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
266  int gtid = __kmp_entry_gtid();
267 
268 #if (KMP_STATS_ENABLED)
269  // If we were in a serial region, then stop the serial timer, record
270  // the event, and start parallel region timer
271  stats_state_e previous_state = KMP_GET_THREAD_STATE();
272  if (previous_state == stats_state_e::SERIAL_REGION) {
273  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
274  } else {
275  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
276  }
277  int inParallel = __kmpc_in_parallel(loc);
278  if (inParallel) {
279  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
280  } else {
281  KMP_COUNT_BLOCK(OMP_PARALLEL);
282  }
283 #endif
284 
285  // maybe to save thr_state is enough here
286  {
287  va_list ap;
288  va_start(ap, microtask);
289 
290 #if OMPT_SUPPORT
291  ompt_frame_t *ompt_frame;
292  if (ompt_enabled.enabled) {
293  kmp_info_t *master_th = __kmp_threads[gtid];
294  kmp_team_t *parent_team = master_th->th.th_team;
295  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
296  if (lwt)
297  ompt_frame = &(lwt->ompt_task_info.frame);
298  else {
299  int tid = __kmp_tid_from_gtid(gtid);
300  ompt_frame = &(
301  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
302  }
303  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
304  OMPT_STORE_RETURN_ADDRESS(gtid);
305  }
306 #endif
307 
308 #if INCLUDE_SSC_MARKS
309  SSC_MARK_FORKING();
310 #endif
311  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
312  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
313  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
314 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
315 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
316  &ap
317 #else
318  ap
319 #endif
320  );
321 #if INCLUDE_SSC_MARKS
322  SSC_MARK_JOINING();
323 #endif
324  __kmp_join_call(loc, gtid
325 #if OMPT_SUPPORT
326  ,
327  fork_context_intel
328 #endif
329  );
330 
331  va_end(ap);
332  }
333 
334 #if KMP_STATS_ENABLED
335  if (previous_state == stats_state_e::SERIAL_REGION) {
336  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
337  } else {
338  KMP_POP_PARTITIONED_TIMER();
339  }
340 #endif // KMP_STATS_ENABLED
341 }
342 
354 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
355  kmp_int32 num_teams, kmp_int32 num_threads) {
356  KA_TRACE(20,
357  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
358  global_tid, num_teams, num_threads));
359 
360  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
361 }
362 
373 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
374  ...) {
375  int gtid = __kmp_entry_gtid();
376  kmp_info_t *this_thr = __kmp_threads[gtid];
377  va_list ap;
378  va_start(ap, microtask);
379 
380 #if KMP_STATS_ENABLED
381  KMP_COUNT_BLOCK(OMP_TEAMS);
382  stats_state_e previous_state = KMP_GET_THREAD_STATE();
383  if (previous_state == stats_state_e::SERIAL_REGION) {
384  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
385  } else {
386  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
387  }
388 #endif
389 
390  // remember teams entry point and nesting level
391  this_thr->th.th_teams_microtask = microtask;
392  this_thr->th.th_teams_level =
393  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
394 
395 #if OMPT_SUPPORT
396  kmp_team_t *parent_team = this_thr->th.th_team;
397  int tid = __kmp_tid_from_gtid(gtid);
398  if (ompt_enabled.enabled) {
399  parent_team->t.t_implicit_task_taskdata[tid]
400  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
401  }
402  OMPT_STORE_RETURN_ADDRESS(gtid);
403 #endif
404 
405  // check if __kmpc_push_num_teams called, set default number of teams
406  // otherwise
407  if (this_thr->th.th_teams_size.nteams == 0) {
408  __kmp_push_num_teams(loc, gtid, 0, 0);
409  }
410  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
411  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
412  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
413 
414  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
415  VOLATILE_CAST(microtask_t)
416  __kmp_teams_master, // "wrapped" task
417  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
418 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
419  &ap
420 #else
421  ap
422 #endif
423  );
424  __kmp_join_call(loc, gtid
425 #if OMPT_SUPPORT
426  ,
427  fork_context_intel
428 #endif
429  );
430 
431  // Pop current CG root off list
432  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
433  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
434  this_thr->th.th_cg_roots = tmp->up;
435  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
436  " to node %p. cg_nthreads was %d\n",
437  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
438  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
439  int i = tmp->cg_nthreads--;
440  if (i == 1) { // check is we are the last thread in CG (not always the case)
441  __kmp_free(tmp);
442  }
443  // Restore current task's thread_limit from CG root
444  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
445  this_thr->th.th_current_task->td_icvs.thread_limit =
446  this_thr->th.th_cg_roots->cg_thread_limit;
447 
448  this_thr->th.th_teams_microtask = NULL;
449  this_thr->th.th_teams_level = 0;
450  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
451  va_end(ap);
452 #if KMP_STATS_ENABLED
453  if (previous_state == stats_state_e::SERIAL_REGION) {
454  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
455  } else {
456  KMP_POP_PARTITIONED_TIMER();
457  }
458 #endif // KMP_STATS_ENABLED
459 }
460 
461 // I don't think this function should ever have been exported.
462 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
463 // openmp code ever called it, but it's been exported from the RTL for so
464 // long that I'm afraid to remove the definition.
465 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
466 
479 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
480 // The implementation is now in kmp_runtime.cpp so that it can share static
481 // functions with kmp_fork_call since the tasks to be done are similar in
482 // each case.
483 #if OMPT_SUPPORT
484  OMPT_STORE_RETURN_ADDRESS(global_tid);
485 #endif
486  __kmp_serialized_parallel(loc, global_tid);
487 }
488 
496 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
497  kmp_internal_control_t *top;
498  kmp_info_t *this_thr;
499  kmp_team_t *serial_team;
500 
501  KC_TRACE(10,
502  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
503 
504  /* skip all this code for autopar serialized loops since it results in
505  unacceptable overhead */
506  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
507  return;
508 
509  // Not autopar code
510  if (!TCR_4(__kmp_init_parallel))
511  __kmp_parallel_initialize();
512 
513  __kmp_resume_if_soft_paused();
514 
515  this_thr = __kmp_threads[global_tid];
516  serial_team = this_thr->th.th_serial_team;
517 
518  kmp_task_team_t *task_team = this_thr->th.th_task_team;
519  // we need to wait for the proxy tasks before finishing the thread
520  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
521  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
522 
523  KMP_MB();
524  KMP_DEBUG_ASSERT(serial_team);
525  KMP_ASSERT(serial_team->t.t_serialized);
526  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
527  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
528  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
529  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
530 
531 #if OMPT_SUPPORT
532  if (ompt_enabled.enabled &&
533  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
534  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
535  if (ompt_enabled.ompt_callback_implicit_task) {
536  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
537  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
538  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
539  }
540 
541  // reset clear the task id only after unlinking the task
542  ompt_data_t *parent_task_data;
543  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
544 
545  if (ompt_enabled.ompt_callback_parallel_end) {
546  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
547  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
548  ompt_parallel_invoker_program | ompt_parallel_team,
549  OMPT_LOAD_RETURN_ADDRESS(global_tid));
550  }
551  __ompt_lw_taskteam_unlink(this_thr);
552  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
553  }
554 #endif
555 
556  /* If necessary, pop the internal control stack values and replace the team
557  * values */
558  top = serial_team->t.t_control_stack_top;
559  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
560  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
561  serial_team->t.t_control_stack_top = top->next;
562  __kmp_free(top);
563  }
564 
565  // if( serial_team -> t.t_serialized > 1 )
566  serial_team->t.t_level--;
567 
568  /* pop dispatch buffers stack */
569  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
570  {
571  dispatch_private_info_t *disp_buffer =
572  serial_team->t.t_dispatch->th_disp_buffer;
573  serial_team->t.t_dispatch->th_disp_buffer =
574  serial_team->t.t_dispatch->th_disp_buffer->next;
575  __kmp_free(disp_buffer);
576  }
577  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
578 
579  --serial_team->t.t_serialized;
580  if (serial_team->t.t_serialized == 0) {
581 
582 /* return to the parallel section */
583 
584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
585  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
586  __kmp_clear_x87_fpu_status_word();
587  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
588  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
589  }
590 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
591 
592  this_thr->th.th_team = serial_team->t.t_parent;
593  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
594 
595  /* restore values cached in the thread */
596  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
597  this_thr->th.th_team_master =
598  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
599  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
600 
601  /* TODO the below shouldn't need to be adjusted for serialized teams */
602  this_thr->th.th_dispatch =
603  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
604 
605  __kmp_pop_current_task_from_thread(this_thr);
606 
607  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
608  this_thr->th.th_current_task->td_flags.executing = 1;
609 
610  if (__kmp_tasking_mode != tskm_immediate_exec) {
611  // Copy the task team from the new child / old parent team to the thread.
612  this_thr->th.th_task_team =
613  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
614  KA_TRACE(20,
615  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
616  "team %p\n",
617  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
618  }
619  } else {
620  if (__kmp_tasking_mode != tskm_immediate_exec) {
621  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
622  "depth of serial team %p to %d\n",
623  global_tid, serial_team, serial_team->t.t_serialized));
624  }
625  }
626 
627  if (__kmp_env_consistency_check)
628  __kmp_pop_parallel(global_tid, NULL);
629 #if OMPT_SUPPORT
630  if (ompt_enabled.enabled)
631  this_thr->th.ompt_thread_info.state =
632  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
633  : ompt_state_work_parallel);
634 #endif
635 }
636 
645 void __kmpc_flush(ident_t *loc) {
646  KC_TRACE(10, ("__kmpc_flush: called\n"));
647 
648  /* need explicit __mf() here since use volatile instead in library */
649  KMP_MB(); /* Flush all pending memory write invalidates. */
650 
651 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
652 #if KMP_MIC
653 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
654 // We shouldn't need it, though, since the ABI rules require that
655 // * If the compiler generates NGO stores it also generates the fence
656 // * If users hand-code NGO stores they should insert the fence
657 // therefore no incomplete unordered stores should be visible.
658 #else
659  // C74404
660  // This is to address non-temporal store instructions (sfence needed).
661  // The clflush instruction is addressed either (mfence needed).
662  // Probably the non-temporal load monvtdqa instruction should also be
663  // addressed.
664  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
665  if (!__kmp_cpuinfo.initialized) {
666  __kmp_query_cpuid(&__kmp_cpuinfo);
667  }
668  if (!__kmp_cpuinfo.sse2) {
669  // CPU cannot execute SSE2 instructions.
670  } else {
671 #if KMP_COMPILER_ICC
672  _mm_mfence();
673 #elif KMP_COMPILER_MSVC
674  MemoryBarrier();
675 #else
676  __sync_synchronize();
677 #endif // KMP_COMPILER_ICC
678  }
679 #endif // KMP_MIC
680 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
681  KMP_ARCH_RISCV64)
682 // Nothing to see here move along
683 #elif KMP_ARCH_PPC64
684 // Nothing needed here (we have a real MB above).
685 #if KMP_OS_CNK
686  // The flushing thread needs to yield here; this prevents a
687  // busy-waiting thread from saturating the pipeline. flush is
688  // often used in loops like this:
689  // while (!flag) {
690  // #pragma omp flush(flag)
691  // }
692  // and adding the yield here is good for at least a 10x speedup
693  // when running >2 threads per core (on the NAS LU benchmark).
694  __kmp_yield();
695 #endif
696 #else
697 #error Unknown or unsupported architecture
698 #endif
699 
700 #if OMPT_SUPPORT && OMPT_OPTIONAL
701  if (ompt_enabled.ompt_callback_flush) {
702  ompt_callbacks.ompt_callback(ompt_callback_flush)(
703  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
704  }
705 #endif
706 }
707 
708 /* -------------------------------------------------------------------------- */
716 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
717  KMP_COUNT_BLOCK(OMP_BARRIER);
718  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
719 
720  if (!TCR_4(__kmp_init_parallel))
721  __kmp_parallel_initialize();
722 
723  __kmp_resume_if_soft_paused();
724 
725  if (__kmp_env_consistency_check) {
726  if (loc == 0) {
727  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
728  }
729  __kmp_check_barrier(global_tid, ct_barrier, loc);
730  }
731 
732 #if OMPT_SUPPORT
733  ompt_frame_t *ompt_frame;
734  if (ompt_enabled.enabled) {
735  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
736  if (ompt_frame->enter_frame.ptr == NULL)
737  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
738  OMPT_STORE_RETURN_ADDRESS(global_tid);
739  }
740 #endif
741  __kmp_threads[global_tid]->th.th_ident = loc;
742  // TODO: explicit barrier_wait_id:
743  // this function is called when 'barrier' directive is present or
744  // implicit barrier at the end of a worksharing construct.
745  // 1) better to add a per-thread barrier counter to a thread data structure
746  // 2) set to 0 when a new team is created
747  // 4) no sync is required
748 
749  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
750 #if OMPT_SUPPORT && OMPT_OPTIONAL
751  if (ompt_enabled.enabled) {
752  ompt_frame->enter_frame = ompt_data_none;
753  }
754 #endif
755 }
756 
757 /* The BARRIER for a MASTER section is always explicit */
764 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
765  int status = 0;
766 
767  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
768 
769  if (!TCR_4(__kmp_init_parallel))
770  __kmp_parallel_initialize();
771 
772  __kmp_resume_if_soft_paused();
773 
774  if (KMP_MASTER_GTID(global_tid)) {
775  KMP_COUNT_BLOCK(OMP_MASTER);
776  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
777  status = 1;
778  }
779 
780 #if OMPT_SUPPORT && OMPT_OPTIONAL
781  if (status) {
782  if (ompt_enabled.ompt_callback_master) {
783  kmp_info_t *this_thr = __kmp_threads[global_tid];
784  kmp_team_t *team = this_thr->th.th_team;
785 
786  int tid = __kmp_tid_from_gtid(global_tid);
787  ompt_callbacks.ompt_callback(ompt_callback_master)(
788  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
789  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
790  OMPT_GET_RETURN_ADDRESS(0));
791  }
792  }
793 #endif
794 
795  if (__kmp_env_consistency_check) {
796 #if KMP_USE_DYNAMIC_LOCK
797  if (status)
798  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
799  else
800  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
801 #else
802  if (status)
803  __kmp_push_sync(global_tid, ct_master, loc, NULL);
804  else
805  __kmp_check_sync(global_tid, ct_master, loc, NULL);
806 #endif
807  }
808 
809  return status;
810 }
811 
820 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
821  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
822 
823  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
824  KMP_POP_PARTITIONED_TIMER();
825 
826 #if OMPT_SUPPORT && OMPT_OPTIONAL
827  kmp_info_t *this_thr = __kmp_threads[global_tid];
828  kmp_team_t *team = this_thr->th.th_team;
829  if (ompt_enabled.ompt_callback_master) {
830  int tid = __kmp_tid_from_gtid(global_tid);
831  ompt_callbacks.ompt_callback(ompt_callback_master)(
832  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
833  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
834  OMPT_GET_RETURN_ADDRESS(0));
835  }
836 #endif
837 
838  if (__kmp_env_consistency_check) {
839  if (global_tid < 0)
840  KMP_WARNING(ThreadIdentInvalid);
841 
842  if (KMP_MASTER_GTID(global_tid))
843  __kmp_pop_sync(global_tid, ct_master, loc);
844  }
845 }
846 
854 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
855  int cid = 0;
856  kmp_info_t *th;
857  KMP_DEBUG_ASSERT(__kmp_init_serial);
858 
859  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
860 
861  if (!TCR_4(__kmp_init_parallel))
862  __kmp_parallel_initialize();
863 
864  __kmp_resume_if_soft_paused();
865 
866 #if USE_ITT_BUILD
867  __kmp_itt_ordered_prep(gtid);
868 // TODO: ordered_wait_id
869 #endif /* USE_ITT_BUILD */
870 
871  th = __kmp_threads[gtid];
872 
873 #if OMPT_SUPPORT && OMPT_OPTIONAL
874  kmp_team_t *team;
875  ompt_wait_id_t lck;
876  void *codeptr_ra;
877  if (ompt_enabled.enabled) {
878  OMPT_STORE_RETURN_ADDRESS(gtid);
879  team = __kmp_team_from_gtid(gtid);
880  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
881  /* OMPT state update */
882  th->th.ompt_thread_info.wait_id = lck;
883  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
884 
885  /* OMPT event callback */
886  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
887  if (ompt_enabled.ompt_callback_mutex_acquire) {
888  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
889  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
890  codeptr_ra);
891  }
892  }
893 #endif
894 
895  if (th->th.th_dispatch->th_deo_fcn != 0)
896  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
897  else
898  __kmp_parallel_deo(&gtid, &cid, loc);
899 
900 #if OMPT_SUPPORT && OMPT_OPTIONAL
901  if (ompt_enabled.enabled) {
902  /* OMPT state update */
903  th->th.ompt_thread_info.state = ompt_state_work_parallel;
904  th->th.ompt_thread_info.wait_id = 0;
905 
906  /* OMPT event callback */
907  if (ompt_enabled.ompt_callback_mutex_acquired) {
908  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
909  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
910  }
911  }
912 #endif
913 
914 #if USE_ITT_BUILD
915  __kmp_itt_ordered_start(gtid);
916 #endif /* USE_ITT_BUILD */
917 }
918 
926 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
927  int cid = 0;
928  kmp_info_t *th;
929 
930  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
931 
932 #if USE_ITT_BUILD
933  __kmp_itt_ordered_end(gtid);
934 // TODO: ordered_wait_id
935 #endif /* USE_ITT_BUILD */
936 
937  th = __kmp_threads[gtid];
938 
939  if (th->th.th_dispatch->th_dxo_fcn != 0)
940  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
941  else
942  __kmp_parallel_dxo(&gtid, &cid, loc);
943 
944 #if OMPT_SUPPORT && OMPT_OPTIONAL
945  OMPT_STORE_RETURN_ADDRESS(gtid);
946  if (ompt_enabled.ompt_callback_mutex_released) {
947  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
948  ompt_mutex_ordered,
949  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
950  ->t.t_ordered.dt.t_value,
951  OMPT_LOAD_RETURN_ADDRESS(gtid));
952  }
953 #endif
954 }
955 
956 #if KMP_USE_DYNAMIC_LOCK
957 
958 static __forceinline void
959 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
960  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
961  // Pointer to the allocated indirect lock is written to crit, while indexing
962  // is ignored.
963  void *idx;
964  kmp_indirect_lock_t **lck;
965  lck = (kmp_indirect_lock_t **)crit;
966  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
967  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
968  KMP_SET_I_LOCK_LOCATION(ilk, loc);
969  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
970  KA_TRACE(20,
971  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
972 #if USE_ITT_BUILD
973  __kmp_itt_critical_creating(ilk->lock, loc);
974 #endif
975  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
976  if (status == 0) {
977 #if USE_ITT_BUILD
978  __kmp_itt_critical_destroyed(ilk->lock);
979 #endif
980  // We don't really need to destroy the unclaimed lock here since it will be
981  // cleaned up at program exit.
982  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
983  }
984  KMP_DEBUG_ASSERT(*lck != NULL);
985 }
986 
987 // Fast-path acquire tas lock
988 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
989  { \
990  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
991  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
992  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
993  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
994  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
995  kmp_uint32 spins; \
996  KMP_FSYNC_PREPARE(l); \
997  KMP_INIT_YIELD(spins); \
998  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
999  do { \
1000  if (TCR_4(__kmp_nth) > \
1001  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1002  KMP_YIELD(TRUE); \
1003  } else { \
1004  KMP_YIELD_SPIN(spins); \
1005  } \
1006  __kmp_spin_backoff(&backoff); \
1007  } while ( \
1008  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1009  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1010  } \
1011  KMP_FSYNC_ACQUIRED(l); \
1012  }
1013 
1014 // Fast-path test tas lock
1015 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1016  { \
1017  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1018  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1019  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1020  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1021  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1022  }
1023 
1024 // Fast-path release tas lock
1025 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1026  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1027 
1028 #if KMP_USE_FUTEX
1029 
1030 #include <sys/syscall.h>
1031 #include <unistd.h>
1032 #ifndef FUTEX_WAIT
1033 #define FUTEX_WAIT 0
1034 #endif
1035 #ifndef FUTEX_WAKE
1036 #define FUTEX_WAKE 1
1037 #endif
1038 
1039 // Fast-path acquire futex lock
1040 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1041  { \
1042  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1043  kmp_int32 gtid_code = (gtid + 1) << 1; \
1044  KMP_MB(); \
1045  KMP_FSYNC_PREPARE(ftx); \
1046  kmp_int32 poll_val; \
1047  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1048  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1049  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1050  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1051  if (!cond) { \
1052  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1053  poll_val | \
1054  KMP_LOCK_BUSY(1, futex))) { \
1055  continue; \
1056  } \
1057  poll_val |= KMP_LOCK_BUSY(1, futex); \
1058  } \
1059  kmp_int32 rc; \
1060  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1061  NULL, NULL, 0)) != 0) { \
1062  continue; \
1063  } \
1064  gtid_code |= 1; \
1065  } \
1066  KMP_FSYNC_ACQUIRED(ftx); \
1067  }
1068 
1069 // Fast-path test futex lock
1070 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1071  { \
1072  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1073  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1074  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1075  KMP_FSYNC_ACQUIRED(ftx); \
1076  rc = TRUE; \
1077  } else { \
1078  rc = FALSE; \
1079  } \
1080  }
1081 
1082 // Fast-path release futex lock
1083 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1084  { \
1085  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1086  KMP_MB(); \
1087  KMP_FSYNC_RELEASING(ftx); \
1088  kmp_int32 poll_val = \
1089  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1090  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1091  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1092  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1093  } \
1094  KMP_MB(); \
1095  KMP_YIELD_OVERSUB(); \
1096  }
1097 
1098 #endif // KMP_USE_FUTEX
1099 
1100 #else // KMP_USE_DYNAMIC_LOCK
1101 
1102 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1103  ident_t const *loc,
1104  kmp_int32 gtid) {
1105  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1106 
1107  // Because of the double-check, the following load doesn't need to be volatile
1108  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1109 
1110  if (lck == NULL) {
1111  void *idx;
1112 
1113  // Allocate & initialize the lock.
1114  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1115  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1116  __kmp_init_user_lock_with_checks(lck);
1117  __kmp_set_user_lock_location(lck, loc);
1118 #if USE_ITT_BUILD
1119  __kmp_itt_critical_creating(lck);
1120 // __kmp_itt_critical_creating() should be called *before* the first usage
1121 // of underlying lock. It is the only place where we can guarantee it. There
1122 // are chances the lock will destroyed with no usage, but it is not a
1123 // problem, because this is not real event seen by user but rather setting
1124 // name for object (lock). See more details in kmp_itt.h.
1125 #endif /* USE_ITT_BUILD */
1126 
1127  // Use a cmpxchg instruction to slam the start of the critical section with
1128  // the lock pointer. If another thread beat us to it, deallocate the lock,
1129  // and use the lock that the other thread allocated.
1130  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1131 
1132  if (status == 0) {
1133 // Deallocate the lock and reload the value.
1134 #if USE_ITT_BUILD
1135  __kmp_itt_critical_destroyed(lck);
1136 // Let ITT know the lock is destroyed and the same memory location may be reused
1137 // for another purpose.
1138 #endif /* USE_ITT_BUILD */
1139  __kmp_destroy_user_lock_with_checks(lck);
1140  __kmp_user_lock_free(&idx, gtid, lck);
1141  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1142  KMP_DEBUG_ASSERT(lck != NULL);
1143  }
1144  }
1145  return lck;
1146 }
1147 
1148 #endif // KMP_USE_DYNAMIC_LOCK
1149 
1160 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1161  kmp_critical_name *crit) {
1162 #if KMP_USE_DYNAMIC_LOCK
1163 #if OMPT_SUPPORT && OMPT_OPTIONAL
1164  OMPT_STORE_RETURN_ADDRESS(global_tid);
1165 #endif // OMPT_SUPPORT
1166  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1167 #else
1168  KMP_COUNT_BLOCK(OMP_CRITICAL);
1169 #if OMPT_SUPPORT && OMPT_OPTIONAL
1170  ompt_state_t prev_state = ompt_state_undefined;
1171  ompt_thread_info_t ti;
1172 #endif
1173  kmp_user_lock_p lck;
1174 
1175  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1176 
1177  // TODO: add THR_OVHD_STATE
1178 
1179  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1180  KMP_CHECK_USER_LOCK_INIT();
1181 
1182  if ((__kmp_user_lock_kind == lk_tas) &&
1183  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1184  lck = (kmp_user_lock_p)crit;
1185  }
1186 #if KMP_USE_FUTEX
1187  else if ((__kmp_user_lock_kind == lk_futex) &&
1188  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1189  lck = (kmp_user_lock_p)crit;
1190  }
1191 #endif
1192  else { // ticket, queuing or drdpa
1193  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1194  }
1195 
1196  if (__kmp_env_consistency_check)
1197  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1198 
1199 // since the critical directive binds to all threads, not just the current
1200 // team we have to check this even if we are in a serialized team.
1201 // also, even if we are the uber thread, we still have to conduct the lock,
1202 // as we have to contend with sibling threads.
1203 
1204 #if USE_ITT_BUILD
1205  __kmp_itt_critical_acquiring(lck);
1206 #endif /* USE_ITT_BUILD */
1207 #if OMPT_SUPPORT && OMPT_OPTIONAL
1208  OMPT_STORE_RETURN_ADDRESS(gtid);
1209  void *codeptr_ra = NULL;
1210  if (ompt_enabled.enabled) {
1211  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1212  /* OMPT state update */
1213  prev_state = ti.state;
1214  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1215  ti.state = ompt_state_wait_critical;
1216 
1217  /* OMPT event callback */
1218  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1219  if (ompt_enabled.ompt_callback_mutex_acquire) {
1220  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1221  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1222  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1223  }
1224  }
1225 #endif
1226  // Value of 'crit' should be good for using as a critical_id of the critical
1227  // section directive.
1228  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1229 
1230 #if USE_ITT_BUILD
1231  __kmp_itt_critical_acquired(lck);
1232 #endif /* USE_ITT_BUILD */
1233 #if OMPT_SUPPORT && OMPT_OPTIONAL
1234  if (ompt_enabled.enabled) {
1235  /* OMPT state update */
1236  ti.state = prev_state;
1237  ti.wait_id = 0;
1238 
1239  /* OMPT event callback */
1240  if (ompt_enabled.ompt_callback_mutex_acquired) {
1241  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1242  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1243  }
1244  }
1245 #endif
1246  KMP_POP_PARTITIONED_TIMER();
1247 
1248  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1249  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1250 #endif // KMP_USE_DYNAMIC_LOCK
1251 }
1252 
1253 #if KMP_USE_DYNAMIC_LOCK
1254 
1255 // Converts the given hint to an internal lock implementation
1256 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1257 #if KMP_USE_TSX
1258 #define KMP_TSX_LOCK(seq) lockseq_##seq
1259 #else
1260 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1261 #endif
1262 
1263 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1264 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1265 #else
1266 #define KMP_CPUINFO_RTM 0
1267 #endif
1268 
1269  // Hints that do not require further logic
1270  if (hint & kmp_lock_hint_hle)
1271  return KMP_TSX_LOCK(hle);
1272  if (hint & kmp_lock_hint_rtm)
1273  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1274  if (hint & kmp_lock_hint_adaptive)
1275  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1276 
1277  // Rule out conflicting hints first by returning the default lock
1278  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1279  return __kmp_user_lock_seq;
1280  if ((hint & omp_lock_hint_speculative) &&
1281  (hint & omp_lock_hint_nonspeculative))
1282  return __kmp_user_lock_seq;
1283 
1284  // Do not even consider speculation when it appears to be contended
1285  if (hint & omp_lock_hint_contended)
1286  return lockseq_queuing;
1287 
1288  // Uncontended lock without speculation
1289  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1290  return lockseq_tas;
1291 
1292  // HLE lock for speculation
1293  if (hint & omp_lock_hint_speculative)
1294  return KMP_TSX_LOCK(hle);
1295 
1296  return __kmp_user_lock_seq;
1297 }
1298 
1299 #if OMPT_SUPPORT && OMPT_OPTIONAL
1300 #if KMP_USE_DYNAMIC_LOCK
1301 static kmp_mutex_impl_t
1302 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1303  if (user_lock) {
1304  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1305  case 0:
1306  break;
1307 #if KMP_USE_FUTEX
1308  case locktag_futex:
1309  return kmp_mutex_impl_queuing;
1310 #endif
1311  case locktag_tas:
1312  return kmp_mutex_impl_spin;
1313 #if KMP_USE_TSX
1314  case locktag_hle:
1315  return kmp_mutex_impl_speculative;
1316 #endif
1317  default:
1318  return kmp_mutex_impl_none;
1319  }
1320  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1321  }
1322  KMP_ASSERT(ilock);
1323  switch (ilock->type) {
1324 #if KMP_USE_TSX
1325  case locktag_adaptive:
1326  case locktag_rtm:
1327  return kmp_mutex_impl_speculative;
1328 #endif
1329  case locktag_nested_tas:
1330  return kmp_mutex_impl_spin;
1331 #if KMP_USE_FUTEX
1332  case locktag_nested_futex:
1333 #endif
1334  case locktag_ticket:
1335  case locktag_queuing:
1336  case locktag_drdpa:
1337  case locktag_nested_ticket:
1338  case locktag_nested_queuing:
1339  case locktag_nested_drdpa:
1340  return kmp_mutex_impl_queuing;
1341  default:
1342  return kmp_mutex_impl_none;
1343  }
1344 }
1345 #else
1346 // For locks without dynamic binding
1347 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1348  switch (__kmp_user_lock_kind) {
1349  case lk_tas:
1350  return kmp_mutex_impl_spin;
1351 #if KMP_USE_FUTEX
1352  case lk_futex:
1353 #endif
1354  case lk_ticket:
1355  case lk_queuing:
1356  case lk_drdpa:
1357  return kmp_mutex_impl_queuing;
1358 #if KMP_USE_TSX
1359  case lk_hle:
1360  case lk_rtm:
1361  case lk_adaptive:
1362  return kmp_mutex_impl_speculative;
1363 #endif
1364  default:
1365  return kmp_mutex_impl_none;
1366  }
1367 }
1368 #endif // KMP_USE_DYNAMIC_LOCK
1369 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1370 
1384 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1385  kmp_critical_name *crit, uint32_t hint) {
1386  KMP_COUNT_BLOCK(OMP_CRITICAL);
1387  kmp_user_lock_p lck;
1388 #if OMPT_SUPPORT && OMPT_OPTIONAL
1389  ompt_state_t prev_state = ompt_state_undefined;
1390  ompt_thread_info_t ti;
1391  // This is the case, if called from __kmpc_critical:
1392  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1393  if (!codeptr)
1394  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1395 #endif
1396 
1397  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1398 
1399  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1400  // Check if it is initialized.
1401  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1402  if (*lk == 0) {
1403  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1404  if (KMP_IS_D_LOCK(lckseq)) {
1405  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1406  KMP_GET_D_TAG(lckseq));
1407  } else {
1408  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1409  }
1410  }
1411  // Branch for accessing the actual lock object and set operation. This
1412  // branching is inevitable since this lock initialization does not follow the
1413  // normal dispatch path (lock table is not used).
1414  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1415  lck = (kmp_user_lock_p)lk;
1416  if (__kmp_env_consistency_check) {
1417  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1418  __kmp_map_hint_to_lock(hint));
1419  }
1420 #if USE_ITT_BUILD
1421  __kmp_itt_critical_acquiring(lck);
1422 #endif
1423 #if OMPT_SUPPORT && OMPT_OPTIONAL
1424  if (ompt_enabled.enabled) {
1425  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1426  /* OMPT state update */
1427  prev_state = ti.state;
1428  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1429  ti.state = ompt_state_wait_critical;
1430 
1431  /* OMPT event callback */
1432  if (ompt_enabled.ompt_callback_mutex_acquire) {
1433  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1434  ompt_mutex_critical, (unsigned int)hint,
1435  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1436  codeptr);
1437  }
1438  }
1439 #endif
1440 #if KMP_USE_INLINED_TAS
1441  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1442  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1443  } else
1444 #elif KMP_USE_INLINED_FUTEX
1445  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1446  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1447  } else
1448 #endif
1449  {
1450  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1451  }
1452  } else {
1453  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1454  lck = ilk->lock;
1455  if (__kmp_env_consistency_check) {
1456  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1457  __kmp_map_hint_to_lock(hint));
1458  }
1459 #if USE_ITT_BUILD
1460  __kmp_itt_critical_acquiring(lck);
1461 #endif
1462 #if OMPT_SUPPORT && OMPT_OPTIONAL
1463  if (ompt_enabled.enabled) {
1464  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1465  /* OMPT state update */
1466  prev_state = ti.state;
1467  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1468  ti.state = ompt_state_wait_critical;
1469 
1470  /* OMPT event callback */
1471  if (ompt_enabled.ompt_callback_mutex_acquire) {
1472  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1473  ompt_mutex_critical, (unsigned int)hint,
1474  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1475  codeptr);
1476  }
1477  }
1478 #endif
1479  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1480  }
1481  KMP_POP_PARTITIONED_TIMER();
1482 
1483 #if USE_ITT_BUILD
1484  __kmp_itt_critical_acquired(lck);
1485 #endif /* USE_ITT_BUILD */
1486 #if OMPT_SUPPORT && OMPT_OPTIONAL
1487  if (ompt_enabled.enabled) {
1488  /* OMPT state update */
1489  ti.state = prev_state;
1490  ti.wait_id = 0;
1491 
1492  /* OMPT event callback */
1493  if (ompt_enabled.ompt_callback_mutex_acquired) {
1494  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1495  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1496  }
1497  }
1498 #endif
1499 
1500  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1501  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1502 } // __kmpc_critical_with_hint
1503 
1504 #endif // KMP_USE_DYNAMIC_LOCK
1505 
1515 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1516  kmp_critical_name *crit) {
1517  kmp_user_lock_p lck;
1518 
1519  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1520 
1521 #if KMP_USE_DYNAMIC_LOCK
1522  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1523  lck = (kmp_user_lock_p)crit;
1524  KMP_ASSERT(lck != NULL);
1525  if (__kmp_env_consistency_check) {
1526  __kmp_pop_sync(global_tid, ct_critical, loc);
1527  }
1528 #if USE_ITT_BUILD
1529  __kmp_itt_critical_releasing(lck);
1530 #endif
1531 #if KMP_USE_INLINED_TAS
1532  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1533  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1534  } else
1535 #elif KMP_USE_INLINED_FUTEX
1536  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1537  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1538  } else
1539 #endif
1540  {
1541  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1542  }
1543  } else {
1544  kmp_indirect_lock_t *ilk =
1545  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1546  KMP_ASSERT(ilk != NULL);
1547  lck = ilk->lock;
1548  if (__kmp_env_consistency_check) {
1549  __kmp_pop_sync(global_tid, ct_critical, loc);
1550  }
1551 #if USE_ITT_BUILD
1552  __kmp_itt_critical_releasing(lck);
1553 #endif
1554  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1555  }
1556 
1557 #else // KMP_USE_DYNAMIC_LOCK
1558 
1559  if ((__kmp_user_lock_kind == lk_tas) &&
1560  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1561  lck = (kmp_user_lock_p)crit;
1562  }
1563 #if KMP_USE_FUTEX
1564  else if ((__kmp_user_lock_kind == lk_futex) &&
1565  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1566  lck = (kmp_user_lock_p)crit;
1567  }
1568 #endif
1569  else { // ticket, queuing or drdpa
1570  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1571  }
1572 
1573  KMP_ASSERT(lck != NULL);
1574 
1575  if (__kmp_env_consistency_check)
1576  __kmp_pop_sync(global_tid, ct_critical, loc);
1577 
1578 #if USE_ITT_BUILD
1579  __kmp_itt_critical_releasing(lck);
1580 #endif /* USE_ITT_BUILD */
1581  // Value of 'crit' should be good for using as a critical_id of the critical
1582  // section directive.
1583  __kmp_release_user_lock_with_checks(lck, global_tid);
1584 
1585 #endif // KMP_USE_DYNAMIC_LOCK
1586 
1587 #if OMPT_SUPPORT && OMPT_OPTIONAL
1588  /* OMPT release event triggers after lock is released; place here to trigger
1589  * for all #if branches */
1590  OMPT_STORE_RETURN_ADDRESS(global_tid);
1591  if (ompt_enabled.ompt_callback_mutex_released) {
1592  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1593  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1594  OMPT_LOAD_RETURN_ADDRESS(0));
1595  }
1596 #endif
1597 
1598  KMP_POP_PARTITIONED_TIMER();
1599  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1600 }
1601 
1611 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1612  int status;
1613 
1614  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1615 
1616  if (!TCR_4(__kmp_init_parallel))
1617  __kmp_parallel_initialize();
1618 
1619  __kmp_resume_if_soft_paused();
1620 
1621  if (__kmp_env_consistency_check)
1622  __kmp_check_barrier(global_tid, ct_barrier, loc);
1623 
1624 #if OMPT_SUPPORT
1625  ompt_frame_t *ompt_frame;
1626  if (ompt_enabled.enabled) {
1627  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1628  if (ompt_frame->enter_frame.ptr == NULL)
1629  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1630  OMPT_STORE_RETURN_ADDRESS(global_tid);
1631  }
1632 #endif
1633 #if USE_ITT_NOTIFY
1634  __kmp_threads[global_tid]->th.th_ident = loc;
1635 #endif
1636  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1637 #if OMPT_SUPPORT && OMPT_OPTIONAL
1638  if (ompt_enabled.enabled) {
1639  ompt_frame->enter_frame = ompt_data_none;
1640  }
1641 #endif
1642 
1643  return (status != 0) ? 0 : 1;
1644 }
1645 
1655 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1656  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1657 
1658  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1659 }
1660 
1671 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1672  kmp_int32 ret;
1673 
1674  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1675 
1676  if (!TCR_4(__kmp_init_parallel))
1677  __kmp_parallel_initialize();
1678 
1679  __kmp_resume_if_soft_paused();
1680 
1681  if (__kmp_env_consistency_check) {
1682  if (loc == 0) {
1683  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1684  }
1685  __kmp_check_barrier(global_tid, ct_barrier, loc);
1686  }
1687 
1688 #if OMPT_SUPPORT
1689  ompt_frame_t *ompt_frame;
1690  if (ompt_enabled.enabled) {
1691  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1692  if (ompt_frame->enter_frame.ptr == NULL)
1693  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1694  OMPT_STORE_RETURN_ADDRESS(global_tid);
1695  }
1696 #endif
1697 #if USE_ITT_NOTIFY
1698  __kmp_threads[global_tid]->th.th_ident = loc;
1699 #endif
1700  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1701 #if OMPT_SUPPORT && OMPT_OPTIONAL
1702  if (ompt_enabled.enabled) {
1703  ompt_frame->enter_frame = ompt_data_none;
1704  }
1705 #endif
1706 
1707  ret = __kmpc_master(loc, global_tid);
1708 
1709  if (__kmp_env_consistency_check) {
1710  /* there's no __kmpc_end_master called; so the (stats) */
1711  /* actions of __kmpc_end_master are done here */
1712 
1713  if (global_tid < 0) {
1714  KMP_WARNING(ThreadIdentInvalid);
1715  }
1716  if (ret) {
1717  /* only one thread should do the pop since only */
1718  /* one did the push (see __kmpc_master()) */
1719 
1720  __kmp_pop_sync(global_tid, ct_master, loc);
1721  }
1722  }
1723 
1724  return (ret);
1725 }
1726 
1727 /* The BARRIER for a SINGLE process section is always explicit */
1739 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1740  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1741 
1742  if (rc) {
1743  // We are going to execute the single statement, so we should count it.
1744  KMP_COUNT_BLOCK(OMP_SINGLE);
1745  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1746  }
1747 
1748 #if OMPT_SUPPORT && OMPT_OPTIONAL
1749  kmp_info_t *this_thr = __kmp_threads[global_tid];
1750  kmp_team_t *team = this_thr->th.th_team;
1751  int tid = __kmp_tid_from_gtid(global_tid);
1752 
1753  if (ompt_enabled.enabled) {
1754  if (rc) {
1755  if (ompt_enabled.ompt_callback_work) {
1756  ompt_callbacks.ompt_callback(ompt_callback_work)(
1757  ompt_work_single_executor, ompt_scope_begin,
1758  &(team->t.ompt_team_info.parallel_data),
1759  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1760  1, OMPT_GET_RETURN_ADDRESS(0));
1761  }
1762  } else {
1763  if (ompt_enabled.ompt_callback_work) {
1764  ompt_callbacks.ompt_callback(ompt_callback_work)(
1765  ompt_work_single_other, ompt_scope_begin,
1766  &(team->t.ompt_team_info.parallel_data),
1767  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1768  1, OMPT_GET_RETURN_ADDRESS(0));
1769  ompt_callbacks.ompt_callback(ompt_callback_work)(
1770  ompt_work_single_other, ompt_scope_end,
1771  &(team->t.ompt_team_info.parallel_data),
1772  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1773  1, OMPT_GET_RETURN_ADDRESS(0));
1774  }
1775  }
1776  }
1777 #endif
1778 
1779  return rc;
1780 }
1781 
1791 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1792  __kmp_exit_single(global_tid);
1793  KMP_POP_PARTITIONED_TIMER();
1794 
1795 #if OMPT_SUPPORT && OMPT_OPTIONAL
1796  kmp_info_t *this_thr = __kmp_threads[global_tid];
1797  kmp_team_t *team = this_thr->th.th_team;
1798  int tid = __kmp_tid_from_gtid(global_tid);
1799 
1800  if (ompt_enabled.ompt_callback_work) {
1801  ompt_callbacks.ompt_callback(ompt_callback_work)(
1802  ompt_work_single_executor, ompt_scope_end,
1803  &(team->t.ompt_team_info.parallel_data),
1804  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1805  OMPT_GET_RETURN_ADDRESS(0));
1806  }
1807 #endif
1808 }
1809 
1817 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1818  KMP_POP_PARTITIONED_TIMER();
1819  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1820 
1821 #if OMPT_SUPPORT && OMPT_OPTIONAL
1822  if (ompt_enabled.ompt_callback_work) {
1823  ompt_work_t ompt_work_type = ompt_work_loop;
1824  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1825  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1826  // Determine workshare type
1827  if (loc != NULL) {
1828  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1829  ompt_work_type = ompt_work_loop;
1830  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1831  ompt_work_type = ompt_work_sections;
1832  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1833  ompt_work_type = ompt_work_distribute;
1834  } else {
1835  // use default set above.
1836  // a warning about this case is provided in __kmpc_for_static_init
1837  }
1838  KMP_DEBUG_ASSERT(ompt_work_type);
1839  }
1840  ompt_callbacks.ompt_callback(ompt_callback_work)(
1841  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1842  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1843  }
1844 #endif
1845  if (__kmp_env_consistency_check)
1846  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1847 }
1848 
1849 // User routines which take C-style arguments (call by value)
1850 // different from the Fortran equivalent routines
1851 
1852 void ompc_set_num_threads(int arg) {
1853  // !!!!! TODO: check the per-task binding
1854  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1855 }
1856 
1857 void ompc_set_dynamic(int flag) {
1858  kmp_info_t *thread;
1859 
1860  /* For the thread-private implementation of the internal controls */
1861  thread = __kmp_entry_thread();
1862 
1863  __kmp_save_internal_controls(thread);
1864 
1865  set__dynamic(thread, flag ? TRUE : FALSE);
1866 }
1867 
1868 void ompc_set_nested(int flag) {
1869  kmp_info_t *thread;
1870 
1871  /* For the thread-private internal controls implementation */
1872  thread = __kmp_entry_thread();
1873 
1874  __kmp_save_internal_controls(thread);
1875 
1876  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1877 }
1878 
1879 void ompc_set_max_active_levels(int max_active_levels) {
1880  /* TO DO */
1881  /* we want per-task implementation of this internal control */
1882 
1883  /* For the per-thread internal controls implementation */
1884  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1885 }
1886 
1887 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1888  // !!!!! TODO: check the per-task binding
1889  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1890 }
1891 
1892 int ompc_get_ancestor_thread_num(int level) {
1893  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1894 }
1895 
1896 int ompc_get_team_size(int level) {
1897  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1898 }
1899 
1900 /* OpenMP 5.0 Affinity Format API */
1901 
1902 void ompc_set_affinity_format(char const *format) {
1903  if (!__kmp_init_serial) {
1904  __kmp_serial_initialize();
1905  }
1906  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1907  format, KMP_STRLEN(format) + 1);
1908 }
1909 
1910 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1911  size_t format_size;
1912  if (!__kmp_init_serial) {
1913  __kmp_serial_initialize();
1914  }
1915  format_size = KMP_STRLEN(__kmp_affinity_format);
1916  if (buffer && size) {
1917  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1918  format_size + 1);
1919  }
1920  return format_size;
1921 }
1922 
1923 void ompc_display_affinity(char const *format) {
1924  int gtid;
1925  if (!TCR_4(__kmp_init_middle)) {
1926  __kmp_middle_initialize();
1927  }
1928  gtid = __kmp_get_gtid();
1929  __kmp_aux_display_affinity(gtid, format);
1930 }
1931 
1932 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1933  char const *format) {
1934  int gtid;
1935  size_t num_required;
1936  kmp_str_buf_t capture_buf;
1937  if (!TCR_4(__kmp_init_middle)) {
1938  __kmp_middle_initialize();
1939  }
1940  gtid = __kmp_get_gtid();
1941  __kmp_str_buf_init(&capture_buf);
1942  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1943  if (buffer && buf_size) {
1944  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1945  capture_buf.used + 1);
1946  }
1947  __kmp_str_buf_free(&capture_buf);
1948  return num_required;
1949 }
1950 
1951 void kmpc_set_stacksize(int arg) {
1952  // __kmp_aux_set_stacksize initializes the library if needed
1953  __kmp_aux_set_stacksize(arg);
1954 }
1955 
1956 void kmpc_set_stacksize_s(size_t arg) {
1957  // __kmp_aux_set_stacksize initializes the library if needed
1958  __kmp_aux_set_stacksize(arg);
1959 }
1960 
1961 void kmpc_set_blocktime(int arg) {
1962  int gtid, tid;
1963  kmp_info_t *thread;
1964 
1965  gtid = __kmp_entry_gtid();
1966  tid = __kmp_tid_from_gtid(gtid);
1967  thread = __kmp_thread_from_gtid(gtid);
1968 
1969  __kmp_aux_set_blocktime(arg, thread, tid);
1970 }
1971 
1972 void kmpc_set_library(int arg) {
1973  // __kmp_user_set_library initializes the library if needed
1974  __kmp_user_set_library((enum library_type)arg);
1975 }
1976 
1977 void kmpc_set_defaults(char const *str) {
1978  // __kmp_aux_set_defaults initializes the library if needed
1979  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1980 }
1981 
1982 void kmpc_set_disp_num_buffers(int arg) {
1983  // ignore after initialization because some teams have already
1984  // allocated dispatch buffers
1985  if (__kmp_init_serial == 0 && arg > 0)
1986  __kmp_dispatch_num_buffers = arg;
1987 }
1988 
1989 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1990 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1991  return -1;
1992 #else
1993  if (!TCR_4(__kmp_init_middle)) {
1994  __kmp_middle_initialize();
1995  }
1996  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1997 #endif
1998 }
1999 
2000 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2001 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2002  return -1;
2003 #else
2004  if (!TCR_4(__kmp_init_middle)) {
2005  __kmp_middle_initialize();
2006  }
2007  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2008 #endif
2009 }
2010 
2011 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2012 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2013  return -1;
2014 #else
2015  if (!TCR_4(__kmp_init_middle)) {
2016  __kmp_middle_initialize();
2017  }
2018  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2019 #endif
2020 }
2021 
2022 /* -------------------------------------------------------------------------- */
2067 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2068  void *cpy_data, void (*cpy_func)(void *, void *),
2069  kmp_int32 didit) {
2070  void **data_ptr;
2071 
2072  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2073 
2074  KMP_MB();
2075 
2076  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2077 
2078  if (__kmp_env_consistency_check) {
2079  if (loc == 0) {
2080  KMP_WARNING(ConstructIdentInvalid);
2081  }
2082  }
2083 
2084  // ToDo: Optimize the following two barriers into some kind of split barrier
2085 
2086  if (didit)
2087  *data_ptr = cpy_data;
2088 
2089 #if OMPT_SUPPORT
2090  ompt_frame_t *ompt_frame;
2091  if (ompt_enabled.enabled) {
2092  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2093  if (ompt_frame->enter_frame.ptr == NULL)
2094  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2095  OMPT_STORE_RETURN_ADDRESS(gtid);
2096  }
2097 #endif
2098 /* This barrier is not a barrier region boundary */
2099 #if USE_ITT_NOTIFY
2100  __kmp_threads[gtid]->th.th_ident = loc;
2101 #endif
2102  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2103 
2104  if (!didit)
2105  (*cpy_func)(cpy_data, *data_ptr);
2106 
2107 // Consider next barrier a user-visible barrier for barrier region boundaries
2108 // Nesting checks are already handled by the single construct checks
2109 
2110 #if OMPT_SUPPORT
2111  if (ompt_enabled.enabled) {
2112  OMPT_STORE_RETURN_ADDRESS(gtid);
2113  }
2114 #endif
2115 #if USE_ITT_NOTIFY
2116  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2117 // tasks can overwrite the location)
2118 #endif
2119  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2120 #if OMPT_SUPPORT && OMPT_OPTIONAL
2121  if (ompt_enabled.enabled) {
2122  ompt_frame->enter_frame = ompt_data_none;
2123  }
2124 #endif
2125 }
2126 
2127 /* -------------------------------------------------------------------------- */
2128 
2129 #define INIT_LOCK __kmp_init_user_lock_with_checks
2130 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2131 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2132 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2133 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2134 #define ACQUIRE_NESTED_LOCK_TIMED \
2135  __kmp_acquire_nested_user_lock_with_checks_timed
2136 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2137 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2138 #define TEST_LOCK __kmp_test_user_lock_with_checks
2139 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2140 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2141 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2142 
2143 // TODO: Make check abort messages use location info & pass it into
2144 // with_checks routines
2145 
2146 #if KMP_USE_DYNAMIC_LOCK
2147 
2148 // internal lock initializer
2149 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2150  kmp_dyna_lockseq_t seq) {
2151  if (KMP_IS_D_LOCK(seq)) {
2152  KMP_INIT_D_LOCK(lock, seq);
2153 #if USE_ITT_BUILD
2154  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2155 #endif
2156  } else {
2157  KMP_INIT_I_LOCK(lock, seq);
2158 #if USE_ITT_BUILD
2159  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2160  __kmp_itt_lock_creating(ilk->lock, loc);
2161 #endif
2162  }
2163 }
2164 
2165 // internal nest lock initializer
2166 static __forceinline void
2167 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2168  kmp_dyna_lockseq_t seq) {
2169 #if KMP_USE_TSX
2170  // Don't have nested lock implementation for speculative locks
2171  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2172  seq = __kmp_user_lock_seq;
2173 #endif
2174  switch (seq) {
2175  case lockseq_tas:
2176  seq = lockseq_nested_tas;
2177  break;
2178 #if KMP_USE_FUTEX
2179  case lockseq_futex:
2180  seq = lockseq_nested_futex;
2181  break;
2182 #endif
2183  case lockseq_ticket:
2184  seq = lockseq_nested_ticket;
2185  break;
2186  case lockseq_queuing:
2187  seq = lockseq_nested_queuing;
2188  break;
2189  case lockseq_drdpa:
2190  seq = lockseq_nested_drdpa;
2191  break;
2192  default:
2193  seq = lockseq_nested_queuing;
2194  }
2195  KMP_INIT_I_LOCK(lock, seq);
2196 #if USE_ITT_BUILD
2197  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2198  __kmp_itt_lock_creating(ilk->lock, loc);
2199 #endif
2200 }
2201 
2202 /* initialize the lock with a hint */
2203 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2204  uintptr_t hint) {
2205  KMP_DEBUG_ASSERT(__kmp_init_serial);
2206  if (__kmp_env_consistency_check && user_lock == NULL) {
2207  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2208  }
2209 
2210  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2211 
2212 #if OMPT_SUPPORT && OMPT_OPTIONAL
2213  // This is the case, if called from omp_init_lock_with_hint:
2214  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2215  if (!codeptr)
2216  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2217  if (ompt_enabled.ompt_callback_lock_init) {
2218  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2219  ompt_mutex_lock, (omp_lock_hint_t)hint,
2220  __ompt_get_mutex_impl_type(user_lock),
2221  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2222  }
2223 #endif
2224 }
2225 
2226 /* initialize the lock with a hint */
2227 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2228  void **user_lock, uintptr_t hint) {
2229  KMP_DEBUG_ASSERT(__kmp_init_serial);
2230  if (__kmp_env_consistency_check && user_lock == NULL) {
2231  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2232  }
2233 
2234  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2235 
2236 #if OMPT_SUPPORT && OMPT_OPTIONAL
2237  // This is the case, if called from omp_init_lock_with_hint:
2238  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2239  if (!codeptr)
2240  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2241  if (ompt_enabled.ompt_callback_lock_init) {
2242  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2243  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2244  __ompt_get_mutex_impl_type(user_lock),
2245  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2246  }
2247 #endif
2248 }
2249 
2250 #endif // KMP_USE_DYNAMIC_LOCK
2251 
2252 /* initialize the lock */
2253 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2254 #if KMP_USE_DYNAMIC_LOCK
2255 
2256  KMP_DEBUG_ASSERT(__kmp_init_serial);
2257  if (__kmp_env_consistency_check && user_lock == NULL) {
2258  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2259  }
2260  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2261 
2262 #if OMPT_SUPPORT && OMPT_OPTIONAL
2263  // This is the case, if called from omp_init_lock_with_hint:
2264  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2265  if (!codeptr)
2266  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2267  if (ompt_enabled.ompt_callback_lock_init) {
2268  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2269  ompt_mutex_lock, omp_lock_hint_none,
2270  __ompt_get_mutex_impl_type(user_lock),
2271  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2272  }
2273 #endif
2274 
2275 #else // KMP_USE_DYNAMIC_LOCK
2276 
2277  static char const *const func = "omp_init_lock";
2278  kmp_user_lock_p lck;
2279  KMP_DEBUG_ASSERT(__kmp_init_serial);
2280 
2281  if (__kmp_env_consistency_check) {
2282  if (user_lock == NULL) {
2283  KMP_FATAL(LockIsUninitialized, func);
2284  }
2285  }
2286 
2287  KMP_CHECK_USER_LOCK_INIT();
2288 
2289  if ((__kmp_user_lock_kind == lk_tas) &&
2290  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2291  lck = (kmp_user_lock_p)user_lock;
2292  }
2293 #if KMP_USE_FUTEX
2294  else if ((__kmp_user_lock_kind == lk_futex) &&
2295  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2296  lck = (kmp_user_lock_p)user_lock;
2297  }
2298 #endif
2299  else {
2300  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2301  }
2302  INIT_LOCK(lck);
2303  __kmp_set_user_lock_location(lck, loc);
2304 
2305 #if OMPT_SUPPORT && OMPT_OPTIONAL
2306  // This is the case, if called from omp_init_lock_with_hint:
2307  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2308  if (!codeptr)
2309  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2310  if (ompt_enabled.ompt_callback_lock_init) {
2311  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2312  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2313  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2314  }
2315 #endif
2316 
2317 #if USE_ITT_BUILD
2318  __kmp_itt_lock_creating(lck);
2319 #endif /* USE_ITT_BUILD */
2320 
2321 #endif // KMP_USE_DYNAMIC_LOCK
2322 } // __kmpc_init_lock
2323 
2324 /* initialize the lock */
2325 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2326 #if KMP_USE_DYNAMIC_LOCK
2327 
2328  KMP_DEBUG_ASSERT(__kmp_init_serial);
2329  if (__kmp_env_consistency_check && user_lock == NULL) {
2330  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2331  }
2332  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2333 
2334 #if OMPT_SUPPORT && OMPT_OPTIONAL
2335  // This is the case, if called from omp_init_lock_with_hint:
2336  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2337  if (!codeptr)
2338  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2339  if (ompt_enabled.ompt_callback_lock_init) {
2340  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2341  ompt_mutex_nest_lock, omp_lock_hint_none,
2342  __ompt_get_mutex_impl_type(user_lock),
2343  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2344  }
2345 #endif
2346 
2347 #else // KMP_USE_DYNAMIC_LOCK
2348 
2349  static char const *const func = "omp_init_nest_lock";
2350  kmp_user_lock_p lck;
2351  KMP_DEBUG_ASSERT(__kmp_init_serial);
2352 
2353  if (__kmp_env_consistency_check) {
2354  if (user_lock == NULL) {
2355  KMP_FATAL(LockIsUninitialized, func);
2356  }
2357  }
2358 
2359  KMP_CHECK_USER_LOCK_INIT();
2360 
2361  if ((__kmp_user_lock_kind == lk_tas) &&
2362  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2363  OMP_NEST_LOCK_T_SIZE)) {
2364  lck = (kmp_user_lock_p)user_lock;
2365  }
2366 #if KMP_USE_FUTEX
2367  else if ((__kmp_user_lock_kind == lk_futex) &&
2368  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2369  OMP_NEST_LOCK_T_SIZE)) {
2370  lck = (kmp_user_lock_p)user_lock;
2371  }
2372 #endif
2373  else {
2374  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2375  }
2376 
2377  INIT_NESTED_LOCK(lck);
2378  __kmp_set_user_lock_location(lck, loc);
2379 
2380 #if OMPT_SUPPORT && OMPT_OPTIONAL
2381  // This is the case, if called from omp_init_lock_with_hint:
2382  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2383  if (!codeptr)
2384  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2385  if (ompt_enabled.ompt_callback_lock_init) {
2386  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2387  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2388  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2389  }
2390 #endif
2391 
2392 #if USE_ITT_BUILD
2393  __kmp_itt_lock_creating(lck);
2394 #endif /* USE_ITT_BUILD */
2395 
2396 #endif // KMP_USE_DYNAMIC_LOCK
2397 } // __kmpc_init_nest_lock
2398 
2399 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2400 #if KMP_USE_DYNAMIC_LOCK
2401 
2402 #if USE_ITT_BUILD
2403  kmp_user_lock_p lck;
2404  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2405  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2406  } else {
2407  lck = (kmp_user_lock_p)user_lock;
2408  }
2409  __kmp_itt_lock_destroyed(lck);
2410 #endif
2411 #if OMPT_SUPPORT && OMPT_OPTIONAL
2412  // This is the case, if called from omp_init_lock_with_hint:
2413  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2414  if (!codeptr)
2415  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2416  if (ompt_enabled.ompt_callback_lock_destroy) {
2417  kmp_user_lock_p lck;
2418  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2419  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2420  } else {
2421  lck = (kmp_user_lock_p)user_lock;
2422  }
2423  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2424  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2425  }
2426 #endif
2427  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2428 #else
2429  kmp_user_lock_p lck;
2430 
2431  if ((__kmp_user_lock_kind == lk_tas) &&
2432  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2433  lck = (kmp_user_lock_p)user_lock;
2434  }
2435 #if KMP_USE_FUTEX
2436  else if ((__kmp_user_lock_kind == lk_futex) &&
2437  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2438  lck = (kmp_user_lock_p)user_lock;
2439  }
2440 #endif
2441  else {
2442  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2443  }
2444 
2445 #if OMPT_SUPPORT && OMPT_OPTIONAL
2446  // This is the case, if called from omp_init_lock_with_hint:
2447  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2448  if (!codeptr)
2449  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2450  if (ompt_enabled.ompt_callback_lock_destroy) {
2451  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2452  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2453  }
2454 #endif
2455 
2456 #if USE_ITT_BUILD
2457  __kmp_itt_lock_destroyed(lck);
2458 #endif /* USE_ITT_BUILD */
2459  DESTROY_LOCK(lck);
2460 
2461  if ((__kmp_user_lock_kind == lk_tas) &&
2462  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2463  ;
2464  }
2465 #if KMP_USE_FUTEX
2466  else if ((__kmp_user_lock_kind == lk_futex) &&
2467  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2468  ;
2469  }
2470 #endif
2471  else {
2472  __kmp_user_lock_free(user_lock, gtid, lck);
2473  }
2474 #endif // KMP_USE_DYNAMIC_LOCK
2475 } // __kmpc_destroy_lock
2476 
2477 /* destroy the lock */
2478 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2479 #if KMP_USE_DYNAMIC_LOCK
2480 
2481 #if USE_ITT_BUILD
2482  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2483  __kmp_itt_lock_destroyed(ilk->lock);
2484 #endif
2485 #if OMPT_SUPPORT && OMPT_OPTIONAL
2486  // This is the case, if called from omp_init_lock_with_hint:
2487  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2488  if (!codeptr)
2489  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2490  if (ompt_enabled.ompt_callback_lock_destroy) {
2491  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2492  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2493  }
2494 #endif
2495  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2496 
2497 #else // KMP_USE_DYNAMIC_LOCK
2498 
2499  kmp_user_lock_p lck;
2500 
2501  if ((__kmp_user_lock_kind == lk_tas) &&
2502  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2503  OMP_NEST_LOCK_T_SIZE)) {
2504  lck = (kmp_user_lock_p)user_lock;
2505  }
2506 #if KMP_USE_FUTEX
2507  else if ((__kmp_user_lock_kind == lk_futex) &&
2508  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2509  OMP_NEST_LOCK_T_SIZE)) {
2510  lck = (kmp_user_lock_p)user_lock;
2511  }
2512 #endif
2513  else {
2514  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2515  }
2516 
2517 #if OMPT_SUPPORT && OMPT_OPTIONAL
2518  // This is the case, if called from omp_init_lock_with_hint:
2519  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2520  if (!codeptr)
2521  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2522  if (ompt_enabled.ompt_callback_lock_destroy) {
2523  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2524  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2525  }
2526 #endif
2527 
2528 #if USE_ITT_BUILD
2529  __kmp_itt_lock_destroyed(lck);
2530 #endif /* USE_ITT_BUILD */
2531 
2532  DESTROY_NESTED_LOCK(lck);
2533 
2534  if ((__kmp_user_lock_kind == lk_tas) &&
2535  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2536  OMP_NEST_LOCK_T_SIZE)) {
2537  ;
2538  }
2539 #if KMP_USE_FUTEX
2540  else if ((__kmp_user_lock_kind == lk_futex) &&
2541  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2542  OMP_NEST_LOCK_T_SIZE)) {
2543  ;
2544  }
2545 #endif
2546  else {
2547  __kmp_user_lock_free(user_lock, gtid, lck);
2548  }
2549 #endif // KMP_USE_DYNAMIC_LOCK
2550 } // __kmpc_destroy_nest_lock
2551 
2552 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2553  KMP_COUNT_BLOCK(OMP_set_lock);
2554 #if KMP_USE_DYNAMIC_LOCK
2555  int tag = KMP_EXTRACT_D_TAG(user_lock);
2556 #if USE_ITT_BUILD
2557  __kmp_itt_lock_acquiring(
2558  (kmp_user_lock_p)
2559  user_lock); // itt function will get to the right lock object.
2560 #endif
2561 #if OMPT_SUPPORT && OMPT_OPTIONAL
2562  // This is the case, if called from omp_init_lock_with_hint:
2563  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2564  if (!codeptr)
2565  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2566  if (ompt_enabled.ompt_callback_mutex_acquire) {
2567  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2568  ompt_mutex_lock, omp_lock_hint_none,
2569  __ompt_get_mutex_impl_type(user_lock),
2570  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2571  }
2572 #endif
2573 #if KMP_USE_INLINED_TAS
2574  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2575  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2576  } else
2577 #elif KMP_USE_INLINED_FUTEX
2578  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2579  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2580  } else
2581 #endif
2582  {
2583  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2584  }
2585 #if USE_ITT_BUILD
2586  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2587 #endif
2588 #if OMPT_SUPPORT && OMPT_OPTIONAL
2589  if (ompt_enabled.ompt_callback_mutex_acquired) {
2590  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2591  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2592  }
2593 #endif
2594 
2595 #else // KMP_USE_DYNAMIC_LOCK
2596 
2597  kmp_user_lock_p lck;
2598 
2599  if ((__kmp_user_lock_kind == lk_tas) &&
2600  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2601  lck = (kmp_user_lock_p)user_lock;
2602  }
2603 #if KMP_USE_FUTEX
2604  else if ((__kmp_user_lock_kind == lk_futex) &&
2605  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2606  lck = (kmp_user_lock_p)user_lock;
2607  }
2608 #endif
2609  else {
2610  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2611  }
2612 
2613 #if USE_ITT_BUILD
2614  __kmp_itt_lock_acquiring(lck);
2615 #endif /* USE_ITT_BUILD */
2616 #if OMPT_SUPPORT && OMPT_OPTIONAL
2617  // This is the case, if called from omp_init_lock_with_hint:
2618  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2619  if (!codeptr)
2620  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2621  if (ompt_enabled.ompt_callback_mutex_acquire) {
2622  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2623  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2624  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2625  }
2626 #endif
2627 
2628  ACQUIRE_LOCK(lck, gtid);
2629 
2630 #if USE_ITT_BUILD
2631  __kmp_itt_lock_acquired(lck);
2632 #endif /* USE_ITT_BUILD */
2633 
2634 #if OMPT_SUPPORT && OMPT_OPTIONAL
2635  if (ompt_enabled.ompt_callback_mutex_acquired) {
2636  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2637  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2638  }
2639 #endif
2640 
2641 #endif // KMP_USE_DYNAMIC_LOCK
2642 }
2643 
2644 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2645 #if KMP_USE_DYNAMIC_LOCK
2646 
2647 #if USE_ITT_BUILD
2648  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2649 #endif
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL
2651  // This is the case, if called from omp_init_lock_with_hint:
2652  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2653  if (!codeptr)
2654  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2655  if (ompt_enabled.enabled) {
2656  if (ompt_enabled.ompt_callback_mutex_acquire) {
2657  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2658  ompt_mutex_nest_lock, omp_lock_hint_none,
2659  __ompt_get_mutex_impl_type(user_lock),
2660  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2661  }
2662  }
2663 #endif
2664  int acquire_status =
2665  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2666  (void) acquire_status;
2667 #if USE_ITT_BUILD
2668  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2669 #endif
2670 
2671 #if OMPT_SUPPORT && OMPT_OPTIONAL
2672  if (ompt_enabled.enabled) {
2673  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2674  if (ompt_enabled.ompt_callback_mutex_acquired) {
2675  // lock_first
2676  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2677  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2678  codeptr);
2679  }
2680  } else {
2681  if (ompt_enabled.ompt_callback_nest_lock) {
2682  // lock_next
2683  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2684  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2685  }
2686  }
2687  }
2688 #endif
2689 
2690 #else // KMP_USE_DYNAMIC_LOCK
2691  int acquire_status;
2692  kmp_user_lock_p lck;
2693 
2694  if ((__kmp_user_lock_kind == lk_tas) &&
2695  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2696  OMP_NEST_LOCK_T_SIZE)) {
2697  lck = (kmp_user_lock_p)user_lock;
2698  }
2699 #if KMP_USE_FUTEX
2700  else if ((__kmp_user_lock_kind == lk_futex) &&
2701  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2702  OMP_NEST_LOCK_T_SIZE)) {
2703  lck = (kmp_user_lock_p)user_lock;
2704  }
2705 #endif
2706  else {
2707  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2708  }
2709 
2710 #if USE_ITT_BUILD
2711  __kmp_itt_lock_acquiring(lck);
2712 #endif /* USE_ITT_BUILD */
2713 #if OMPT_SUPPORT && OMPT_OPTIONAL
2714  // This is the case, if called from omp_init_lock_with_hint:
2715  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2716  if (!codeptr)
2717  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2718  if (ompt_enabled.enabled) {
2719  if (ompt_enabled.ompt_callback_mutex_acquire) {
2720  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2721  ompt_mutex_nest_lock, omp_lock_hint_none,
2722  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2723  codeptr);
2724  }
2725  }
2726 #endif
2727 
2728  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2729 
2730 #if USE_ITT_BUILD
2731  __kmp_itt_lock_acquired(lck);
2732 #endif /* USE_ITT_BUILD */
2733 
2734 #if OMPT_SUPPORT && OMPT_OPTIONAL
2735  if (ompt_enabled.enabled) {
2736  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2737  if (ompt_enabled.ompt_callback_mutex_acquired) {
2738  // lock_first
2739  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2740  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2741  }
2742  } else {
2743  if (ompt_enabled.ompt_callback_nest_lock) {
2744  // lock_next
2745  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2746  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2747  }
2748  }
2749  }
2750 #endif
2751 
2752 #endif // KMP_USE_DYNAMIC_LOCK
2753 }
2754 
2755 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2756 #if KMP_USE_DYNAMIC_LOCK
2757 
2758  int tag = KMP_EXTRACT_D_TAG(user_lock);
2759 #if USE_ITT_BUILD
2760  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2761 #endif
2762 #if KMP_USE_INLINED_TAS
2763  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2764  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2765  } else
2766 #elif KMP_USE_INLINED_FUTEX
2767  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2768  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2769  } else
2770 #endif
2771  {
2772  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2773  }
2774 
2775 #if OMPT_SUPPORT && OMPT_OPTIONAL
2776  // This is the case, if called from omp_init_lock_with_hint:
2777  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2778  if (!codeptr)
2779  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2780  if (ompt_enabled.ompt_callback_mutex_released) {
2781  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2782  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2783  }
2784 #endif
2785 
2786 #else // KMP_USE_DYNAMIC_LOCK
2787 
2788  kmp_user_lock_p lck;
2789 
2790  /* Can't use serial interval since not block structured */
2791  /* release the lock */
2792 
2793  if ((__kmp_user_lock_kind == lk_tas) &&
2794  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2795 #if KMP_OS_LINUX && \
2796  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2797 // "fast" path implemented to fix customer performance issue
2798 #if USE_ITT_BUILD
2799  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2800 #endif /* USE_ITT_BUILD */
2801  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2802  KMP_MB();
2803 
2804 #if OMPT_SUPPORT && OMPT_OPTIONAL
2805  // This is the case, if called from omp_init_lock_with_hint:
2806  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2807  if (!codeptr)
2808  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2809  if (ompt_enabled.ompt_callback_mutex_released) {
2810  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2811  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2812  }
2813 #endif
2814 
2815  return;
2816 #else
2817  lck = (kmp_user_lock_p)user_lock;
2818 #endif
2819  }
2820 #if KMP_USE_FUTEX
2821  else if ((__kmp_user_lock_kind == lk_futex) &&
2822  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2823  lck = (kmp_user_lock_p)user_lock;
2824  }
2825 #endif
2826  else {
2827  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2828  }
2829 
2830 #if USE_ITT_BUILD
2831  __kmp_itt_lock_releasing(lck);
2832 #endif /* USE_ITT_BUILD */
2833 
2834  RELEASE_LOCK(lck, gtid);
2835 
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL
2837  // This is the case, if called from omp_init_lock_with_hint:
2838  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2839  if (!codeptr)
2840  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2841  if (ompt_enabled.ompt_callback_mutex_released) {
2842  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2843  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2844  }
2845 #endif
2846 
2847 #endif // KMP_USE_DYNAMIC_LOCK
2848 }
2849 
2850 /* release the lock */
2851 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2852 #if KMP_USE_DYNAMIC_LOCK
2853 
2854 #if USE_ITT_BUILD
2855  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2856 #endif
2857  int release_status =
2858  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2859  (void) release_status;
2860 
2861 #if OMPT_SUPPORT && OMPT_OPTIONAL
2862  // This is the case, if called from omp_init_lock_with_hint:
2863  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2864  if (!codeptr)
2865  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2866  if (ompt_enabled.enabled) {
2867  if (release_status == KMP_LOCK_RELEASED) {
2868  if (ompt_enabled.ompt_callback_mutex_released) {
2869  // release_lock_last
2870  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2871  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2872  codeptr);
2873  }
2874  } else if (ompt_enabled.ompt_callback_nest_lock) {
2875  // release_lock_prev
2876  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2877  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2878  }
2879  }
2880 #endif
2881 
2882 #else // KMP_USE_DYNAMIC_LOCK
2883 
2884  kmp_user_lock_p lck;
2885 
2886  /* Can't use serial interval since not block structured */
2887 
2888  if ((__kmp_user_lock_kind == lk_tas) &&
2889  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2890  OMP_NEST_LOCK_T_SIZE)) {
2891 #if KMP_OS_LINUX && \
2892  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2893  // "fast" path implemented to fix customer performance issue
2894  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2895 #if USE_ITT_BUILD
2896  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2897 #endif /* USE_ITT_BUILD */
2898 
2899 #if OMPT_SUPPORT && OMPT_OPTIONAL
2900  int release_status = KMP_LOCK_STILL_HELD;
2901 #endif
2902 
2903  if (--(tl->lk.depth_locked) == 0) {
2904  TCW_4(tl->lk.poll, 0);
2905 #if OMPT_SUPPORT && OMPT_OPTIONAL
2906  release_status = KMP_LOCK_RELEASED;
2907 #endif
2908  }
2909  KMP_MB();
2910 
2911 #if OMPT_SUPPORT && OMPT_OPTIONAL
2912  // This is the case, if called from omp_init_lock_with_hint:
2913  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2914  if (!codeptr)
2915  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2916  if (ompt_enabled.enabled) {
2917  if (release_status == KMP_LOCK_RELEASED) {
2918  if (ompt_enabled.ompt_callback_mutex_released) {
2919  // release_lock_last
2920  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2921  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2922  }
2923  } else if (ompt_enabled.ompt_callback_nest_lock) {
2924  // release_lock_previous
2925  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2926  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2927  }
2928  }
2929 #endif
2930 
2931  return;
2932 #else
2933  lck = (kmp_user_lock_p)user_lock;
2934 #endif
2935  }
2936 #if KMP_USE_FUTEX
2937  else if ((__kmp_user_lock_kind == lk_futex) &&
2938  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2939  OMP_NEST_LOCK_T_SIZE)) {
2940  lck = (kmp_user_lock_p)user_lock;
2941  }
2942 #endif
2943  else {
2944  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2945  }
2946 
2947 #if USE_ITT_BUILD
2948  __kmp_itt_lock_releasing(lck);
2949 #endif /* USE_ITT_BUILD */
2950 
2951  int release_status;
2952  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2953 #if OMPT_SUPPORT && OMPT_OPTIONAL
2954  // This is the case, if called from omp_init_lock_with_hint:
2955  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2956  if (!codeptr)
2957  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2958  if (ompt_enabled.enabled) {
2959  if (release_status == KMP_LOCK_RELEASED) {
2960  if (ompt_enabled.ompt_callback_mutex_released) {
2961  // release_lock_last
2962  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2963  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2964  }
2965  } else if (ompt_enabled.ompt_callback_nest_lock) {
2966  // release_lock_previous
2967  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2968  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2969  }
2970  }
2971 #endif
2972 
2973 #endif // KMP_USE_DYNAMIC_LOCK
2974 }
2975 
2976 /* try to acquire the lock */
2977 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2978  KMP_COUNT_BLOCK(OMP_test_lock);
2979 
2980 #if KMP_USE_DYNAMIC_LOCK
2981  int rc;
2982  int tag = KMP_EXTRACT_D_TAG(user_lock);
2983 #if USE_ITT_BUILD
2984  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2985 #endif
2986 #if OMPT_SUPPORT && OMPT_OPTIONAL
2987  // This is the case, if called from omp_init_lock_with_hint:
2988  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2989  if (!codeptr)
2990  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2991  if (ompt_enabled.ompt_callback_mutex_acquire) {
2992  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2993  ompt_mutex_lock, omp_lock_hint_none,
2994  __ompt_get_mutex_impl_type(user_lock),
2995  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2996  }
2997 #endif
2998 #if KMP_USE_INLINED_TAS
2999  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3000  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3001  } else
3002 #elif KMP_USE_INLINED_FUTEX
3003  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3004  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3005  } else
3006 #endif
3007  {
3008  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3009  }
3010  if (rc) {
3011 #if USE_ITT_BUILD
3012  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3013 #endif
3014 #if OMPT_SUPPORT && OMPT_OPTIONAL
3015  if (ompt_enabled.ompt_callback_mutex_acquired) {
3016  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3017  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3018  }
3019 #endif
3020  return FTN_TRUE;
3021  } else {
3022 #if USE_ITT_BUILD
3023  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3024 #endif
3025  return FTN_FALSE;
3026  }
3027 
3028 #else // KMP_USE_DYNAMIC_LOCK
3029 
3030  kmp_user_lock_p lck;
3031  int rc;
3032 
3033  if ((__kmp_user_lock_kind == lk_tas) &&
3034  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3035  lck = (kmp_user_lock_p)user_lock;
3036  }
3037 #if KMP_USE_FUTEX
3038  else if ((__kmp_user_lock_kind == lk_futex) &&
3039  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3040  lck = (kmp_user_lock_p)user_lock;
3041  }
3042 #endif
3043  else {
3044  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3045  }
3046 
3047 #if USE_ITT_BUILD
3048  __kmp_itt_lock_acquiring(lck);
3049 #endif /* USE_ITT_BUILD */
3050 #if OMPT_SUPPORT && OMPT_OPTIONAL
3051  // This is the case, if called from omp_init_lock_with_hint:
3052  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3053  if (!codeptr)
3054  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3055  if (ompt_enabled.ompt_callback_mutex_acquire) {
3056  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3057  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3058  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3059  }
3060 #endif
3061 
3062  rc = TEST_LOCK(lck, gtid);
3063 #if USE_ITT_BUILD
3064  if (rc) {
3065  __kmp_itt_lock_acquired(lck);
3066  } else {
3067  __kmp_itt_lock_cancelled(lck);
3068  }
3069 #endif /* USE_ITT_BUILD */
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3071  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3072  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3073  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3074  }
3075 #endif
3076 
3077  return (rc ? FTN_TRUE : FTN_FALSE);
3078 
3079 /* Can't use serial interval since not block structured */
3080 
3081 #endif // KMP_USE_DYNAMIC_LOCK
3082 }
3083 
3084 /* try to acquire the lock */
3085 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3086 #if KMP_USE_DYNAMIC_LOCK
3087  int rc;
3088 #if USE_ITT_BUILD
3089  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3090 #endif
3091 #if OMPT_SUPPORT && OMPT_OPTIONAL
3092  // This is the case, if called from omp_init_lock_with_hint:
3093  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3094  if (!codeptr)
3095  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3096  if (ompt_enabled.ompt_callback_mutex_acquire) {
3097  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3098  ompt_mutex_nest_lock, omp_lock_hint_none,
3099  __ompt_get_mutex_impl_type(user_lock),
3100  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3101  }
3102 #endif
3103  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3104 #if USE_ITT_BUILD
3105  if (rc) {
3106  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3107  } else {
3108  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3109  }
3110 #endif
3111 #if OMPT_SUPPORT && OMPT_OPTIONAL
3112  if (ompt_enabled.enabled && rc) {
3113  if (rc == 1) {
3114  if (ompt_enabled.ompt_callback_mutex_acquired) {
3115  // lock_first
3116  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3117  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3118  codeptr);
3119  }
3120  } else {
3121  if (ompt_enabled.ompt_callback_nest_lock) {
3122  // lock_next
3123  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3124  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3125  }
3126  }
3127  }
3128 #endif
3129  return rc;
3130 
3131 #else // KMP_USE_DYNAMIC_LOCK
3132 
3133  kmp_user_lock_p lck;
3134  int rc;
3135 
3136  if ((__kmp_user_lock_kind == lk_tas) &&
3137  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3138  OMP_NEST_LOCK_T_SIZE)) {
3139  lck = (kmp_user_lock_p)user_lock;
3140  }
3141 #if KMP_USE_FUTEX
3142  else if ((__kmp_user_lock_kind == lk_futex) &&
3143  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3144  OMP_NEST_LOCK_T_SIZE)) {
3145  lck = (kmp_user_lock_p)user_lock;
3146  }
3147 #endif
3148  else {
3149  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3150  }
3151 
3152 #if USE_ITT_BUILD
3153  __kmp_itt_lock_acquiring(lck);
3154 #endif /* USE_ITT_BUILD */
3155 
3156 #if OMPT_SUPPORT && OMPT_OPTIONAL
3157  // This is the case, if called from omp_init_lock_with_hint:
3158  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3159  if (!codeptr)
3160  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3161  if (ompt_enabled.enabled) &&
3162  ompt_enabled.ompt_callback_mutex_acquire) {
3163  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3164  ompt_mutex_nest_lock, omp_lock_hint_none,
3165  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3166  codeptr);
3167  }
3168 #endif
3169 
3170  rc = TEST_NESTED_LOCK(lck, gtid);
3171 #if USE_ITT_BUILD
3172  if (rc) {
3173  __kmp_itt_lock_acquired(lck);
3174  } else {
3175  __kmp_itt_lock_cancelled(lck);
3176  }
3177 #endif /* USE_ITT_BUILD */
3178 #if OMPT_SUPPORT && OMPT_OPTIONAL
3179  if (ompt_enabled.enabled && rc) {
3180  if (rc == 1) {
3181  if (ompt_enabled.ompt_callback_mutex_acquired) {
3182  // lock_first
3183  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3184  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3185  }
3186  } else {
3187  if (ompt_enabled.ompt_callback_nest_lock) {
3188  // lock_next
3189  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3190  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3191  }
3192  }
3193  }
3194 #endif
3195  return rc;
3196 
3197 /* Can't use serial interval since not block structured */
3198 
3199 #endif // KMP_USE_DYNAMIC_LOCK
3200 }
3201 
3202 // Interface to fast scalable reduce methods routines
3203 
3204 // keep the selected method in a thread local structure for cross-function
3205 // usage: will be used in __kmpc_end_reduce* functions;
3206 // another solution: to re-determine the method one more time in
3207 // __kmpc_end_reduce* functions (new prototype required then)
3208 // AT: which solution is better?
3209 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3210  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3211 
3212 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3213  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3214 
3215 // description of the packed_reduction_method variable: look at the macros in
3216 // kmp.h
3217 
3218 // used in a critical section reduce block
3219 static __forceinline void
3220 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3221  kmp_critical_name *crit) {
3222 
3223  // this lock was visible to a customer and to the threading profile tool as a
3224  // serial overhead span (although it's used for an internal purpose only)
3225  // why was it visible in previous implementation?
3226  // should we keep it visible in new reduce block?
3227  kmp_user_lock_p lck;
3228 
3229 #if KMP_USE_DYNAMIC_LOCK
3230 
3231  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3232  // Check if it is initialized.
3233  if (*lk == 0) {
3234  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3235  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3236  KMP_GET_D_TAG(__kmp_user_lock_seq));
3237  } else {
3238  __kmp_init_indirect_csptr(crit, loc, global_tid,
3239  KMP_GET_I_TAG(__kmp_user_lock_seq));
3240  }
3241  }
3242  // Branch for accessing the actual lock object and set operation. This
3243  // branching is inevitable since this lock initialization does not follow the
3244  // normal dispatch path (lock table is not used).
3245  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3246  lck = (kmp_user_lock_p)lk;
3247  KMP_DEBUG_ASSERT(lck != NULL);
3248  if (__kmp_env_consistency_check) {
3249  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3250  }
3251  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3252  } else {
3253  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3254  lck = ilk->lock;
3255  KMP_DEBUG_ASSERT(lck != NULL);
3256  if (__kmp_env_consistency_check) {
3257  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3258  }
3259  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3260  }
3261 
3262 #else // KMP_USE_DYNAMIC_LOCK
3263 
3264  // We know that the fast reduction code is only emitted by Intel compilers
3265  // with 32 byte critical sections. If there isn't enough space, then we
3266  // have to use a pointer.
3267  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3268  lck = (kmp_user_lock_p)crit;
3269  } else {
3270  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3271  }
3272  KMP_DEBUG_ASSERT(lck != NULL);
3273 
3274  if (__kmp_env_consistency_check)
3275  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3276 
3277  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3278 
3279 #endif // KMP_USE_DYNAMIC_LOCK
3280 }
3281 
3282 // used in a critical section reduce block
3283 static __forceinline void
3284 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3285  kmp_critical_name *crit) {
3286 
3287  kmp_user_lock_p lck;
3288 
3289 #if KMP_USE_DYNAMIC_LOCK
3290 
3291  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3292  lck = (kmp_user_lock_p)crit;
3293  if (__kmp_env_consistency_check)
3294  __kmp_pop_sync(global_tid, ct_critical, loc);
3295  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3296  } else {
3297  kmp_indirect_lock_t *ilk =
3298  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3299  if (__kmp_env_consistency_check)
3300  __kmp_pop_sync(global_tid, ct_critical, loc);
3301  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3302  }
3303 
3304 #else // KMP_USE_DYNAMIC_LOCK
3305 
3306  // We know that the fast reduction code is only emitted by Intel compilers
3307  // with 32 byte critical sections. If there isn't enough space, then we have
3308  // to use a pointer.
3309  if (__kmp_base_user_lock_size > 32) {
3310  lck = *((kmp_user_lock_p *)crit);
3311  KMP_ASSERT(lck != NULL);
3312  } else {
3313  lck = (kmp_user_lock_p)crit;
3314  }
3315 
3316  if (__kmp_env_consistency_check)
3317  __kmp_pop_sync(global_tid, ct_critical, loc);
3318 
3319  __kmp_release_user_lock_with_checks(lck, global_tid);
3320 
3321 #endif // KMP_USE_DYNAMIC_LOCK
3322 } // __kmp_end_critical_section_reduce_block
3323 
3324 static __forceinline int
3325 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3326  int *task_state) {
3327  kmp_team_t *team;
3328 
3329  // Check if we are inside the teams construct?
3330  if (th->th.th_teams_microtask) {
3331  *team_p = team = th->th.th_team;
3332  if (team->t.t_level == th->th.th_teams_level) {
3333  // This is reduction at teams construct.
3334  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3335  // Let's swap teams temporarily for the reduction.
3336  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3337  th->th.th_team = team->t.t_parent;
3338  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3339  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3340  *task_state = th->th.th_task_state;
3341  th->th.th_task_state = 0;
3342 
3343  return 1;
3344  }
3345  }
3346  return 0;
3347 }
3348 
3349 static __forceinline void
3350 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3351  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3352  th->th.th_info.ds.ds_tid = 0;
3353  th->th.th_team = team;
3354  th->th.th_team_nproc = team->t.t_nproc;
3355  th->th.th_task_team = team->t.t_task_team[task_state];
3356  th->th.th_task_state = task_state;
3357 }
3358 
3359 /* 2.a.i. Reduce Block without a terminating barrier */
3375 kmp_int32
3376 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3377  size_t reduce_size, void *reduce_data,
3378  void (*reduce_func)(void *lhs_data, void *rhs_data),
3379  kmp_critical_name *lck) {
3380 
3381  KMP_COUNT_BLOCK(REDUCE_nowait);
3382  int retval = 0;
3383  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3384  kmp_info_t *th;
3385  kmp_team_t *team;
3386  int teams_swapped = 0, task_state;
3387  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3388 
3389  // why do we need this initialization here at all?
3390  // Reduction clause can not be used as a stand-alone directive.
3391 
3392  // do not call __kmp_serial_initialize(), it will be called by
3393  // __kmp_parallel_initialize() if needed
3394  // possible detection of false-positive race by the threadchecker ???
3395  if (!TCR_4(__kmp_init_parallel))
3396  __kmp_parallel_initialize();
3397 
3398  __kmp_resume_if_soft_paused();
3399 
3400 // check correctness of reduce block nesting
3401 #if KMP_USE_DYNAMIC_LOCK
3402  if (__kmp_env_consistency_check)
3403  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3404 #else
3405  if (__kmp_env_consistency_check)
3406  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3407 #endif
3408 
3409  th = __kmp_thread_from_gtid(global_tid);
3410  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3411 
3412  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3413  // the value should be kept in a variable
3414  // the variable should be either a construct-specific or thread-specific
3415  // property, not a team specific property
3416  // (a thread can reach the next reduce block on the next construct, reduce
3417  // method may differ on the next construct)
3418  // an ident_t "loc" parameter could be used as a construct-specific property
3419  // (what if loc == 0?)
3420  // (if both construct-specific and team-specific variables were shared,
3421  // then unness extra syncs should be needed)
3422  // a thread-specific variable is better regarding two issues above (next
3423  // construct and extra syncs)
3424  // a thread-specific "th_local.reduction_method" variable is used currently
3425  // each thread executes 'determine' and 'set' lines (no need to execute by one
3426  // thread, to avoid unness extra syncs)
3427 
3428  packed_reduction_method = __kmp_determine_reduction_method(
3429  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3430  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3431 
3432  if (packed_reduction_method == critical_reduce_block) {
3433 
3434  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3435  retval = 1;
3436 
3437  } else if (packed_reduction_method == empty_reduce_block) {
3438 
3439  // usage: if team size == 1, no synchronization is required ( Intel
3440  // platforms only )
3441  retval = 1;
3442 
3443  } else if (packed_reduction_method == atomic_reduce_block) {
3444 
3445  retval = 2;
3446 
3447  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3448  // won't be called by the code gen)
3449  // (it's not quite good, because the checking block has been closed by
3450  // this 'pop',
3451  // but atomic operation has not been executed yet, will be executed
3452  // slightly later, literally on next instruction)
3453  if (__kmp_env_consistency_check)
3454  __kmp_pop_sync(global_tid, ct_reduce, loc);
3455 
3456  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3457  tree_reduce_block)) {
3458 
3459 // AT: performance issue: a real barrier here
3460 // AT: (if master goes slow, other threads are blocked here waiting for the
3461 // master to come and release them)
3462 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3463 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3464 // be confusing to a customer)
3465 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3466 // might go faster and be more in line with sense of NOWAIT
3467 // AT: TO DO: do epcc test and compare times
3468 
3469 // this barrier should be invisible to a customer and to the threading profile
3470 // tool (it's neither a terminating barrier nor customer's code, it's
3471 // used for an internal purpose)
3472 #if OMPT_SUPPORT
3473  // JP: can this barrier potentially leed to task scheduling?
3474  // JP: as long as there is a barrier in the implementation, OMPT should and
3475  // will provide the barrier events
3476  // so we set-up the necessary frame/return addresses.
3477  ompt_frame_t *ompt_frame;
3478  if (ompt_enabled.enabled) {
3479  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3480  if (ompt_frame->enter_frame.ptr == NULL)
3481  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3482  OMPT_STORE_RETURN_ADDRESS(global_tid);
3483  }
3484 #endif
3485 #if USE_ITT_NOTIFY
3486  __kmp_threads[global_tid]->th.th_ident = loc;
3487 #endif
3488  retval =
3489  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3490  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3491  retval = (retval != 0) ? (0) : (1);
3492 #if OMPT_SUPPORT && OMPT_OPTIONAL
3493  if (ompt_enabled.enabled) {
3494  ompt_frame->enter_frame = ompt_data_none;
3495  }
3496 #endif
3497 
3498  // all other workers except master should do this pop here
3499  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3500  if (__kmp_env_consistency_check) {
3501  if (retval == 0) {
3502  __kmp_pop_sync(global_tid, ct_reduce, loc);
3503  }
3504  }
3505 
3506  } else {
3507 
3508  // should never reach this block
3509  KMP_ASSERT(0); // "unexpected method"
3510  }
3511  if (teams_swapped) {
3512  __kmp_restore_swapped_teams(th, team, task_state);
3513  }
3514  KA_TRACE(
3515  10,
3516  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3517  global_tid, packed_reduction_method, retval));
3518 
3519  return retval;
3520 }
3521 
3530 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3531  kmp_critical_name *lck) {
3532 
3533  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3534 
3535  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3536 
3537  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3538 
3539  if (packed_reduction_method == critical_reduce_block) {
3540 
3541  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3542 
3543  } else if (packed_reduction_method == empty_reduce_block) {
3544 
3545  // usage: if team size == 1, no synchronization is required ( on Intel
3546  // platforms only )
3547 
3548  } else if (packed_reduction_method == atomic_reduce_block) {
3549 
3550  // neither master nor other workers should get here
3551  // (code gen does not generate this call in case 2: atomic reduce block)
3552  // actually it's better to remove this elseif at all;
3553  // after removal this value will checked by the 'else' and will assert
3554 
3555  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3556  tree_reduce_block)) {
3557 
3558  // only master gets here
3559 
3560  } else {
3561 
3562  // should never reach this block
3563  KMP_ASSERT(0); // "unexpected method"
3564  }
3565 
3566  if (__kmp_env_consistency_check)
3567  __kmp_pop_sync(global_tid, ct_reduce, loc);
3568 
3569  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3570  global_tid, packed_reduction_method));
3571 
3572  return;
3573 }
3574 
3575 /* 2.a.ii. Reduce Block with a terminating barrier */
3576 
3592 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3593  size_t reduce_size, void *reduce_data,
3594  void (*reduce_func)(void *lhs_data, void *rhs_data),
3595  kmp_critical_name *lck) {
3596  KMP_COUNT_BLOCK(REDUCE_wait);
3597  int retval = 0;
3598  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3599  kmp_info_t *th;
3600  kmp_team_t *team;
3601  int teams_swapped = 0, task_state;
3602 
3603  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3604 
3605  // why do we need this initialization here at all?
3606  // Reduction clause can not be a stand-alone directive.
3607 
3608  // do not call __kmp_serial_initialize(), it will be called by
3609  // __kmp_parallel_initialize() if needed
3610  // possible detection of false-positive race by the threadchecker ???
3611  if (!TCR_4(__kmp_init_parallel))
3612  __kmp_parallel_initialize();
3613 
3614  __kmp_resume_if_soft_paused();
3615 
3616 // check correctness of reduce block nesting
3617 #if KMP_USE_DYNAMIC_LOCK
3618  if (__kmp_env_consistency_check)
3619  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3620 #else
3621  if (__kmp_env_consistency_check)
3622  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3623 #endif
3624 
3625  th = __kmp_thread_from_gtid(global_tid);
3626  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3627 
3628  packed_reduction_method = __kmp_determine_reduction_method(
3629  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3630  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3631 
3632  if (packed_reduction_method == critical_reduce_block) {
3633 
3634  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3635  retval = 1;
3636 
3637  } else if (packed_reduction_method == empty_reduce_block) {
3638 
3639  // usage: if team size == 1, no synchronization is required ( Intel
3640  // platforms only )
3641  retval = 1;
3642 
3643  } else if (packed_reduction_method == atomic_reduce_block) {
3644 
3645  retval = 2;
3646 
3647  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3648  tree_reduce_block)) {
3649 
3650 // case tree_reduce_block:
3651 // this barrier should be visible to a customer and to the threading profile
3652 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3653 #if OMPT_SUPPORT
3654  ompt_frame_t *ompt_frame;
3655  if (ompt_enabled.enabled) {
3656  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3657  if (ompt_frame->enter_frame.ptr == NULL)
3658  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3659  OMPT_STORE_RETURN_ADDRESS(global_tid);
3660  }
3661 #endif
3662 #if USE_ITT_NOTIFY
3663  __kmp_threads[global_tid]->th.th_ident =
3664  loc; // needed for correct notification of frames
3665 #endif
3666  retval =
3667  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3668  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3669  retval = (retval != 0) ? (0) : (1);
3670 #if OMPT_SUPPORT && OMPT_OPTIONAL
3671  if (ompt_enabled.enabled) {
3672  ompt_frame->enter_frame = ompt_data_none;
3673  }
3674 #endif
3675 
3676  // all other workers except master should do this pop here
3677  // ( none of other workers except master will enter __kmpc_end_reduce() )
3678  if (__kmp_env_consistency_check) {
3679  if (retval == 0) { // 0: all other workers; 1: master
3680  __kmp_pop_sync(global_tid, ct_reduce, loc);
3681  }
3682  }
3683 
3684  } else {
3685 
3686  // should never reach this block
3687  KMP_ASSERT(0); // "unexpected method"
3688  }
3689  if (teams_swapped) {
3690  __kmp_restore_swapped_teams(th, team, task_state);
3691  }
3692 
3693  KA_TRACE(10,
3694  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3695  global_tid, packed_reduction_method, retval));
3696  return retval;
3697 }
3698 
3709 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3710  kmp_critical_name *lck) {
3711 
3712  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3713  kmp_info_t *th;
3714  kmp_team_t *team;
3715  int teams_swapped = 0, task_state;
3716 
3717  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3718 
3719  th = __kmp_thread_from_gtid(global_tid);
3720  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3721 
3722  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3723 
3724  // this barrier should be visible to a customer and to the threading profile
3725  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3726 
3727  if (packed_reduction_method == critical_reduce_block) {
3728  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3729 
3730 // TODO: implicit barrier: should be exposed
3731 #if OMPT_SUPPORT
3732  ompt_frame_t *ompt_frame;
3733  if (ompt_enabled.enabled) {
3734  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3735  if (ompt_frame->enter_frame.ptr == NULL)
3736  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3737  OMPT_STORE_RETURN_ADDRESS(global_tid);
3738  }
3739 #endif
3740 #if USE_ITT_NOTIFY
3741  __kmp_threads[global_tid]->th.th_ident = loc;
3742 #endif
3743  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3744 #if OMPT_SUPPORT && OMPT_OPTIONAL
3745  if (ompt_enabled.enabled) {
3746  ompt_frame->enter_frame = ompt_data_none;
3747  }
3748 #endif
3749 
3750  } else if (packed_reduction_method == empty_reduce_block) {
3751 
3752 // usage: if team size==1, no synchronization is required (Intel platforms only)
3753 
3754 // TODO: implicit barrier: should be exposed
3755 #if OMPT_SUPPORT
3756  ompt_frame_t *ompt_frame;
3757  if (ompt_enabled.enabled) {
3758  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3759  if (ompt_frame->enter_frame.ptr == NULL)
3760  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3761  OMPT_STORE_RETURN_ADDRESS(global_tid);
3762  }
3763 #endif
3764 #if USE_ITT_NOTIFY
3765  __kmp_threads[global_tid]->th.th_ident = loc;
3766 #endif
3767  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3768 #if OMPT_SUPPORT && OMPT_OPTIONAL
3769  if (ompt_enabled.enabled) {
3770  ompt_frame->enter_frame = ompt_data_none;
3771  }
3772 #endif
3773 
3774  } else if (packed_reduction_method == atomic_reduce_block) {
3775 
3776 #if OMPT_SUPPORT
3777  ompt_frame_t *ompt_frame;
3778  if (ompt_enabled.enabled) {
3779  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3780  if (ompt_frame->enter_frame.ptr == NULL)
3781  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3782  OMPT_STORE_RETURN_ADDRESS(global_tid);
3783  }
3784 #endif
3785 // TODO: implicit barrier: should be exposed
3786 #if USE_ITT_NOTIFY
3787  __kmp_threads[global_tid]->th.th_ident = loc;
3788 #endif
3789  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3790 #if OMPT_SUPPORT && OMPT_OPTIONAL
3791  if (ompt_enabled.enabled) {
3792  ompt_frame->enter_frame = ompt_data_none;
3793  }
3794 #endif
3795 
3796  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3797  tree_reduce_block)) {
3798 
3799  // only master executes here (master releases all other workers)
3800  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3801  global_tid);
3802 
3803  } else {
3804 
3805  // should never reach this block
3806  KMP_ASSERT(0); // "unexpected method"
3807  }
3808  if (teams_swapped) {
3809  __kmp_restore_swapped_teams(th, team, task_state);
3810  }
3811 
3812  if (__kmp_env_consistency_check)
3813  __kmp_pop_sync(global_tid, ct_reduce, loc);
3814 
3815  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3816  global_tid, packed_reduction_method));
3817 
3818  return;
3819 }
3820 
3821 #undef __KMP_GET_REDUCTION_METHOD
3822 #undef __KMP_SET_REDUCTION_METHOD
3823 
3824 /* end of interface to fast scalable reduce routines */
3825 
3826 kmp_uint64 __kmpc_get_taskid() {
3827 
3828  kmp_int32 gtid;
3829  kmp_info_t *thread;
3830 
3831  gtid = __kmp_get_gtid();
3832  if (gtid < 0) {
3833  return 0;
3834  }
3835  thread = __kmp_thread_from_gtid(gtid);
3836  return thread->th.th_current_task->td_task_id;
3837 
3838 } // __kmpc_get_taskid
3839 
3840 kmp_uint64 __kmpc_get_parent_taskid() {
3841 
3842  kmp_int32 gtid;
3843  kmp_info_t *thread;
3844  kmp_taskdata_t *parent_task;
3845 
3846  gtid = __kmp_get_gtid();
3847  if (gtid < 0) {
3848  return 0;
3849  }
3850  thread = __kmp_thread_from_gtid(gtid);
3851  parent_task = thread->th.th_current_task->td_parent;
3852  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3853 
3854 } // __kmpc_get_parent_taskid
3855 
3867 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3868  const struct kmp_dim *dims) {
3869  int j, idx;
3870  kmp_int64 last, trace_count;
3871  kmp_info_t *th = __kmp_threads[gtid];
3872  kmp_team_t *team = th->th.th_team;
3873  kmp_uint32 *flags;
3874  kmp_disp_t *pr_buf = th->th.th_dispatch;
3875  dispatch_shared_info_t *sh_buf;
3876 
3877  KA_TRACE(
3878  20,
3879  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3880  gtid, num_dims, !team->t.t_serialized));
3881  KMP_DEBUG_ASSERT(dims != NULL);
3882  KMP_DEBUG_ASSERT(num_dims > 0);
3883 
3884  if (team->t.t_serialized) {
3885  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3886  return; // no dependencies if team is serialized
3887  }
3888  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3889  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3890  // the next loop
3891  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3892 
3893  // Save bounds info into allocated private buffer
3894  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3895  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3896  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3897  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3898  pr_buf->th_doacross_info[0] =
3899  (kmp_int64)num_dims; // first element is number of dimensions
3900  // Save also address of num_done in order to access it later without knowing
3901  // the buffer index
3902  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3903  pr_buf->th_doacross_info[2] = dims[0].lo;
3904  pr_buf->th_doacross_info[3] = dims[0].up;
3905  pr_buf->th_doacross_info[4] = dims[0].st;
3906  last = 5;
3907  for (j = 1; j < num_dims; ++j) {
3908  kmp_int64
3909  range_length; // To keep ranges of all dimensions but the first dims[0]
3910  if (dims[j].st == 1) { // most common case
3911  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3912  range_length = dims[j].up - dims[j].lo + 1;
3913  } else {
3914  if (dims[j].st > 0) {
3915  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3916  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3917  } else { // negative increment
3918  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3919  range_length =
3920  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3921  }
3922  }
3923  pr_buf->th_doacross_info[last++] = range_length;
3924  pr_buf->th_doacross_info[last++] = dims[j].lo;
3925  pr_buf->th_doacross_info[last++] = dims[j].up;
3926  pr_buf->th_doacross_info[last++] = dims[j].st;
3927  }
3928 
3929  // Compute total trip count.
3930  // Start with range of dims[0] which we don't need to keep in the buffer.
3931  if (dims[0].st == 1) { // most common case
3932  trace_count = dims[0].up - dims[0].lo + 1;
3933  } else if (dims[0].st > 0) {
3934  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3935  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3936  } else { // negative increment
3937  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3938  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3939  }
3940  for (j = 1; j < num_dims; ++j) {
3941  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3942  }
3943  KMP_DEBUG_ASSERT(trace_count > 0);
3944 
3945  // Check if shared buffer is not occupied by other loop (idx -
3946  // __kmp_dispatch_num_buffers)
3947  if (idx != sh_buf->doacross_buf_idx) {
3948  // Shared buffer is occupied, wait for it to be free
3949  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3950  __kmp_eq_4, NULL);
3951  }
3952 #if KMP_32_BIT_ARCH
3953  // Check if we are the first thread. After the CAS the first thread gets 0,
3954  // others get 1 if initialization is in progress, allocated pointer otherwise.
3955  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3956  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3957  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3958 #else
3959  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3960  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3961 #endif
3962  if (flags == NULL) {
3963  // we are the first thread, allocate the array of flags
3964  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3965  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3966  KMP_MB();
3967  sh_buf->doacross_flags = flags;
3968  } else if (flags == (kmp_uint32 *)1) {
3969 #if KMP_32_BIT_ARCH
3970  // initialization is still in progress, need to wait
3971  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3972 #else
3973  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3974 #endif
3975  KMP_YIELD(TRUE);
3976  KMP_MB();
3977  } else {
3978  KMP_MB();
3979  }
3980  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3981  pr_buf->th_doacross_flags =
3982  sh_buf->doacross_flags; // save private copy in order to not
3983  // touch shared buffer on each iteration
3984  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3985 }
3986 
3987 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3988  kmp_int32 shft, num_dims, i;
3989  kmp_uint32 flag;
3990  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3991  kmp_info_t *th = __kmp_threads[gtid];
3992  kmp_team_t *team = th->th.th_team;
3993  kmp_disp_t *pr_buf;
3994  kmp_int64 lo, up, st;
3995 
3996  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3997  if (team->t.t_serialized) {
3998  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3999  return; // no dependencies if team is serialized
4000  }
4001 
4002  // calculate sequential iteration number and check out-of-bounds condition
4003  pr_buf = th->th.th_dispatch;
4004  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4005  num_dims = pr_buf->th_doacross_info[0];
4006  lo = pr_buf->th_doacross_info[2];
4007  up = pr_buf->th_doacross_info[3];
4008  st = pr_buf->th_doacross_info[4];
4009  if (st == 1) { // most common case
4010  if (vec[0] < lo || vec[0] > up) {
4011  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4012  "bounds [%lld,%lld]\n",
4013  gtid, vec[0], lo, up));
4014  return;
4015  }
4016  iter_number = vec[0] - lo;
4017  } else if (st > 0) {
4018  if (vec[0] < lo || vec[0] > up) {
4019  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4020  "bounds [%lld,%lld]\n",
4021  gtid, vec[0], lo, up));
4022  return;
4023  }
4024  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4025  } else { // negative increment
4026  if (vec[0] > lo || vec[0] < up) {
4027  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4028  "bounds [%lld,%lld]\n",
4029  gtid, vec[0], lo, up));
4030  return;
4031  }
4032  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4033  }
4034  for (i = 1; i < num_dims; ++i) {
4035  kmp_int64 iter, ln;
4036  kmp_int32 j = i * 4;
4037  ln = pr_buf->th_doacross_info[j + 1];
4038  lo = pr_buf->th_doacross_info[j + 2];
4039  up = pr_buf->th_doacross_info[j + 3];
4040  st = pr_buf->th_doacross_info[j + 4];
4041  if (st == 1) {
4042  if (vec[i] < lo || vec[i] > up) {
4043  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4044  "bounds [%lld,%lld]\n",
4045  gtid, vec[i], lo, up));
4046  return;
4047  }
4048  iter = vec[i] - lo;
4049  } else if (st > 0) {
4050  if (vec[i] < lo || vec[i] > up) {
4051  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4052  "bounds [%lld,%lld]\n",
4053  gtid, vec[i], lo, up));
4054  return;
4055  }
4056  iter = (kmp_uint64)(vec[i] - lo) / st;
4057  } else { // st < 0
4058  if (vec[i] > lo || vec[i] < up) {
4059  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4060  "bounds [%lld,%lld]\n",
4061  gtid, vec[i], lo, up));
4062  return;
4063  }
4064  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4065  }
4066  iter_number = iter + ln * iter_number;
4067  }
4068  shft = iter_number % 32; // use 32-bit granularity
4069  iter_number >>= 5; // divided by 32
4070  flag = 1 << shft;
4071  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4072  KMP_YIELD(TRUE);
4073  }
4074  KMP_MB();
4075  KA_TRACE(20,
4076  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4077  gtid, (iter_number << 5) + shft));
4078 }
4079 
4080 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4081  kmp_int32 shft, num_dims, i;
4082  kmp_uint32 flag;
4083  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4084  kmp_info_t *th = __kmp_threads[gtid];
4085  kmp_team_t *team = th->th.th_team;
4086  kmp_disp_t *pr_buf;
4087  kmp_int64 lo, st;
4088 
4089  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4090  if (team->t.t_serialized) {
4091  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4092  return; // no dependencies if team is serialized
4093  }
4094 
4095  // calculate sequential iteration number (same as in "wait" but no
4096  // out-of-bounds checks)
4097  pr_buf = th->th.th_dispatch;
4098  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4099  num_dims = pr_buf->th_doacross_info[0];
4100  lo = pr_buf->th_doacross_info[2];
4101  st = pr_buf->th_doacross_info[4];
4102  if (st == 1) { // most common case
4103  iter_number = vec[0] - lo;
4104  } else if (st > 0) {
4105  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4106  } else { // negative increment
4107  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4108  }
4109  for (i = 1; i < num_dims; ++i) {
4110  kmp_int64 iter, ln;
4111  kmp_int32 j = i * 4;
4112  ln = pr_buf->th_doacross_info[j + 1];
4113  lo = pr_buf->th_doacross_info[j + 2];
4114  st = pr_buf->th_doacross_info[j + 4];
4115  if (st == 1) {
4116  iter = vec[i] - lo;
4117  } else if (st > 0) {
4118  iter = (kmp_uint64)(vec[i] - lo) / st;
4119  } else { // st < 0
4120  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4121  }
4122  iter_number = iter + ln * iter_number;
4123  }
4124  shft = iter_number % 32; // use 32-bit granularity
4125  iter_number >>= 5; // divided by 32
4126  flag = 1 << shft;
4127  KMP_MB();
4128  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4129  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4130  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4131  (iter_number << 5) + shft));
4132 }
4133 
4134 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4135  kmp_int32 num_done;
4136  kmp_info_t *th = __kmp_threads[gtid];
4137  kmp_team_t *team = th->th.th_team;
4138  kmp_disp_t *pr_buf = th->th.th_dispatch;
4139 
4140  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4141  if (team->t.t_serialized) {
4142  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4143  return; // nothing to do
4144  }
4145  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4146  if (num_done == th->th.th_team_nproc) {
4147  // we are the last thread, need to free shared resources
4148  int idx = pr_buf->th_doacross_buf_idx - 1;
4149  dispatch_shared_info_t *sh_buf =
4150  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4151  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4152  (kmp_int64)&sh_buf->doacross_num_done);
4153  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4154  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4155  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4156  sh_buf->doacross_flags = NULL;
4157  sh_buf->doacross_num_done = 0;
4158  sh_buf->doacross_buf_idx +=
4159  __kmp_dispatch_num_buffers; // free buffer for future re-use
4160  }
4161  // free private resources (need to keep buffer index forever)
4162  pr_buf->th_doacross_flags = NULL;
4163  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4164  pr_buf->th_doacross_info = NULL;
4165  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4166 }
4167 
4168 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4169 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4170  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4171 }
4172 
4173 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4174  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4175 }
4176 
4177 int __kmpc_get_target_offload(void) {
4178  if (!__kmp_init_serial) {
4179  __kmp_serial_initialize();
4180  }
4181  return __kmp_target_offload;
4182 }
4183 
4184 int __kmpc_pause_resource(kmp_pause_status_t level) {
4185  if (!__kmp_init_serial) {
4186  return 1; // Can't pause if runtime is not initialized
4187  }
4188  return __kmp_pause_resource(level);
4189 }
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:222
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1395
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:232
kmp_int32 flags
Definition: kmp.h:224