openmp/runtime/src/kmp_barrier.h

   1 /*
   2  * kmp_barrier.h
   3  */
   4
   5 //===----------------------------------------------------------------------===//
   6 //
   7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   8 // See https://llvm.org/LICENSE.txt for license information.
   9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef KMP_BARRIER_H
  14 #define KMP_BARRIER_H
  15
  16 #include "kmp.h"
  17 #include "kmp_i18n.h"
  18
  19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
  20 #include <xmmintrin.h>
  21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
  22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
  23 #elif KMP_HAVE_ALIGNED_ALLOC
  24 #define KMP_ALGIN_UP(val, alignment)                                           \
  25   (((val) + (alignment)-1) / (alignment) * (alignment))
  26 #define KMP_ALIGNED_ALLOCATE(size, alignment)                                  \
  27   aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
  28 #define KMP_ALIGNED_FREE(ptr) free(ptr)
  29 #elif KMP_HAVE_POSIX_MEMALIGN
  30 static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
  31   void *ptr;
  32   int n = posix_memalign(&ptr, alignment, size);
  33   if (n != 0) {
  34     if (ptr)
  35       free(ptr);
  36     return nullptr;
  37   }
  38   return ptr;
  39 }
  40 #define KMP_ALIGNED_FREE(ptr) free(ptr)
  41 #elif KMP_HAVE__ALIGNED_MALLOC
  42 #include <malloc.h>
  43 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
  44 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
  45 #else
  46 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
  47 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
  48 #endif
  49
  50 // Use four cache lines: MLC tends to prefetch the next or previous cache line
  51 // creating a possible fake conflict between cores, so this is the only way to
  52 // guarantee that no such prefetch can happen.
  53 #ifndef KMP_FOURLINE_ALIGN_CACHE
  54 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
  55 #endif
  56
  57 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
  58
  59 class distributedBarrier {
  60   struct flags_s {
  61     kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
  62   };
  63
  64   struct go_s {
  65     std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
  66   };
  67
  68   struct iter_s {
  69     kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
  70   };
  71
  72   struct sleep_s {
  73     std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
  74   };
  75
  76   void init(size_t nthr);
  77   void resize(size_t nthr);
  78   void computeGo(size_t n);
  79   void computeVarsForN(size_t n);
  80
  81 public:
  82   enum {
  83     MAX_ITERS = 3,
  84     MAX_GOS = 8,
  85     IDEAL_GOS = 4,
  86     IDEAL_CONTENTION = 16,
  87   };
  88
  89   flags_s *flags[MAX_ITERS];
  90   go_s *go;
  91   iter_s *iter;
  92   sleep_s *sleep;
  93
  94   size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
  95   size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
  96   // number of go signals each requiring one write per iteration
  97   size_t KMP_ALIGN_CACHE num_gos;
  98   // number of groups of gos
  99   size_t KMP_ALIGN_CACHE num_groups;
 100   // threads per go signal
 101   size_t KMP_ALIGN_CACHE threads_per_go;
 102   bool KMP_ALIGN_CACHE fix_threads_per_go;
 103   // threads per group
 104   size_t KMP_ALIGN_CACHE threads_per_group;
 105   // number of go signals in a group
 106   size_t KMP_ALIGN_CACHE gos_per_group;
 107   void *team_icvs;
 108
 109   distributedBarrier() = delete;
 110   ~distributedBarrier() = delete;
 111
 112   // Used instead of constructor to create aligned data
 113   static distributedBarrier *allocate(int nThreads) {
 114     distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
 115         sizeof(distributedBarrier), 4 * CACHE_LINE);
 116     if (!d) {
 117       KMP_FATAL(MemoryAllocFailed);
 118     }
 119     d->num_threads = 0;
 120     d->max_threads = 0;
 121     for (int i = 0; i < MAX_ITERS; ++i)
 122       d->flags[i] = NULL;
 123     d->go = NULL;
 124     d->iter = NULL;
 125     d->sleep = NULL;
 126     d->team_icvs = NULL;
 127     d->fix_threads_per_go = false;
 128     // calculate gos and groups ONCE on base size
 129     d->computeGo(nThreads);
 130     d->init(nThreads);
 131     return d;
 132   }
 133
 134   static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
 135
 136   void update_num_threads(size_t nthr) { init(nthr); }
 137
 138   bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
 139   size_t get_num_threads() { return num_threads; }
 140   kmp_uint64 go_release();
 141   void go_reset();
 142 };
 143
 144 #endif // KMP_BARRIER_H