openmp/libomptarget/deviceRTLs/common/state-queuei.h

   1 //===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file contains the implementation of a queue to hand out OpenMP state
  10 // objects to teams of one or more kernels.
  11 //
  12 // Reference:
  13 // Thomas R.W. Scogland and Wu-chun Feng. 2015.
  14 // Design and Evaluation of Scalable Concurrent Queues for Many-Core
  15 // Architectures. International Conference on Performance Engineering.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #include "state-queue.h"
  20
  21 template <typename ElementType, uint32_t SIZE>
  22 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
  23   return __kmpc_atomic_add((unsigned int *)&tail, 1u);
  24 }
  25
  26 template <typename ElementType, uint32_t SIZE>
  27 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
  28   return __kmpc_atomic_add((unsigned int *)&head, 1u);
  29 }
  30
  31 template <typename ElementType, uint32_t SIZE>
  32 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
  33   return (ticket / SIZE) * 2;
  34 }
  35
  36 template <typename ElementType, uint32_t SIZE>
  37 INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
  38                                                                 uint32_t id) {
  39   return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id;
  40 }
  41
  42 template <typename ElementType, uint32_t SIZE>
  43 INLINE void
  44 omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
  45                                                       ElementType *element) {
  46   __kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot],
  47                          (unsigned long long)element);
  48 }
  49
  50 template <typename ElementType, uint32_t SIZE>
  51 INLINE ElementType *
  52 omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
  53   return (ElementType *)__kmpc_atomic_add(
  54       (unsigned long long *)&elementQueue[slot], (unsigned long long)0);
  55 }
  56
  57 template <typename ElementType, uint32_t SIZE>
  58 INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
  59                                                                   uint32_t id) {
  60   __kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
  61 }
  62
  63 template <typename ElementType, uint32_t SIZE>
  64 INLINE void
  65 omptarget_nvptx_Queue<ElementType, SIZE>::Enqueue(ElementType *element) {
  66   uint32_t ticket = ENQUEUE_TICKET();
  67   uint32_t slot = ticket % SIZE;
  68   uint32_t id = ID(ticket) + 1;
  69   while (!IsServing(slot, id))
  70     ;
  71   PushElement(slot, element);
  72   DoneServing(slot, id);
  73 }
  74
  75 template <typename ElementType, uint32_t SIZE>
  76 INLINE ElementType *omptarget_nvptx_Queue<ElementType, SIZE>::Dequeue() {
  77   uint32_t ticket = DEQUEUE_TICKET();
  78   uint32_t slot = ticket % SIZE;
  79   uint32_t id = ID(ticket);
  80   while (!IsServing(slot, id))
  81     ;
  82   ElementType *element = PopElement(slot);
  83   // This is to populate the queue because of the lack of GPU constructors.
  84   if (element == 0)
  85     element = &elements[slot];
  86   DoneServing(slot, id);
  87   return element;
  88 }