1 //===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the implementation of a queue to hand out OpenMP state
10 // objects to teams of one or more kernels.
13 // Thomas R.W. Scogland and Wu-chun Feng. 2015.
14 // Design and Evaluation of Scalable Concurrent Queues for Many-Core
15 // Architectures. International Conference on Performance Engineering.
17 //===----------------------------------------------------------------------===//
19 #include "state-queue.h"
21 template <typename ElementType
, uint32_t SIZE
>
22 INLINE
uint32_t omptarget_nvptx_Queue
<ElementType
, SIZE
>::ENQUEUE_TICKET() {
23 return __kmpc_atomic_add((unsigned int *)&tail
, 1u);
26 template <typename ElementType
, uint32_t SIZE
>
27 INLINE
uint32_t omptarget_nvptx_Queue
<ElementType
, SIZE
>::DEQUEUE_TICKET() {
28 return __kmpc_atomic_add((unsigned int *)&head
, 1u);
31 template <typename ElementType
, uint32_t SIZE
>
32 INLINE
uint32_t omptarget_nvptx_Queue
<ElementType
, SIZE
>::ID(uint32_t ticket
) {
33 return (ticket
/ SIZE
) * 2;
36 template <typename ElementType
, uint32_t SIZE
>
37 INLINE
bool omptarget_nvptx_Queue
<ElementType
, SIZE
>::IsServing(uint32_t slot
,
39 return __kmpc_atomic_add((unsigned int *)&ids
[slot
], 0u) == id
;
42 template <typename ElementType
, uint32_t SIZE
>
44 omptarget_nvptx_Queue
<ElementType
, SIZE
>::PushElement(uint32_t slot
,
45 ElementType
*element
) {
46 __kmpc_atomic_exchange((unsigned long long *)&elementQueue
[slot
],
47 (unsigned long long)element
);
50 template <typename ElementType
, uint32_t SIZE
>
52 omptarget_nvptx_Queue
<ElementType
, SIZE
>::PopElement(uint32_t slot
) {
53 return (ElementType
*)__kmpc_atomic_add(
54 (unsigned long long *)&elementQueue
[slot
], (unsigned long long)0);
57 template <typename ElementType
, uint32_t SIZE
>
58 INLINE
void omptarget_nvptx_Queue
<ElementType
, SIZE
>::DoneServing(uint32_t slot
,
60 __kmpc_atomic_exchange((unsigned int *)&ids
[slot
], (id
+ 1) % MAX_ID
);
63 template <typename ElementType
, uint32_t SIZE
>
65 omptarget_nvptx_Queue
<ElementType
, SIZE
>::Enqueue(ElementType
*element
) {
66 uint32_t ticket
= ENQUEUE_TICKET();
67 uint32_t slot
= ticket
% SIZE
;
68 uint32_t id
= ID(ticket
) + 1;
69 while (!IsServing(slot
, id
))
71 PushElement(slot
, element
);
72 DoneServing(slot
, id
);
75 template <typename ElementType
, uint32_t SIZE
>
76 INLINE ElementType
*omptarget_nvptx_Queue
<ElementType
, SIZE
>::Dequeue() {
77 uint32_t ticket
= DEQUEUE_TICKET();
78 uint32_t slot
= ticket
% SIZE
;
79 uint32_t id
= ID(ticket
);
80 while (!IsServing(slot
, id
))
82 ElementType
*element
= PopElement(slot
);
83 // This is to populate the queue because of the lack of GPU constructors.
85 element
= &elements
[slot
];
86 DoneServing(slot
, id
);