Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / libc / src / time / gpu / nanosleep.cpp
bloba0c735502ff58924bb20ab433fd666637c311b12
1 //===-- GPU implementation of the nanosleep function ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "src/time/nanosleep.h"
11 #include "time_utils.h"
13 namespace LIBC_NAMESPACE {
15 constexpr uint64_t TICKS_PER_NS = 1000000000UL;
17 LLVM_LIBC_FUNCTION(int, nanosleep,
18 (const struct timespec *req, struct timespec *rem)) {
19 if (!GPU_CLOCKS_PER_SEC || !req)
20 return -1;
22 uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_NS;
24 uint64_t start = gpu::fixed_frequency_clock();
25 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
26 uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
27 uint64_t cur = gpu::fixed_frequency_clock();
28 // The NVPTX architecture supports sleeping and guaruntees the actual time
29 // slept will be somewhere between zero and twice the requested amount. Here
30 // we will sleep again if we undershot the time.
31 while (cur < end) {
32 LIBC_INLINE_ASM("nanosleep.u32 %0;" ::"r"(nsecs));
33 cur = gpu::fixed_frequency_clock();
34 nsecs -= nsecs > cur - start ? cur - start : 0;
36 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
37 uint64_t end = start + nsecs / (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
38 uint64_t cur = gpu::fixed_frequency_clock();
39 // The AMDGPU architecture does not provide a sleep implementation with a
40 // known delay so we simply repeatedly sleep with a large value of ~960 clock
41 // cycles and check until we've passed the time using the known frequency.
42 __builtin_amdgcn_s_sleep(2);
43 while (cur < end) {
44 __builtin_amdgcn_s_sleep(15);
45 cur = gpu::fixed_frequency_clock();
47 #else
48 // Sleeping is not supported.
49 if (rem) {
50 rem->tv_sec = req->tv_sec;
51 rem->tv_nsec = req->tv_nsec;
53 return -1;
54 #endif
55 uint64_t stop = gpu::fixed_frequency_clock();
57 // Check to make sure we slept for at least the desired duration and set the
58 // remaining time if not.
59 uint64_t elapsed = (stop - start) * (TICKS_PER_NS / GPU_CLOCKS_PER_SEC);
60 if (elapsed < nsecs) {
61 if (rem) {
62 rem->tv_sec = (nsecs - elapsed) / TICKS_PER_NS;
63 rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_NS;
65 return -1;
68 return 0;
71 } // namespace LIBC_NAMESPACE