clang/test/CodeGenCUDA/link-device-bitcode.cu

   1 // Test for linking with CUDA's libdevice as outlined in
   2 // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
   3 //
   4 // REQUIRES: nvptx-registered-target
   5 //
   6 // Prepare bitcode file to link with
   7 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
   8 // RUN:    -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
   9 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
  10 // RUN:    -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
  11 //
  12 // Make sure function in device-code gets linked in and internalized.
  13 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  14 // RUN:    -mlink-builtin-bitcode %t.bc  -emit-llvm \
  15 // RUN:    -disable-llvm-passes -o - %s \
  16 // RUN:    | FileCheck %s -check-prefix CHECK-IR
  17
  18 // Make sure we can link two bitcode files.
  19 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  20 // RUN:    -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
  21 // RUN:    -emit-llvm -disable-llvm-passes -o - %s \
  22 // RUN:    | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
  23 //
  24 // Make sure function in device-code gets linked but is not internalized
  25 // without -fcuda-uses-libdevice
  26 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  27 // RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
  28 // RUN:    -disable-llvm-passes -o - %s \
  29 // RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
  30 //
  31 // Make sure NVVMReflect pass is enabled in NVPTX back-end.
  32 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
  33 // RUN:    -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
  34 // RUN:    -mllvm -debug-pass=Structure 2>&1 \
  35 // RUN:    | FileCheck %s -check-prefix CHECK-REFLECT
  36
  37 #include "Inputs/cuda.h"
  38
  39 __device__ float device_mul_or_add(float a, float b);
  40 extern "C" __device__ double __nv_sin(double x);
  41 extern "C" __device__ double __nv_exp(double x);
  42
  43 // CHECK-IR-LABEL: define{{.*}} void @_Z26should_not_be_internalizedPf(
  44 // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
  45 __device__ void should_not_be_internalized(float *data) {}
  46
  47 // Make sure kernel call has not been internalized.
  48 // CHECK-IR-LABEL: define{{.*}} void @_Z6kernelPfS_
  49 // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
  50 __global__ __attribute__((used)) void kernel(float *out, float *in) {
  51   *out = device_mul_or_add(in[0], in[1]);
  52   *out += __nv_exp(__nv_sin(*out));
  53   should_not_be_internalized(out);
  54 }
  55
  56 // Make sure device_mul_or_add() is present in IR, is internal and
  57 // calls __nvvm_reflect().
  58 // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
  59 // CHECK-IR-NLD-LABEL: define{{.*}} float @_Z17device_mul_or_addff(
  60 // CHECK-IR: call i32 @__nvvm_reflect
  61 // CHECK-IR: ret float
  62
  63 // Make sure we've linked in and internalized only needed functions
  64 // from the second bitcode file.
  65 // CHECK-IR-2-LABEL: define internal double @__nv_sin
  66 // CHECK-IR-2-LABEL: define internal double @__nv_exp
  67 // CHECK-IR-2-NOT: double @__unused
  68
  69 // Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
  70 // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1