clang/test/Driver/fat_archive_nvptx.cpp

   1 // REQUIRES: x86-registered-target
   2 // REQUIRES: nvptx-registered-target
   3
   4 // See the steps to create a fat archive are given at the end of the file.
   5
   6 // Given a FatArchive, clang-offload-bundler should be called to create a
   7 // device specific archive, which should be passed to clang-nvlink-wrapper.
   8 // RUN: %clang -O2 -### -fopenmp -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
   9 // CHECK: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
  10 // CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-nvidia-cuda-[[GPU]]" "-output=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
  11 // CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
  12 // RUN: not %clang -fopenmp -fno-openmp-new-driver -fopenmp-targets=nvptx64-nvidia-cuda %s %S/Inputs/openmp_static_device_link/empty.o --libomptarget-nvptx-bc-path=%S/Inputs/openmp_static_device_link/lib.bc 2>&1 | FileCheck %s --check-prefix=EMPTY
  13 // EMPTY-NOT: Could not open input file
  14
  15 #ifndef HEADER
  16 #define HEADER
  17
  18 #define N 10
  19
  20 #pragma omp declare target
  21 // Functions defined in Fat Archive.
  22 extern "C" void func_present(float *, float *, unsigned);
  23
  24 #ifdef MISSING
  25 // Function not defined in the fat archive.
  26 extern "C" void func_missing(float *, float *, unsigned);
  27 #endif
  28
  29 #pragma omp end declare target
  30
  31 int main() {
  32   float in[N], out[N], sum = 0;
  33   unsigned i;
  34
  35 #pragma omp parallel for
  36   for (i = 0; i < N; ++i) {
  37     in[i] = i;
  38   }
  39
  40   func_present(in, out, N); // Returns out[i] = a[i] * 0
  41
  42 #ifdef MISSING
  43   func_missing(in, out, N); // Should throw an error here
  44 #endif
  45
  46 #pragma omp parallel for reduction(+ \
  47                                    : sum)
  48   for (i = 0; i < N; ++i)
  49     sum += out[i];
  50
  51   if (!sum)
  52     return 0;
  53   return sum;
  54 }
  55
  56 #endif
  57
  58 /***********************************************
  59    Steps to create Fat Archive (libFatArchive.a)
  60 ************************************************
  61 ***************** File: func_1.c ***************
  62 void func_present(float* in, float* out, unsigned n){
  63   unsigned i;
  64   #pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
  65   for(i=0; i<n; ++i){
  66     out[i] = in[i] * 0;
  67   }
  68 }
  69 *************************************************
  70 1. Compile source file(s) to generate object file(s)
  71     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
  72     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
  73     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
  74     clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
  75     clang -O2 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -c func_1.c -o func_1_nvptx.o
  76     clang -O2 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -c func_2.c -o func_2_nvptx.o
  77
  78 2. Create a fat archive by combining all the object file(s)
  79     llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
  80 ************************************************/