[libc] Switch to using the generic `<gpuintrin.h>` implementations (#121810)
[llvm-project.git] / clang / test / Driver / linker-wrapper-image.c
blobf553f20f7ee897cb169e51de5ec3d23c39b362fd
1 // REQUIRES: x86-registered-target
2 // REQUIRES: nvptx-registered-target
3 // REQUIRES: amdgpu-registered-target
5 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
7 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
8 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
9 // RUN: -fembed-offload-object=%t.out
10 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
11 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF
12 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \
13 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL
14 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
15 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF
17 // OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
18 // OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
19 // OPENMP-ELF-NEXT: @__dummy.omp_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries"
21 // OPENMP-COFF: @__start_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OA"
22 // OPENMP-COFF-NEXT: @__stop_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OZ"
24 // OPENMP-REL: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading.relocatable", align 8
26 // OPENMP: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8
27 // OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr getelementptr ([[[BEGIN:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr getelementptr ([[[END:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }]
28 // OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }
29 // OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
31 // OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
32 // OPENMP-NEXT: entry:
33 // OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
34 // OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
35 // OPENMP-NEXT: ret void
36 // OPENMP-NEXT: }
38 // OPENMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
39 // OPENMP-NEXT: entry:
40 // OPENMP-NEXT: call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
41 // OPENMP-NEXT: ret void
42 // OPENMP-NEXT: }
44 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70
45 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
46 // RUN: -fembed-offload-object=%t.out
47 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
48 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF
49 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \
50 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF
51 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
52 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF
54 // CUDA-ELF: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
55 // CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
56 // CUDA-ELF-NEXT: @__dummy.cuda_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries"
58 // CUDA-COFF: @__start_cuda_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OA"
59 // CUDA-COFF-NEXT: @__stop_cuda_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OZ"
61 // CUDA: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin"
62 // CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
63 // CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
65 // CUDA: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
67 // CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
68 // CUDA-NEXT: entry:
69 // CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
70 // CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
71 // CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
72 // CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
73 // CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
74 // CUDA-NEXT: ret void
75 // CUDA-NEXT: }
77 // CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
78 // CUDA-NEXT: entry:
79 // CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
80 // CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
81 // CUDA-NEXT: ret void
82 // CUDA-NEXT: }
84 // CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" {
85 // CUDA-NEXT: entry:
86 // CUDA-NEXT: %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries
87 // CUDA-NEXT: br i1 %1, label %while.entry, label %while.end
89 // CUDA: while.entry:
90 // CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %12, %if.end ]
91 // CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
92 // CUDA-NEXT: %addr = load ptr, ptr %2, align 8
93 // CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
94 // CUDA-NEXT: %name = load ptr, ptr %3, align 8
95 // CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2
96 // CUDA-NEXT: %size = load i64, ptr %4, align 4
97 // CUDA-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 3
98 // CUDA-NEXT: %flags = load i32, ptr %5, align 4
99 // CUDA-NEXT: %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 4
100 // CUDA-NEXT: %textype = load i32, ptr %6, align 4
101 // CUDA-NEXT: %type = and i32 %flags, 7
102 // CUDA-NEXT: %7 = and i32 %flags, 8
103 // CUDA-NEXT: %extern = lshr i32 %7, 3
104 // CUDA-NEXT: %8 = and i32 %flags, 16
105 // CUDA-NEXT: %constant = lshr i32 %8, 4
106 // CUDA-NEXT: %9 = and i32 %flags, 32
107 // CUDA-NEXT: %normalized = lshr i32 %9, 5
108 // CUDA-NEXT: %10 = icmp eq i64 %size, 0
109 // CUDA-NEXT: br i1 %10, label %if.then, label %if.else
111 // CUDA: if.then:
112 // CUDA-NEXT: %11 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
113 // CUDA-NEXT: br label %if.end
115 // CUDA: if.else:
116 // CUDA-NEXT: switch i32 %type, label %if.end [
117 // CUDA-NEXT: i32 0, label %sw.global
118 // CUDA-NEXT: i32 1, label %sw.managed
119 // CUDA-NEXT: i32 2, label %sw.surface
120 // CUDA-NEXT: i32 3, label %sw.texture
121 // CUDA-NEXT: ]
123 // CUDA: sw.global:
124 // CUDA-NEXT: call void @__cudaRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %extern, i64 %size, i32 %constant, i32 0)
125 // CUDA-NEXT: br label %if.end
127 // CUDA: sw.managed:
128 // CUDA-NEXT: br label %if.end
130 // CUDA: sw.surface:
131 // CUDA-NEXT: br label %if.end
133 // CUDA: sw.texture:
134 // CUDA-NEXT: br label %if.end
136 // CUDA: if.end:
137 // CUDA-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
138 // CUDA-NEXT: %13 = icmp eq ptr %12, @__stop_cuda_offloading_entries
139 // CUDA-NEXT: br i1 %13, label %while.end, label %while.entry
141 // CUDA: while.end:
142 // CUDA-NEXT: ret void
143 // CUDA-NEXT: }
145 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
146 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
147 // RUN: -fembed-offload-object=%t.out
148 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
149 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
150 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \
151 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
152 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
153 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
155 // HIP-ELF: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
156 // HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
157 // HIP-ELF-NEXT: @__dummy.hip_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries"
159 // HIP-COFF: @__start_hip_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OA"
160 // HIP-COFF-NEXT: @__stop_hip_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OZ"
162 // HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
163 // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
164 // HIP-NEXT: @.hip.binary_handle = internal global ptr null
166 // HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
168 // HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
169 // HIP-NEXT: entry:
170 // HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
171 // HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
172 // HIP-NEXT: call void @.hip.globals_reg(ptr %0)
173 // HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
174 // HIP-NEXT: ret void
175 // HIP-NEXT: }
177 // HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
178 // HIP-NEXT: entry:
179 // HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
180 // HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
181 // HIP-NEXT: ret void
182 // HIP-NEXT: }
184 // HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
185 // HIP-NEXT: entry:
186 // HIP-NEXT: %1 = icmp ne ptr @__start_hip_offloading_entries, @__stop_hip_offloading_entries
187 // HIP-NEXT: br i1 %1, label %while.entry, label %while.end
189 // HIP: while.entry:
190 // HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %12, %if.end ]
191 // HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 0
192 // HIP-NEXT: %addr = load ptr, ptr %2, align 8
193 // HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 1
194 // HIP-NEXT: %name = load ptr, ptr %3, align 8
195 // HIP-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 2
196 // HIP-NEXT: %size = load i64, ptr %4, align 4
197 // HIP-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 3
198 // HIP-NEXT: %flags = load i32, ptr %5, align 4
199 // HIP-NEXT: %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 0, i32 4
200 // HIP-NEXT: %textype = load i32, ptr %6, align 4
201 // HIP-NEXT: %type = and i32 %flags, 7
202 // HIP-NEXT: %7 = and i32 %flags, 8
203 // HIP-NEXT: %extern = lshr i32 %7, 3
204 // HIP-NEXT: %8 = and i32 %flags, 16
205 // HIP-NEXT: %constant = lshr i32 %8, 4
206 // HIP-NEXT: %9 = and i32 %flags, 32
207 // HIP-NEXT: %normalized = lshr i32 %9, 5
208 // HIP-NEXT: %10 = icmp eq i64 %size, 0
209 // HIP-NEXT: br i1 %10, label %if.then, label %if.else
211 // HIP: if.then:
212 // HIP-NEXT: %11 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
213 // HIP-NEXT: br label %if.end
215 // HIP: if.else:
216 // HIP-NEXT: switch i32 %type, label %if.end [
217 // HIP-NEXT: i32 0, label %sw.global
218 // HIP-NEXT: i32 1, label %sw.managed
219 // HIP-NEXT: i32 2, label %sw.surface
220 // HIP-NEXT: i32 3, label %sw.texture
221 // HIP-NEXT: ]
223 // HIP: sw.global:
224 // HIP-NEXT: call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %extern, i64 %size, i32 %constant, i32 0)
225 // HIP-NEXT: br label %if.end
227 // HIP: sw.managed:
228 // HIP-NEXT: br label %if.end
230 // HIP: sw.surface:
231 // HIP-NEXT: call void @__hipRegisterSurface(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %textype, i32 %extern)
232 // HIP-NEXT: br label %if.end
234 // HIP: sw.texture:
235 // HIP-NEXT: call void @__hipRegisterTexture(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %textype, i32 %normalized, i32 %extern)
236 // HIP-NEXT: br label %if.end
238 // HIP: if.end:
239 // HIP-NEXT: %12 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
240 // HIP-NEXT: %13 = icmp eq ptr %12, @__stop_hip_offloading_entries
241 // HIP-NEXT: br i1 %13, label %while.end, label %while.entry
243 // HIP: while.end:
244 // HIP-NEXT: ret void
245 // HIP-NEXT: }