Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / Driver / linker-wrapper-image.c
blob83e7db6a49a6bb319db06542526628e2f98c8a1f
1 // REQUIRES: x86-registered-target
2 // REQUIRES: nvptx-registered-target
3 // REQUIRES: amdgpu-registered-target
5 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
7 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
8 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
9 // RUN: -fembed-offload-object=%t.out
10 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
11 // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=OPENMP
13 // OPENMP: @__start_omp_offloading_entries = external hidden constant %__tgt_offload_entry
14 // OPENMP-NEXT: @__stop_omp_offloading_entries = external hidden constant %__tgt_offload_entry
15 // OPENMP-NEXT: @__dummy.omp_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries"
16 // OPENMP-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}"
17 // OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr inbounds ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }]
18 // OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }
19 // OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_reg, ptr null }]
20 // OPENMP-NEXT: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_unreg, ptr null }]
22 // OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
23 // OPENMP-NEXT: entry:
24 // OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
25 // OPENMP-NEXT: ret void
26 // OPENMP-NEXT: }
28 // OPENMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
29 // OPENMP-NEXT: entry:
30 // OPENMP-NEXT: call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
31 // OPENMP-NEXT: ret void
32 // OPENMP-NEXT: }
34 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70
35 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
36 // RUN: -fembed-offload-object=%t.out
37 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
38 // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA
40 // CUDA: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin"
41 // CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
42 // CUDA-NEXT: @__dummy.cuda_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries"
43 // CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
44 // CUDA-NEXT: @__start_cuda_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
45 // CUDA-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
46 // CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }]
48 // CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
49 // CUDA-NEXT: entry:
50 // CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
51 // CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8
52 // CUDA-NEXT: call void @.cuda.globals_reg(ptr %0)
53 // CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0)
54 // CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
55 // CUDA-NEXT: ret void
56 // CUDA-NEXT: }
58 // CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
59 // CUDA-NEXT: entry:
60 // CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8
61 // CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0)
62 // CUDA-NEXT: ret void
63 // CUDA-NEXT: }
65 // CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" {
66 // CUDA-NEXT: entry:
67 // CUDA-NEXT: br i1 icmp ne (ptr @__start_cuda_offloading_entries, ptr @__stop_cuda_offloading_entries), label %while.entry, label %while.end
69 // CUDA: while.entry:
70 // CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %7, %if.end ]
71 // CUDA-NEXT: %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0
72 // CUDA-NEXT: %addr = load ptr, ptr %1, align 8
73 // CUDA-NEXT: %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1
74 // CUDA-NEXT: %name = load ptr, ptr %2, align 8
75 // CUDA-NEXT: %3 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 2
76 // CUDA-NEXT: %size = load i64, ptr %3, align 4
77 // CUDA-NEXT: %4 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 3
78 // CUDA-NEXT: %flag = load i32, ptr %4, align 4
79 // CUDA-NEXT: %5 = icmp eq i64 %size, 0
80 // CUDA-NEXT: br i1 %5, label %if.then, label %if.else
82 // CUDA: if.then:
83 // CUDA-NEXT: %6 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
84 // CUDA-NEXT: br label %if.end
86 // CUDA: if.else:
87 // CUDA-NEXT: switch i32 %flag, label %if.end [
88 // CUDA-NEXT: i32 0, label %sw.global
89 // CUDA-NEXT: i32 1, label %sw.managed
90 // CUDA-NEXT: i32 2, label %sw.surface
91 // CUDA-NEXT: i32 3, label %sw.texture
92 // CUDA-NEXT: ]
94 // CUDA: sw.global:
95 // CUDA-NEXT: call void @__cudaRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 0, i64 %size, i32 0, i32 0)
96 // CUDA-NEXT: br label %if.end
98 // CUDA: sw.managed:
99 // CUDA-NEXT: br label %if.end
101 // CUDA: sw.surface:
102 // CUDA-NEXT: br label %if.end
104 // CUDA: sw.texture:
105 // CUDA-NEXT: br label %if.end
107 // CUDA: if.end:
108 // CUDA-NEXT: %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1
109 // CUDA-NEXT: %8 = icmp eq ptr %7, @__stop_cuda_offloading_entries
110 // CUDA-NEXT: br i1 %8, label %while.end, label %while.entry
112 // CUDA: while.end:
113 // CUDA-NEXT: ret void
114 // CUDA-NEXT: }
116 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
117 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
118 // RUN: -fembed-offload-object=%t.out
119 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
120 // RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
122 // HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
123 // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
124 // HIP-NEXT: @__dummy.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries"
125 // HIP-NEXT: @.hip.binary_handle = internal global ptr null
126 // HIP-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
127 // HIP-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry]
128 // HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }]
130 // HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
131 // HIP-NEXT: entry:
132 // HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
133 // HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8
134 // HIP-NEXT: call void @.hip.globals_reg(ptr %0)
135 // HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
136 // HIP-NEXT: ret void
137 // HIP-NEXT: }
139 // HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
140 // HIP-NEXT: entry:
141 // HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8
142 // HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0)
143 // HIP-NEXT: ret void
144 // HIP-NEXT: }
146 // HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
147 // HIP-NEXT: entry:
148 // HIP-NEXT: br i1 icmp ne (ptr @__start_hip_offloading_entries, ptr @__stop_hip_offloading_entries), label %while.entry, label %while.end
150 // HIP: while.entry:
151 // HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %7, %if.end ]
152 // HIP-NEXT: %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0
153 // HIP-NEXT: %addr = load ptr, ptr %1, align 8
154 // HIP-NEXT: %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1
155 // HIP-NEXT: %name = load ptr, ptr %2, align 8
156 // HIP-NEXT: %3 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 2
157 // HIP-NEXT: %size = load i64, ptr %3, align 4
158 // HIP-NEXT: %4 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 3
159 // HIP-NEXT: %flag = load i32, ptr %4, align 4
160 // HIP-NEXT: %5 = icmp eq i64 %size, 0
161 // HIP-NEXT: br i1 %5, label %if.then, label %if.else
163 // HIP: if.then:
164 // HIP-NEXT: %6 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
165 // HIP-NEXT: br label %if.end
167 // HIP: if.else:
168 // HIP-NEXT: switch i32 %flag, label %if.end [
169 // HIP-NEXT: i32 0, label %sw.global
170 // HIP-NEXT: i32 1, label %sw.managed
171 // HIP-NEXT: i32 2, label %sw.surface
172 // HIP-NEXT: i32 3, label %sw.texture
173 // HIP-NEXT: ]
175 // HIP: sw.global:
176 // HIP-NEXT: call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 0, i64 %size, i32 0, i32 0)
177 // HIP-NEXT: br label %if.end
179 // HIP: sw.managed:
180 // HIP-NEXT: br label %if.end
182 // HIP: sw.surface:
183 // HIP-NEXT: br label %if.end
185 // HIP: sw.texture:
186 // HIP-NEXT: br label %if.end
188 // HIP: if.end:
189 // HIP-NEXT: %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1
190 // HIP-NEXT: %8 = icmp eq ptr %7, @__stop_hip_offloading_entries
191 // HIP-NEXT: br i1 %8, label %while.end, label %while.entry
193 // HIP: while.end:
194 // HIP-NEXT: ret void
195 // HIP-NEXT: }