1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --version 3
2 ; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s
3 ; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s
4 ; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \
5 ; RUN: -nvptx-lower-global-ctor-dtor-id=unique_id < %s | FileCheck %s --check-prefix=GLOBAL
6 ; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \
7 ; RUN: -nvptx-emit-init-fini-kernel=false < %s | FileCheck %s --check-prefix=KERNEL
9 ; Make sure we get the same result if we run multiple times
10 ; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
11 ; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
13 @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
14 @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
17 ; CHECK-NOT: @llvm.global_ctors
18 ; CHECK-NOT: @llvm.global_dtors
20 ; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
21 ; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
22 ; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
23 ; CHECK: @__fini_array_start = weak protected addrspace(1) global ptr null
24 ; CHECK: @__fini_array_end = weak protected addrspace(1) global ptr null
26 ; GLOBAL: @__init_array_object_foo_unique_id_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
27 ; GLOBAL: @__fini_array_object_bar_unique_id_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
28 ; GLOBAL: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_unique_id_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_unique_id_1 to ptr)], section "llvm.metadata"
29 ; GLOBAL: @__fini_array_start = weak protected addrspace(1) global ptr null
30 ; GLOBAL: @__fini_array_end = weak protected addrspace(1) global ptr null
32 ; KERNEL: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
33 ; KERNEL: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
35 ; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
36 ; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;
38 define internal void @foo() {
42 define internal void @bar() {
46 ; CHECK-LABEL: define weak_odr void @"nvptx$device$init"() {
48 ; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8
49 ; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8
50 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ne ptr addrspace(1) [[BEGIN]], [[STOP]]
51 ; CHECK-NEXT: br i1 [[TMP0]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
53 ; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[BEGIN]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
54 ; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
55 ; CHECK-NEXT: call void [[CALLBACK]]()
56 ; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
57 ; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], [[STOP]]
58 ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
60 ; CHECK-NEXT: ret void
63 ; CHECK-LABEL: define weak_odr void @"nvptx$device$fini"() {
65 ; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8
66 ; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8
67 ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[BEGIN]] to i64
68 ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[STOP]] to i64
69 ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[TMP0]]
70 ; CHECK-NEXT: [[OFFSET:%.*]] = ashr exact i64 [[TMP2]], 3
71 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr addrspace(1) [[BEGIN]], i64 [[OFFSET]]
72 ; CHECK-NEXT: [[START:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[TMP3]], i64 -1
73 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt ptr addrspace(1) [[START]], [[BEGIN]]
74 ; CHECK-NEXT: br i1 [[TMP4]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
76 ; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[START]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
77 ; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
78 ; CHECK-NEXT: call void [[CALLBACK]]()
79 ; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
80 ; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], [[BEGIN]]
81 ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
83 ; CHECK-NEXT: ret void
85 ; CHECK: [[META0:![0-9]+]] = !{ptr @"nvptx$device$init", !"kernel", i32 1}
86 ; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1}
87 ; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1}
88 ; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1}
89 ; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1}
90 ; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"kernel", i32 1}
91 ; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1}
92 ; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1}
93 ; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1}
94 ; CHECK: [[META9:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxclusterrank", i32 1}