1 ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
2 target triple = "nvptx64"
4 ; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback.
5 ; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:13:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override.
6 ; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:15:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override.
7 ; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:20:1: Rewriting generic-mode kernel with a customized state machine.
10 ;; void unknown(void);
12 ;; #pragma omp parallel
18 ;; void test_fallback(void) {
19 ;; #pragma omp target teams
27 ;; void no_openmp(void) __attribute__((assume("omp_no_openmp")));
28 ;; void test_no_fallback(void) {
29 ;; #pragma omp target teams
34 ;; no_openmp(); // make it non-spmd
38 %struct.ident_t = type { i32, i32, i32, i32, i8* }
40 @0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
41 @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
42 @2 = private unnamed_addr constant [82 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_fallback;11;1;;\00", align 1
43 @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([82 x i8], [82 x i8]* @2, i32 0, i32 0) }, align 8
44 @4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
45 @5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([114 x i8], [114 x i8]* @4, i32 0, i32 0) }, align 8
46 @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
47 @6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
48 @7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([116 x i8], [116 x i8]* @6, i32 0, i32 0) }, align 8
49 @8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
50 @9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([85 x i8], [85 x i8]* @8, i32 0, i32 0) }, align 8
51 @10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
52 @11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([117 x i8], [117 x i8]* @10, i32 0, i32 0) }, align 8
53 @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
54 @12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
55 @13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([73 x i8], [73 x i8]* @12, i32 0, i32 0) }, align 8
56 @G = external global i32
57 @llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
59 ; Function Attrs: convergent norecurse nounwind
60 define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
62 %captured_vars_addrs.i.i = alloca [0 x i8*], align 8
63 %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18
64 %exec_user_code = icmp eq i32 %0, -1, !dbg !18
65 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
67 common.ret: ; preds = %entry, %user_code.entry
70 user_code.entry: ; preds = %entry
71 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
72 call void @unknown() #6, !dbg !20
73 %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
74 call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
75 %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
76 %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
77 call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
78 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
79 call void @unknown() #6, !dbg !27
80 call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28
84 declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
86 ; Function Attrs: convergent
87 declare void @unknown() local_unnamed_addr #1
89 ; Function Attrs: nounwind
90 define hidden void @known() local_unnamed_addr #2 !dbg !29 {
92 %captured_vars_addrs = alloca [0 x i8*], align 8
93 %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
94 %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
95 call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
99 ; Function Attrs: nounwind
100 declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
102 declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
104 ; Function Attrs: norecurse nounwind
105 define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
107 %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
108 %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33
109 %exec_user_code = icmp eq i32 %0, -1, !dbg !33
110 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
112 common.ret: ; preds = %entry, %user_code.entry
115 user_code.entry: ; preds = %entry
116 %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
117 %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
118 call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
119 %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
120 %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
121 call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
122 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
123 call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
124 %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
125 call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
126 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
127 call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
128 %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
129 call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
130 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
131 call void @no_openmp()
132 call void @no_parallelism()
133 call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46
137 ; Function Attrs: convergent norecurse nounwind
138 define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
140 call void @unknown() #6, !dbg !48
144 ; Function Attrs: convergent norecurse nounwind
145 define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
147 %global_args = alloca i8**, align 8
148 call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
149 call void @unknown() #6, !dbg !52
153 declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
155 declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
157 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
158 declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
160 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
161 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
163 declare void @no_openmp() #7
164 declare void @no_parallelism() #8
166 attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
167 attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
168 attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
169 attributes #3 = { nounwind }
170 attributes #4 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
171 attributes #5 = { argmemonly nofree nosync nounwind willreturn }
172 attributes #6 = { convergent nounwind }
173 attributes #7 = { "llvm.assume"="omp_no_openmp" }
174 attributes #8 = { "llvm.assume"="omp_no_parallelism" }
177 !omp_offload.info = !{!3, !4}
178 !nvvm.annotations = !{!5, !6}
179 !llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
182 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
183 !1 = !DIFile(filename: "custom_state_machines_remarks.c", directory: "/data/src/llvm-project")
185 !3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
186 !4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
187 !5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
188 !6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
189 !7 = !{i32 7, !"Dwarf Version", i32 2}
190 !8 = !{i32 2, !"Debug Info Version", i32 3}
191 !9 = !{i32 1, !"wchar_size", i32 4}
192 !10 = !{i32 7, !"openmp", i32 50}
193 !11 = !{i32 7, !"openmp-device", i32 50}
194 !12 = !{i32 7, !"PIC Level", i32 2}
195 !13 = !{i32 7, !"frame-pointer", i32 2}
196 !14 = !{!"clang version 13.0.0"}
197 !15 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_fallback_l11", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
198 !16 = !DIFile(filename: "llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c", directory: "/data/src/llvm-project")
199 !17 = !DISubroutineType(types: !2)
200 !18 = !DILocation(line: 11, column: 1, scope: !15)
201 !19 = !DILocation(line: 0, scope: !15)
202 !20 = !DILocation(line: 13, column: 5, scope: !21, inlinedAt: !22)
203 !21 = distinct !DISubprogram(name: "__omp_outlined__", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
204 !22 = distinct !DILocation(line: 11, column: 1, scope: !15)
205 !23 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !25)
206 !24 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
207 !25 = distinct !DILocation(line: 14, column: 5, scope: !21, inlinedAt: !22)
208 !26 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !25)
209 !27 = !DILocation(line: 15, column: 5, scope: !21, inlinedAt: !22)
210 !28 = !DILocation(line: 11, column: 25, scope: !15)
211 !29 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
212 !30 = !DILocation(line: 4, column: 1, scope: !29)
213 !31 = !DILocation(line: 8, column: 1, scope: !29)
214 !32 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_no_fallback_l20", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
215 !33 = !DILocation(line: 20, column: 1, scope: !32)
216 !34 = !DILocation(line: 0, scope: !32)
217 !35 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !36)
218 !36 = distinct !DILocation(line: 22, column: 5, scope: !37, inlinedAt: !38)
219 !37 = distinct !DISubprogram(name: "__omp_outlined__1", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
220 !38 = distinct !DILocation(line: 20, column: 1, scope: !32)
221 !39 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !36)
222 !40 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !41)
223 !41 = distinct !DILocation(line: 23, column: 5, scope: !37, inlinedAt: !38)
224 !42 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !41)
225 !43 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !44)
226 !44 = distinct !DILocation(line: 24, column: 5, scope: !37, inlinedAt: !38)
227 !45 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !44)
228 !46 = !DILocation(line: 20, column: 25, scope: !32)
229 !47 = distinct !DISubprogram(name: "__omp_outlined__2", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
230 !48 = !DILocation(line: 6, column: 5, scope: !47)
231 !49 = !DILocation(line: 7, column: 3, scope: !47)
232 !50 = distinct !DISubprogram(linkageName: "__omp_outlined__2_wrapper", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
233 !51 = !DILocation(line: 4, column: 1, scope: !50)
234 !52 = !DILocation(line: 6, column: 5, scope: !47, inlinedAt: !53)
235 !53 = distinct !DILocation(line: 4, column: 1, scope: !50)