1 ; RUN: llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX32 %s
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=CHECK,CHECK_PTX64 %s
3 ; RUN: %if ptxas-11.0 && ! ptxas-12.0 %{ llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
4 ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
6 declare void @llvm.nvvm.cp.async.wait.group(i32)
8 ; CHECK-LABEL: asyncwaitgroup
9 define void @asyncwaitgroup() {
10 ; CHECK: cp.async.wait_group 8;
11 tail call void @llvm.nvvm.cp.async.wait.group(i32 8)
12 ; CHECK: cp.async.wait_group 0;
13 tail call void @llvm.nvvm.cp.async.wait.group(i32 0)
14 ; CHECK: cp.async.wait_group 16;
15 tail call void @llvm.nvvm.cp.async.wait.group(i32 16)
19 declare void @llvm.nvvm.cp.async.wait.all()
21 ; CHECK-LABEL: asyncwaitall
22 define void @asyncwaitall() {
23 ; CHECK: cp.async.wait_all
24 tail call void @llvm.nvvm.cp.async.wait.all()
28 declare void @llvm.nvvm.cp.async.commit.group()
30 ; CHECK-LABEL: asynccommitgroup
31 define void @asynccommitgroup() {
32 ; CHECK: cp.async.commit_group
33 tail call void @llvm.nvvm.cp.async.commit.group()
37 declare void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
38 declare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
39 declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
40 declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
42 ; CHECK-LABEL: asyncmbarrier
43 define void @asyncmbarrier(ptr %a) {
44 ; The distinction between PTX32/PTX64 here is only to capture pointer register type
45 ; in R to be used in subsequent tests.
46 ; CHECK_PTX32: cp.async.mbarrier.arrive.b64 [%[[R:r]]{{[0-9]+}}];
47 ; CHECK_PTX64: cp.async.mbarrier.arrive.b64 [%[[R:rd]]{{[0-9]+}}];
48 tail call void @llvm.nvvm.cp.async.mbarrier.arrive(ptr %a)
52 ; CHECK-LABEL: asyncmbarriershared
53 define void @asyncmbarriershared(ptr addrspace(3) %a) {
54 ; CHECK: cp.async.mbarrier.arrive.shared.b64 [%[[R]]{{[0-9]+}}];
55 tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(ptr addrspace(3) %a)
59 ; CHECK-LABEL: asyncmbarriernoinc
60 define void @asyncmbarriernoinc(ptr %a) {
61 ; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.b64 [%[[R]]{{[0-9]+}}];
62 tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(ptr %a)
66 ; CHECK-LABEL: asyncmbarriernoincshared
67 define void @asyncmbarriernoincshared(ptr addrspace(3) %a) {
68 ; CHECK: cp.async.mbarrier.arrive.noinc.shared.b64 [%[[R]]{{[0-9]+}}];
69 tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(ptr addrspace(3) %a)
73 declare void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
74 declare void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
76 ; CHECK-LABEL: asynccasharedglobal4i8
77 define void @asynccasharedglobal4i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
78 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4;
79 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4, %r{{[0-9]+}};
80 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 4, 1;
81 tail call void @llvm.nvvm.cp.async.ca.shared.global.4(ptr addrspace(3) %a, ptr addrspace(1) %b)
82 tail call void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
83 tail call void @llvm.nvvm.cp.async.ca.shared.global.4.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
87 declare void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
88 declare void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
90 ; CHECK-LABEL: asynccasharedglobal8i8
91 define void @asynccasharedglobal8i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
92 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8;
93 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8, %r{{[0-9]+}};
94 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 8, 1;
95 tail call void @llvm.nvvm.cp.async.ca.shared.global.8(ptr addrspace(3) %a, ptr addrspace(1) %b)
96 tail call void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
97 tail call void @llvm.nvvm.cp.async.ca.shared.global.8.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
101 declare void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
102 declare void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
104 ; CHECK-LABEL: asynccasharedglobal16i8
105 define void @asynccasharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
106 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16;
107 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, %r{{[0-9]+}};
108 ; CHECK: cp.async.ca.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, 1;
109 tail call void @llvm.nvvm.cp.async.ca.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
110 tail call void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
111 tail call void @llvm.nvvm.cp.async.ca.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)
115 declare void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
116 declare void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
118 ; CHECK-LABEL: asynccgsharedglobal16i8
119 define void @asynccgsharedglobal16i8(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c) {
120 ; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16;
121 ; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, %r{{[0-9]+}};
122 ; CHECK: cp.async.cg.shared.global [%[[R]]{{[0-9]+}}], [%[[R]]{{[0-9]+}}], 16, 1;
123 tail call void @llvm.nvvm.cp.async.cg.shared.global.16(ptr addrspace(3) %a, ptr addrspace(1) %b)
124 tail call void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 %c)
125 tail call void @llvm.nvvm.cp.async.cg.shared.global.16.s(ptr addrspace(3) %a, ptr addrspace(1) %b, i32 1)