1 ; RUN: llc < %s -march=nvptx -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=ALL,CHECK_PTX32 %s
2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck -check-prefixes=ALL,CHECK_PTX64 %s
4 declare void @llvm.nvvm.cp.async.wait.group(i32)
6 ; ALL-LABEL: asyncwaitgroup
7 define void @asyncwaitgroup() {
8 ; ALL: cp.async.wait_group 8;
9 tail call void @llvm.nvvm.cp.async.wait.group(i32 8)
10 ; ALL: cp.async.wait_group 0;
11 tail call void @llvm.nvvm.cp.async.wait.group(i32 0)
12 ; ALL: cp.async.wait_group 16;
13 tail call void @llvm.nvvm.cp.async.wait.group(i32 16)
17 declare void @llvm.nvvm.cp.async.wait.all()
19 ; ALL-LABEL: asyncwaitall
20 define void @asyncwaitall() {
21 ; ALL: cp.async.wait_all
22 tail call void @llvm.nvvm.cp.async.wait.all()
26 declare void @llvm.nvvm.cp.async.commit.group()
28 ; ALL-LABEL: asynccommitgroup
29 define void @asynccommitgroup() {
30 ; ALL: cp.async.commit_group
31 tail call void @llvm.nvvm.cp.async.commit.group()
35 declare void @llvm.nvvm.cp.async.mbarrier.arrive(i64* %a)
36 declare void @llvm.nvvm.cp.async.mbarrier.arrive.shared(i64 addrspace(3)* %a)
37 declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(i64* %a)
38 declare void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(i64 addrspace(3)* %a)
40 ; CHECK-LABEL: asyncmbarrier
41 define void @asyncmbarrier(i64* %a) {
42 ; CHECK_PTX32: cp.async.mbarrier.arrive.b64 [%r{{[0-9]+}}];
43 ; CHECK_PTX64: cp.async.mbarrier.arrive.b64 [%rd{{[0-9]+}}];
44 tail call void @llvm.nvvm.cp.async.mbarrier.arrive(i64* %a)
48 ; CHECK-LABEL: asyncmbarriershared
49 define void @asyncmbarriershared(i64 addrspace(3)* %a) {
50 ; CHECK_PTX32: cp.async.mbarrier.arrive.shared.b64 [%r{{[0-9]+}}];
51 ; CHECK_PTX64: cp.async.mbarrier.arrive.shared.b64 [%rd{{[0-9]+}}];
52 tail call void @llvm.nvvm.cp.async.mbarrier.arrive.shared(i64 addrspace(3)* %a)
56 ; CHECK-LABEL: asyncmbarriernoinc
57 define void @asyncmbarriernoinc(i64* %a) {
58 ; CHECK_PTX32: cp.async.mbarrier.arrive.noinc.b64 [%r{{[0-9]+}}];
59 ; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.b64 [%rd{{[0-9]+}}];
60 tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc(i64* %a)
64 ; CHECK-LABEL: asyncmbarriernoincshared
65 define void @asyncmbarriernoincshared(i64 addrspace(3)* %a) {
66 ; CHECK_PTX32: cp.async.mbarrier.arrive.noinc.shared.b64 [%r{{[0-9]+}}];
67 ; CHECK_PTX64: cp.async.mbarrier.arrive.noinc.shared.b64 [%rd{{[0-9]+}}];
68 tail call void @llvm.nvvm.cp.async.mbarrier.arrive.noinc.shared(i64 addrspace(3)* %a)
72 declare void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
74 ; CHECK-LABEL: asynccasharedglobal4i8
75 define void @asynccasharedglobal4i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
76 ; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 4;
77 ; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 4;
78 tail call void @llvm.nvvm.cp.async.ca.shared.global.4(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
82 declare void @llvm.nvvm.cp.async.ca.shared.global.8(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
84 ; CHECK-LABEL: asynccasharedglobal8i8
85 define void @asynccasharedglobal8i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
86 ; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 8;
87 ; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 8;
88 tail call void @llvm.nvvm.cp.async.ca.shared.global.8(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
92 declare void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
94 ; CHECK-LABEL: asynccasharedglobal16i8
95 define void @asynccasharedglobal16i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
96 ; CHECK_PTX32: cp.async.ca.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 16;
97 ; CHECK_PTX64: cp.async.ca.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 16;
98 tail call void @llvm.nvvm.cp.async.ca.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
102 declare void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)
104 ; CHECK-LABEL: asynccgsharedglobal16i8
105 define void @asynccgsharedglobal16i8(i8 addrspace(3)* %a, i8 addrspace(1)* %b) {
106 ; CHECK_PTX32: cp.async.cg.shared.global [%r{{[0-9]+}}], [%r{{[0-9]+}}], 16;
107 ; CHECK_PTX64: cp.async.cg.shared.global [%rd{{[0-9]+}}], [%rd{{[0-9]+}}], 16;
108 tail call void @llvm.nvvm.cp.async.cg.shared.global.16(i8 addrspace(3)* %a, i8 addrspace(1)* %b)