1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc --global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX11
3 ; RUN: llc --global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=GFX10
5 declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
6 declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
7 declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn
9 define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) {
10 ; GFX11-LABEL: name: chain_call
11 ; GFX11: bb.1 (%ir-block.0):
12 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11
14 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
15 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
16 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
17 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
18 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8
19 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9
20 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10
21 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11
22 ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
23 ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
24 ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
25 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee
26 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
27 ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32)
28 ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32)
29 ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32)
30 ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32)
31 ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5)
32 ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32)
33 ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](s32)
34 ; GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
36 ; GFX10-LABEL: name: chain_call
37 ; GFX10: bb.1 (%ir-block.0):
38 ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11
40 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
41 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
42 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
43 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
44 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8
45 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9
46 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10
47 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11
48 ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee
49 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
50 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
51 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee
52 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
53 ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32)
54 ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32)
55 ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32)
56 ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32)
57 ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5)
58 ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32)
59 ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](s32)
60 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51
61 ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x s32>)
62 ; GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
63 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0)
67 define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) {
68 ; GFX11-LABEL: name: chain_preserve_call
69 ; GFX11: bb.1 (%ir-block.0):
70 ; GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11
72 ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
73 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
74 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
75 ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
76 ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8
77 ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9
78 ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10
79 ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11
80 ; GFX11-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee_preserve
81 ; GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
82 ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
83 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve
84 ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
85 ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32)
86 ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32)
87 ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32)
88 ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32)
89 ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5)
90 ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32)
91 ; GFX11-NEXT: $vgpr11 = COPY [[COPY6]](s32)
92 ; GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11
94 ; GFX10-LABEL: name: chain_preserve_call
95 ; GFX10: bb.1 (%ir-block.0):
96 ; GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9, $vgpr10, $vgpr11
98 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
99 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
100 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2
101 ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
102 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr8
103 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $vgpr9
104 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr10
105 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr11
106 ; GFX10-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @callee_preserve
107 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
108 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
109 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve
110 ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>)
111 ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32)
112 ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32)
113 ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32)
114 ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32)
115 ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5)
116 ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32)
117 ; GFX10-NEXT: $vgpr11 = COPY [[COPY6]](s32)
118 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr48_sgpr49_sgpr50_sgpr51
119 ; GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]](<4 x s32>)
120 ; GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[GV1]](p0), @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51
121 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr @callee_preserve, i32 -1, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 0)