[MachineScheduler] Fix physreg dependencies of ExitSU (#123541)
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / no-fold-accvgpr-read.mir
blob49c0aaf9fb39020957362f2853d47a3b8dc33b49
1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -run-pass si-fold-operands %s -o - | FileCheck %s
3 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx942 -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=COALESCE
4 # RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -start-before=si-fold-operands -stop-after=register-coalescer %s -o - | FileCheck %s --check-prefixes=GFX908-COALESCE
6 ...
7 ---
8 name:            test
9 tracksRegLiveness: true
10 body:             |
11   ; CHECK-LABEL: name: test
12   ; CHECK: bb.0:
13   ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
14   ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
15   ; CHECK-NEXT: {{  $}}
16   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
17   ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
18   ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
19   ; CHECK-NEXT:   S_BITCMP0_B32 killed [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
20   ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit $scc
21   ; CHECK-NEXT: {{  $}}
22   ; CHECK-NEXT: bb.1:
23   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
24   ; CHECK-NEXT: {{  $}}
25   ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
26   ; CHECK-NEXT:   S_BRANCH %bb.3
27   ; CHECK-NEXT: {{  $}}
28   ; CHECK-NEXT: bb.2:
29   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
30   ; CHECK-NEXT: {{  $}}
31   ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
32   ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
33   ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
34   ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
35   ; CHECK-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_4:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
36   ; CHECK-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_4]], %subreg.sub3
37   ; CHECK-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
38   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
39   ; CHECK-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], 0, 0, 0, 0, implicit $mode, implicit $exec
40   ; CHECK-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], killed [[V_MFMA_F32_16X16X16F16_e64_]], 0, 0, 0, implicit $mode, implicit $exec
41   ; CHECK-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_2:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], killed [[V_MFMA_F32_16X16X16F16_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
42   ; CHECK-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_3:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], killed [[V_MFMA_F32_16X16X16F16_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
43   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_MFMA_F32_16X16X16F16_e64_3]].sub0
44   ; CHECK-NEXT: {{  $}}
45   ; CHECK-NEXT: bb.3:
46   ; CHECK-NEXT:   [[PHI:%[0-9]+]]:agpr_32 = PHI [[V_ACCVGPR_WRITE_B32_e64_]], %bb.1, [[V_MFMA_F32_16X16X16F16_e64_3]].sub0, %bb.2
47   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[PHI]]
48   ; CHECK-NEXT:   [[V_CVT_F16_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY3]], 0, 0, implicit $mode, implicit $exec
49   ; CHECK-NEXT:   [[V_PACK_B32_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed [[V_CVT_F16_F32_e64_]], 0, 0, 0, 0, implicit $mode, implicit $exec
50   ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
51   ; CHECK-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_PACK_B32_F16_e64_]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
52   ; CHECK-NEXT:   [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3
53   ; CHECK-NEXT:   BUFFER_STORE_DWORDX2_OFFSET_exact [[REG_SEQUENCE2]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
54   ; CHECK-NEXT:   S_ENDPGM 0
55   ;
56   ; COALESCE-LABEL: name: test
57   ; COALESCE: bb.0:
58   ; COALESCE-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
59   ; COALESCE-NEXT:   liveins: $sgpr4_sgpr5
60   ; COALESCE-NEXT: {{  $}}
61   ; COALESCE-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
62   ; COALESCE-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
63   ; COALESCE-NEXT:   undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
64   ; COALESCE-NEXT:   S_BITCMP0_B32 [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
65   ; COALESCE-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
66   ; COALESCE-NEXT: {{  $}}
67   ; COALESCE-NEXT: bb.1:
68   ; COALESCE-NEXT:   successors: %bb.3(0x80000000)
69   ; COALESCE-NEXT: {{  $}}
70   ; COALESCE-NEXT:   undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128_align2 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
71   ; COALESCE-NEXT:   S_BRANCH %bb.3
72   ; COALESCE-NEXT: {{  $}}
73   ; COALESCE-NEXT: bb.2:
74   ; COALESCE-NEXT:   successors: %bb.3(0x80000000)
75   ; COALESCE-NEXT: {{  $}}
76   ; COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
77   ; COALESCE-NEXT:   [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B32_]].sub0_sub1
78   ; COALESCE-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], 0, 0, 0, 0, implicit $mode, implicit $exec
79   ; COALESCE-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_]], 0, 0, 0, implicit $mode, implicit $exec
80   ; COALESCE-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_2:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
81   ; COALESCE-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
82   ; COALESCE-NEXT: {{  $}}
83   ; COALESCE-NEXT: bb.3:
84   ; COALESCE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
85   ; COALESCE-NEXT:   [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY2]], implicit $mode, implicit $exec
86   ; COALESCE-NEXT:   undef [[V_PACK_B32_F16_e64_:%[0-9]+]].sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, [[V_CVT_F16_F32_e32_]], 0, 0, 0, 0, implicit $mode, implicit $exec
87   ; COALESCE-NEXT:   [[V_PACK_B32_F16_e64_:%[0-9]+]].sub1:vreg_64_align2 = V_MOV_B32_e32 0, implicit $exec
88   ; COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
89   ; COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
90   ; COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
91   ; COALESCE-NEXT:   BUFFER_STORE_DWORDX2_OFFSET_exact [[V_PACK_B32_F16_e64_]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
92   ; COALESCE-NEXT:   S_ENDPGM 0
93   ;
94   ; GFX908-COALESCE-LABEL: name: test
95   ; GFX908-COALESCE: bb.0:
96   ; GFX908-COALESCE-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
97   ; GFX908-COALESCE-NEXT:   liveins: $sgpr4_sgpr5
98   ; GFX908-COALESCE-NEXT: {{  $}}
99   ; GFX908-COALESCE-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
100   ; GFX908-COALESCE-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
101   ; GFX908-COALESCE-NEXT:   undef [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = S_MOV_B32 0
102   ; GFX908-COALESCE-NEXT:   S_BITCMP0_B32 [[S_LOAD_DWORD_IMM]], 0, implicit-def $scc
103   ; GFX908-COALESCE-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
104   ; GFX908-COALESCE-NEXT: {{  $}}
105   ; GFX908-COALESCE-NEXT: bb.1:
106   ; GFX908-COALESCE-NEXT:   successors: %bb.3(0x80000000)
107   ; GFX908-COALESCE-NEXT: {{  $}}
108   ; GFX908-COALESCE-NEXT:   undef [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]].sub0:areg_128_align2 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
109   ; GFX908-COALESCE-NEXT:   S_BRANCH %bb.3
110   ; GFX908-COALESCE-NEXT: {{  $}}
111   ; GFX908-COALESCE-NEXT: bb.2:
112   ; GFX908-COALESCE-NEXT:   successors: %bb.3(0x80000000)
113   ; GFX908-COALESCE-NEXT: {{  $}}
114   ; GFX908-COALESCE-NEXT:   undef [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]].sub0:areg_128_align2 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
115   ; GFX908-COALESCE-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]].sub1:areg_128_align2 = COPY [[V_ACCVGPR_WRITE_B32_e64_1]].sub0
116   ; GFX908-COALESCE-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]].sub2:areg_128_align2 = COPY [[V_ACCVGPR_WRITE_B32_e64_1]].sub0
117   ; GFX908-COALESCE-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]].sub3:areg_128_align2 = COPY [[V_ACCVGPR_WRITE_B32_e64_1]].sub0
118   ; GFX908-COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
119   ; GFX908-COALESCE-NEXT:   [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY [[S_MOV_B32_]].sub0_sub1
120   ; GFX908-COALESCE-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_ACCVGPR_WRITE_B32_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
121   ; GFX908-COALESCE-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_1:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_]], 0, 0, 0, implicit $mode, implicit $exec
122   ; GFX908-COALESCE-NEXT:   [[V_MFMA_F32_16X16X16F16_e64_2:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_1]], 0, 0, 0, implicit $mode, implicit $exec
123   ; GFX908-COALESCE-NEXT:   [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 [[COPY1]], [[COPY1]], [[V_MFMA_F32_16X16X16F16_e64_2]], 0, 0, 0, implicit $mode, implicit $exec
124   ; GFX908-COALESCE-NEXT: {{  $}}
125   ; GFX908-COALESCE-NEXT: bb.3:
126   ; GFX908-COALESCE-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_ACCVGPR_WRITE_B32_e64_]].sub0
127   ; GFX908-COALESCE-NEXT:   [[V_CVT_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY2]], implicit $mode, implicit $exec
128   ; GFX908-COALESCE-NEXT:   undef [[V_PACK_B32_F16_e64_:%[0-9]+]].sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, [[V_CVT_F16_F32_e32_]], 0, 0, 0, 0, implicit $mode, implicit $exec
129   ; GFX908-COALESCE-NEXT:   [[V_PACK_B32_F16_e64_:%[0-9]+]].sub1:vreg_64_align2 = V_MOV_B32_e32 0, implicit $exec
130   ; GFX908-COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub0
131   ; GFX908-COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub0
132   ; GFX908-COALESCE-NEXT:   [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub0
133   ; GFX908-COALESCE-NEXT:   BUFFER_STORE_DWORDX2_OFFSET_exact [[V_PACK_B32_F16_e64_]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
134   ; GFX908-COALESCE-NEXT:   S_ENDPGM 0
135   bb.0:
136     successors: %bb.2, %bb.1
137     liveins: $sgpr4_sgpr5
139     %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
140     %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0(p4), 0, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4)
141     %2:sgpr_32 = S_MOV_B32 0
142     S_BITCMP0_B32 killed %1, 0, implicit-def $scc
143     S_CBRANCH_SCC0 %bb.2, implicit $scc
145   bb.1:
146     successors: %bb.3
148     %3:sgpr_32 = COPY %2
149     %4:vgpr_32 = COPY %3, implicit $exec
150     S_BRANCH %bb.3
152   bb.2:
153     successors: %bb.3
155     %5:sgpr_32 = S_MOV_B32 0
156     %6:vgpr_32 = COPY %5
157     %7:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
158     %8:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
159     %9:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
160     %10:agpr_32 = V_ACCVGPR_WRITE_B32_e64 %6, implicit $exec
161     %11:areg_128_align2 = REG_SEQUENCE %7, %subreg.sub0, %8, %subreg.sub1, %9, %subreg.sub2, %10, %subreg.sub3
162     %12:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %5, %subreg.sub1
163     %13:vreg_64_align2 = COPY %12
164     %14:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %11, 0, 0, 0, implicit $mode, implicit $exec
165     %15:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %14, 0, 0, 0, implicit $mode, implicit $exec
166     %16:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %15, 0, 0, 0, implicit $mode, implicit $exec
167     %17:areg_128_align2 = V_MFMA_F32_16X16X16F16_e64 %13, %13, killed %16, 0, 0, 0, implicit $mode, implicit $exec
168     %18:vgpr_32 = COPY %17.sub0
169     %19:vgpr_32 = COPY %18
171   bb.3:
172     %20:vgpr_32 = PHI %4, %bb.1, %19, %bb.2
173     %21:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, %20, 0, 0, implicit $mode, implicit $exec
174     %22:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, killed %21, 0, %2, 0, 0, implicit $mode, implicit $exec
175     %23:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
176     %24:vreg_64_align2 = REG_SEQUENCE %22, %subreg.sub0, killed %23, %subreg.sub1
177     %25:sgpr_128 = REG_SEQUENCE %2, %subreg.sub0, %2, %subreg.sub1, %2, %subreg.sub2, %2, %subreg.sub3
178     %26:vreg_64_align2 = COPY %24
179     BUFFER_STORE_DWORDX2_OFFSET_exact killed %26, killed %25, %2, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) null`, align 1, addrspace 8)
180     S_ENDPGM 0