1 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
4 # GCN-LABEL: name: diffoporder_add
5 # GCN: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
6 # GCN: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
11 %0:sgpr_64 = COPY $sgpr0_sgpr1
12 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
13 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
14 %4:sreg_32_xm0 = COPY $sgpr101
15 %5:sreg_32_xm0 = S_MOV_B32 0
16 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
18 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
19 %6:vreg_64 = COPY $vgpr0_vgpr1
20 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
21 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
22 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
23 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
24 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
25 %12:sgpr_32 = COPY %1.sub1
27 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
28 %16:vgpr_32 = COPY %12
29 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
30 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
31 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
32 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
33 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
34 %25:sgpr_32 = S_MOV_B32 4096
35 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
36 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
37 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
38 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
39 %32:sgpr_32 = S_MOV_B32 6144
40 %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
41 %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
42 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
43 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
47 # GCN-LABEL: name: LowestInMiddle
48 # GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400
49 # GFX11: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
51 # GCN: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
52 # GCN: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
53 # GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
54 # GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0
55 # GFX11: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
57 # GFX11: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
58 # GFX11: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
59 # GFX11: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
60 # GFX11: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
61 # GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
62 # GFX11: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
64 # GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200
65 # GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
66 # GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
67 # GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
68 # GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
69 # GFX11: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
74 %0:sgpr_64 = COPY $sgpr0_sgpr1
75 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
76 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
77 %4:sreg_32_xm0 = COPY $sgpr101
78 %5:sreg_32_xm0 = S_MOV_B32 0
79 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
81 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
82 %6:vreg_64 = COPY $vgpr0_vgpr1
83 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
84 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
85 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
86 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
87 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
88 %12:sgpr_32 = COPY %1.sub1
90 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
91 %16:vgpr_32 = COPY %12
92 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
93 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
94 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
95 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
96 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
97 %25:sgpr_32 = S_MOV_B32 8000
98 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
99 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
100 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
101 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
102 %32:sgpr_32 = S_MOV_B32 6400
103 %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
104 %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
105 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
106 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
107 %39:sgpr_32 = S_MOV_B32 11200
108 %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
109 %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
110 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
111 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
115 # GCN-LABEL: name: NegativeDistance
116 # GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
117 # GFX11: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
118 # GCN: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
119 # GCN: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
120 # GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
121 # GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
122 # GFX11: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
123 # GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
124 # GFX11: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
125 # GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240
126 # GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
127 # GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
128 # GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
129 # GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0
130 # GFX11: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
132 name: NegativeDistance
135 %0:sgpr_64 = COPY $sgpr0_sgpr1
136 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
137 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
138 %4:sreg_32_xm0 = COPY $sgpr101
139 %5:sreg_32_xm0 = S_MOV_B32 0
140 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
142 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
143 %6:vreg_64 = COPY $vgpr0_vgpr1
144 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
145 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
146 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
147 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
148 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
149 %12:sgpr_32 = COPY %1.sub1
150 %13:vgpr_32 = COPY %5
151 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
152 %16:vgpr_32 = COPY %12
153 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
154 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
155 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
156 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
157 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
158 %25:sgpr_32 = S_MOV_B32 6144
159 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
160 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
161 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
162 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
163 %32:sgpr_32 = S_MOV_B32 8192
164 %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
165 %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
166 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
167 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
168 %39:sgpr_32 = S_MOV_B32 10240
169 %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
170 %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
171 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
172 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
176 # Tests for a successful compilation.
180 %0:sgpr_64 = COPY $sgpr0_sgpr1
181 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
182 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
183 %4:sreg_32_xm0 = COPY $sgpr101
184 %5:sreg_32_xm0 = S_MOV_B32 0
185 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
187 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
188 %6:vreg_64 = COPY $vgpr0_vgpr1
189 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
190 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
191 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
192 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
193 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
194 %12:sgpr_32 = COPY %1.sub1
195 %13:vgpr_32 = COPY %5
196 %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
197 %16:vgpr_32 = COPY %12
198 %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
199 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
200 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
201 %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
202 %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
204 %25:sgpr_32 = S_MOV_B32 6144
205 %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
206 %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
207 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
208 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
212 # GCN-LABEL: name: diffoporder_add_store
213 # GCN: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0
214 # GCN: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
216 name: diffoporder_add_store
220 %0:vreg_64 = COPY $vgpr0_vgpr1
222 %1:sgpr_32 = S_MOV_B32 4000
223 %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
224 %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
225 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
226 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
228 %8:sgpr_32 = S_MOV_B32 3000
229 %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
230 %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
231 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
232 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec