1 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 # GCN-LABEL: name: diffoporder_add
5 # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
6 # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
8 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
9 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
14 %0:sgpr_64 = COPY $sgpr0_sgpr1
15 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
16 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
17 %4:sreg_32_xm0 = COPY $sgpr101
18 %5:sreg_32_xm0 = S_MOV_B32 0
19 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
21 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
22 %6:vreg_64 = COPY $vgpr0_vgpr1
23 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
24 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
25 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
26 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
27 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
28 %12:sgpr_32 = COPY %1.sub1
30 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
31 %16:vgpr_32 = COPY %12
32 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
33 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
34 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
35 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
36 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
37 %25:sgpr_32 = S_MOV_B32 4096
38 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
39 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
40 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
41 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
42 %32:sgpr_32 = S_MOV_B32 6144
43 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
44 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
45 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
46 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
50 # GCN-LABEL: name: LowestInMiddle
51 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
52 # GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
53 # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
54 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
55 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
57 # GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
58 # GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
59 # GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
60 # GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
61 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
62 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
64 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
65 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
66 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
72 %0:sgpr_64 = COPY $sgpr0_sgpr1
73 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
74 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
75 %4:sreg_32_xm0 = COPY $sgpr101
76 %5:sreg_32_xm0 = S_MOV_B32 0
77 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
79 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
80 %6:vreg_64 = COPY $vgpr0_vgpr1
81 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
82 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
83 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
84 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
85 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
86 %12:sgpr_32 = COPY %1.sub1
88 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
89 %16:vgpr_32 = COPY %12
90 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
91 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
92 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
93 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
94 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
95 %25:sgpr_32 = S_MOV_B32 8000
96 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
97 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
98 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
99 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
100 %32:sgpr_32 = S_MOV_B32 6400
101 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
102 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
103 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
104 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
105 %39:sgpr_32 = S_MOV_B32 11200
106 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
107 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
108 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
109 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
113 # GCN-LABEL: name: NegativeDistance
114 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
115 # GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
116 # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
117 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
118 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
119 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
120 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
122 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
123 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
124 # GFX8: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
126 name: NegativeDistance
129 %0:sgpr_64 = COPY $sgpr0_sgpr1
130 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
131 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
132 %4:sreg_32_xm0 = COPY $sgpr101
133 %5:sreg_32_xm0 = S_MOV_B32 0
134 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
136 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
137 %6:vreg_64 = COPY $vgpr0_vgpr1
138 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
139 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
140 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
141 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
142 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
143 %12:sgpr_32 = COPY %1.sub1
144 %13:vgpr_32 = COPY %5
145 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
146 %16:vgpr_32 = COPY %12
147 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
148 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
149 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
150 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
151 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
152 %25:sgpr_32 = S_MOV_B32 6144
153 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
154 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
155 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
156 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
157 %32:sgpr_32 = S_MOV_B32 8192
158 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
159 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
160 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
161 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
162 %39:sgpr_32 = S_MOV_B32 10240
163 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
164 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
165 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
166 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
170 # Tests for a successful compilation.
174 %0:sgpr_64 = COPY $sgpr0_sgpr1
175 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
176 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
177 %4:sreg_32_xm0 = COPY $sgpr101
178 %5:sreg_32_xm0 = S_MOV_B32 0
179 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
181 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
182 %6:vreg_64 = COPY $vgpr0_vgpr1
183 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
184 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
185 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
186 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
187 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
188 %12:sgpr_32 = COPY %1.sub1
189 %13:vgpr_32 = COPY %5
190 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
191 %16:vgpr_32 = COPY %12
192 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
193 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
194 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
195 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
196 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
198 %25:sgpr_32 = S_MOV_B32 6144
199 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
200 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
201 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
202 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
206 # GCN-LABEL: name: diffoporder_add_store
207 # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
208 # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
210 # GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0
211 # GFX8: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
213 name: diffoporder_add_store
217 %0:vreg_64 = COPY $vgpr0_vgpr1
219 %1:sgpr_32 = S_MOV_B32 4000
220 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
221 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
222 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
223 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
225 %8:sgpr_32 = S_MOV_B32 3000
226 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
227 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
228 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
229 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
233 # GCN-LABEL: name: diffoporder_add_flat_load
234 # GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 1000, 0,
235 # GFX9: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
237 # GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
238 # GFX8: FLAT_LOAD_DWORD %{{[0-9]+}}, 0, 0,
240 name: diffoporder_add_flat_load
244 %0:vreg_64 = COPY $vgpr0_vgpr1
246 %1:sgpr_32 = S_MOV_B32 4000
247 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
248 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
249 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
250 %14:vgpr_32 = FLAT_LOAD_DWORD %6, 0, 0, implicit $exec, implicit $flat_scr
252 %8:sgpr_32 = S_MOV_B32 3000
253 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
254 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
255 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
256 %15:vgpr_32 = FLAT_LOAD_DWORD %13, 0, 0, implicit $exec, implicit $flat_scr
260 # GCN-LABEL: name: diffoporder_add_flat_store
261 # GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
262 # GFX9: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
264 # GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub0, 0, 0,
265 # GFX8: FLAT_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
267 name: diffoporder_add_flat_store
271 %0:vreg_64 = COPY $vgpr0_vgpr1
273 %1:sgpr_32 = S_MOV_B32 4000
274 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
275 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
276 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
277 FLAT_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
279 %8:sgpr_32 = S_MOV_B32 3000
280 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
281 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
282 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
283 FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
287 # GCN-LABEL: name: diffoporder_add_global_atomic_cmpswap
288 # GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
289 # GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
291 # GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
292 # GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
294 name: diffoporder_add_global_atomic_cmpswap
298 %0:vreg_64 = COPY $vgpr0_vgpr1
300 %1:sgpr_32 = S_MOV_B32 4000
301 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
302 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
303 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
304 GLOBAL_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec
306 %8:sgpr_32 = S_MOV_B32 3000
307 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
308 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
309 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
310 GLOBAL_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec
314 # GCN-LABEL: name: diffoporder_add_flat_atomic_cmpswap
315 # GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
316 # GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
318 # GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
319 # GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
321 name: diffoporder_add_flat_atomic_cmpswap
325 %0:vreg_64 = COPY $vgpr0_vgpr1
327 %1:sgpr_32 = S_MOV_B32 4000
328 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
329 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
330 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
331 FLAT_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
334 %8:sgpr_32 = S_MOV_B32 3000
335 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
336 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
337 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
338 FLAT_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
342 # GCN-LABEL: name: diffoporder_add_global_atomic_add
343 # GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
344 # GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
346 # GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
347 # GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
349 name: diffoporder_add_global_atomic_add
353 %0:vreg_64 = COPY $vgpr0_vgpr1
355 %1:sgpr_32 = S_MOV_B32 4000
356 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
357 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
358 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
359 GLOBAL_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec
361 %8:sgpr_32 = S_MOV_B32 3000
362 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
363 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
364 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
365 GLOBAL_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec
369 # GCN-LABEL: name: diffoporder_add_flat_atomic_add
370 # GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
371 # GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
373 # GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
374 # GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
376 name: diffoporder_add_flat_atomic_add
380 %0:vreg_64 = COPY $vgpr0_vgpr1
382 %1:sgpr_32 = S_MOV_B32 4000
383 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
384 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
385 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
386 FLAT_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
389 %8:sgpr_32 = S_MOV_B32 3000
390 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
391 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
392 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
393 FLAT_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
397 # GCN-LABEL: name: diffoporder_add_global_atomic_add_rtn
398 # GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
399 # GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
401 # GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
402 # GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
404 name: diffoporder_add_global_atomic_add_rtn
408 %0:vreg_64 = COPY $vgpr0_vgpr1
410 %1:sgpr_32 = S_MOV_B32 4000
411 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
412 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
413 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
414 %14:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec
416 %8:sgpr_32 = S_MOV_B32 3000
417 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
418 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
419 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
420 %15:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec
424 # GCN-LABEL: name: diffoporder_add_flat_atomic_add_rtn
425 # GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
426 # GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
428 # GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
429 # GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
431 name: diffoporder_add_flat_atomic_add_rtn
435 %0:vreg_64 = COPY $vgpr0_vgpr1
437 %1:sgpr_32 = S_MOV_B32 4000
438 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
439 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
440 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
441 %14:vgpr_32 = FLAT_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
444 %8:sgpr_32 = S_MOV_B32 3000
445 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
446 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
447 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
448 %15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
452 # GCN-LABEL: name: negative_offset_nullptr
453 # GCN: V_ADD_CO_U32_e64 -1, 0, 0
454 # GCN: V_ADDC_U32_e64 -1, %{{[0-9]+}}, %{{[0-9]+}}, 0
456 name: negative_offset_nullptr
459 %0:sreg_64 = S_MOV_B64 $src_private_base
460 %1:sreg_32 = S_MOV_B32 0
461 %2:sreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0.sub1, %subreg.sub1
462 %3:vgpr_32, %4:sreg_64_xexec = V_ADD_CO_U32_e64 -1, 0, 0, implicit $exec
463 %5:vgpr_32 = COPY %2.sub1
464 %6:vgpr_32, %7:sreg_64 = V_ADDC_U32_e64 -1, %5, %4, 0, implicit $exec
465 %8:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %6, %subreg.sub1
466 %9:vgpr_32 = FLAT_LOAD_UBYTE %8, 0, 0, implicit $exec, implicit $flat_scr
467 S_ENDPGM 0, implicit %9