1 # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
3 # GFX9-LABEL: name: diffoporder_add
5 # GFX9: %{{[0-9]+}}:vreg_64_align2 = REG_SEQUENCE
8 # GFX9-NEXT: V_ADD_CO_U32
9 # GFX9-NEXT: V_ADDC_U32
10 # GFX9-NEXT: [[PTR0:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE
11 # GFX9-NEXT: %{{[0-9]+}}:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[PTR0]], -2048, 0
12 # GFX9: %{{[0-9]+}}:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[PTR0]], 0, 0
17 %0:sgpr_64 = COPY $sgpr0_sgpr1
18 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
19 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
20 %4:sreg_32_xm0 = COPY $sgpr101
21 %5:sreg_32_xm0 = S_MOV_B32 0
22 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
24 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
25 %6:vreg_64_align2 = COPY $vgpr0_vgpr1
26 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
27 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
28 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
29 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
30 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
31 %12:sgpr_32 = COPY %1.sub1
33 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
34 %16:vgpr_32 = COPY %12
35 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
36 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
37 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
38 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
39 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
40 %25:sgpr_32 = S_MOV_B32 4096
41 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
42 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
43 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
44 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
45 %32:sgpr_32 = S_MOV_B32 6144
46 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
47 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
48 %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
49 %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
53 # GFX9-LABEL: name: LowestInMiddle
54 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
55 # GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
56 # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
57 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
58 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
60 # GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
61 # GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
62 # GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
63 # GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
64 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
65 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
70 %0:sgpr_64 = COPY $sgpr0_sgpr1
71 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
72 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
73 %4:sreg_32_xm0 = COPY $sgpr101
74 %5:sreg_32_xm0 = S_MOV_B32 0
75 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
77 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
78 %6:vreg_64_align2 = COPY $vgpr0_vgpr1
79 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
80 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
81 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
82 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
83 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
84 %12:sgpr_32 = COPY %1.sub1
86 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
87 %16:vgpr_32 = COPY %12
88 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
89 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
90 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
91 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
92 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
93 %25:sgpr_32 = S_MOV_B32 8000
94 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
95 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
96 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
97 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
98 %32:sgpr_32 = S_MOV_B32 6400
99 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
100 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
101 %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
102 %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
103 %39:sgpr_32 = S_MOV_B32 11200
104 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
105 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
106 %44:vreg_64_align2 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
107 %45:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
111 # GFX9-LABEL: name: NegativeDistance
112 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
113 # GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
114 # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
115 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
116 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
117 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
118 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
120 name: NegativeDistance
123 %0:sgpr_64 = COPY $sgpr0_sgpr1
124 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
125 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
126 %4:sreg_32_xm0 = COPY $sgpr10
127 %5:sreg_32_xm0 = S_MOV_B32 0
128 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
130 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
131 %6:vreg_64_align2 = COPY $vgpr0_vgpr1
132 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
133 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
134 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
135 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
136 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
137 %12:sgpr_32 = COPY %1.sub1
138 %13:vgpr_32 = COPY %5
139 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
140 %16:vgpr_32 = COPY %12
141 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
142 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
143 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
144 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
145 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
146 %25:sgpr_32 = S_MOV_B32 6144
147 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
148 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
149 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
150 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
151 %32:sgpr_32 = S_MOV_B32 8192
152 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
153 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
154 %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
155 %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
156 %39:sgpr_32 = S_MOV_B32 10240
157 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
158 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
159 %44:vreg_64_align2 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
160 %45:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
164 # Tests for a successful compilation.
168 %0:sgpr_64 = COPY $sgpr0_sgpr1
169 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
170 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
171 %4:sreg_32_xm0 = COPY $sgpr101
172 %5:sreg_32_xm0 = S_MOV_B32 0
173 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
175 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
176 %6:vreg_64_align2 = COPY $vgpr0_vgpr1
177 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
178 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
179 %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
180 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
181 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
182 %12:sgpr_32 = COPY %1.sub1
183 %13:vgpr_32 = COPY %5
184 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
185 %16:vgpr_32 = COPY %12
186 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
187 %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
188 %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
189 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
190 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
192 %25:sgpr_32 = S_MOV_B32 6144
193 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
194 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
195 %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
196 %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
200 # GFX9-LABEL: name: diffoporder_add_store
201 # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
202 # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
204 name: diffoporder_add_store
208 %0:vreg_64_align2 = COPY $vgpr0_vgpr1
210 %1:sgpr_32 = S_MOV_B32 4000
211 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
212 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
213 %6:vreg_64_align2 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
214 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
216 %8:sgpr_32 = S_MOV_B32 3000
217 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
218 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
219 %13:vreg_64_align2 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
220 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec