1 # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
3 # GFX9-LABEL: name: diffoporder_add
4 # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
5 # GFX9: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
10 %0:sgpr_64 = COPY $sgpr0_sgpr1
11 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
12 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
13 %4:sreg_32_xm0 = COPY $sgpr101
14 %5:sreg_32_xm0 = S_MOV_B32 0
15 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
17 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
18 %6:vreg_64 = COPY $vgpr0_vgpr1
19 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
20 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
21 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
22 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
23 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
24 %12:sgpr_32 = COPY %1.sub1
26 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
27 %16:vgpr_32 = COPY %12
28 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
29 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
30 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
31 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
32 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
33 %25:sgpr_32 = S_MOV_B32 4096
34 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
35 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
36 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
37 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
38 %32:sgpr_32 = S_MOV_B32 6144
39 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
40 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
41 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
42 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
46 # GFX9-LABEL: name: LowestInMiddle
47 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
48 # GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
49 # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
50 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
51 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
53 # GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
54 # GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
55 # GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
56 # GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
57 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
58 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
63 %0:sgpr_64 = COPY $sgpr0_sgpr1
64 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
65 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
66 %4:sreg_32_xm0 = COPY $sgpr101
67 %5:sreg_32_xm0 = S_MOV_B32 0
68 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
70 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
71 %6:vreg_64 = COPY $vgpr0_vgpr1
72 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
73 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
74 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
75 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
76 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
77 %12:sgpr_32 = COPY %1.sub1
79 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
80 %16:vgpr_32 = COPY %12
81 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
82 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
83 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
84 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
85 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
86 %25:sgpr_32 = S_MOV_B32 8000
87 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
88 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
89 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
90 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
91 %32:sgpr_32 = S_MOV_B32 6400
92 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
93 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
94 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
95 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
96 %39:sgpr_32 = S_MOV_B32 11200
97 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
98 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
99 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
100 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
104 # GFX9-LABEL: name: NegativeDistance
105 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
106 # GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
107 # GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
108 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
109 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
110 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
111 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
113 name: NegativeDistance
116 %0:sgpr_64 = COPY $sgpr0_sgpr1
117 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
118 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
119 %4:sreg_32_xm0 = COPY $sgpr101
120 %5:sreg_32_xm0 = S_MOV_B32 0
121 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
123 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
124 %6:vreg_64 = COPY $vgpr0_vgpr1
125 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
126 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
127 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
128 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
129 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
130 %12:sgpr_32 = COPY %1.sub1
131 %13:vgpr_32 = COPY %5
132 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
133 %16:vgpr_32 = COPY %12
134 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
135 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
136 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
137 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
138 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
139 %25:sgpr_32 = S_MOV_B32 6144
140 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
141 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
142 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
143 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
144 %32:sgpr_32 = S_MOV_B32 8192
145 %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
146 %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
147 %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
148 %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
149 %39:sgpr_32 = S_MOV_B32 10240
150 %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
151 %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
152 %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
153 %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
157 # Tests for a successful compilation.
161 %0:sgpr_64 = COPY $sgpr0_sgpr1
162 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
163 %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
164 %4:sreg_32_xm0 = COPY $sgpr101
165 %5:sreg_32_xm0 = S_MOV_B32 0
166 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
168 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
169 %6:vreg_64 = COPY $vgpr0_vgpr1
170 %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
171 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
172 %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
173 %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
174 %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
175 %12:sgpr_32 = COPY %1.sub1
176 %13:vgpr_32 = COPY %5
177 %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
178 %16:vgpr_32 = COPY %12
179 %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
180 %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
181 %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
182 %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
183 %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
185 %25:sgpr_32 = S_MOV_B32 6144
186 %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
187 %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
188 %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
189 %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
193 # GFX9-LABEL: name: diffoporder_add_store
194 # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
195 # GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
197 name: diffoporder_add_store
201 %0:vreg_64 = COPY $vgpr0_vgpr1
203 %1:sgpr_32 = S_MOV_B32 4000
204 %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
205 %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
206 %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
207 GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
209 %8:sgpr_32 = S_MOV_B32 3000
210 %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
211 %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
212 %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
213 GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec