1 # RUN: llc -mtriple=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
2 # RUN: llc -mtriple=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
3 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
4 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,GFX9
7 define amdgpu_kernel void @smrd_imm(ptr addrspace(4) %const0) { ret void }
8 define amdgpu_kernel void @smrd_wide() { ret void }
9 define amdgpu_kernel void @constant_address_positive() { ret void }
10 define amdgpu_kernel void @smrd_sgpr() { ret void }
11 define amdgpu_kernel void @smrd_sgpr_imm() { ret void }
20 # GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
23 # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
24 # VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
26 # Max immediate offset for SI
27 # SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
28 # VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
30 # Immediate overflow for SI
31 # SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
32 # SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
33 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
34 # VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
36 # Max immediate offset for VI
37 # SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572
38 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
39 # VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
42 # Immediate overflow for VI
43 # SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
44 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
45 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
47 # Max immediate for CI
48 # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
49 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
50 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
51 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
52 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
53 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
54 # SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
55 # SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
56 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
57 # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
58 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
59 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
61 # Immediate overflow for CI
62 # GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
63 # GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
64 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
65 # GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
66 # GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
67 # GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
68 # GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
69 # GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
70 # GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
71 # GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
72 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
74 # Max 32-bit byte offset
75 # SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
76 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
77 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
79 # Overflow 32-bit byte offset
80 # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
81 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
82 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
83 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
84 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
85 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
86 # SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
87 # SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
88 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
89 # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
90 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
91 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
94 # GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
95 # GCN: $sgpr0_sgpr1 = COPY [[AS0]]
96 # GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
97 # GCN: $sgpr0_sgpr1 = COPY [[AS1]]
98 # GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
99 # GCN: $sgpr0_sgpr1 = COPY [[AS4]]
103 liveins: $sgpr0_sgpr1
105 %0:sgpr(p4) = COPY $sgpr0_sgpr1
107 %1:sgpr(s64) = G_CONSTANT i64 4
108 %2:sgpr(p4) = G_PTR_ADD %0, %1
109 %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4)
112 %4:sgpr(s64) = G_CONSTANT i64 1020
113 %5:sgpr(p4) = G_PTR_ADD %0, %4
114 %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4)
117 %7:sgpr(s64) = G_CONSTANT i64 1024
118 %8:sgpr(p4) = G_PTR_ADD %0, %7
119 %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4)
122 %10:sgpr(s64) = G_CONSTANT i64 1048572
123 %11:sgpr(p4) = G_PTR_ADD %0, %10
124 %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4)
127 %13:sgpr(s64) = G_CONSTANT i64 1048576
128 %14:sgpr(p4) = G_PTR_ADD %0, %13
129 %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4)
132 %16:sgpr(s64) = G_CONSTANT i64 17179869180
133 %17:sgpr(p4) = G_PTR_ADD %0, %16
134 %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4)
137 %19:sgpr(s64) = G_CONSTANT i64 17179869184
138 %20:sgpr(p4) = G_PTR_ADD %0, %19
139 %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4)
142 %22:sgpr(s64) = G_CONSTANT i64 4294967292
143 %23:sgpr(p4) = G_PTR_ADD %0, %22
144 %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4)
147 %25:sgpr(s64) = G_CONSTANT i64 4294967296
148 %26:sgpr(p4) = G_PTR_ADD %0, %25
149 %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4)
152 %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4)
153 $sgpr0_sgpr1 = COPY %28
155 %29:sgpr(p1) = G_LOAD %0 :: (load (p1) from %ir.const0, addrspace 4)
156 $sgpr0_sgpr1 = COPY %29
158 %30:sgpr(p4) = G_LOAD %0 :: (load (p4) from %ir.const0, addrspace 4)
159 $sgpr0_sgpr1 = COPY %30
166 regBankSelected: true
170 liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
171 %0:sgpr(p4) = COPY $sgpr0_sgpr1
172 %1:sgpr(p1) = COPY $sgpr2_sgpr3
174 ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
175 ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
176 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]]
177 %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4)
178 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2
180 ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]]
181 %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4)
182 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3
184 ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]]
185 %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1)
186 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4
188 ; CHECK s_load_dwordx16 [[GLOBAL_PTR]]
189 %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1)
190 $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5
194 # Test a load of an offset from a constant base address
195 # GCN-LABEL: name: constant_address_positive{{$}}
196 # GCN: %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 44
198 # VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4)
199 # SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4)
203 name: constant_address_positive
205 regBankSelected: true
209 liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
210 %0:sgpr(p4) = G_CONSTANT i64 44
211 %1:sgpr(s64) = G_CONSTANT i64 64
212 %2:sgpr(p4) = G_PTR_ADD %0, %1
213 %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
214 S_ENDPGM 0, implicit %3
219 # Test a load with a register offset.
220 # GCN-LABEL: name: smrd_sgpr{{$}}
221 # SICI: S_LOAD_DWORD_SGPR %0, %1, 0
222 # VI: S_LOAD_DWORD_SGPR %0, %1, 0
223 # GFX9: S_LOAD_DWORD_SGPR_IMM %0, %1, 0, 0
227 regBankSelected: true
231 liveins: $sgpr0_sgpr1, $sgpr2
232 %0:sgpr(p4) = COPY $sgpr0_sgpr1
233 %1:sgpr(s32) = COPY $sgpr2
234 %2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
235 %4:sgpr(p4) = G_PTR_ADD %0, %2
236 %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
237 S_ENDPGM 0, implicit %5
242 # Test a load with a (register + immediate) offset.
243 # GCN-LABEL: name: smrd_sgpr_imm{{$}}
244 # GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1
245 # GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2
246 # GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16,
250 regBankSelected: true
254 liveins: $sgpr0_sgpr1, $sgpr2
255 %0:sgpr(p4) = COPY $sgpr0_sgpr1
256 %1:sgpr(s32) = COPY $sgpr2
257 %2:sgpr(s64) = G_ZEXT %1:sgpr(s32)
258 %4:sgpr(p4) = G_PTR_ADD %0, %2
259 %5:sgpr(s64) = G_CONSTANT i64 16
260 %6:sgpr(p4) = G_PTR_ADD %4, %5
261 %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
262 S_ENDPGM 0, implicit %7