1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
3 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
4 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
8 name: load_local_v4s32_align16
11 tracksRegLiveness: true
17 ; GFX7-LABEL: name: load_local_v4s32_align16
18 ; GFX7: liveins: $vgpr0
20 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
21 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
22 ; GFX7-NEXT: [[DS_READ_B128_:%[0-9]+]]:vreg_128 = DS_READ_B128 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), addrspace 3)
23 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_]]
24 ; GFX9-LABEL: name: load_local_v4s32_align16
25 ; GFX9: liveins: $vgpr0
27 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
28 ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
29 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
30 ; GFX10-LABEL: name: load_local_v4s32_align16
31 ; GFX10: liveins: $vgpr0
33 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
34 ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
35 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
36 %0:vgpr(p3) = COPY $vgpr0
37 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3)
38 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
44 name: load_local_v4s32_align_8
47 tracksRegLiveness: true
53 ; GFX7-LABEL: name: load_local_v4s32_align_8
54 ; GFX7: liveins: $vgpr0
56 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
57 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
58 ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
59 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
60 ; GFX9-LABEL: name: load_local_v4s32_align_8
61 ; GFX9: liveins: $vgpr0
63 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
64 ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
65 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
66 ; GFX10-LABEL: name: load_local_v4s32_align_8
67 ; GFX10: liveins: $vgpr0
69 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
70 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
71 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
72 %0:vgpr(p3) = COPY $vgpr0
73 %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3)
74 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
80 name: load_local_v4s32_align_8_offset_160
83 tracksRegLiveness: true
89 ; GFX7-LABEL: name: load_local_v4s32_align_8_offset_160
90 ; GFX7: liveins: $vgpr0
92 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
93 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
94 ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 50, 51, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
95 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
96 ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_160
97 ; GFX9: liveins: $vgpr0
99 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
100 ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
101 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
102 ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_160
103 ; GFX10: liveins: $vgpr0
105 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
106 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
107 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
108 %0:vgpr(p3) = COPY $vgpr0
109 %1:vgpr(s32) = G_CONSTANT i32 400
110 %2:vgpr(p3) = G_PTR_ADD %0, %1
111 %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load (<4 x s32>), align 8, addrspace 3)
112 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
118 name: load_local_v4s32_align_8_offset_320
120 regBankSelected: true
121 tracksRegLiveness: true
127 ; GFX7-LABEL: name: load_local_v4s32_align_8_offset_320
128 ; GFX7: liveins: $vgpr0
130 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
131 ; GFX7-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec
132 ; GFX7-NEXT: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
133 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
134 ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
135 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
136 ; GFX9-LABEL: name: load_local_v4s32_align_8_offset_320
137 ; GFX9: liveins: $vgpr0
139 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
140 ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec
141 ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
142 ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
143 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
144 ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_320
145 ; GFX10: liveins: $vgpr0
147 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
148 ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec
149 ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
150 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
151 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
152 %0:vgpr(p3) = COPY $vgpr0
153 %1:vgpr(s32) = G_CONSTANT i32 4000
154 %2:vgpr(p3) = G_PTR_ADD %0, %1
155 %3:vgpr(<4 x s32>) = G_LOAD %2 :: (load (<4 x s32>), align 8, addrspace 3)
156 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
162 name: load_local_v2s64
164 regBankSelected: true
165 tracksRegLiveness: true
171 ; GFX7-LABEL: name: load_local_v2s64
172 ; GFX7: liveins: $vgpr0
174 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
175 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
176 ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
177 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
178 ; GFX9-LABEL: name: load_local_v2s64
179 ; GFX9: liveins: $vgpr0
181 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
182 ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
183 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
184 ; GFX10-LABEL: name: load_local_v2s64
185 ; GFX10: liveins: $vgpr0
187 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
188 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
189 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
190 %0:vgpr(p3) = COPY $vgpr0
191 %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3)
192 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
198 name: load_local_v2p1
200 regBankSelected: true
201 tracksRegLiveness: true
207 ; GFX7-LABEL: name: load_local_v2p1
208 ; GFX7: liveins: $vgpr0
210 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
211 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
212 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
213 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
214 ; GFX9-LABEL: name: load_local_v2p1
215 ; GFX9: liveins: $vgpr0
217 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
218 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
219 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
220 ; GFX10-LABEL: name: load_local_v2p1
221 ; GFX10: liveins: $vgpr0
223 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
224 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
225 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
226 %0:vgpr(p3) = COPY $vgpr0
227 %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3)
228 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
234 name: load_local_s128
236 regBankSelected: true
237 tracksRegLiveness: true
243 ; GFX7-LABEL: name: load_local_s128
244 ; GFX7: liveins: $vgpr0
246 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
247 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
248 ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
249 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
250 ; GFX9-LABEL: name: load_local_s128
251 ; GFX9: liveins: $vgpr0
253 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
254 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
255 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
256 ; GFX10-LABEL: name: load_local_s128
257 ; GFX10: liveins: $vgpr0
259 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
260 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
261 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
262 %0:vgpr(p3) = COPY $vgpr0
263 %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3)
264 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
270 name: load_local_v8s16
272 regBankSelected: true
273 tracksRegLiveness: true
279 ; GFX7-LABEL: name: load_local_v8s16
280 ; GFX7: liveins: $vgpr0
282 ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
283 ; GFX7-NEXT: $m0 = S_MOV_B32 -1
284 ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
285 ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
286 ; GFX9-LABEL: name: load_local_v8s16
287 ; GFX9: liveins: $vgpr0
289 ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
290 ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
291 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
292 ; GFX10-LABEL: name: load_local_v8s16
293 ; GFX10: liveins: $vgpr0
295 ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
296 ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
297 ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
298 %0:vgpr(p3) = COPY $vgpr0
299 %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3)
300 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1