1 # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
2 # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
3 # RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
4 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
7 define amdgpu_kernel void @div_fmas() { ret void }
8 define amdgpu_kernel void @s_getreg() { ret void }
9 define amdgpu_kernel void @s_setreg() { ret void }
10 define amdgpu_kernel void @vmem_gt_8dw_store() { ret void }
11 define amdgpu_kernel void @readwrite_lane() { ret void }
12 define amdgpu_kernel void @rfe() { ret void }
13 define amdgpu_kernel void @s_mov_fed_b32() { ret void }
14 define amdgpu_kernel void @s_movrel() { ret void }
15 define amdgpu_kernel void @v_interp() { ret void }
16 define amdgpu_kernel void @dpp() { ret void }
18 define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
20 %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
21 store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
22 call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12
26 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
29 !llvm.module.flags = !{!3, !4}
31 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
32 !1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
34 !3 = !{i32 2, !"Dwarf Version", i32 2}
35 !4 = !{i32 2, !"Debug Info Version", i32 3}
36 !5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
37 !6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
38 !7 = !DISubroutineType(types: !8)
40 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
41 !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
43 !12 = !DILocation(line: 1, column: 30, scope: !6)
47 # GCN-LABEL: name: div_fmas
71 # GCN: V_DIV_SCALE_F32
82 $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
86 implicit $vcc = V_CMP_EQ_I32_e32 $vgpr1, $vgpr2, implicit $exec
87 $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
91 $vcc = V_CMP_EQ_I32_e64 $vgpr1, $vgpr2, implicit $exec
92 $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
96 $vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $exec
97 $vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
104 # GCN-LABEL: name: s_getreg
113 # GCN: S_SETREG_IMM32
131 S_SETREG_B32 $sgpr0, 1
132 $sgpr1 = S_GETREG_B32 1
136 S_SETREG_IMM32_B32 0, 1
137 $sgpr1 = S_GETREG_B32 1
141 S_SETREG_B32 $sgpr0, 1
143 $sgpr2 = S_GETREG_B32 1
147 S_SETREG_B32 $sgpr0, 0
148 $sgpr1 = S_GETREG_B32 1
154 # GCN-LABEL: name: s_setreg
176 S_SETREG_B32 $sgpr0, 1
177 S_SETREG_B32 $sgpr1, 1
181 S_SETREG_B32 $sgpr0, 64
182 S_SETREG_B32 $sgpr1, 128
186 S_SETREG_B32 $sgpr0, 1
187 S_SETREG_B32 $sgpr1, 0
193 # GCN-LABEL: name: vmem_gt_8dw_store
196 # GCN: BUFFER_STORE_DWORD_OFFSET
197 # GCN-NEXT: V_MOV_B32
198 # GCN: BUFFER_STORE_DWORDX3_OFFSET
200 # GCN-NEXT: V_MOV_B32
201 # GCN: BUFFER_STORE_DWORDX4_OFFSET
202 # GCN-NEXT: V_MOV_B32
203 # GCN: BUFFER_STORE_DWORDX4_OFFSET
205 # GCN-NEXT: V_MOV_B32
206 # GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET
208 # GCN-NEXT: V_MOV_B32
209 # GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET
211 # GCN-NEXT: V_MOV_B32
214 # GCN: FLAT_STORE_DWORDX2
215 # GCN-NEXT: V_MOV_B32
216 # GCN: FLAT_STORE_DWORDX3
218 # GCN-NEXT: V_MOV_B32
219 # GCN: FLAT_STORE_DWORDX4
221 # GCN-NEXT: V_MOV_B32
222 # GCN: FLAT_ATOMIC_CMPSWAP_X2
224 # GCN-NEXT: V_MOV_B32
225 # GCN: FLAT_ATOMIC_FCMPSWAP_X2
229 name: vmem_gt_8dw_store
233 BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
234 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
235 BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
236 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
237 BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
238 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
239 BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
240 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
241 BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
242 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
243 BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
244 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
245 BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
246 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
250 FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
251 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
252 FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
253 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
254 FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
255 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
256 FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
257 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
258 FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
259 $vgpr3 = V_MOV_B32_e32 0, implicit $exec
267 # GCN-LABEL: name: readwrite_lane
275 # GCN: V_READLANE_B32
283 # GCN: V_WRITELANE_B32
291 # GCN: V_READLANE_B32
299 # GCN: V_WRITELANE_B32
305 $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
306 $sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0
310 $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
311 $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4
315 $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
316 $sgpr4 = V_READLANE_B32 $vgpr4, $vcc_lo
320 $m0 = S_MOV_B32 $sgpr4
321 $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
322 $vgpr4 = V_WRITELANE_B32 $m0, $vcc_lo, $vgpr4
330 # GCN-LABEL: name: rfe
335 # GCN-NEXT: S_RFE_B64
339 # GCN-NEXT: S_RFE_B64
345 S_SETREG_B32 $sgpr0, 3
346 S_RFE_B64 $sgpr2_sgpr3
350 S_SETREG_B32 $sgpr0, 0
351 S_RFE_B64 $sgpr2_sgpr3
359 # GCN-LABEL: name: s_mov_fed_b32
364 # GCN-NEXT: S_MOV_B32
369 # GCN-NEXT: V_MOV_B32
374 $sgpr0 = S_MOV_FED_B32 $sgpr0
375 $sgpr0 = S_MOV_B32 $sgpr0
379 $sgpr0 = S_MOV_FED_B32 $sgpr0
380 $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec
388 # GCN-LABEL: name: s_movrel
393 # GCN-NEXT: S_MOVRELS_B32
398 # GCN-NEXT: S_MOVRELS_B64
403 # GCN-NEXT: S_MOVRELD_B32
408 # GCN-NEXT: S_MOVRELD_B64
415 $sgpr0 = S_MOVRELS_B32 $sgpr0, implicit $m0
420 $sgpr0_sgpr1 = S_MOVRELS_B64 $sgpr0_sgpr1, implicit $m0
425 $sgpr0 = S_MOVRELD_B32 $sgpr0, implicit $m0
430 $sgpr0_sgpr1 = S_MOVRELD_B64 $sgpr0_sgpr1, implicit $m0
437 # GCN-LABEL: name: v_interp
442 # GCN-NEXT: V_INTERP_P1_F32
447 # GCN-NEXT: V_INTERP_P2_F32
452 # GCN-NEXT: V_INTERP_P1_F32_16bank
457 # GCN-NEXT: V_INTERP_MOV_F32
464 $vgpr0 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
469 $vgpr0 = V_INTERP_P2_F32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec
474 $vgpr0 = V_INTERP_P1_F32_16bank $vgpr0, 0, 0, implicit $m0, implicit $exec
479 $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $m0, implicit $exec
486 # GCN-LABEL: name: dpp
492 # VI-NEXT: V_MOV_B32_dpp
495 # VI: V_CMPX_EQ_I32_e32
501 # VI-NEXT: V_MOV_B32_dpp
507 $vgpr0 = V_MOV_B32_e32 0, implicit $exec
508 $vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
512 implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
513 $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
517 name: mov_fed_hazard_crash_on_dbg_value
519 exposesReturnsTwice: false
521 regBankSelected: false
523 tracksRegLiveness: true
525 - { reg: '$sgpr4_sgpr5' }
526 - { reg: '$sgpr6_sgpr7' }
528 - { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
530 isFrameAddressTaken: false
531 isReturnAddressTaken: false
540 hasOpaqueSPAdjustment: false
542 hasMustTailInVarArgFunc: false
544 - { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
545 - { id: 1, offset: 8, size: 4, alignment: 4 }
548 liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3
550 $flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
551 $flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
552 DBG_VALUE $noreg, 2, !5, !11, debug-location !12
553 $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
554 dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
555 $sgpr8 = S_MOV_B32 $sgpr5
556 $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
557 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
558 $sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
559 $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
560 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)