1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
5 define amdgpu_kernel void @single-wave-phase-2b(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17, ptr addrspace(7) noalias %in18, ptr addrspace(7) noalias %in19, ptr addrspace(7) noalias %in20, ptr addrspace(7) noalias %in21, ptr addrspace(7) noalias %in22, ptr addrspace(7) noalias %in23, ptr addrspace(7) noalias %in24, ptr addrspace(7) noalias %in25, ptr addrspace(7) noalias %in26, ptr addrspace(7) noalias %in27, ptr addrspace(7) noalias %in28, ptr addrspace(7) noalias %in29) #0 { ret void }
7 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
15 name: single-wave-phase-2b
16 tracksRegLiveness: true
20 ; GCN-LABEL: name: single-wave-phase-2b
22 ; GCN-NEXT: successors: %bb.1(0x80000000)
24 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = IMPLICIT_DEF
25 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = IMPLICIT_DEF
26 ; GCN-NEXT: [[DEF2:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
27 ; GCN-NEXT: [[DEF3:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
28 ; GCN-NEXT: [[DEF4:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
29 ; GCN-NEXT: [[DEF5:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
30 ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
31 ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
32 ; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
33 ; GCN-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
34 ; GCN-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
35 ; GCN-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
36 ; GCN-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
37 ; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
38 ; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
39 ; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
40 ; GCN-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
41 ; GCN-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
42 ; GCN-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
43 ; GCN-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
44 ; GCN-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
45 ; GCN-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
46 ; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
47 ; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
48 ; GCN-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
49 ; GCN-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
50 ; GCN-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
51 ; GCN-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
52 ; GCN-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
53 ; GCN-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
54 ; GCN-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
55 ; GCN-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
56 ; GCN-NEXT: [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
57 ; GCN-NEXT: [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
58 ; GCN-NEXT: [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
59 ; GCN-NEXT: [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
60 ; GCN-NEXT: [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
61 ; GCN-NEXT: [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
62 ; GCN-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
63 ; GCN-NEXT: [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
64 ; GCN-NEXT: [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
65 ; GCN-NEXT: [[DEF41:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
66 ; GCN-NEXT: [[DEF42:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
67 ; GCN-NEXT: [[DEF43:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
68 ; GCN-NEXT: [[DEF44:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
69 ; GCN-NEXT: [[DEF45:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
70 ; GCN-NEXT: [[DEF46:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
71 ; GCN-NEXT: [[DEF47:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
72 ; GCN-NEXT: [[DEF48:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
73 ; GCN-NEXT: [[DEF49:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
76 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
78 ; GCN-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
79 ; GCN-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 0, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
80 ; GCN-NEXT: [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
81 ; GCN-NEXT: [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 2064, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
82 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
83 ; GCN-NEXT: [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 2080, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
84 ; GCN-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF8]], implicit $exec
85 ; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF9]], implicit $exec
86 ; GCN-NEXT: dead [[V_MFMA_F32_32X32X8F16_mac_e64_:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
87 ; GCN-NEXT: [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 3120, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
88 ; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF10]], implicit $exec
89 ; GCN-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF11]], implicit $exec
90 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
91 ; GCN-NEXT: [[DS_READ_B128_gfx9_6:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 4128, 0, implicit $exec :: (load (s128) from %ir.in6, !alias.scope !0, addrspace 3)
92 ; GCN-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF12]], implicit $exec
93 ; GCN-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF13]], implicit $exec
94 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
95 ; GCN-NEXT: [[DS_READ_B128_gfx9_7:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 6192, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 3)
96 ; GCN-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF14]], implicit $exec
97 ; GCN-NEXT: [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF15]], implicit $exec
98 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub0_sub1, [[DS_READ_B128_gfx9_6]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
99 ; GCN-NEXT: [[DS_READ_B128_gfx9_8:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 1024, 0, implicit $exec :: (load (s128) from %ir.in8, !alias.scope !0, addrspace 3)
100 ; GCN-NEXT: [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF16]], implicit $exec
101 ; GCN-NEXT: [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF17]], implicit $exec
102 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub2_sub3, [[DS_READ_B128_gfx9_6]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
103 ; GCN-NEXT: [[DS_READ_B128_gfx9_9:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 3088, 0, implicit $exec :: (load (s128) from %ir.in9, !alias.scope !0, addrspace 3)
104 ; GCN-NEXT: [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF18]], implicit $exec
105 ; GCN-NEXT: [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF19]], implicit $exec
106 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub0_sub1, [[DS_READ_B128_gfx9_7]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
107 ; GCN-NEXT: [[DS_READ_B128_gfx9_10:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 5152, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 3)
108 ; GCN-NEXT: [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF20]], implicit $exec
109 ; GCN-NEXT: [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF21]], implicit $exec
110 ; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub2_sub3, [[DS_READ_B128_gfx9_7]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
111 ; GCN-NEXT: [[DS_READ_B128_gfx9_11:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 7216, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 3)
112 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_8]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
113 ; GCN-NEXT: undef [[V_PERM_B32_e64_:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF28]], [[DEF29]], [[DEF44]], implicit $exec
114 ; GCN-NEXT: [[V_PERM_B32_e64_:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF30]], [[DEF31]], [[DEF44]], implicit $exec
115 ; GCN-NEXT: [[V_PERM_B32_e64_:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF24]], [[DEF25]], [[DEF44]], implicit $exec
116 ; GCN-NEXT: [[V_PERM_B32_e64_:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF26]], [[DEF27]], [[DEF44]], implicit $exec
117 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF40]], [[V_PERM_B32_e64_]], 0, 0, implicit $exec :: (store (s128) into %ir.in0, !alias.scope !0, addrspace 3)
118 ; GCN-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in12, !alias.scope !0, addrspace 7)
119 ; GCN-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_1]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in13, !alias.scope !0, addrspace 7)
120 ; GCN-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_2]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in14, !alias.scope !0, addrspace 7)
121 ; GCN-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_3]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in15, !alias.scope !0, addrspace 7)
122 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_8]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
123 ; GCN-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_4]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in16, !alias.scope !0, addrspace 7)
124 ; GCN-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_5]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in17, !alias.scope !0, addrspace 7)
125 ; GCN-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_6]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in18, !alias.scope !0, addrspace 7)
126 ; GCN-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_7]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in19, !alias.scope !0, addrspace 7)
127 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_9]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
128 ; GCN-NEXT: undef [[V_PERM_B32_e64_1:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF36]], [[DEF37]], [[DEF44]], implicit $exec
129 ; GCN-NEXT: [[V_PERM_B32_e64_1:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF38]], [[DEF39]], [[DEF44]], implicit $exec
130 ; GCN-NEXT: [[V_PERM_B32_e64_1:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF32]], [[DEF33]], [[DEF44]], implicit $exec
131 ; GCN-NEXT: [[V_PERM_B32_e64_1:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF34]], [[DEF35]], [[DEF44]], implicit $exec
132 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF40]], [[V_PERM_B32_e64_1]], 1040, 0, implicit $exec :: (store (s128) into %ir.in1, !alias.scope !0, addrspace 3)
133 ; GCN-NEXT: [[DEF32:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_8]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in20, !alias.scope !0, addrspace 7)
134 ; GCN-NEXT: [[DEF33:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_9]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in21, !alias.scope !0, addrspace 7)
135 ; GCN-NEXT: [[DEF34:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_10]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in22, !alias.scope !0, addrspace 7)
136 ; GCN-NEXT: [[DEF35:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_11]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in23, !alias.scope !0, addrspace 7)
137 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_9]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
138 ; GCN-NEXT: [[DEF36:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_12]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in24, !alias.scope !0, addrspace 7)
139 ; GCN-NEXT: [[DEF37:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_13]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in25, !alias.scope !0, addrspace 7)
140 ; GCN-NEXT: [[V_ADD_U32_e32_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF22]], implicit $exec
141 ; GCN-NEXT: [[V_ADD_U32_e32_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF23]], implicit $exec
142 ; GCN-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_14]], [[DEF47]], 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in26, !alias.scope !0, addrspace 7)
143 ; GCN-NEXT: [[DEF39:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[V_ADD_U32_e32_15]], [[DEF47]], 0, 0, 0, 0, implicit $exec
144 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub0_sub1, [[DS_READ_B128_gfx9_10]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
145 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF2]], 0, 0, implicit $exec :: (store (s128) into %ir.in2, !alias.scope !0, addrspace 3)
146 ; GCN-NEXT: [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -16, [[DEF45]], implicit $exec
147 ; GCN-NEXT: [[V_ADD_U32_e32_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -16, [[DEF46]], implicit $exec
148 ; GCN-NEXT: [[DEF2:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_16]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in26, !alias.scope !0, addrspace 7)
149 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub2_sub3, [[DS_READ_B128_gfx9_10]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
150 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF3]], 2064, 0, implicit $exec :: (store (s128) into %ir.in3, !alias.scope !0, addrspace 3)
151 ; GCN-NEXT: [[DEF3:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF45]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in27, !alias.scope !0, addrspace 7)
152 ; GCN-NEXT: [[DEF45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[DEF45]], implicit $exec
153 ; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF23]], implicit $exec
154 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub0_sub1, [[DS_READ_B128_gfx9_11]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
155 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF4]], 2080, 0, implicit $exec :: (store (s128) into %ir.in4, !alias.scope !0, addrspace 3)
156 ; GCN-NEXT: [[DEF4:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF46]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in28, !alias.scope !0, addrspace 7)
157 ; GCN-NEXT: [[DEF46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[DEF46]], implicit $exec
158 ; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF22]], implicit $exec
159 ; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub2_sub3, [[DS_READ_B128_gfx9_11]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
160 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF5]], 16, 0, implicit $exec :: (store (s128) into %ir.in5, !alias.scope !0, addrspace 3)
161 ; GCN-NEXT: [[DEF5:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_17]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in29, !alias.scope !0, addrspace 7)
162 ; GCN-NEXT: IGLP_OPT 1
163 ; GCN-NEXT: [[DEF49:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[DEF49]], -1, implicit-def dead $scc
164 ; GCN-NEXT: S_CMP_LG_U32 [[DEF49]], 0, implicit-def $scc
165 ; GCN-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF9]], implicit $exec
166 ; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF8]], implicit $exec
167 ; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF13]], implicit $exec
168 ; GCN-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF12]], implicit $exec
169 ; GCN-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF11]], implicit $exec
170 ; GCN-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF10]], implicit $exec
171 ; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF15]], implicit $exec
172 ; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF14]], implicit $exec
173 ; GCN-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF17]], implicit $exec
174 ; GCN-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF16]], implicit $exec
175 ; GCN-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF19]], implicit $exec
176 ; GCN-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF18]], implicit $exec
177 ; GCN-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF21]], implicit $exec
178 ; GCN-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF20]], implicit $exec
179 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
182 ; GCN-NEXT: S_ENDPGM 0
184 %761:areg_512_align2 = IMPLICIT_DEF
185 %814:areg_512_align2 = IMPLICIT_DEF
186 %1757:av_128_align2 = IMPLICIT_DEF
187 %1755:av_128_align2 = IMPLICIT_DEF
188 %1754:av_128_align2 = IMPLICIT_DEF
189 %1756:av_128_align2 = IMPLICIT_DEF
190 %37:vgpr_32 = IMPLICIT_DEF
191 %38:vgpr_32 = IMPLICIT_DEF
192 %1736:vgpr_32 = IMPLICIT_DEF
193 %1737:vgpr_32 = IMPLICIT_DEF
194 %1738:vgpr_32 = IMPLICIT_DEF
195 %1739:vgpr_32 = IMPLICIT_DEF
196 %1740:vgpr_32 = IMPLICIT_DEF
197 %1741:vgpr_32 = IMPLICIT_DEF
198 %1742:vgpr_32 = IMPLICIT_DEF
199 %1743:vgpr_32 = IMPLICIT_DEF
200 %1744:vgpr_32 = IMPLICIT_DEF
201 %1745:vgpr_32 = IMPLICIT_DEF
202 %1746:vgpr_32 = IMPLICIT_DEF
203 %1747:vgpr_32 = IMPLICIT_DEF
204 %1748:vgpr_32 = IMPLICIT_DEF
205 %1749:vgpr_32 = IMPLICIT_DEF
206 %1750:vgpr_32 = IMPLICIT_DEF
207 %1751:vgpr_32 = IMPLICIT_DEF
208 %1766:vgpr_32 = IMPLICIT_DEF
209 %1767:vgpr_32 = IMPLICIT_DEF
210 %1768:vgpr_32 = IMPLICIT_DEF
211 %1769:vgpr_32 = IMPLICIT_DEF
212 %1770:vgpr_32 = IMPLICIT_DEF
213 %1771:vgpr_32 = IMPLICIT_DEF
214 %1772:vgpr_32 = IMPLICIT_DEF
215 %1773:vgpr_32 = IMPLICIT_DEF
216 %1758:vgpr_32 = IMPLICIT_DEF
217 %1759:vgpr_32 = IMPLICIT_DEF
218 %1760:vgpr_32 = IMPLICIT_DEF
219 %1761:vgpr_32 = IMPLICIT_DEF
220 %1762:vgpr_32 = IMPLICIT_DEF
221 %1763:vgpr_32 = IMPLICIT_DEF
222 %1764:vgpr_32 = IMPLICIT_DEF
223 %1765:vgpr_32 = IMPLICIT_DEF
224 %14:vgpr_32 = IMPLICIT_DEF
225 %31:vgpr_32 = IMPLICIT_DEF
226 %41:vgpr_32 = IMPLICIT_DEF
227 %43:sreg_32 = IMPLICIT_DEF
228 %535:sreg_32 = IMPLICIT_DEF
229 %1752:vgpr_32 = IMPLICIT_DEF
230 %1753:vgpr_32 = IMPLICIT_DEF
231 %450:sgpr_128 = IMPLICIT_DEF
232 %518:sgpr_128 = IMPLICIT_DEF
233 %1735:sreg_32 = IMPLICIT_DEF
237 %683:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
238 %688:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
239 %693:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 2080, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
240 %698:av_128_align2 = DS_READ_B128_gfx9 %37:vgpr_32, 3120, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
241 %703:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
242 %708:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 2064, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
243 %713:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 4128, 0, implicit $exec :: (load (s128) from %ir.in6, !alias.scope !0, addrspace 3)
244 %718:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 6192, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 3)
245 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub0_sub1:av_128_align2, %703.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
246 %762:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub2_sub3:av_128_align2, %703.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
247 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub0_sub1:av_128_align2, %708.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
248 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub2_sub3:av_128_align2, %708.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
249 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub0_sub1:av_128_align2, %713.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
250 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub2_sub3:av_128_align2, %713.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
251 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub0_sub1:av_128_align2, %718.sub0_sub1:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
252 %761:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub2_sub3:av_128_align2, %718.sub2_sub3:av_128_align2, %761:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
253 %764:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 1024, 0, implicit $exec :: (load (s128) from %ir.in8, !alias.scope !0, addrspace 3)
254 %769:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 3088, 0, implicit $exec :: (load (s128) from %ir.in9, !alias.scope !0, addrspace 3)
255 %774:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 5152, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 3)
256 %779:av_128_align2 = DS_READ_B128_gfx9 %38:vgpr_32, 7216, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 3)
257 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub0_sub1:av_128_align2, %764.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
258 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %683.sub2_sub3:av_128_align2, %764.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
259 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub0_sub1:av_128_align2, %769.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
260 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %688.sub2_sub3:av_128_align2, %769.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
261 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub0_sub1:av_128_align2, %774.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
262 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %693.sub2_sub3:av_128_align2, %774.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
263 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub0_sub1:av_128_align2, %779.sub0_sub1:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
264 %814:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %698.sub2_sub3:av_128_align2, %779.sub2_sub3:av_128_align2, %814:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
265 undef %1599.sub3:vreg_128_align2 = V_PERM_B32_e64 %1766:vgpr_32, %1767:vgpr_32, %535:sreg_32, implicit $exec
266 %1599.sub2:vreg_128_align2 = V_PERM_B32_e64 %1768:vgpr_32, %1769:vgpr_32, %535:sreg_32, implicit $exec
267 %1599.sub1:vreg_128_align2 = V_PERM_B32_e64 %1770:vgpr_32, %1771:vgpr_32, %535:sreg_32, implicit $exec
268 %1599.sub0:vreg_128_align2 = V_PERM_B32_e64 %1772:vgpr_32, %1773:vgpr_32, %535:sreg_32, implicit $exec
269 undef %1579.sub3:vreg_128_align2 = V_PERM_B32_e64 %1758:vgpr_32, %1759:vgpr_32, %535:sreg_32, implicit $exec
270 %1579.sub2:vreg_128_align2 = V_PERM_B32_e64 %1760:vgpr_32, %1761:vgpr_32, %535:sreg_32, implicit $exec
271 %1579.sub1:vreg_128_align2 = V_PERM_B32_e64 %1762:vgpr_32, %1763:vgpr_32, %535:sreg_32, implicit $exec
272 %1579.sub0:vreg_128_align2 = V_PERM_B32_e64 %1764:vgpr_32, %1765:vgpr_32, %535:sreg_32, implicit $exec
273 DS_WRITE_B128_gfx9 %14:vgpr_32, %1599:vreg_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in0, !alias.scope !0, addrspace 3)
274 DS_WRITE_B128_gfx9 %14:vgpr_32, %1579:vreg_128_align2, 1040, 0, implicit $exec :: (store (s128) into %ir.in1, !alias.scope !0, addrspace 3)
275 %830:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1736:vgpr_32, implicit $exec
276 %1773:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %830:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in12, !alias.scope !0, addrspace 7)
277 %833:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1737:vgpr_32, implicit $exec
278 %1772:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %833:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in13, !alias.scope !0, addrspace 7)
279 %835:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1738:vgpr_32, implicit $exec
280 %1771:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %835:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in14, !alias.scope !0, addrspace 7)
281 %837:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1739:vgpr_32, implicit $exec
282 %1770:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %837:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in15, !alias.scope !0, addrspace 7)
283 %839:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1740:vgpr_32, implicit $exec
284 %1769:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %839:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in16, !alias.scope !0, addrspace 7)
285 %841:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1741:vgpr_32, implicit $exec
286 %1768:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %841:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in17, !alias.scope !0, addrspace 7)
287 %843:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1742:vgpr_32, implicit $exec
288 %1767:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %843:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in18, !alias.scope !0, addrspace 7)
289 %845:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1743:vgpr_32, implicit $exec
290 %1766:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %845:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in19, !alias.scope !0, addrspace 7)
291 %847:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1744:vgpr_32, implicit $exec
292 %1758:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %847:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in20, !alias.scope !0, addrspace 7)
293 %849:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1745:vgpr_32, implicit $exec
294 %1759:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %849:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in21, !alias.scope !0, addrspace 7)
295 %851:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1746:vgpr_32, implicit $exec
296 %1760:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %851:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in22, !alias.scope !0, addrspace 7)
297 %853:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1747:vgpr_32, implicit $exec
298 %1761:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %853:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in23, !alias.scope !0, addrspace 7)
299 %855:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1748:vgpr_32, implicit $exec
300 %1762:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %855:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in24, !alias.scope !0, addrspace 7)
301 %857:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1749:vgpr_32, implicit $exec
302 %1763:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %857:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in25, !alias.scope !0, addrspace 7)
303 %859:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1750:vgpr_32, implicit $exec
304 %1764:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %859:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s16) from %ir.in26, !alias.scope !0, addrspace 7)
305 %861:vgpr_32 = V_ADD_U32_e32 %41:vgpr_32, %1751:vgpr_32, implicit $exec
306 %1765:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN %861:vgpr_32, %450:sgpr_128, 0, 0, 0, 0, implicit $exec
307 DS_WRITE_B128_gfx9 %31:vgpr_32, %1757:av_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in2, !alias.scope !0, addrspace 3)
308 DS_WRITE_B128_gfx9 %31:vgpr_32, %1755:av_128_align2, 2064, 0, implicit $exec :: (store (s128) into %ir.in3, !alias.scope !0, addrspace 3)
309 DS_WRITE_B128_gfx9 %31:vgpr_32, %1754:av_128_align2, 2080, 0, implicit $exec :: (store (s128) into %ir.in4, !alias.scope !0, addrspace 3)
310 DS_WRITE_B128_gfx9 %31:vgpr_32, %1756:av_128_align2, 16, 0, implicit $exec :: (store (s128) into %ir.in5, !alias.scope !0, addrspace 3)
311 %864:vgpr_32 = V_ADD_U32_e32 -16, %1752:vgpr_32, implicit $exec
312 %1757:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %864:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in26, !alias.scope !0, addrspace 7)
313 %1755:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1752:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in27, !alias.scope !0, addrspace 7)
314 %1754:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1753:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in28, !alias.scope !0, addrspace 7)
315 %865:vgpr_32 = V_ADD_U32_e32 -16, %1753:vgpr_32, implicit $exec
316 %1756:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %865:vgpr_32, %518:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in29, !alias.scope !0, addrspace 7)
317 %1753:vgpr_32 = V_ADD_U32_e32 128, %1753:vgpr_32, implicit $exec
318 %1752:vgpr_32 = V_ADD_U32_e32 128, %1752:vgpr_32, implicit $exec
319 %1751:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1751:vgpr_32, implicit $exec
320 %1750:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1750:vgpr_32, implicit $exec
321 %1749:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1749:vgpr_32, implicit $exec
322 %1748:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1748:vgpr_32, implicit $exec
323 %1747:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1747:vgpr_32, implicit $exec
324 %1746:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1746:vgpr_32, implicit $exec
325 %1745:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1745:vgpr_32, implicit $exec
326 %1744:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1744:vgpr_32, implicit $exec
327 %1743:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1743:vgpr_32, implicit $exec
328 %1742:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1742:vgpr_32, implicit $exec
329 %1741:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1741:vgpr_32, implicit $exec
330 %1740:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1740:vgpr_32, implicit $exec
331 %1739:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1739:vgpr_32, implicit $exec
332 %1738:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1738:vgpr_32, implicit $exec
333 %1737:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1737:vgpr_32, implicit $exec
334 %1736:vgpr_32 = V_ADD_U32_e32 %43:sreg_32, %1736:vgpr_32, implicit $exec
335 %1735:sreg_32 = nsw S_ADD_I32 %1735:sreg_32, -1, implicit-def dead $scc
336 S_CMP_LG_U32 %1735:sreg_32, 0, implicit-def $scc
337 S_CBRANCH_SCC1 %bb.1, implicit killed $scc