1 # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
3 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-4-generic --amdhsa-code-object-version=6 -start-before=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
6 define amdgpu_kernel void @largeInterleave() #0 { ret void }
7 ; GCN-LABEL: largeInterleave:
9 ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
10 ; GCN-NEXT: ; implicit-def: $vgpr0
11 ; GCN-NEXT: ; implicit-def: $vgpr2
12 ; GCN-NEXT: ; implicit-def: $vgpr1
13 ; GCN-NEXT: ; implicit-def: $vgpr8
14 ; GCN-NEXT: ; implicit-def: $vgpr94
15 ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79
16 ; GCN-NEXT: ; implicit-def: $vgpr106
17 ; GCN-NEXT: ; implicit-def: $vgpr132
18 ; GCN-NEXT: ; implicit-def: $vgpr133
19 ; GCN-NEXT: ; implicit-def: $vgpr139
20 ; GCN-NEXT: ; implicit-def: $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127
21 ; GCN-NEXT: ; iglp_opt mask(0x00000002)
22 ; GCN-NEXT: ; implicit-def: $sgpr0
23 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; GCN-NEXT: v_readfirstlane_b32 s7, v0
25 ; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9_sgpr10_sgpr11
26 ; GCN-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10_sgpr11
27 ; GCN-NEXT: ; implicit-def: $sgpr5
29 ; GCN-NEXT: v_lshl_add_u32 v0, s7, 4, v2
30 ; GCN-NEXT: v_mul_lo_u32 v0, v0, s6
31 ; GCN-NEXT: v_add_lshl_u32 v92, v0, v1, 1
32 ; GCN-NEXT: v_add_u32_e32 v93, s0, v92
33 ; GCN-NEXT: buffer_load_dwordx4 v[0:3], v92, s[8:11], 0 offen sc0 sc1
34 ; GCN-NEXT: s_waitcnt vmcnt(0)
35 ; GCN-NEXT: buffer_inv sc0 sc1
36 ; GCN-NEXT: buffer_load_dwordx4 v[4:7], v93, s[8:11], 0 offen sc0 sc1
37 ; GCN-NEXT: s_waitcnt vmcnt(0)
38 ; GCN-NEXT: buffer_inv sc0 sc1
39 ; GCN-NEXT: s_lshl_b32 s0, s7, 7
40 ; GCN-NEXT: v_add_lshl_u32 v95, v8, s0, 1
41 ; GCN-NEXT: v_add_u32_e32 v8, 64, v93
42 ; GCN-NEXT: ; kill: killed $vgpr8
43 ; GCN-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3
44 ; GCN-NEXT: ; kill: killed $vgpr92
45 ; GCN-NEXT: ; implicit-def: $sgpr6
46 ; GCN-NEXT: buffer_wbl2 sc0 sc1
47 ; GCN-NEXT: ds_write_b128 v95, v[0:3]
48 ; GCN-NEXT: buffer_wbl2 sc0 sc1
49 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
50 ; GCN-NEXT: ds_write_b128 v95, v[4:7] offset:1024
51 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
52 ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v92, s[8:11], 0 offen offset:64 sc0 sc1
53 ; GCN-NEXT: s_waitcnt vmcnt(0)
54 ; GCN-NEXT: buffer_inv sc0 sc1
55 ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v8, s[8:11], 0 offen sc0 sc1
56 ; GCN-NEXT: s_waitcnt vmcnt(0)
57 ; GCN-NEXT: buffer_inv sc0 sc1
58 ; GCN-NEXT: ;;#ASMSTART
59 ; GCN-NEXT: s_waitcnt vmcnt(8)
61 ; GCN-NEXT: ds_read_b128 v[72:75], v94
62 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
63 ; GCN-NEXT: buffer_inv sc0 sc1
64 ; GCN-NEXT: ds_read_b128 v[80:83], v94 offset:512
65 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
66 ; GCN-NEXT: buffer_inv sc0 sc1
67 ; GCN-NEXT: ds_read_b128 v[84:87], v94 offset:1024
68 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
69 ; GCN-NEXT: buffer_inv sc0 sc1
70 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], 0
71 ; GCN-NEXT: ds_read_b128 v[88:91], v94 offset:1536
72 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
73 ; GCN-NEXT: buffer_inv sc0 sc1
74 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63]
75 ; GCN-NEXT: ds_read_b128 v[72:75], v106
76 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
77 ; GCN-NEXT: buffer_inv sc0 sc1
78 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[80:81], v[76:77], 0
79 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[76:77], 0
80 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[88:89], v[76:77], 0
81 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[82:83], v[78:79], v[32:47]
82 ; GCN-NEXT: ds_read_b128 v[80:83], v106 offset:512
83 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
84 ; GCN-NEXT: buffer_inv sc0 sc1
85 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[78:79], v[16:31]
86 ; GCN-NEXT: ds_read_b128 v[84:87], v106 offset:1024
87 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
88 ; GCN-NEXT: buffer_inv sc0 sc1
89 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[90:91], v[78:79], v[0:15]
90 ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79
91 ; GCN-NEXT: ds_read_b128 v[88:91], v106 offset:1536
92 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
93 ; GCN-NEXT: buffer_inv sc0 sc1
94 ; GCN-NEXT: ;;#ASMSTART
95 ; GCN-NEXT: s_waitcnt vmcnt(8)
97 ; GCN-NEXT: buffer_wbl2 sc0 sc1
98 ; GCN-NEXT: ds_write_b128 v95, v[64:67]
99 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63]
100 ; GCN-NEXT: v_add_u32_e32 v72, 0x80, v93
101 ; GCN-NEXT: buffer_wbl2 sc0 sc1
102 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
103 ; GCN-NEXT: ds_write_b128 v95, v[68:71] offset:1024
104 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
105 ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v92, s[8:11], 0 offen offset:128 sc0 sc1
106 ; GCN-NEXT: s_waitcnt vmcnt(0)
107 ; GCN-NEXT: buffer_inv sc0 sc1
108 ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v72, s[8:11], 0 offen sc0 sc1
109 ; GCN-NEXT: s_waitcnt vmcnt(0)
110 ; GCN-NEXT: buffer_inv sc0 sc1
111 ; GCN-NEXT: ;;#ASMSTART
112 ; GCN-NEXT: s_waitcnt vmcnt(8)
113 ; GCN-NEXT: ;;#ASMEND
114 ; GCN-NEXT: ; kill: killed $vgpr72
115 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63]
116 ; GCN-NEXT: ds_read_b128 v[72:75], v94
117 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
118 ; GCN-NEXT: buffer_inv sc0 sc1
119 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[80:81], v[76:77], v[32:47]
120 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[76:77], v[16:31]
121 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[88:89], v[76:77], v[0:15]
122 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[82:83], v[78:79], v[32:47]
123 ; GCN-NEXT: ds_read_b128 v[80:83], v94 offset:512
124 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
125 ; GCN-NEXT: buffer_inv sc0 sc1
126 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[78:79], v[16:31]
127 ; GCN-NEXT: ds_read_b128 v[84:87], v94 offset:1024
128 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
129 ; GCN-NEXT: buffer_inv sc0 sc1
130 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[90:91], v[78:79], v[0:15]
131 ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79
132 ; GCN-NEXT: ds_read_b128 v[88:91], v94 offset:1536
133 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
134 ; GCN-NEXT: buffer_inv sc0 sc1
135 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63]
136 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63]
137 ; GCN-NEXT: ds_read_b128 v[72:75], v106
138 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
139 ; GCN-NEXT: buffer_inv sc0 sc1
140 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[80:81], v[76:77], v[32:47]
141 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[76:77], v[16:31]
142 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[88:89], v[76:77], v[0:15]
143 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[82:83], v[78:79], v[32:47]
144 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[78:79], v[16:31]
145 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[90:91], v[78:79], v[0:15]
146 ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79
147 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63]
148 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63]
149 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:512
150 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
151 ; GCN-NEXT: buffer_inv sc0 sc1
152 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[72:73], v[76:77], v[32:47]
153 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[74:75], v[78:79], v[32:47]
154 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1024
155 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
156 ; GCN-NEXT: buffer_inv sc0 sc1
157 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[72:73], v[76:77], v[16:31]
158 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[74:75], v[78:79], v[16:31]
159 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1536
160 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
161 ; GCN-NEXT: buffer_inv sc0 sc1
162 ; GCN-NEXT: ;;#ASMSTART
163 ; GCN-NEXT: s_waitcnt vmcnt(8)
164 ; GCN-NEXT: ;;#ASMEND
165 ; GCN-NEXT: buffer_wbl2 sc0 sc1
166 ; GCN-NEXT: ds_write_b128 v95, v[64:67]
167 ; GCN-NEXT: buffer_wbl2 sc0 sc1
168 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
169 ; GCN-NEXT: ds_write_b128 v95, v[68:71] offset:1024
170 ; GCN-NEXT: ; implicit-def: $vgpr64
171 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[76:77], v[0:15]
172 ; GCN-NEXT: v_add_u32_e32 v72, 0xc0, v93
173 ; GCN-NEXT: ; implicit-def: $vgpr73
174 ; GCN-NEXT: v_add_u32_e32 v76, v132, v64
175 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
176 ; GCN-NEXT: buffer_load_dwordx4 v[64:67], v92, s[8:11], 0 offen offset:192 sc0 sc1
177 ; GCN-NEXT: s_waitcnt vmcnt(0)
178 ; GCN-NEXT: buffer_inv sc0 sc1
179 ; GCN-NEXT: buffer_load_dwordx4 v[68:71], v72, s[8:11], 0 offen sc0 sc1
180 ; GCN-NEXT: s_waitcnt vmcnt(0)
181 ; GCN-NEXT: buffer_inv sc0 sc1
182 ; GCN-NEXT: ; kill: killed $vgpr72
183 ; GCN-NEXT: v_add_u32_e32 v72, v132, v73
184 ; GCN-NEXT: buffer_load_dwordx2 v[98:99], v76, s[0:3], 0 offen sc0 sc1
185 ; GCN-NEXT: s_waitcnt vmcnt(0)
186 ; GCN-NEXT: buffer_inv sc0 sc1
187 ; GCN-NEXT: buffer_load_dwordx2 v[102:103], v72, s[0:3], 0 offen sc0 sc1
188 ; GCN-NEXT: s_waitcnt vmcnt(0)
189 ; GCN-NEXT: buffer_inv sc0 sc1
190 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[78:79], v[0:15]
191 ; GCN-NEXT: ; implicit-def: $vgpr74
192 ; GCN-NEXT: v_add_u32_e32 v72, v132, v74
193 ; GCN-NEXT: ; implicit-def: $vgpr75
194 ; GCN-NEXT: buffer_load_dwordx2 v[100:101], v72, s[0:3], 0 offen sc0 sc1
195 ; GCN-NEXT: s_waitcnt vmcnt(0)
196 ; GCN-NEXT: buffer_inv sc0 sc1
197 ; GCN-NEXT: v_add_u32_e32 v72, v132, v75
198 ; GCN-NEXT: buffer_load_dwordx2 v[104:105], v72, s[0:3], 0 offen sc0 sc1
199 ; GCN-NEXT: s_waitcnt vmcnt(0)
200 ; GCN-NEXT: buffer_inv sc0 sc1
201 ; GCN-NEXT: ;;#ASMSTART
202 ; GCN-NEXT: s_waitcnt vmcnt(8)
203 ; GCN-NEXT: ;;#ASMEND
204 ; GCN-NEXT: ds_read_b128 v[72:75], v94
205 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
206 ; GCN-NEXT: buffer_inv sc0 sc1
207 ; GCN-NEXT: ; kill: killed $vgpr76
208 ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79
209 ; GCN-NEXT: ; implicit-def: $sgpr8
210 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63]
211 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63]
212 ; GCN-NEXT: ds_read_b128 v[72:75], v94 offset:512
213 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
214 ; GCN-NEXT: buffer_inv sc0 sc1
215 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[72:73], v[76:77], v[32:47]
216 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[74:75], v[78:79], v[32:47]
217 ; GCN-NEXT: ds_read_b128 v[72:75], v94 offset:1024
218 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
219 ; GCN-NEXT: buffer_inv sc0 sc1
220 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[72:73], v[76:77], v[16:31]
221 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[74:75], v[78:79], v[16:31]
222 ; GCN-NEXT: ds_read_b128 v[72:75], v94 offset:1536
223 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
224 ; GCN-NEXT: buffer_inv sc0 sc1
225 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[76:77], v[0:15]
226 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[78:79], v[0:15]
227 ; GCN-NEXT: ds_read_b128 v[72:75], v106
228 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
229 ; GCN-NEXT: buffer_inv sc0 sc1
230 ; GCN-NEXT: ; implicit-def: $vgpr76_vgpr77_vgpr78_vgpr79
231 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[72:73], v[76:77], v[48:63]
232 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[74:75], v[78:79], v[48:63]
233 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:512
234 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
235 ; GCN-NEXT: buffer_inv sc0 sc1
236 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[72:73], v[76:77], v[32:47]
237 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[74:75], v[78:79], v[32:47]
238 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1024
239 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
240 ; GCN-NEXT: buffer_inv sc0 sc1
241 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[72:73], v[76:77], v[16:31]
242 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[74:75], v[78:79], v[16:31]
243 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1536
244 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
245 ; GCN-NEXT: buffer_inv sc0 sc1
246 ; GCN-NEXT: ;;#ASMSTART
247 ; GCN-NEXT: s_waitcnt vmcnt(8)
248 ; GCN-NEXT: ;;#ASMEND
249 ; GCN-NEXT: buffer_wbl2 sc0 sc1
250 ; GCN-NEXT: ds_write_b128 v95, v[64:67]
251 ; GCN-NEXT: buffer_wbl2 sc0 sc1
252 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
253 ; GCN-NEXT: ds_write_b128 v95, v[68:71] offset:1024
254 ; GCN-NEXT: ;;#ASMSTART
255 ; GCN-NEXT: s_waitcnt vmcnt(8)
256 ; GCN-NEXT: ;;#ASMEND
257 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
258 ; GCN-NEXT: ds_read_b128 v[64:67], v94
259 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
260 ; GCN-NEXT: buffer_inv sc0 sc1
261 ; GCN-NEXT: ds_read_b128 v[90:93], v94 offset:512
262 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
263 ; GCN-NEXT: buffer_inv sc0 sc1
264 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[76:77], v[0:15]
265 ; GCN-NEXT: ; implicit-def: $vgpr68_vgpr69_vgpr70_vgpr71
266 ; GCN-NEXT: ds_read_b128 v[84:87], v94 offset:1024
267 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
268 ; GCN-NEXT: buffer_inv sc0 sc1
269 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[64:65], v[68:69], v[48:63]
270 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[78:79], v[0:15]
271 ; GCN-NEXT: ds_read_b128 v[76:79], v94 offset:1536
272 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
273 ; GCN-NEXT: buffer_inv sc0 sc1
274 ; GCN-NEXT: ds_read_b128 v[94:97], v106
275 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
276 ; GCN-NEXT: buffer_inv sc0 sc1
277 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[66:67], v[70:71], v[48:63]
278 ; GCN-NEXT: ; implicit-def: $vgpr64_vgpr65_vgpr66_vgpr67
279 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[90:91], v[68:69], v[32:47]
280 ; GCN-NEXT: ds_read_b128 v[88:91], v106 offset:512
281 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
282 ; GCN-NEXT: buffer_inv sc0 sc1
283 ; GCN-NEXT: ds_read_b128 v[80:83], v106 offset:1024
284 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
285 ; GCN-NEXT: buffer_inv sc0 sc1
286 ; GCN-NEXT: ds_read_b128 v[72:75], v106 offset:1536
287 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
288 ; GCN-NEXT: buffer_inv sc0 sc1
289 ; GCN-NEXT: ;;#ASMSTART
290 ; GCN-NEXT: s_waitcnt vmcnt(8)
291 ; GCN-NEXT: ;;#ASMEND
292 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[94:95], v[64:65], v[48:63]
293 ; GCN-NEXT: v_perm_b32 v94, v102, v98, s5
294 ; GCN-NEXT: v_perm_b32 v98, v102, v98, s8
295 ; GCN-NEXT: v_perm_b32 v102, v103, v99, s5
296 ; GCN-NEXT: v_perm_b32 v95, v104, v100, s5
297 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[92:93], v[70:71], v[32:47]
298 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[84:85], v[68:69], v[16:31]
299 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[48:63], v[96:97], v[66:67], v[48:63]
300 ; GCN-NEXT: v_perm_b32 v96, v103, v99, s8
301 ; GCN-NEXT: v_perm_b32 v99, v104, v100, s8
302 ; GCN-NEXT: v_perm_b32 v103, v105, v101, s5
303 ; GCN-NEXT: v_perm_b32 v97, v105, v101, s8
304 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[88:89], v[64:65], v[32:47]
306 ; GCN-NEXT: v_mul_f32_e32 v100, s4, v48
307 ; GCN-NEXT: v_mul_f32_e32 v101, s4, v49
308 ; GCN-NEXT: v_max3_f32 v92, v100, s6, v101
309 ; GCN-NEXT: v_mul_f32_e32 v93, s4, v50
310 ; GCN-NEXT: v_mul_f32_e32 v100, s4, v51
311 ; GCN-NEXT: v_max3_f32 v92, v92, v93, v100
312 ; GCN-NEXT: v_mul_f32_e32 v93, s4, v52
313 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[86:87], v[70:71], v[16:31]
314 ; GCN-NEXT: v_mul_f32_e32 v100, s4, v53
315 ; GCN-NEXT: v_max3_f32 v92, v92, v93, v100
316 ; GCN-NEXT: v_mul_f32_e32 v84, s4, v54
317 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v55
318 ; GCN-NEXT: v_max3_f32 v84, v92, v84, v85
319 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v56
320 ; GCN-NEXT: v_mul_f32_e32 v92, s4, v57
321 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[76:77], v[68:69], v[0:15]
322 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v92
323 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v58
324 ; GCN-NEXT: v_mul_f32_e32 v88, s4, v59
325 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88
326 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v60
327 ; GCN-NEXT: v_mul_f32_e32 v88, s4, v61
328 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88
329 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[32:47], v[90:91], v[66:67], v[32:47]
330 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v62
331 ; GCN-NEXT: v_mul_f32_e32 v88, s4, v63
332 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88
333 ; GCN-NEXT: ; implicit-def: $sgpr6
334 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[80:81], v[64:65], v[16:31]
336 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v32
337 ; GCN-NEXT: v_mul_f32_e32 v88, s4, v33
338 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88
339 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v34
340 ; GCN-NEXT: v_mul_f32_e32 v88, s4, v35
341 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v88
342 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v36
343 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[78:79], v[70:71], v[0:15]
344 ; GCN-NEXT: v_mul_f32_e32 v86, s4, v37
345 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v86
346 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v38
347 ; GCN-NEXT: v_mul_f32_e32 v86, s4, v39
348 ; GCN-NEXT: v_max3_f32 v84, v84, v85, v86
349 ; GCN-NEXT: v_mul_f32_e32 v85, s4, v40
350 ; GCN-NEXT: v_mul_f32_e32 v80, s4, v41
351 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[16:31], v[82:83], v[66:67], v[16:31]
352 ; GCN-NEXT: v_max3_f32 v80, v84, v85, v80
353 ; GCN-NEXT: v_mul_f32_e32 v81, s4, v42
354 ; GCN-NEXT: v_mul_f32_e32 v84, s4, v43
355 ; GCN-NEXT: v_max3_f32 v80, v80, v81, v84
356 ; GCN-NEXT: v_mul_f32_e32 v81, s4, v44
357 ; GCN-NEXT: v_mul_f32_e32 v84, s4, v45
358 ; GCN-NEXT: v_max3_f32 v80, v80, v81, v84
359 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[72:73], v[64:65], v[0:15]
360 ; GCN-NEXT: v_mul_f32_e32 v81, s4, v46
361 ; GCN-NEXT: v_mul_f32_e32 v82, s4, v47
362 ; GCN-NEXT: v_max3_f32 v80, v80, v81, v82
363 ; GCN-NEXT: v_mul_f32_e32 v81, s4, v16
364 ; GCN-NEXT: v_mul_f32_e32 v82, s4, v17
365 ; GCN-NEXT: v_max3_f32 v80, v80, v81, v82
366 ; GCN-NEXT: v_mul_f32_e32 v68, s4, v18
367 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[74:75], v[66:67], v[0:15]
368 ; GCN-NEXT: v_mul_f32_e32 v69, s4, v19
369 ; GCN-NEXT: v_max3_f32 v68, v80, v68, v69
370 ; GCN-NEXT: v_mul_f32_e32 v69, s4, v20
371 ; GCN-NEXT: v_mul_f32_e32 v76, s4, v21
372 ; GCN-NEXT: v_max3_f32 v68, v68, v69, v76
373 ; GCN-NEXT: v_mul_f32_e32 v69, s4, v22
374 ; GCN-NEXT: v_mul_f32_e32 v70, s4, v23
375 ; GCN-NEXT: v_max3_f32 v68, v68, v69, v70
376 ; GCN-NEXT: v_mul_f32_e32 v69, s4, v24
377 ; GCN-NEXT: v_mul_f32_e32 v70, s4, v25
378 ; GCN-NEXT: v_max3_f32 v68, v68, v69, v70
379 ; GCN-NEXT: v_mul_f32_e32 v69, s4, v26
380 ; GCN-NEXT: v_mul_f32_e32 v70, s4, v27
381 ; GCN-NEXT: v_max3_f32 v64, v68, v69, v70
382 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v28
383 ; GCN-NEXT: v_mul_f32_e32 v68, s4, v29
384 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v68
385 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v30
386 ; GCN-NEXT: v_mul_f32_e32 v68, s4, v31
387 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v68
388 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v0
389 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v1
390 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
391 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v2
392 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v3
393 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
394 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v4
395 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v5
396 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
397 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v6
398 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v7
399 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
400 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v8
401 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v9
402 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
403 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v10
404 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v11
405 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
406 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v12
407 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v13
408 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
409 ; GCN-NEXT: v_mul_f32_e32 v65, s4, v14
410 ; GCN-NEXT: v_mul_f32_e32 v66, s4, v15
411 ; GCN-NEXT: v_max3_f32 v64, v64, v65, v66
412 ; GCN-NEXT: ; implicit-def: $vgpr65
413 ; GCN-NEXT: ; implicit-def: $vgpr66
414 ; GCN-NEXT: ; implicit-def: $vgpr68
415 ; GCN-NEXT: ; implicit-def: $vgpr67
416 ; GCN-NEXT: v_add_u32_e32 v65, s7, v65
417 ; GCN-NEXT: v_and_b32_e32 v65, 0x1fffffff, v65
418 ; GCN-NEXT: v_mul_lo_u32 v65, v65, s6
419 ; GCN-NEXT: v_add_lshl_u32 v135, v66, v65, 1
420 ; GCN-NEXT: ds_bpermute_b32 v65, v133, v64
421 ; GCN-NEXT: ; implicit-def: $vgpr66
422 ; GCN-NEXT: v_lshl_add_u32 v136, v66, 1, v135
423 ; GCN-NEXT: ; implicit-def: $vgpr66
424 ; GCN-NEXT: v_lshl_add_u32 v137, v66, 1, v136
425 ; GCN-NEXT: ; implicit-def: $vgpr66
426 ; GCN-NEXT: ; implicit-def: $sgpr6_sgpr7
427 ; GCN-NEXT: v_lshl_add_u32 v138, v66, 1, v137
428 ; GCN-NEXT: buffer_wbl2 sc0 sc1
429 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
430 ; GCN-NEXT: ds_write_b64 v135, v[94:95]
431 ; GCN-NEXT: v_max_f32_e32 v65, v65, v65
432 ; GCN-NEXT: v_max_f32_e32 v64, v64, v65
433 ; GCN-NEXT: ds_bpermute_b32 v65, v133, v64
434 ; GCN-NEXT: buffer_wbl2 sc0 sc1
435 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
436 ; GCN-NEXT: ds_write_b64 v136, v[98:99]
437 ; GCN-NEXT: buffer_wbl2 sc0 sc1
438 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
439 ; GCN-NEXT: ds_write_b64 v137, v[102:103]
440 ; GCN-NEXT: buffer_wbl2 sc0 sc1
441 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
442 ; GCN-NEXT: ds_write_b64 v138, v[96:97]
443 ; GCN-NEXT: v_add_u32_e32 v68, v132, v68
444 ; GCN-NEXT: v_cndmask_b32_e64 v64, v65, v64, s[6:7]
445 ; GCN-NEXT: v_max_f32_e32 v64, v64, v64
446 ; GCN-NEXT: ; implicit-def: $vgpr65
447 ; GCN-NEXT: v_max_f32_e32 v66, v65, v65
448 ; GCN-NEXT: v_max_f32_e32 v134, v66, v64
449 ; GCN-NEXT: ; implicit-def: $vgpr64
450 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
451 ; GCN-NEXT: buffer_load_dwordx2 v[156:157], v68, s[0:3], 0 offen sc0 sc1
452 ; GCN-NEXT: s_waitcnt vmcnt(0)
453 ; GCN-NEXT: buffer_inv sc0 sc1
454 ; GCN-NEXT: v_add_u32_e32 v64, v132, v64
455 ; GCN-NEXT: buffer_load_dwordx2 v[158:159], v64, s[0:3], 0 offen sc0 sc1
456 ; GCN-NEXT: s_waitcnt vmcnt(0)
457 ; GCN-NEXT: buffer_inv sc0 sc1
458 ; GCN-NEXT: ; implicit-def: $vgpr66
459 ; GCN-NEXT: v_add_u32_e32 v64, v132, v66
460 ; GCN-NEXT: buffer_load_dwordx2 v[128:129], v64, s[0:3], 0 offen sc0 sc1
461 ; GCN-NEXT: s_waitcnt vmcnt(0)
462 ; GCN-NEXT: buffer_inv sc0 sc1
463 ; GCN-NEXT: v_add_u32_e32 v64, v132, v67
464 ; GCN-NEXT: buffer_load_dwordx2 v[130:131], v64, s[0:3], 0 offen sc0 sc1
465 ; GCN-NEXT: s_waitcnt vmcnt(0)
466 ; GCN-NEXT: buffer_inv sc0 sc1
467 ; GCN-NEXT: v_fma_f32 v57, s4, v57, -v134
468 ; GCN-NEXT: v_fma_f32 v48, s4, v48, -v134
469 ; GCN-NEXT: v_fma_f32 v96, s4, v58, -v134
470 ; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v57
471 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v48
472 ; GCN-NEXT: v_fma_f32 v64, s4, v49, -v134
473 ; GCN-NEXT: v_exp_f32_e32 v163, v57
474 ; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v96
475 ; GCN-NEXT: v_fma_f32 v66, s4, v50, -v134
476 ; GCN-NEXT: v_exp_f32_e32 v164, v57
477 ; GCN-NEXT: v_exp_f32_e32 v49, v48
478 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v64
479 ; GCN-NEXT: v_fma_f32 v67, s4, v51, -v134
480 ; GCN-NEXT: v_exp_f32_e32 v50, v48
481 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v66
482 ; GCN-NEXT: v_fma_f32 v68, s4, v52, -v134
483 ; GCN-NEXT: v_exp_f32_e32 v51, v48
484 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v67
485 ; GCN-NEXT: v_fma_f32 v69, s4, v53, -v134
486 ; GCN-NEXT: v_exp_f32_e32 v52, v48
487 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v68
488 ; GCN-NEXT: ;;#ASMSTART
489 ; GCN-NEXT: s_waitcnt vmcnt(8)
490 ; GCN-NEXT: ;;#ASMEND
491 ; GCN-NEXT: v_fma_f32 v70, s4, v54, -v134
492 ; GCN-NEXT: v_exp_f32_e32 v53, v48
493 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v69
494 ; GCN-NEXT: v_fma_f32 v71, s4, v55, -v134
495 ; GCN-NEXT: ds_read_b128 v[140:143], v139
496 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
497 ; GCN-NEXT: buffer_inv sc0 sc1
498 ; GCN-NEXT: v_exp_f32_e32 v54, v48
499 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v70
500 ; GCN-NEXT: v_exp_f32_e32 v55, v48
501 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v71
502 ; GCN-NEXT: ds_read_b128 v[144:147], v139 offset:576
503 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
504 ; GCN-NEXT: buffer_inv sc0 sc1
505 ; GCN-NEXT: v_fma_f32 v66, s4, v56, -v134
506 ; GCN-NEXT: v_exp_f32_e32 v56, v48
507 ; GCN-NEXT: v_sub_f32_e32 v48, v65, v134
508 ; GCN-NEXT: v_cvt_f16_f32_e32 v64, v49
509 ; GCN-NEXT: v_cvt_f16_f32_e32 v67, v50
510 ; GCN-NEXT: v_cvt_f16_f32_e32 v68, v51
511 ; GCN-NEXT: v_cvt_f16_f32_e32 v58, v52
512 ; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v48
513 ; GCN-NEXT: ds_read_b128 v[148:151], v139 offset:1152
514 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
515 ; GCN-NEXT: buffer_inv sc0 sc1
516 ; GCN-NEXT: v_exp_f32_e32 v48, v48
517 ; GCN-NEXT: v_pack_b32_f16 v161, v68, v58
518 ; GCN-NEXT: v_pack_b32_f16 v160, v64, v67
519 ; GCN-NEXT: v_mul_f32_e32 v58, 0x3fb8aa3b, v66
520 ; GCN-NEXT: ; implicit-def: $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79
521 ; GCN-NEXT: ds_read_b128 v[152:155], v139 offset:1728
522 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
523 ; GCN-NEXT: buffer_inv sc0 sc1
524 ; GCN-NEXT: v_fma_f32 v162, s4, v61, -v134
525 ; GCN-NEXT: v_cvt_f16_f32_e32 v61, v55
526 ; GCN-NEXT: v_cvt_f16_f32_e32 v57, v56
527 ; GCN-NEXT: v_pk_mul_f32 v[64:65], v[64:65], v[48:49] op_sel_hi:[1,0]
528 ; GCN-NEXT: v_pk_mul_f32 v[66:67], v[66:67], v[48:49] op_sel_hi:[1,0]
529 ; GCN-NEXT: v_pk_mul_f32 v[68:69], v[68:69], v[48:49] op_sel_hi:[1,0]
530 ; GCN-NEXT: v_pk_mul_f32 v[70:71], v[70:71], v[48:49] op_sel_hi:[1,0]
531 ; GCN-NEXT: v_pk_mul_f32 v[72:73], v[72:73], v[48:49] op_sel_hi:[1,0]
532 ; GCN-NEXT: v_pk_mul_f32 v[74:75], v[74:75], v[48:49] op_sel_hi:[1,0]
533 ; GCN-NEXT: v_pk_mul_f32 v[76:77], v[76:77], v[48:49] op_sel_hi:[1,0]
534 ; GCN-NEXT: v_pk_mul_f32 v[78:79], v[78:79], v[48:49] op_sel_hi:[1,0]
535 ; GCN-NEXT: ; implicit-def: $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95
536 ; GCN-NEXT: v_fma_f32 v59, s4, v59, -v134
537 ; GCN-NEXT: v_pk_mul_f32 v[80:81], v[80:81], v[48:49] op_sel_hi:[1,0]
538 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[140:141], v[160:161], v[64:79]
539 ; GCN-NEXT: v_pk_mul_f32 v[82:83], v[82:83], v[48:49] op_sel_hi:[1,0]
540 ; GCN-NEXT: v_pk_mul_f32 v[84:85], v[84:85], v[48:49] op_sel_hi:[1,0]
541 ; GCN-NEXT: v_pk_mul_f32 v[86:87], v[86:87], v[48:49] op_sel_hi:[1,0]
542 ; GCN-NEXT: v_pk_mul_f32 v[88:89], v[88:89], v[48:49] op_sel_hi:[1,0]
543 ; GCN-NEXT: v_pk_mul_f32 v[90:91], v[90:91], v[48:49] op_sel_hi:[1,0]
544 ; GCN-NEXT: v_pk_mul_f32 v[92:93], v[92:93], v[48:49] op_sel_hi:[1,0]
545 ; GCN-NEXT: v_pk_mul_f32 v[94:95], v[94:95], v[48:49] op_sel_hi:[1,0]
546 ; GCN-NEXT: ; implicit-def: $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111
547 ; GCN-NEXT: v_exp_f32_e32 v58, v58
548 ; GCN-NEXT: v_pk_mul_f32 v[96:97], v[96:97], v[48:49] op_sel_hi:[1,0]
549 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[144:145], v[160:161], v[80:95]
550 ; GCN-NEXT: v_pk_mul_f32 v[98:99], v[98:99], v[48:49] op_sel_hi:[1,0]
551 ; GCN-NEXT: v_pk_mul_f32 v[100:101], v[100:101], v[48:49] op_sel_hi:[1,0]
552 ; GCN-NEXT: v_pk_mul_f32 v[102:103], v[102:103], v[48:49] op_sel_hi:[1,0]
553 ; GCN-NEXT: v_pk_mul_f32 v[104:105], v[104:105], v[48:49] op_sel_hi:[1,0]
554 ; GCN-NEXT: v_pk_mul_f32 v[106:107], v[106:107], v[48:49] op_sel_hi:[1,0]
555 ; GCN-NEXT: v_pk_mul_f32 v[108:109], v[108:109], v[48:49] op_sel_hi:[1,0]
556 ; GCN-NEXT: v_pk_mul_f32 v[110:111], v[110:111], v[48:49] op_sel_hi:[1,0]
557 ; GCN-NEXT: v_pack_b32_f16 v145, v61, v57
558 ; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v59
559 ; GCN-NEXT: v_cvt_f16_f32_e32 v140, v53
560 ; GCN-NEXT: v_cvt_f16_f32_e32 v141, v54
561 ; GCN-NEXT: v_exp_f32_e32 v59, v57
562 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[148:149], v[160:161], v[96:111]
563 ; GCN-NEXT: v_fma_f32 v60, s4, v60, -v134
564 ; GCN-NEXT: v_pk_mul_f32 v[112:113], v[112:113], v[48:49] op_sel_hi:[1,0]
565 ; GCN-NEXT: v_pk_mul_f32 v[114:115], v[114:115], v[48:49] op_sel_hi:[1,0]
566 ; GCN-NEXT: v_pk_mul_f32 v[116:117], v[116:117], v[48:49] op_sel_hi:[1,0]
567 ; GCN-NEXT: v_pk_mul_f32 v[118:119], v[118:119], v[48:49] op_sel_hi:[1,0]
568 ; GCN-NEXT: v_pk_mul_f32 v[120:121], v[120:121], v[48:49] op_sel_hi:[1,0]
569 ; GCN-NEXT: v_pk_mul_f32 v[122:123], v[122:123], v[48:49] op_sel_hi:[1,0]
570 ; GCN-NEXT: v_pk_mul_f32 v[124:125], v[124:125], v[48:49] op_sel_hi:[1,0]
571 ; GCN-NEXT: v_pk_mul_f32 v[126:127], v[126:127], v[48:49] op_sel_hi:[1,0]
572 ; GCN-NEXT: v_fma_f32 v148, s4, v62, -v134
573 ; GCN-NEXT: v_pack_b32_f16 v144, v140, v141
574 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[152:153], v[160:161], v[112:127]
575 ; GCN-NEXT: v_fma_f32 v152, s4, v63, -v134
576 ; GCN-NEXT: v_mul_f32_e32 v149, 0x3fb8aa3b, v60
577 ; GCN-NEXT: ; implicit-def: $vgpr57
578 ; GCN-NEXT: ds_read_b128 v[60:63], v57
579 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
580 ; GCN-NEXT: buffer_inv sc0 sc1
581 ; GCN-NEXT: v_exp_f32_e32 v160, v149
582 ; GCN-NEXT: v_fma_f32 v161, s4, v33, -v134
583 ; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v148
584 ; GCN-NEXT: v_cvt_f16_f32_e32 v153, v58
585 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[142:143], v[144:145], v[64:79]
586 ; GCN-NEXT: v_fma_f32 v32, s4, v32, -v134
587 ; GCN-NEXT: ds_read_b128 v[140:143], v57 offset:576
588 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
589 ; GCN-NEXT: buffer_inv sc0 sc1
590 ; GCN-NEXT: v_fma_f32 v40, s4, v40, -v134
591 ; GCN-NEXT: v_fma_f32 v44, s4, v44, -v134
592 ; GCN-NEXT: v_fma_f32 v16, s4, v16, -v134
593 ; GCN-NEXT: v_fma_f32 v166, s4, v20, -v134
594 ; GCN-NEXT: v_fma_f32 v24, s4, v24, -v134
595 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[146:147], v[144:145], v[80:95]
596 ; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v162
597 ; GCN-NEXT: v_cvt_f16_f32_e32 v147, v163
598 ; GCN-NEXT: v_exp_f32_e32 v162, v146
599 ; GCN-NEXT: v_cvt_f16_f32_e32 v146, v164
600 ; GCN-NEXT: v_fma_f32 v28, s4, v28, -v134
601 ; GCN-NEXT: v_pack_b32_f16 v148, v153, v147
602 ; GCN-NEXT: v_fma_f32 v0, s4, v0, -v134
603 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[150:151], v[144:145], v[96:111]
604 ; GCN-NEXT: v_exp_f32_e32 v151, v33
605 ; GCN-NEXT: v_cvt_f16_f32_e32 v33, v59
606 ; GCN-NEXT: v_fma_f32 v150, s4, v34, -v134
607 ; GCN-NEXT: v_fma_f32 v8, s4, v8, -v134
608 ; GCN-NEXT: v_fma_f32 v12, s4, v12, -v134
609 ; GCN-NEXT: v_pack_b32_f16 v149, v146, v33
610 ; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v152
611 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[154:155], v[144:145], v[112:127]
612 ; GCN-NEXT: v_fma_f32 v152, s4, v35, -v134
613 ; GCN-NEXT: v_exp_f32_e32 v153, v33
614 ; GCN-NEXT: v_fma_f32 v155, s4, v36, -v134
615 ; GCN-NEXT: v_perm_b32 v36, v158, v156, s5
616 ; GCN-NEXT: v_cvt_f16_f32_e32 v154, v160
617 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[60:61], v[148:149], v[64:79]
618 ; GCN-NEXT: v_mul_f32_e32 v60, 0x3fb8aa3b, v32
619 ; GCN-NEXT: ds_read_b128 v[32:35], v57 offset:1152
620 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
621 ; GCN-NEXT: buffer_inv sc0 sc1
622 ; GCN-NEXT: ds_read_b128 v[144:147], v57 offset:1728
623 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
624 ; GCN-NEXT: buffer_inv sc0 sc1
625 ; GCN-NEXT: v_mul_f32_e32 v61, 0x3fb8aa3b, v161
626 ; GCN-NEXT: v_exp_f32_e32 v165, v60
627 ; GCN-NEXT: v_perm_b32 v60, v158, v156, s8
628 ; GCN-NEXT: v_fma_f32 v158, s4, v37, -v134
629 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[140:141], v[148:149], v[80:95]
630 ; GCN-NEXT: v_exp_f32_e32 v161, v61
631 ; GCN-NEXT: v_perm_b32 v140, v159, v157, s8
632 ; GCN-NEXT: v_perm_b32 v37, v130, v128, s5
633 ; GCN-NEXT: v_perm_b32 v61, v130, v128, s8
634 ; GCN-NEXT: v_perm_b32 v141, v131, v129, s8
635 ; GCN-NEXT: ;;#ASMSTART
636 ; GCN-NEXT: s_waitcnt vmcnt(8)
637 ; GCN-NEXT: ;;#ASMEND
638 ; GCN-NEXT: buffer_wbl2 sc0 sc1
639 ; GCN-NEXT: ds_write_b64 v135, v[36:37]
640 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[32:33], v[148:149], v[96:111]
641 ; GCN-NEXT: v_perm_b32 v32, v159, v157, s5
642 ; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v150
643 ; GCN-NEXT: v_cvt_f16_f32_e32 v150, v151
644 ; GCN-NEXT: v_fma_f32 v157, s4, v38, -v134
645 ; GCN-NEXT: v_cvt_f16_f32_e32 v38, v153
646 ; GCN-NEXT: v_exp_f32_e32 v159, v33
647 ; GCN-NEXT: v_perm_b32 v33, v131, v129, s5
648 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[144:145], v[148:149], v[112:127]
649 ; GCN-NEXT: v_pack_b32_f16 v129, v150, v38
650 ; GCN-NEXT: v_mul_f32_e32 v38, 0x3fb8aa3b, v152
651 ; GCN-NEXT: v_exp_f32_e32 v152, v38
652 ; GCN-NEXT: buffer_wbl2 sc0 sc1
653 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
654 ; GCN-NEXT: ds_write_b64 v136, v[60:61]
655 ; GCN-NEXT: buffer_wbl2 sc0 sc1
656 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
657 ; GCN-NEXT: ds_write_b64 v137, v[32:33]
658 ; GCN-NEXT: ; implicit-def: $vgpr33
659 ; GCN-NEXT: ; implicit-def: $vgpr38
660 ; GCN-NEXT: buffer_wbl2 sc0 sc1
661 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
662 ; GCN-NEXT: ds_write_b64 v138, v[140:141]
663 ; GCN-NEXT: v_add_u32_e32 v38, v132, v38
664 ; GCN-NEXT: v_add_u32_e32 v33, v132, v33
665 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
666 ; GCN-NEXT: buffer_load_dwordx2 v[130:131], v38, s[0:3], 0 offen sc0 sc1
667 ; GCN-NEXT: s_waitcnt vmcnt(0)
668 ; GCN-NEXT: buffer_inv sc0 sc1
669 ; GCN-NEXT: buffer_load_dwordx2 v[140:141], v33, s[0:3], 0 offen sc0 sc1
670 ; GCN-NEXT: s_waitcnt vmcnt(0)
671 ; GCN-NEXT: buffer_inv sc0 sc1
672 ; GCN-NEXT: ; implicit-def: $vgpr36
673 ; GCN-NEXT: v_add_u32_e32 v33, v132, v36
674 ; GCN-NEXT: ; implicit-def: $vgpr37
675 ; GCN-NEXT: buffer_load_dwordx2 v[144:145], v33, s[0:3], 0 offen sc0 sc1
676 ; GCN-NEXT: s_waitcnt vmcnt(0)
677 ; GCN-NEXT: buffer_inv sc0 sc1
678 ; GCN-NEXT: v_add_u32_e32 v33, v132, v37
679 ; GCN-NEXT: buffer_load_dwordx2 v[148:149], v33, s[0:3], 0 offen sc0 sc1
680 ; GCN-NEXT: s_waitcnt vmcnt(0)
681 ; GCN-NEXT: buffer_inv sc0 sc1
682 ; GCN-NEXT: v_cvt_f16_f32_e32 v156, v162
683 ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v155
684 ; GCN-NEXT: ;;#ASMSTART
685 ; GCN-NEXT: s_waitcnt vmcnt(8)
686 ; GCN-NEXT: ;;#ASMEND
687 ; GCN-NEXT: v_cvt_f16_f32_e32 v33, v165
688 ; GCN-NEXT: v_pack_b32_f16 v128, v154, v156
689 ; GCN-NEXT: v_fma_f32 v150, s4, v39, -v134
690 ; GCN-NEXT: ds_read_b128 v[36:39], v139
691 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
692 ; GCN-NEXT: buffer_inv sc0 sc1
693 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[62:63], v[128:129], v[64:79]
694 ; GCN-NEXT: v_exp_f32_e32 v154, v32
695 ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v158
696 ; GCN-NEXT: ds_read_b128 v[60:63], v139 offset:576
697 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
698 ; GCN-NEXT: buffer_inv sc0 sc1
699 ; GCN-NEXT: v_fma_f32 v156, s4, v42, -v134
700 ; GCN-NEXT: v_perm_b32 v20, v140, v130, s5
701 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[142:143], v[128:129], v[80:95]
702 ; GCN-NEXT: v_exp_f32_e32 v155, v32
703 ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v157
704 ; GCN-NEXT: v_cvt_f16_f32_e32 v142, v161
705 ; GCN-NEXT: v_fma_f32 v143, s4, v41, -v134
706 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[34:35], v[128:129], v[96:111]
707 ; GCN-NEXT: v_cvt_f16_f32_e32 v34, v159
708 ; GCN-NEXT: v_exp_f32_e32 v157, v32
709 ; GCN-NEXT: v_cvt_f16_f32_e32 v32, v152
710 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[146:147], v[128:129], v[112:127]
711 ; GCN-NEXT: v_pack_b32_f16 v129, v34, v32
712 ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v150
713 ; GCN-NEXT: v_pack_b32_f16 v128, v33, v142
714 ; GCN-NEXT: v_exp_f32_e32 v146, v32
715 ; GCN-NEXT: ds_read_b128 v[32:35], v139 offset:1152
716 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
717 ; GCN-NEXT: buffer_inv sc0 sc1
718 ; GCN-NEXT: v_fma_f32 v142, s4, v43, -v134
719 ; GCN-NEXT: v_fma_f32 v150, s4, v46, -v134
720 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[36:37], v[128:129], v[64:79]
721 ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v40
722 ; GCN-NEXT: ds_read_b128 v[40:43], v139 offset:1728
723 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
724 ; GCN-NEXT: buffer_inv sc0 sc1
725 ; GCN-NEXT: v_exp_f32_e32 v147, v36
726 ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v143
727 ; GCN-NEXT: v_cvt_f16_f32_e32 v37, v154
728 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[60:61], v[128:129], v[80:95]
729 ; GCN-NEXT: v_exp_f32_e32 v143, v36
730 ; GCN-NEXT: v_cvt_f16_f32_e32 v60, v155
731 ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v142
732 ; GCN-NEXT: v_fma_f32 v61, s4, v45, -v134
733 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[32:33], v[128:129], v[96:111]
734 ; GCN-NEXT: v_mul_f32_e32 v32, 0x3fb8aa3b, v156
735 ; GCN-NEXT: v_cvt_f16_f32_e32 v33, v157
736 ; GCN-NEXT: v_exp_f32_e32 v156, v32
737 ; GCN-NEXT: v_cvt_f16_f32_e32 v32, v146
738 ; GCN-NEXT: v_pack_b32_f16 v33, v33, v32
739 ; GCN-NEXT: v_pack_b32_f16 v32, v37, v60
740 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[40:41], v[128:129], v[112:127]
741 ; GCN-NEXT: v_exp_f32_e32 v129, v36
742 ; GCN-NEXT: v_mul_f32_e32 v40, 0x3fb8aa3b, v44
743 ; GCN-NEXT: v_cvt_f16_f32_e32 v60, v147
744 ; GCN-NEXT: v_fma_f32 v128, s4, v47, -v134
745 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[38:39], v[32:33], v[64:79]
746 ; GCN-NEXT: ds_read_b128 v[36:39], v57
747 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
748 ; GCN-NEXT: buffer_inv sc0 sc1
749 ; GCN-NEXT: v_exp_f32_e32 v142, v40
750 ; GCN-NEXT: v_mul_f32_e32 v40, 0x3fb8aa3b, v61
751 ; GCN-NEXT: v_cvt_f16_f32_e32 v61, v143
752 ; GCN-NEXT: ds_read_b128 v[44:47], v57 offset:576
753 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
754 ; GCN-NEXT: buffer_inv sc0 sc1
755 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[62:63], v[32:33], v[80:95]
756 ; GCN-NEXT: v_fma_f32 v62, s4, v17, -v134
757 ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v150
758 ; GCN-NEXT: v_exp_f32_e32 v63, v40
759 ; GCN-NEXT: v_pack_b32_f16 v40, v60, v61
760 ; GCN-NEXT: v_fma_f32 v150, s4, v18, -v134
761 ; GCN-NEXT: v_fma_f32 v60, s4, v19, -v134
762 ; GCN-NEXT: v_cvt_f16_f32_e32 v61, v142
763 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[34:35], v[32:33], v[96:111]
764 ; GCN-NEXT: v_cvt_f16_f32_e32 v34, v156
765 ; GCN-NEXT: v_exp_f32_e32 v158, v17
766 ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v129
767 ; GCN-NEXT: v_pack_b32_f16 v41, v34, v17
768 ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v128
769 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[42:43], v[32:33], v[112:127]
770 ; GCN-NEXT: v_exp_f32_e32 v128, v17
771 ; GCN-NEXT: v_perm_b32 v42, v141, v131, s8
772 ; GCN-NEXT: v_perm_b32 v43, v149, v145, s8
773 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[36:37], v[40:41], v[64:79]
774 ; GCN-NEXT: v_mul_f32_e32 v36, 0x3fb8aa3b, v16
775 ; GCN-NEXT: ds_read_b128 v[16:19], v57 offset:1152
776 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
777 ; GCN-NEXT: buffer_inv sc0 sc1
778 ; GCN-NEXT: ds_read_b128 v[32:35], v57 offset:1728
779 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
780 ; GCN-NEXT: buffer_inv sc0 sc1
781 ; GCN-NEXT: v_mul_f32_e32 v37, 0x3fb8aa3b, v62
782 ; GCN-NEXT: v_exp_f32_e32 v167, v36
783 ; GCN-NEXT: v_perm_b32 v36, v140, v130, s8
784 ; GCN-NEXT: v_fma_f32 v62, s4, v21, -v134
785 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[44:45], v[40:41], v[80:95]
786 ; GCN-NEXT: v_exp_f32_e32 v130, v37
787 ; GCN-NEXT: v_cvt_f16_f32_e32 v45, v158
788 ; GCN-NEXT: v_perm_b32 v21, v148, v144, s5
789 ; GCN-NEXT: v_perm_b32 v37, v148, v144, s8
790 ; GCN-NEXT: v_cvt_f16_f32_e32 v44, v63
791 ; GCN-NEXT: ;;#ASMSTART
792 ; GCN-NEXT: s_waitcnt vmcnt(8)
793 ; GCN-NEXT: ;;#ASMEND
794 ; GCN-NEXT: buffer_wbl2 sc0 sc1
795 ; GCN-NEXT: ds_write_b64 v135, v[20:21]
796 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[16:17], v[40:41], v[96:111]
797 ; GCN-NEXT: v_perm_b32 v16, v141, v131, s5
798 ; GCN-NEXT: v_fma_f32 v131, s4, v22, -v134
799 ; GCN-NEXT: v_cvt_f16_f32_e32 v22, v128
800 ; GCN-NEXT: v_mul_f32_e32 v17, 0x3fb8aa3b, v150
801 ; GCN-NEXT: v_exp_f32_e32 v140, v17
802 ; GCN-NEXT: v_perm_b32 v17, v149, v145, s5
803 ; GCN-NEXT: buffer_wbl2 sc0 sc1
804 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
805 ; GCN-NEXT: ds_write_b64 v136, v[36:37]
806 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[32:33], v[40:41], v[112:127]
807 ; GCN-NEXT: v_pack_b32_f16 v33, v45, v22
808 ; GCN-NEXT: v_mul_f32_e32 v22, 0x3fb8aa3b, v60
809 ; GCN-NEXT: v_exp_f32_e32 v144, v22
810 ; GCN-NEXT: buffer_wbl2 sc0 sc1
811 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
812 ; GCN-NEXT: ds_write_b64 v137, v[16:17]
813 ; GCN-NEXT: ; implicit-def: $vgpr17
814 ; GCN-NEXT: ; implicit-def: $vgpr22
815 ; GCN-NEXT: buffer_wbl2 sc0 sc1
816 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
817 ; GCN-NEXT: ds_write_b64 v138, v[42:43]
818 ; GCN-NEXT: v_add_u32_e32 v22, v132, v22
819 ; GCN-NEXT: v_add_u32_e32 v17, v132, v17
820 ; GCN-NEXT: ; implicit-def: $vgpr20
821 ; GCN-NEXT: ; implicit-def: $vgpr21
822 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
823 ; GCN-NEXT: buffer_load_dwordx2 v[40:41], v22, s[0:3], 0 offen sc0 sc1
824 ; GCN-NEXT: s_waitcnt vmcnt(0)
825 ; GCN-NEXT: buffer_inv sc0 sc1
826 ; GCN-NEXT: buffer_load_dwordx2 v[42:43], v17, s[0:3], 0 offen sc0 sc1
827 ; GCN-NEXT: s_waitcnt vmcnt(0)
828 ; GCN-NEXT: buffer_inv sc0 sc1
829 ; GCN-NEXT: v_add_u32_e32 v20, v132, v20
830 ; GCN-NEXT: v_add_u32_e32 v21, v132, v21
831 ; GCN-NEXT: v_pack_b32_f16 v32, v61, v44
832 ; GCN-NEXT: buffer_load_dwordx2 v[44:45], v20, s[0:3], 0 offen sc0 sc1
833 ; GCN-NEXT: s_waitcnt vmcnt(0)
834 ; GCN-NEXT: buffer_inv sc0 sc1
835 ; GCN-NEXT: buffer_load_dwordx2 v[60:61], v21, s[0:3], 0 offen sc0 sc1
836 ; GCN-NEXT: s_waitcnt vmcnt(0)
837 ; GCN-NEXT: buffer_inv sc0 sc1
838 ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v166
839 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[38:39], v[32:33], v[64:79]
840 ; GCN-NEXT: v_exp_f32_e32 v132, v16
841 ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v62
842 ; GCN-NEXT: ;;#ASMSTART
843 ; GCN-NEXT: s_waitcnt vmcnt(8)
844 ; GCN-NEXT: ;;#ASMEND
845 ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v167
846 ; GCN-NEXT: v_fma_f32 v141, s4, v23, -v134
847 ; GCN-NEXT: ds_read_b128 v[20:23], v139
848 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
849 ; GCN-NEXT: buffer_inv sc0 sc1
850 ; GCN-NEXT: ds_read_b128 v[36:39], v139 offset:576
851 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
852 ; GCN-NEXT: buffer_inv sc0 sc1
853 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[46:47], v[32:33], v[80:95]
854 ; GCN-NEXT: v_exp_f32_e32 v62, v16
855 ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v131
856 ; GCN-NEXT: v_cvt_f16_f32_e32 v46, v130
857 ; GCN-NEXT: v_fma_f32 v47, s4, v25, -v134
858 ; GCN-NEXT: v_fma_f32 v131, s4, v26, -v134
859 ; GCN-NEXT: v_fma_f32 v149, s4, v4, -v134
860 ; GCN-NEXT: ; implicit-def: $sgpr0
861 ; GCN-NEXT: v_perm_b32 v4, v42, v40, s5
862 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[18:19], v[32:33], v[96:111]
863 ; GCN-NEXT: v_cvt_f16_f32_e32 v18, v140
864 ; GCN-NEXT: v_exp_f32_e32 v145, v16
865 ; GCN-NEXT: v_cvt_f16_f32_e32 v16, v144
866 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[34:35], v[32:33], v[112:127]
867 ; GCN-NEXT: v_pack_b32_f16 v33, v18, v16
868 ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v141
869 ; GCN-NEXT: v_pack_b32_f16 v32, v17, v46
870 ; GCN-NEXT: v_exp_f32_e32 v35, v16
871 ; GCN-NEXT: ds_read_b128 v[16:19], v139 offset:1152
872 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
873 ; GCN-NEXT: buffer_inv sc0 sc1
874 ; GCN-NEXT: v_fma_f32 v34, s4, v27, -v134
875 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[20:21], v[32:33], v[64:79]
876 ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v24
877 ; GCN-NEXT: ds_read_b128 v[24:27], v139 offset:1728
878 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
879 ; GCN-NEXT: buffer_inv sc0 sc1
880 ; GCN-NEXT: v_exp_f32_e32 v46, v20
881 ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v47
882 ; GCN-NEXT: v_cvt_f16_f32_e32 v21, v132
883 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[36:37], v[32:33], v[80:95]
884 ; GCN-NEXT: v_exp_f32_e32 v47, v20
885 ; GCN-NEXT: v_cvt_f16_f32_e32 v36, v62
886 ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v34
887 ; GCN-NEXT: v_fma_f32 v37, s4, v29, -v134
888 ; GCN-NEXT: v_cvt_f16_f32_e32 v34, v46
889 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[16:17], v[32:33], v[96:111]
890 ; GCN-NEXT: v_mul_f32_e32 v16, 0x3fb8aa3b, v131
891 ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v145
892 ; GCN-NEXT: v_exp_f32_e32 v141, v16
893 ; GCN-NEXT: v_cvt_f16_f32_e32 v16, v35
894 ; GCN-NEXT: v_fma_f32 v131, s4, v30, -v134
895 ; GCN-NEXT: v_pack_b32_f16 v17, v17, v16
896 ; GCN-NEXT: v_pack_b32_f16 v16, v21, v36
897 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[24:25], v[32:33], v[112:127]
898 ; GCN-NEXT: v_exp_f32_e32 v33, v20
899 ; GCN-NEXT: v_mul_f32_e32 v24, 0x3fb8aa3b, v28
900 ; GCN-NEXT: v_fma_f32 v32, s4, v31, -v134
901 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[22:23], v[16:17], v[64:79]
902 ; GCN-NEXT: ds_read_b128 v[20:23], v57
903 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
904 ; GCN-NEXT: buffer_inv sc0 sc1
905 ; GCN-NEXT: v_exp_f32_e32 v36, v24
906 ; GCN-NEXT: v_mul_f32_e32 v24, 0x3fb8aa3b, v37
907 ; GCN-NEXT: v_cvt_f16_f32_e32 v37, v47
908 ; GCN-NEXT: ds_read_b128 v[28:31], v57 offset:576
909 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
910 ; GCN-NEXT: buffer_inv sc0 sc1
911 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[38:39], v[16:17], v[80:95]
912 ; GCN-NEXT: v_fma_f32 v38, s4, v1, -v134
913 ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v131
914 ; GCN-NEXT: v_exp_f32_e32 v39, v24
915 ; GCN-NEXT: v_pack_b32_f16 v24, v34, v37
916 ; GCN-NEXT: v_fma_f32 v131, s4, v2, -v134
917 ; GCN-NEXT: v_cvt_f16_f32_e32 v37, v36
918 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[18:19], v[16:17], v[96:111]
919 ; GCN-NEXT: v_cvt_f16_f32_e32 v18, v141
920 ; GCN-NEXT: v_exp_f32_e32 v148, v1
921 ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v33
922 ; GCN-NEXT: v_pack_b32_f16 v25, v18, v1
923 ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v32
924 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[26:27], v[16:17], v[112:127]
925 ; GCN-NEXT: v_fma_f32 v32, s4, v3, -v134
926 ; GCN-NEXT: v_exp_f32_e32 v34, v1
927 ; GCN-NEXT: v_perm_b32 v26, v43, v41, s8
928 ; GCN-NEXT: v_perm_b32 v27, v61, v45, s8
929 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[20:21], v[24:25], v[64:79]
930 ; GCN-NEXT: v_mul_f32_e32 v20, 0x3fb8aa3b, v0
931 ; GCN-NEXT: ds_read_b128 v[0:3], v57 offset:1152
932 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
933 ; GCN-NEXT: buffer_inv sc0 sc1
934 ; GCN-NEXT: ds_read_b128 v[16:19], v57 offset:1728
935 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
936 ; GCN-NEXT: buffer_inv sc0 sc1
937 ; GCN-NEXT: v_mul_f32_e32 v21, 0x3fb8aa3b, v38
938 ; GCN-NEXT: v_exp_f32_e32 v150, v20
939 ; GCN-NEXT: v_perm_b32 v20, v42, v40, s8
940 ; GCN-NEXT: v_cvt_f16_f32_e32 v40, v148
941 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[28:29], v[24:25], v[80:95]
942 ; GCN-NEXT: v_exp_f32_e32 v38, v21
943 ; GCN-NEXT: v_cvt_f16_f32_e32 v28, v39
944 ; GCN-NEXT: v_fma_f32 v29, s4, v5, -v134
945 ; GCN-NEXT: v_perm_b32 v5, v60, v44, s5
946 ; GCN-NEXT: v_perm_b32 v21, v60, v44, s8
947 ; GCN-NEXT: ;;#ASMSTART
948 ; GCN-NEXT: s_waitcnt vmcnt(8)
949 ; GCN-NEXT: ;;#ASMEND
950 ; GCN-NEXT: buffer_wbl2 sc0 sc1
951 ; GCN-NEXT: ds_write_b64 v135, v[4:5]
952 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[24:25], v[96:111]
953 ; GCN-NEXT: v_perm_b32 v0, v43, v41, s5
954 ; GCN-NEXT: v_fma_f32 v41, s4, v6, -v134
955 ; GCN-NEXT: v_cvt_f16_f32_e32 v6, v34
956 ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v131
957 ; GCN-NEXT: v_exp_f32_e32 v42, v1
958 ; GCN-NEXT: v_perm_b32 v1, v61, v45, s5
959 ; GCN-NEXT: buffer_wbl2 sc0 sc1
960 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
961 ; GCN-NEXT: ds_write_b64 v136, v[20:21]
962 ; GCN-NEXT: buffer_wbl2 sc0 sc1
963 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
964 ; GCN-NEXT: ds_write_b64 v137, v[0:1]
965 ; GCN-NEXT: buffer_wbl2 sc0 sc1
966 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
967 ; GCN-NEXT: ds_write_b64 v138, v[26:27]
968 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[16:17], v[24:25], v[112:127]
969 ; GCN-NEXT: v_pack_b32_f16 v17, v40, v6
970 ; GCN-NEXT: v_mul_f32_e32 v6, 0x3fb8aa3b, v32
971 ; GCN-NEXT: ;;#ASMSTART
972 ; GCN-NEXT: s_waitcnt vmcnt(8)
973 ; GCN-NEXT: ;;#ASMEND
974 ; GCN-NEXT: v_pack_b32_f16 v16, v37, v28
975 ; GCN-NEXT: v_fma_f32 v24, s4, v7, -v134
976 ; GCN-NEXT: v_exp_f32_e32 v25, v6
977 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
978 ; GCN-NEXT: ds_read_b128 v[4:7], v139
979 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
980 ; GCN-NEXT: buffer_inv sc0 sc1
981 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[22:23], v[16:17], v[64:79]
982 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v149
983 ; GCN-NEXT: v_exp_f32_e32 v26, v0
984 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v29
985 ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v150
986 ; GCN-NEXT: v_cvt_f16_f32_e32 v27, v38
987 ; GCN-NEXT: ds_read_b128 v[20:23], v139 offset:576
988 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
989 ; GCN-NEXT: buffer_inv sc0 sc1
990 ; GCN-NEXT: v_fma_f32 v28, s4, v9, -v134
991 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[30:31], v[16:17], v[80:95]
992 ; GCN-NEXT: v_exp_f32_e32 v29, v0
993 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v41
994 ; GCN-NEXT: v_fma_f32 v30, s4, v10, -v134
995 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[2:3], v[16:17], v[96:111]
996 ; GCN-NEXT: v_cvt_f16_f32_e32 v2, v42
997 ; GCN-NEXT: v_exp_f32_e32 v31, v0
998 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v25
999 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[18:19], v[16:17], v[112:127]
1000 ; GCN-NEXT: v_pack_b32_f16 v17, v2, v0
1001 ; GCN-NEXT: v_pack_b32_f16 v16, v1, v27
1002 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v24
1003 ; GCN-NEXT: v_fma_f32 v18, s4, v11, -v134
1004 ; GCN-NEXT: v_exp_f32_e32 v19, v0
1005 ; GCN-NEXT: ds_read_b128 v[0:3], v139 offset:1152
1006 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1007 ; GCN-NEXT: buffer_inv sc0 sc1
1008 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[16:17], v[64:79]
1009 ; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v8
1010 ; GCN-NEXT: ds_read_b128 v[8:11], v139 offset:1728
1011 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1012 ; GCN-NEXT: buffer_inv sc0 sc1
1013 ; GCN-NEXT: v_exp_f32_e32 v24, v4
1014 ; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v28
1015 ; GCN-NEXT: v_cvt_f16_f32_e32 v5, v26
1016 ; GCN-NEXT: v_exp_f32_e32 v27, v4
1017 ; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v18
1018 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[20:21], v[16:17], v[80:95]
1019 ; GCN-NEXT: v_cvt_f16_f32_e32 v20, v29
1020 ; GCN-NEXT: v_fma_f32 v21, s4, v13, -v134
1021 ; GCN-NEXT: v_fma_f32 v28, s4, v14, -v134
1022 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[16:17], v[96:111]
1023 ; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v30
1024 ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v31
1025 ; GCN-NEXT: v_exp_f32_e32 v30, v0
1026 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v19
1027 ; GCN-NEXT: v_pack_b32_f16 v1, v1, v0
1028 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[8:9], v[16:17], v[112:127]
1029 ; GCN-NEXT: v_exp_f32_e32 v16, v4
1030 ; GCN-NEXT: v_pack_b32_f16 v0, v5, v20
1031 ; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v12
1032 ; GCN-NEXT: v_exp_f32_e32 v18, v9
1033 ; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v21
1034 ; GCN-NEXT: v_exp_f32_e32 v21, v9
1035 ; GCN-NEXT: v_fma_f32 v8, s4, v15, -v134
1036 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79]
1037 ; GCN-NEXT: ds_read_b128 v[4:7], v57
1038 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1039 ; GCN-NEXT: buffer_inv sc0 sc1
1040 ; GCN-NEXT: ds_read_b128 v[12:15], v57 offset:576
1041 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1042 ; GCN-NEXT: buffer_inv sc0 sc1
1043 ; GCN-NEXT: v_cvt_f16_f32_e32 v17, v24
1044 ; GCN-NEXT: v_cvt_f16_f32_e32 v20, v27
1045 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[22:23], v[0:1], v[80:95]
1046 ; GCN-NEXT: v_cvt_f16_f32_e32 v22, v21
1047 ; GCN-NEXT: v_cvt_f16_f32_e32 v23, v18
1048 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[2:3], v[0:1], v[96:111]
1049 ; GCN-NEXT: v_cvt_f16_f32_e32 v3, v30
1050 ; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v28
1051 ; GCN-NEXT: v_exp_f32_e32 v2, v2
1052 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[10:11], v[0:1], v[112:127]
1053 ; GCN-NEXT: v_cvt_f16_f32_e32 v0, v16
1054 ; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v8
1055 ; GCN-NEXT: v_exp_f32_e32 v10, v1
1056 ; GCN-NEXT: v_pack_b32_f16 v8, v17, v20
1057 ; GCN-NEXT: v_pack_b32_f16 v9, v3, v0
1058 ; GCN-NEXT: v_add_f32_e32 v3, 0, v49
1059 ; GCN-NEXT: v_add_f32_e32 v3, v50, v3
1060 ; GCN-NEXT: v_add_f32_e32 v3, v51, v3
1061 ; GCN-NEXT: v_add_f32_e32 v3, v52, v3
1062 ; GCN-NEXT: v_add_f32_e32 v3, v53, v3
1063 ; GCN-NEXT: v_add_f32_e32 v3, v54, v3
1064 ; GCN-NEXT: v_add_f32_e32 v3, v55, v3
1065 ; GCN-NEXT: v_add_f32_e32 v3, v56, v3
1066 ; GCN-NEXT: v_add_f32_e32 v3, v58, v3
1067 ; GCN-NEXT: v_add_f32_e32 v3, v163, v3
1068 ; GCN-NEXT: v_add_f32_e32 v3, v164, v3
1069 ; GCN-NEXT: v_add_f32_e32 v3, v59, v3
1070 ; GCN-NEXT: v_add_f32_e32 v3, v160, v3
1071 ; GCN-NEXT: v_add_f32_e32 v3, v162, v3
1072 ; GCN-NEXT: v_add_f32_e32 v3, v151, v3
1073 ; GCN-NEXT: v_add_f32_e32 v3, v153, v3
1074 ; GCN-NEXT: v_add_f32_e32 v3, v165, v3
1075 ; GCN-NEXT: v_add_f32_e32 v3, v161, v3
1076 ; GCN-NEXT: v_add_f32_e32 v3, v159, v3
1077 ; GCN-NEXT: v_add_f32_e32 v3, v152, v3
1078 ; GCN-NEXT: v_add_f32_e32 v3, v154, v3
1079 ; GCN-NEXT: v_add_f32_e32 v3, v155, v3
1080 ; GCN-NEXT: v_add_f32_e32 v3, v157, v3
1081 ; GCN-NEXT: v_add_f32_e32 v3, v146, v3
1082 ; GCN-NEXT: v_add_f32_e32 v3, v147, v3
1083 ; GCN-NEXT: v_add_f32_e32 v3, v143, v3
1084 ; GCN-NEXT: v_add_f32_e32 v3, v156, v3
1085 ; GCN-NEXT: v_add_f32_e32 v3, v129, v3
1086 ; GCN-NEXT: v_add_f32_e32 v3, v142, v3
1087 ; GCN-NEXT: v_add_f32_e32 v3, v63, v3
1088 ; GCN-NEXT: v_add_f32_e32 v3, v158, v3
1089 ; GCN-NEXT: v_add_f32_e32 v3, v128, v3
1090 ; GCN-NEXT: v_add_f32_e32 v3, v167, v3
1091 ; GCN-NEXT: v_add_f32_e32 v3, v130, v3
1092 ; GCN-NEXT: v_add_f32_e32 v3, v140, v3
1093 ; GCN-NEXT: v_add_f32_e32 v3, v144, v3
1094 ; GCN-NEXT: v_add_f32_e32 v3, v132, v3
1095 ; GCN-NEXT: v_add_f32_e32 v3, v62, v3
1096 ; GCN-NEXT: v_add_f32_e32 v3, v145, v3
1097 ; GCN-NEXT: v_add_f32_e32 v3, v35, v3
1098 ; GCN-NEXT: v_add_f32_e32 v3, v46, v3
1099 ; GCN-NEXT: v_add_f32_e32 v3, v47, v3
1100 ; GCN-NEXT: v_add_f32_e32 v3, v141, v3
1101 ; GCN-NEXT: v_add_f32_e32 v3, v33, v3
1102 ; GCN-NEXT: v_add_f32_e32 v3, v36, v3
1103 ; GCN-NEXT: v_add_f32_e32 v3, v39, v3
1104 ; GCN-NEXT: v_add_f32_e32 v3, v148, v3
1105 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[12:13], v[8:9], v[80:95]
1106 ; GCN-NEXT: v_add_f32_e32 v3, v34, v3
1107 ; GCN-NEXT: v_add_f32_e32 v3, v150, v3
1108 ; GCN-NEXT: v_cvt_f16_f32_e32 v1, v10
1109 ; GCN-NEXT: v_cvt_f16_f32_e32 v11, v2
1110 ; GCN-NEXT: v_add_f32_e32 v3, v38, v3
1111 ; GCN-NEXT: v_add_f32_e32 v3, v42, v3
1112 ; GCN-NEXT: v_add_f32_e32 v3, v25, v3
1113 ; GCN-NEXT: v_add_f32_e32 v3, v26, v3
1114 ; GCN-NEXT: v_pack_b32_f16 v1, v11, v1
1115 ; GCN-NEXT: v_pack_b32_f16 v0, v23, v22
1116 ; GCN-NEXT: v_add_f32_e32 v3, v29, v3
1117 ; GCN-NEXT: v_add_f32_e32 v3, v31, v3
1118 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[14:15], v[0:1], v[80:95]
1119 ; GCN-NEXT: v_add_f32_e32 v3, v19, v3
1120 ; GCN-NEXT: v_add_f32_e32 v3, v24, v3
1121 ; GCN-NEXT: v_add_f32_e32 v3, v27, v3
1122 ; GCN-NEXT: v_add_f32_e32 v3, v30, v3
1123 ; GCN-NEXT: v_add_f32_e32 v3, v16, v3
1124 ; GCN-NEXT: v_add_f32_e32 v3, v18, v3
1125 ; GCN-NEXT: v_add_f32_e32 v3, v21, v3
1126 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[8:9], v[64:79]
1127 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79]
1128 ; GCN-NEXT: v_add_f32_e32 v0, v2, v3
1129 ; GCN-NEXT: v_add_f32_e32 v4, v10, v0
1130 ; GCN-NEXT: ds_bpermute_b32 v5, v133, v4
1131 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1132 ; GCN-NEXT: ds_read_b128 v[0:3], v57 offset:1152
1133 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1134 ; GCN-NEXT: buffer_inv sc0 sc1
1135 ; GCN-NEXT: v_add_f32_e32 v2, v4, v5
1136 ; GCN-NEXT: ds_bpermute_b32 v3, v133, v2
1137 ; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[8:9], v[96:111]
1138 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1139 ; GCN-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[6:7]
1140 ; GCN-NEXT: ; implicit-def: $vgpr4
1141 ; GCN-NEXT: v_fmac_f32_e32 v0, v4, v48
1142 ; GCN-NEXT: ds_read_b128 v[0:3], v57 offset:1728
1143 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
1144 ; GCN-NEXT: buffer_inv sc0 sc1
1145 ; GCN-NEXT: ;;#ASMSTART
1146 ; GCN-NEXT: s_waitcnt vmcnt(8)
1147 ; GCN-NEXT: ;;#ASMEND
1148 ; GCN-NEXT: s_endpgm
1150 attributes #0 = {"amdgpu-flat-work-group-size"="256,256"}
1156 name: largeInterleave
1157 tracksRegLiveness: true
1158 machineFunctionInfo:
1159 stackPtrOffsetReg: '$sgpr32'
1163 liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2, $sgpr3, $sgpr4
1164 %11:vgpr_32 = IMPLICIT_DEF
1165 %1:sgpr_512 = IMPLICIT_DEF
1166 %16:vgpr_32 = IMPLICIT_DEF
1167 %443:sgpr_128 = IMPLICIT_DEF
1168 %18:sreg_32 = IMPLICIT_DEF
1169 %25:vgpr_32 = IMPLICIT_DEF
1170 %23:vgpr_32 = IMPLICIT_DEF
1171 %391:vreg_128_align2 = IMPLICIT_DEF
1172 %24:vgpr_32 = IMPLICIT_DEF
1173 %392:vreg_128_align2 = IMPLICIT_DEF
1174 %401:vreg_128_align2 = IMPLICIT_DEF
1175 %406:vreg_128_align2 = IMPLICIT_DEF
1176 %48:vgpr_32 = IMPLICIT_DEF
1177 %473:sgpr_128 = IMPLICIT_DEF
1178 %411:vreg_128_align2 = IMPLICIT_DEF
1179 %416:vreg_128_align2 = IMPLICIT_DEF
1180 %421:vreg_128_align2 = IMPLICIT_DEF
1181 %426:vreg_128_align2 = IMPLICIT_DEF
1182 %1114:sgpr_32 = IMPLICIT_DEF
1183 %39:vgpr_32 = IMPLICIT_DEF
1184 %484:sreg_64_xexec = IMPLICIT_DEF
1185 %3346:vgpr_32 = IMPLICIT_DEF
1186 %1422:sreg_32 = IMPLICIT_DEF
1187 %1424:sreg_32 = IMPLICIT_DEF
1188 %15:vgpr_32 = IMPLICIT_DEF
1189 %494:sreg_32 = IMPLICIT_DEF
1190 %47:vgpr_32 = IMPLICIT_DEF
1191 %41:vgpr_32 = IMPLICIT_DEF
1192 %42:vgpr_32 = IMPLICIT_DEF
1193 %43:vgpr_32 = IMPLICIT_DEF
1194 %44:vgpr_32 = IMPLICIT_DEF
1195 %45:vgpr_32 = IMPLICIT_DEF
1196 %50:sreg_32 = IMPLICIT_DEF
1197 %3347:vgpr_32 = IMPLICIT_DEF
1198 %3329:vgpr_32 = IMPLICIT_DEF
1199 %3330:vgpr_32 = IMPLICIT_DEF
1200 %3331:vgpr_32 = IMPLICIT_DEF
1201 %3332:vgpr_32 = IMPLICIT_DEF
1202 %3333:vgpr_32 = IMPLICIT_DEF
1203 %2986:vreg_512_align2 = IMPLICIT_DEF
1204 %3038:vreg_512_align2 = IMPLICIT_DEF
1205 %2980:vreg_512_align2 = IMPLICIT_DEF
1206 %3003:vreg_512_align2 = IMPLICIT_DEF
1207 %3334:vgpr_32 = IMPLICIT_DEF
1208 %3335:vgpr_32 = IMPLICIT_DEF
1209 %3336:vgpr_32 = IMPLICIT_DEF
1210 %3337:vgpr_32 = IMPLICIT_DEF
1211 %3338:vgpr_32 = IMPLICIT_DEF
1212 %3339:vgpr_32 = IMPLICIT_DEF
1213 %3345:vgpr_32 = IMPLICIT_DEF
1214 %3340:vgpr_32 = IMPLICIT_DEF
1215 %3341:vgpr_32 = IMPLICIT_DEF
1216 %3342:vgpr_32 = IMPLICIT_DEF
1217 %3343:vgpr_32 = IMPLICIT_DEF
1218 %3344:vgpr_32 = IMPLICIT_DEF
1219 %84:vgpr_32 = COPY %3347
1220 %86:vgpr_32 = COPY %3347:vgpr_32
1222 %593:sreg_32 = V_READFIRSTLANE_B32 %11:vgpr_32, implicit $exec
1223 %595:vgpr_32 = V_LSHL_ADD_U32_e64 %593:sreg_32, 4, %3329:vgpr_32, implicit $exec
1224 %597:vgpr_32 = nsw V_MUL_LO_U32_e64 %595:vgpr_32, %1.sub6:sgpr_512, implicit $exec
1225 %599:vgpr_32 = V_ADD_LSHL_U32_e64 %597:vgpr_32, %16:vgpr_32, 1, implicit $exec
1226 %601:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec
1227 %602:vgpr_32 = V_ADD_U32_e32 %18:sreg_32, %599:vgpr_32, implicit $exec
1228 %603:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %602:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec
1229 %605:sreg_32 = S_LSHL_B32 %593:sreg_32, 7, implicit-def dead $scc
1230 %606:vgpr_32 = V_ADD_LSHL_U32_e64 %25:vgpr_32, %605:sreg_32, 1, implicit $exec
1231 DS_WRITE_B128_gfx9 %606:vgpr_32, %601:vreg_128_align2, 0, 0, implicit $exec
1232 DS_WRITE_B128_gfx9 %606:vgpr_32, %603:vreg_128_align2, 1024, 0, implicit $exec
1233 %608:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 64, 0, 0, implicit $exec
1234 %610:vgpr_32 = V_ADD_U32_e32 64, %602:vgpr_32, implicit $exec
1235 %611:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %610:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec
1236 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1237 %612:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec
1238 early-clobber %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %612.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
1239 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %612.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1240 %626:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec
1241 early-clobber %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %626.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
1242 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %626.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1243 %638:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec
1244 early-clobber %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %638.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
1245 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %638.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1246 %650:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec
1247 early-clobber %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_vgprcd_e64 %650.sub0_sub1:vreg_128_align2, %391.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
1248 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %650.sub2_sub3:vreg_128_align2, %391.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1249 %662:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec
1250 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %662.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1251 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %662.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1252 %673:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec
1253 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %673.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1254 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %673.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1255 %684:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec
1256 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %684.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1257 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %684.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1258 %695:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec
1259 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %695.sub0_sub1:vreg_128_align2, %392.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1260 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %695.sub2_sub3:vreg_128_align2, %392.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1261 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1262 DS_WRITE_B128_gfx9 %606:vgpr_32, %608:vreg_128_align2, 0, 0, implicit $exec
1263 DS_WRITE_B128_gfx9 %606:vgpr_32, %611:vreg_128_align2, 1024, 0, implicit $exec
1264 %706:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 128, 0, 0, implicit $exec
1265 %708:vgpr_32 = V_ADD_U32_e32 128, %602:vgpr_32, implicit $exec
1266 %709:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %708:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec
1267 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1268 %710:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec
1269 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %710.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1270 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %710.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1271 %721:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec
1272 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %721.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1273 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %721.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1274 %732:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec
1275 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %732.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1276 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %732.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1277 %743:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec
1278 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %743.sub0_sub1:vreg_128_align2, %401.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1279 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %743.sub2_sub3:vreg_128_align2, %401.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1280 %754:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec
1281 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %754.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1282 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %754.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1283 %765:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec
1284 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %765.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1285 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %765.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1286 %776:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec
1287 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %776.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1288 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %776.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1289 %787:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec
1290 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %787.sub0_sub1:vreg_128_align2, %406.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1291 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %787.sub2_sub3:vreg_128_align2, %406.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1292 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1293 DS_WRITE_B128_gfx9 %606:vgpr_32, %706:vreg_128_align2, 0, 0, implicit $exec
1294 DS_WRITE_B128_gfx9 %606:vgpr_32, %709:vreg_128_align2, 1024, 0, implicit $exec
1295 %798:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %599:vgpr_32, %443:sgpr_128, 0, 192, 0, 0, implicit $exec
1296 %800:vgpr_32 = V_ADD_U32_e32 192, %602:vgpr_32, implicit $exec
1297 %801:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %800:vgpr_32, %443:sgpr_128, 0, 0, 0, 0, implicit $exec
1298 %802:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3330:vgpr_32, implicit $exec
1299 %803:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %802:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1300 %804:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3331:vgpr_32, implicit $exec
1301 %805:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %804:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1302 %806:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3332:vgpr_32, implicit $exec
1303 %807:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %806:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1304 %808:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3333:vgpr_32, implicit $exec
1305 %809:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %808:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1306 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1307 %810:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec
1308 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %810.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1309 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %810.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1310 %821:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec
1311 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %821.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1312 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %821.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1313 %832:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec
1314 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %832.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1315 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %832.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1316 %843:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec
1317 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %843.sub0_sub1:vreg_128_align2, %411.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1318 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %843.sub2_sub3:vreg_128_align2, %411.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1319 %854:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec
1320 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %854.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1321 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %854.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1322 %865:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec
1323 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %865.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1324 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %865.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1325 %876:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec
1326 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %876.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1327 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %876.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1328 %887:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec
1329 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %887.sub0_sub1:vreg_128_align2, %416.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1330 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %887.sub2_sub3:vreg_128_align2, %416.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1331 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1332 DS_WRITE_B128_gfx9 %606:vgpr_32, %798:vreg_128_align2, 0, 0, implicit $exec
1333 DS_WRITE_B128_gfx9 %606:vgpr_32, %801:vreg_128_align2, 1024, 0, implicit $exec
1334 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1335 %898:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 0, 0, implicit $exec
1336 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %898.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1337 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %898.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1338 %909:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 512, 0, implicit $exec
1339 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %909.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1340 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %909.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1341 %920:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1024, 0, implicit $exec
1342 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %920.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1343 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %920.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1344 %931:vreg_128_align2 = DS_READ_B128_gfx9 %23:vgpr_32, 1536, 0, implicit $exec
1345 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %931.sub0_sub1:vreg_128_align2, %421.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1346 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %931.sub2_sub3:vreg_128_align2, %421.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1347 %942:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 0, 0, implicit $exec
1348 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %942.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1349 %668:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %942.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %668:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1350 %969:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 512, 0, implicit $exec
1351 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %969.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1352 %679:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %969.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %679:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1353 %996:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1024, 0, implicit $exec
1354 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %996.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1355 %690:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %996.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %690:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1356 %1023:vreg_128_align2 = DS_READ_B128_gfx9 %24:vgpr_32, 1536, 0, implicit $exec
1357 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1023.sub0_sub1:vreg_128_align2, %426.sub0_sub1:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1358 %701:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1023.sub2_sub3:vreg_128_align2, %426.sub2_sub3:vreg_128_align2, %701:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1359 %1050:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub0:vreg_512_align2, implicit $mode, implicit $exec
1360 %1051:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub1:vreg_512_align2, implicit $mode, implicit $exec
1361 %1052:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub2:vreg_512_align2, implicit $mode, implicit $exec
1362 %1053:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub3:vreg_512_align2, implicit $mode, implicit $exec
1363 %1054:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub4:vreg_512_align2, implicit $mode, implicit $exec
1364 %1055:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub5:vreg_512_align2, implicit $mode, implicit $exec
1365 %1056:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub6:vreg_512_align2, implicit $mode, implicit $exec
1366 %1057:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub7:vreg_512_align2, implicit $mode, implicit $exec
1367 %1058:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub8:vreg_512_align2, implicit $mode, implicit $exec
1368 %1059:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub9:vreg_512_align2, implicit $mode, implicit $exec
1369 %1060:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub10:vreg_512_align2, implicit $mode, implicit $exec
1370 %1061:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub11:vreg_512_align2, implicit $mode, implicit $exec
1371 %1062:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub12:vreg_512_align2, implicit $mode, implicit $exec
1372 %1063:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub13:vreg_512_align2, implicit $mode, implicit $exec
1373 %1064:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub14:vreg_512_align2, implicit $mode, implicit $exec
1374 %1065:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %668.sub15:vreg_512_align2, implicit $mode, implicit $exec
1375 %1066:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub0:vreg_512_align2, implicit $mode, implicit $exec
1376 %1067:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub1:vreg_512_align2, implicit $mode, implicit $exec
1377 %1068:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub2:vreg_512_align2, implicit $mode, implicit $exec
1378 %1069:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub3:vreg_512_align2, implicit $mode, implicit $exec
1379 %1070:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub4:vreg_512_align2, implicit $mode, implicit $exec
1380 %1071:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub5:vreg_512_align2, implicit $mode, implicit $exec
1381 %1072:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub6:vreg_512_align2, implicit $mode, implicit $exec
1382 %1073:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub7:vreg_512_align2, implicit $mode, implicit $exec
1383 %1074:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub8:vreg_512_align2, implicit $mode, implicit $exec
1384 %1075:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub9:vreg_512_align2, implicit $mode, implicit $exec
1385 %1076:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub10:vreg_512_align2, implicit $mode, implicit $exec
1386 %1077:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub11:vreg_512_align2, implicit $mode, implicit $exec
1387 %1078:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub12:vreg_512_align2, implicit $mode, implicit $exec
1388 %1079:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub13:vreg_512_align2, implicit $mode, implicit $exec
1389 %1080:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub14:vreg_512_align2, implicit $mode, implicit $exec
1390 %1081:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %679.sub15:vreg_512_align2, implicit $mode, implicit $exec
1391 %1082:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub0:vreg_512_align2, implicit $mode, implicit $exec
1392 %1083:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub1:vreg_512_align2, implicit $mode, implicit $exec
1393 %1084:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub2:vreg_512_align2, implicit $mode, implicit $exec
1394 %1085:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub3:vreg_512_align2, implicit $mode, implicit $exec
1395 %1086:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub4:vreg_512_align2, implicit $mode, implicit $exec
1396 %1087:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub5:vreg_512_align2, implicit $mode, implicit $exec
1397 %1088:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub6:vreg_512_align2, implicit $mode, implicit $exec
1398 %1089:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub7:vreg_512_align2, implicit $mode, implicit $exec
1399 %1090:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub8:vreg_512_align2, implicit $mode, implicit $exec
1400 %1091:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub9:vreg_512_align2, implicit $mode, implicit $exec
1401 %1092:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub10:vreg_512_align2, implicit $mode, implicit $exec
1402 %1093:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub11:vreg_512_align2, implicit $mode, implicit $exec
1403 %1094:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub12:vreg_512_align2, implicit $mode, implicit $exec
1404 %1095:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub13:vreg_512_align2, implicit $mode, implicit $exec
1405 %1096:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub14:vreg_512_align2, implicit $mode, implicit $exec
1406 %1097:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %690.sub15:vreg_512_align2, implicit $mode, implicit $exec
1407 %1098:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub0:vreg_512_align2, implicit $mode, implicit $exec
1408 %1099:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub1:vreg_512_align2, implicit $mode, implicit $exec
1409 %1100:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub2:vreg_512_align2, implicit $mode, implicit $exec
1410 %1101:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub3:vreg_512_align2, implicit $mode, implicit $exec
1411 %1102:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub4:vreg_512_align2, implicit $mode, implicit $exec
1412 %1103:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub5:vreg_512_align2, implicit $mode, implicit $exec
1413 %1104:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub6:vreg_512_align2, implicit $mode, implicit $exec
1414 %1105:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub7:vreg_512_align2, implicit $mode, implicit $exec
1415 %1106:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub8:vreg_512_align2, implicit $mode, implicit $exec
1416 %1107:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub9:vreg_512_align2, implicit $mode, implicit $exec
1417 %1108:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub10:vreg_512_align2, implicit $mode, implicit $exec
1418 %1109:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub11:vreg_512_align2, implicit $mode, implicit $exec
1419 %1110:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub12:vreg_512_align2, implicit $mode, implicit $exec
1420 %1111:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub13:vreg_512_align2, implicit $mode, implicit $exec
1421 %1112:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub14:vreg_512_align2, implicit $mode, implicit $exec
1422 %1113:vgpr_32 = contract nofpexcept V_MUL_F32_e32 %1.sub4:sgpr_512, %701.sub15:vreg_512_align2, implicit $mode, implicit $exec
1423 %1115:vgpr_32 = V_MAX3_F32_e64 0, %1050:vgpr_32, 0, %1114:sgpr_32, 0, %1051:vgpr_32, 0, 0, implicit $mode, implicit $exec
1424 %1116:vgpr_32 = V_MAX3_F32_e64 0, %1115:vgpr_32, 0, %1052:vgpr_32, 0, %1053:vgpr_32, 0, 0, implicit $mode, implicit $exec
1425 %1117:vgpr_32 = V_MAX3_F32_e64 0, %1116:vgpr_32, 0, %1054:vgpr_32, 0, %1055:vgpr_32, 0, 0, implicit $mode, implicit $exec
1426 %1118:vgpr_32 = V_MAX3_F32_e64 0, %1117:vgpr_32, 0, %1056:vgpr_32, 0, %1057:vgpr_32, 0, 0, implicit $mode, implicit $exec
1427 %1119:vgpr_32 = V_MAX3_F32_e64 0, %1118:vgpr_32, 0, %1058:vgpr_32, 0, %1059:vgpr_32, 0, 0, implicit $mode, implicit $exec
1428 %1120:vgpr_32 = V_MAX3_F32_e64 0, %1119:vgpr_32, 0, %1060:vgpr_32, 0, %1061:vgpr_32, 0, 0, implicit $mode, implicit $exec
1429 %1121:vgpr_32 = V_MAX3_F32_e64 0, %1120:vgpr_32, 0, %1062:vgpr_32, 0, %1063:vgpr_32, 0, 0, implicit $mode, implicit $exec
1430 %1122:vgpr_32 = V_MAX3_F32_e64 0, %1121:vgpr_32, 0, %1064:vgpr_32, 0, %1065:vgpr_32, 0, 0, implicit $mode, implicit $exec
1431 %1123:vgpr_32 = V_MAX3_F32_e64 0, %1122:vgpr_32, 0, %1066:vgpr_32, 0, %1067:vgpr_32, 0, 0, implicit $mode, implicit $exec
1432 %1124:vgpr_32 = V_MAX3_F32_e64 0, %1123:vgpr_32, 0, %1068:vgpr_32, 0, %1069:vgpr_32, 0, 0, implicit $mode, implicit $exec
1433 %1125:vgpr_32 = V_MAX3_F32_e64 0, %1124:vgpr_32, 0, %1070:vgpr_32, 0, %1071:vgpr_32, 0, 0, implicit $mode, implicit $exec
1434 %1126:vgpr_32 = V_MAX3_F32_e64 0, %1125:vgpr_32, 0, %1072:vgpr_32, 0, %1073:vgpr_32, 0, 0, implicit $mode, implicit $exec
1435 %1127:vgpr_32 = V_MAX3_F32_e64 0, %1126:vgpr_32, 0, %1074:vgpr_32, 0, %1075:vgpr_32, 0, 0, implicit $mode, implicit $exec
1436 %1128:vgpr_32 = V_MAX3_F32_e64 0, %1127:vgpr_32, 0, %1076:vgpr_32, 0, %1077:vgpr_32, 0, 0, implicit $mode, implicit $exec
1437 %1129:vgpr_32 = V_MAX3_F32_e64 0, %1128:vgpr_32, 0, %1078:vgpr_32, 0, %1079:vgpr_32, 0, 0, implicit $mode, implicit $exec
1438 %1130:vgpr_32 = V_MAX3_F32_e64 0, %1129:vgpr_32, 0, %1080:vgpr_32, 0, %1081:vgpr_32, 0, 0, implicit $mode, implicit $exec
1439 %1131:vgpr_32 = V_MAX3_F32_e64 0, %1130:vgpr_32, 0, %1082:vgpr_32, 0, %1083:vgpr_32, 0, 0, implicit $mode, implicit $exec
1440 %1132:vgpr_32 = V_MAX3_F32_e64 0, %1131:vgpr_32, 0, %1084:vgpr_32, 0, %1085:vgpr_32, 0, 0, implicit $mode, implicit $exec
1441 %1133:vgpr_32 = V_MAX3_F32_e64 0, %1132:vgpr_32, 0, %1086:vgpr_32, 0, %1087:vgpr_32, 0, 0, implicit $mode, implicit $exec
1442 %1134:vgpr_32 = V_MAX3_F32_e64 0, %1133:vgpr_32, 0, %1088:vgpr_32, 0, %1089:vgpr_32, 0, 0, implicit $mode, implicit $exec
1443 %1135:vgpr_32 = V_MAX3_F32_e64 0, %1134:vgpr_32, 0, %1090:vgpr_32, 0, %1091:vgpr_32, 0, 0, implicit $mode, implicit $exec
1444 %1136:vgpr_32 = V_MAX3_F32_e64 0, %1135:vgpr_32, 0, %1092:vgpr_32, 0, %1093:vgpr_32, 0, 0, implicit $mode, implicit $exec
1445 %1137:vgpr_32 = V_MAX3_F32_e64 0, %1136:vgpr_32, 0, %1094:vgpr_32, 0, %1095:vgpr_32, 0, 0, implicit $mode, implicit $exec
1446 %1138:vgpr_32 = V_MAX3_F32_e64 0, %1137:vgpr_32, 0, %1096:vgpr_32, 0, %1097:vgpr_32, 0, 0, implicit $mode, implicit $exec
1447 %1139:vgpr_32 = V_MAX3_F32_e64 0, %1138:vgpr_32, 0, %1098:vgpr_32, 0, %1099:vgpr_32, 0, 0, implicit $mode, implicit $exec
1448 %1140:vgpr_32 = V_MAX3_F32_e64 0, %1139:vgpr_32, 0, %1100:vgpr_32, 0, %1101:vgpr_32, 0, 0, implicit $mode, implicit $exec
1449 %1141:vgpr_32 = V_MAX3_F32_e64 0, %1140:vgpr_32, 0, %1102:vgpr_32, 0, %1103:vgpr_32, 0, 0, implicit $mode, implicit $exec
1450 %1142:vgpr_32 = V_MAX3_F32_e64 0, %1141:vgpr_32, 0, %1104:vgpr_32, 0, %1105:vgpr_32, 0, 0, implicit $mode, implicit $exec
1451 %1143:vgpr_32 = V_MAX3_F32_e64 0, %1142:vgpr_32, 0, %1106:vgpr_32, 0, %1107:vgpr_32, 0, 0, implicit $mode, implicit $exec
1452 %1144:vgpr_32 = V_MAX3_F32_e64 0, %1143:vgpr_32, 0, %1108:vgpr_32, 0, %1109:vgpr_32, 0, 0, implicit $mode, implicit $exec
1453 %1145:vgpr_32 = V_MAX3_F32_e64 0, %1144:vgpr_32, 0, %1110:vgpr_32, 0, %1111:vgpr_32, 0, 0, implicit $mode, implicit $exec
1454 %1146:vgpr_32 = V_MAX3_F32_e64 0, %1145:vgpr_32, 0, %1112:vgpr_32, 0, %1113:vgpr_32, 0, 0, implicit $mode, implicit $exec
1455 %1147:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1146:vgpr_32, 0, implicit $exec
1456 %1148:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1147:vgpr_32, %1147:vgpr_32, implicit $mode, implicit $exec
1457 %1149:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1146:vgpr_32, %1148:vgpr_32, implicit $mode, implicit $exec
1458 %1150:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1149:vgpr_32, 0, implicit $exec
1459 %1151:vgpr_32 = V_CNDMASK_B32_e64 0, %1150:vgpr_32, 0, %1149:vgpr_32, %484:sreg_64_xexec, implicit $exec
1460 %1153:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1151:vgpr_32, %1151:vgpr_32, implicit $mode, implicit $exec
1461 %1154:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %3346:vgpr_32, %3346:vgpr_32, implicit $mode, implicit $exec
1462 %151:vgpr_32 = contract nofpexcept V_MAX_F32_e32 %1154:vgpr_32, %1153:vgpr_32, implicit $mode, implicit $exec
1463 %1155:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1464 %1157:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1155:vgpr_32, implicit $mode, implicit $exec
1465 %1158:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1157:vgpr_32, implicit $mode, implicit $exec
1466 %1159:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1467 %1160:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1159:vgpr_32, implicit $mode, implicit $exec
1468 %1161:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1160:vgpr_32, implicit $mode, implicit $exec
1469 %1162:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1470 %1163:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1162:vgpr_32, implicit $mode, implicit $exec
1471 %1164:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1163:vgpr_32, implicit $mode, implicit $exec
1472 %1165:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1473 %1166:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1165:vgpr_32, implicit $mode, implicit $exec
1474 %1167:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1166:vgpr_32, implicit $mode, implicit $exec
1475 %1168:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1476 %1169:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1168:vgpr_32, implicit $mode, implicit $exec
1477 %1170:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1169:vgpr_32, implicit $mode, implicit $exec
1478 %1171:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1479 %1172:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1171:vgpr_32, implicit $mode, implicit $exec
1480 %1173:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1172:vgpr_32, implicit $mode, implicit $exec
1481 %1174:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1482 %1175:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1174:vgpr_32, implicit $mode, implicit $exec
1483 %1176:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1175:vgpr_32, implicit $mode, implicit $exec
1484 %1177:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1485 %1178:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1177:vgpr_32, implicit $mode, implicit $exec
1486 %1179:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1178:vgpr_32, implicit $mode, implicit $exec
1487 %1180:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1488 %1181:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1180:vgpr_32, implicit $mode, implicit $exec
1489 %1182:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1181:vgpr_32, implicit $mode, implicit $exec
1490 %1183:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1491 %1184:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1183:vgpr_32, implicit $mode, implicit $exec
1492 %1185:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1184:vgpr_32, implicit $mode, implicit $exec
1493 %1186:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1494 %1187:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1186:vgpr_32, implicit $mode, implicit $exec
1495 %1188:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1187:vgpr_32, implicit $mode, implicit $exec
1496 %1189:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1497 %1190:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1189:vgpr_32, implicit $mode, implicit $exec
1498 %1191:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1190:vgpr_32, implicit $mode, implicit $exec
1499 %1192:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1500 %1193:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1192:vgpr_32, implicit $mode, implicit $exec
1501 %1194:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1193:vgpr_32, implicit $mode, implicit $exec
1502 %1195:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1503 %1196:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1195:vgpr_32, implicit $mode, implicit $exec
1504 %1197:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1196:vgpr_32, implicit $mode, implicit $exec
1505 %1198:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1506 %1199:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1198:vgpr_32, implicit $mode, implicit $exec
1507 %1200:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1199:vgpr_32, implicit $mode, implicit $exec
1508 %1201:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %668.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1509 %1202:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1201:vgpr_32, implicit $mode, implicit $exec
1510 %1203:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1202:vgpr_32, implicit $mode, implicit $exec
1511 %1204:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1512 %1205:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1204:vgpr_32, implicit $mode, implicit $exec
1513 %1206:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1205:vgpr_32, implicit $mode, implicit $exec
1514 %1207:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1515 %1208:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1207:vgpr_32, implicit $mode, implicit $exec
1516 %1209:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1208:vgpr_32, implicit $mode, implicit $exec
1517 %1210:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1518 %1211:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1210:vgpr_32, implicit $mode, implicit $exec
1519 %1212:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1211:vgpr_32, implicit $mode, implicit $exec
1520 %1213:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1521 %1214:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1213:vgpr_32, implicit $mode, implicit $exec
1522 %1215:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1214:vgpr_32, implicit $mode, implicit $exec
1523 %1216:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1524 %1217:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1216:vgpr_32, implicit $mode, implicit $exec
1525 %1218:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1217:vgpr_32, implicit $mode, implicit $exec
1526 %1219:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1527 %1220:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1219:vgpr_32, implicit $mode, implicit $exec
1528 %1221:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1220:vgpr_32, implicit $mode, implicit $exec
1529 %1222:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1530 %1223:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1222:vgpr_32, implicit $mode, implicit $exec
1531 %1224:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1223:vgpr_32, implicit $mode, implicit $exec
1532 %1225:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1533 %1226:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1225:vgpr_32, implicit $mode, implicit $exec
1534 %1227:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1226:vgpr_32, implicit $mode, implicit $exec
1535 %1228:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1536 %1229:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1228:vgpr_32, implicit $mode, implicit $exec
1537 %1230:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1229:vgpr_32, implicit $mode, implicit $exec
1538 %1231:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1539 %1232:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1231:vgpr_32, implicit $mode, implicit $exec
1540 %1233:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1232:vgpr_32, implicit $mode, implicit $exec
1541 %1234:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1542 %1235:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1234:vgpr_32, implicit $mode, implicit $exec
1543 %1236:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1235:vgpr_32, implicit $mode, implicit $exec
1544 %1237:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1545 %1238:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1237:vgpr_32, implicit $mode, implicit $exec
1546 %1239:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1238:vgpr_32, implicit $mode, implicit $exec
1547 %1240:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1548 %1241:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1240:vgpr_32, implicit $mode, implicit $exec
1549 %1242:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1241:vgpr_32, implicit $mode, implicit $exec
1550 %1243:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1551 %1244:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1243:vgpr_32, implicit $mode, implicit $exec
1552 %1245:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1244:vgpr_32, implicit $mode, implicit $exec
1553 %1246:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1554 %1247:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1246:vgpr_32, implicit $mode, implicit $exec
1555 %1248:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1247:vgpr_32, implicit $mode, implicit $exec
1556 %1249:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %679.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1557 %1250:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1249:vgpr_32, implicit $mode, implicit $exec
1558 %1251:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1250:vgpr_32, implicit $mode, implicit $exec
1559 %1252:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1560 %1253:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1252:vgpr_32, implicit $mode, implicit $exec
1561 %1254:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1253:vgpr_32, implicit $mode, implicit $exec
1562 %1255:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1563 %1256:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1255:vgpr_32, implicit $mode, implicit $exec
1564 %1257:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1256:vgpr_32, implicit $mode, implicit $exec
1565 %1258:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1566 %1259:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1258:vgpr_32, implicit $mode, implicit $exec
1567 %1260:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1259:vgpr_32, implicit $mode, implicit $exec
1568 %1261:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1569 %1262:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1261:vgpr_32, implicit $mode, implicit $exec
1570 %1263:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1262:vgpr_32, implicit $mode, implicit $exec
1571 %1264:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1572 %1265:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1264:vgpr_32, implicit $mode, implicit $exec
1573 %1266:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1265:vgpr_32, implicit $mode, implicit $exec
1574 %1267:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1575 %1268:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1267:vgpr_32, implicit $mode, implicit $exec
1576 %1269:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1268:vgpr_32, implicit $mode, implicit $exec
1577 %1270:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1578 %1271:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1270:vgpr_32, implicit $mode, implicit $exec
1579 %1272:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1271:vgpr_32, implicit $mode, implicit $exec
1580 %1273:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1581 %1274:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1273:vgpr_32, implicit $mode, implicit $exec
1582 %1275:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1274:vgpr_32, implicit $mode, implicit $exec
1583 %1276:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1584 %1277:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1276:vgpr_32, implicit $mode, implicit $exec
1585 %1278:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1277:vgpr_32, implicit $mode, implicit $exec
1586 %1279:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1587 %1280:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1279:vgpr_32, implicit $mode, implicit $exec
1588 %1281:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1280:vgpr_32, implicit $mode, implicit $exec
1589 %1282:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1590 %1283:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1282:vgpr_32, implicit $mode, implicit $exec
1591 %1284:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1283:vgpr_32, implicit $mode, implicit $exec
1592 %1285:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1593 %1286:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1285:vgpr_32, implicit $mode, implicit $exec
1594 %1287:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1286:vgpr_32, implicit $mode, implicit $exec
1595 %1288:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1596 %1289:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1288:vgpr_32, implicit $mode, implicit $exec
1597 %1290:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1289:vgpr_32, implicit $mode, implicit $exec
1598 %1291:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1599 %1292:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1291:vgpr_32, implicit $mode, implicit $exec
1600 %1293:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1292:vgpr_32, implicit $mode, implicit $exec
1601 %1294:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1602 %1295:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1294:vgpr_32, implicit $mode, implicit $exec
1603 %1296:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1295:vgpr_32, implicit $mode, implicit $exec
1604 %1297:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %690.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1605 %1298:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1297:vgpr_32, implicit $mode, implicit $exec
1606 %1299:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1298:vgpr_32, implicit $mode, implicit $exec
1607 %1300:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub0:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1608 %1301:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1300:vgpr_32, implicit $mode, implicit $exec
1609 %1302:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1301:vgpr_32, implicit $mode, implicit $exec
1610 %1303:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub1:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1611 %1304:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1303:vgpr_32, implicit $mode, implicit $exec
1612 %1305:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1304:vgpr_32, implicit $mode, implicit $exec
1613 %1306:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub2:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1614 %1307:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1306:vgpr_32, implicit $mode, implicit $exec
1615 %1308:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1307:vgpr_32, implicit $mode, implicit $exec
1616 %1309:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub3:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1617 %1310:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1309:vgpr_32, implicit $mode, implicit $exec
1618 %1311:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1310:vgpr_32, implicit $mode, implicit $exec
1619 %1312:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub4:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1620 %1313:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1312:vgpr_32, implicit $mode, implicit $exec
1621 %1314:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1313:vgpr_32, implicit $mode, implicit $exec
1622 %1315:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub5:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1623 %1316:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1315:vgpr_32, implicit $mode, implicit $exec
1624 %1317:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1316:vgpr_32, implicit $mode, implicit $exec
1625 %1318:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub6:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1626 %1319:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1318:vgpr_32, implicit $mode, implicit $exec
1627 %1320:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1319:vgpr_32, implicit $mode, implicit $exec
1628 %1321:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub7:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1629 %1322:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1321:vgpr_32, implicit $mode, implicit $exec
1630 %1323:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1322:vgpr_32, implicit $mode, implicit $exec
1631 %1324:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub8:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1632 %1325:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1324:vgpr_32, implicit $mode, implicit $exec
1633 %1326:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1325:vgpr_32, implicit $mode, implicit $exec
1634 %1327:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub9:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1635 %1328:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1327:vgpr_32, implicit $mode, implicit $exec
1636 %1329:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1328:vgpr_32, implicit $mode, implicit $exec
1637 %1330:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub10:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1638 %1331:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1330:vgpr_32, implicit $mode, implicit $exec
1639 %1332:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1331:vgpr_32, implicit $mode, implicit $exec
1640 %1333:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub11:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1641 %1334:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1333:vgpr_32, implicit $mode, implicit $exec
1642 %1335:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1334:vgpr_32, implicit $mode, implicit $exec
1643 %1336:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub12:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1644 %1337:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1336:vgpr_32, implicit $mode, implicit $exec
1645 %1338:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1337:vgpr_32, implicit $mode, implicit $exec
1646 %1339:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub13:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1647 %1340:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1339:vgpr_32, implicit $mode, implicit $exec
1648 %1341:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1340:vgpr_32, implicit $mode, implicit $exec
1649 %1342:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub14:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1650 %1343:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1342:vgpr_32, implicit $mode, implicit $exec
1651 %1344:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1343:vgpr_32, implicit $mode, implicit $exec
1652 %1345:vgpr_32 = contract nofpexcept V_FMA_F32_e64 0, %1.sub4:sgpr_512, 0, %701.sub15:vreg_512_align2, 1, %151:vgpr_32, 0, 0, implicit $mode, implicit $exec
1653 %1346:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1345:vgpr_32, implicit $mode, implicit $exec
1654 %1347:vgpr_32 = afn nofpexcept V_EXP_F32_e32 %1346:vgpr_32, implicit $mode, implicit $exec
1655 %1348:vgpr_32 = contract nofpexcept V_ADD_F32_e32 0, %1158:vgpr_32, implicit $mode, implicit $exec
1656 %1349:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1161:vgpr_32, %1348:vgpr_32, implicit $mode, implicit $exec
1657 %1350:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1164:vgpr_32, %1349:vgpr_32, implicit $mode, implicit $exec
1658 %1351:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1167:vgpr_32, %1350:vgpr_32, implicit $mode, implicit $exec
1659 %1352:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1170:vgpr_32, %1351:vgpr_32, implicit $mode, implicit $exec
1660 %1353:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1173:vgpr_32, %1352:vgpr_32, implicit $mode, implicit $exec
1661 %1354:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1176:vgpr_32, %1353:vgpr_32, implicit $mode, implicit $exec
1662 %1355:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1179:vgpr_32, %1354:vgpr_32, implicit $mode, implicit $exec
1663 %1356:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1182:vgpr_32, %1355:vgpr_32, implicit $mode, implicit $exec
1664 %1357:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1185:vgpr_32, %1356:vgpr_32, implicit $mode, implicit $exec
1665 %1358:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1188:vgpr_32, %1357:vgpr_32, implicit $mode, implicit $exec
1666 %1359:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1191:vgpr_32, %1358:vgpr_32, implicit $mode, implicit $exec
1667 %1360:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1194:vgpr_32, %1359:vgpr_32, implicit $mode, implicit $exec
1668 %1361:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1197:vgpr_32, %1360:vgpr_32, implicit $mode, implicit $exec
1669 %1362:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1200:vgpr_32, %1361:vgpr_32, implicit $mode, implicit $exec
1670 %1363:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1203:vgpr_32, %1362:vgpr_32, implicit $mode, implicit $exec
1671 %1364:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1206:vgpr_32, %1363:vgpr_32, implicit $mode, implicit $exec
1672 %1365:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1209:vgpr_32, %1364:vgpr_32, implicit $mode, implicit $exec
1673 %1366:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1212:vgpr_32, %1365:vgpr_32, implicit $mode, implicit $exec
1674 %1367:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1215:vgpr_32, %1366:vgpr_32, implicit $mode, implicit $exec
1675 %1368:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1218:vgpr_32, %1367:vgpr_32, implicit $mode, implicit $exec
1676 %1369:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1221:vgpr_32, %1368:vgpr_32, implicit $mode, implicit $exec
1677 %1370:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1224:vgpr_32, %1369:vgpr_32, implicit $mode, implicit $exec
1678 %1371:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1227:vgpr_32, %1370:vgpr_32, implicit $mode, implicit $exec
1679 %1372:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1230:vgpr_32, %1371:vgpr_32, implicit $mode, implicit $exec
1680 %1373:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1233:vgpr_32, %1372:vgpr_32, implicit $mode, implicit $exec
1681 %1374:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1236:vgpr_32, %1373:vgpr_32, implicit $mode, implicit $exec
1682 %1375:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1239:vgpr_32, %1374:vgpr_32, implicit $mode, implicit $exec
1683 %1376:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1242:vgpr_32, %1375:vgpr_32, implicit $mode, implicit $exec
1684 %1377:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1245:vgpr_32, %1376:vgpr_32, implicit $mode, implicit $exec
1685 %1378:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1248:vgpr_32, %1377:vgpr_32, implicit $mode, implicit $exec
1686 %1379:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1251:vgpr_32, %1378:vgpr_32, implicit $mode, implicit $exec
1687 %1380:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1254:vgpr_32, %1379:vgpr_32, implicit $mode, implicit $exec
1688 %1381:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1257:vgpr_32, %1380:vgpr_32, implicit $mode, implicit $exec
1689 %1382:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1260:vgpr_32, %1381:vgpr_32, implicit $mode, implicit $exec
1690 %1383:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1263:vgpr_32, %1382:vgpr_32, implicit $mode, implicit $exec
1691 %1384:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1266:vgpr_32, %1383:vgpr_32, implicit $mode, implicit $exec
1692 %1385:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1269:vgpr_32, %1384:vgpr_32, implicit $mode, implicit $exec
1693 %1386:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1272:vgpr_32, %1385:vgpr_32, implicit $mode, implicit $exec
1694 %1387:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1275:vgpr_32, %1386:vgpr_32, implicit $mode, implicit $exec
1695 %1388:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1278:vgpr_32, %1387:vgpr_32, implicit $mode, implicit $exec
1696 %1389:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1281:vgpr_32, %1388:vgpr_32, implicit $mode, implicit $exec
1697 %1390:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1284:vgpr_32, %1389:vgpr_32, implicit $mode, implicit $exec
1698 %1391:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1287:vgpr_32, %1390:vgpr_32, implicit $mode, implicit $exec
1699 %1392:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1290:vgpr_32, %1391:vgpr_32, implicit $mode, implicit $exec
1700 %1393:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1293:vgpr_32, %1392:vgpr_32, implicit $mode, implicit $exec
1701 %1394:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1296:vgpr_32, %1393:vgpr_32, implicit $mode, implicit $exec
1702 %1395:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1299:vgpr_32, %1394:vgpr_32, implicit $mode, implicit $exec
1703 %1396:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1302:vgpr_32, %1395:vgpr_32, implicit $mode, implicit $exec
1704 %1397:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1305:vgpr_32, %1396:vgpr_32, implicit $mode, implicit $exec
1705 %1398:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1308:vgpr_32, %1397:vgpr_32, implicit $mode, implicit $exec
1706 %1399:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1311:vgpr_32, %1398:vgpr_32, implicit $mode, implicit $exec
1707 %1400:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1314:vgpr_32, %1399:vgpr_32, implicit $mode, implicit $exec
1708 %1401:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1317:vgpr_32, %1400:vgpr_32, implicit $mode, implicit $exec
1709 %1402:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1320:vgpr_32, %1401:vgpr_32, implicit $mode, implicit $exec
1710 %1403:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1323:vgpr_32, %1402:vgpr_32, implicit $mode, implicit $exec
1711 %1404:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1326:vgpr_32, %1403:vgpr_32, implicit $mode, implicit $exec
1712 %1405:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1329:vgpr_32, %1404:vgpr_32, implicit $mode, implicit $exec
1713 %1406:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1332:vgpr_32, %1405:vgpr_32, implicit $mode, implicit $exec
1714 %1407:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1335:vgpr_32, %1406:vgpr_32, implicit $mode, implicit $exec
1715 %1408:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1338:vgpr_32, %1407:vgpr_32, implicit $mode, implicit $exec
1716 %1409:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1341:vgpr_32, %1408:vgpr_32, implicit $mode, implicit $exec
1717 %1410:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1344:vgpr_32, %1409:vgpr_32, implicit $mode, implicit $exec
1718 %1411:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1347:vgpr_32, %1410:vgpr_32, implicit $mode, implicit $exec
1719 %1412:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1411:vgpr_32, 0, implicit $exec
1720 %1413:vgpr_32 = contract nofpexcept V_ADD_F32_e32 %1411:vgpr_32, %1412:vgpr_32, implicit $mode, implicit $exec
1721 %1414:vgpr_32 = DS_BPERMUTE_B32 %39:vgpr_32, %1413:vgpr_32, 0, implicit $exec
1722 %3347:vgpr_32 = V_CNDMASK_B32_e64 0, %1414:vgpr_32, 0, %1413:vgpr_32, %484:sreg_64_xexec, implicit $exec
1723 %1417:vgpr_32 = contract nofpexcept V_SUB_F32_e32 %3346:vgpr_32, %151:vgpr_32, implicit $mode, implicit $exec
1724 %1418:vgpr_32 = afn nofpexcept V_MUL_F32_e32 1069066811, %1417:vgpr_32, implicit $mode, implicit $exec
1725 undef %1455.sub0:vreg_64_align2 = afn nofpexcept V_EXP_F32_e32 %1418:vgpr_32, implicit $mode, implicit $exec
1726 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1727 undef %3037.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub0:vreg_64_align2, %803.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1728 undef %3021.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub0:vreg_64_align2, %803.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1729 %3037.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub0:vreg_64_align2, %807.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1730 %3021.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub0:vreg_64_align2, %807.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1731 undef %3005.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub1:vreg_64_align2, %803.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1732 undef %2978.sub0:vreg_64_align2 = V_PERM_B32_e64 %805.sub1:vreg_64_align2, %803.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1733 %3005.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub1:vreg_64_align2, %807.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1734 %2978.sub1:vreg_64_align2 = V_PERM_B32_e64 %809.sub1:vreg_64_align2, %807.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1735 %1442:vgpr_32 = V_ADD_U32_e32 %593:sreg_32, %15:vgpr_32, implicit $exec
1736 %1444:vgpr_32 = V_AND_B32_e32 536870911, %1442:vgpr_32, implicit $exec
1737 %1446:vgpr_32 = nsw V_MUL_LO_U32_e64 %1444:vgpr_32, %494:sreg_32, implicit $exec
1738 %1447:vgpr_32 = V_ADD_LSHL_U32_e64 %47:vgpr_32, %1446:vgpr_32, 1, implicit $exec
1739 DS_WRITE_B64_gfx9 %1447:vgpr_32, %3037:vreg_64_align2, 0, 0, implicit $exec
1740 %1449:vgpr_32 = V_LSHL_ADD_U32_e64 %41:vgpr_32, 1, %1447:vgpr_32, implicit $exec
1741 DS_WRITE_B64_gfx9 %1449:vgpr_32, %3021:vreg_64_align2, 0, 0, implicit $exec
1742 %1451:vgpr_32 = V_LSHL_ADD_U32_e64 %42:vgpr_32, 1, %1449:vgpr_32, implicit $exec
1743 DS_WRITE_B64_gfx9 %1451:vgpr_32, %3005:vreg_64_align2, 0, 0, implicit $exec
1744 %1453:vgpr_32 = V_LSHL_ADD_U32_e64 %43:vgpr_32, 1, %1451:vgpr_32, implicit $exec
1745 DS_WRITE_B64_gfx9 %1453:vgpr_32, %2978:vreg_64_align2, 0, 0, implicit $exec
1746 %3347:vgpr_32 = contract nofpexcept V_FMAC_F32_e32 %86:vgpr_32, %1455.sub0:vreg_64_align2, %3347:vgpr_32, implicit $mode, implicit $exec
1747 %2986.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1748 %2986.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1749 %2986.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1750 %2986.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1751 %2986.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1752 %2986.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1753 %2986.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1754 %2986.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2986.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1755 %3038.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1756 %3038.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1757 %3038.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1758 %3038.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1759 %3038.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1760 %3038.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1761 %3038.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1762 %3038.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3038.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1763 %2980.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1764 %2980.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1765 %2980.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1766 %2980.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1767 %2980.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1768 %2980.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1769 %2980.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1770 %2980.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %2980.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1771 %3003.sub0_sub1:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub0_sub1:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1772 %3003.sub2_sub3:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub2_sub3:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1773 %3003.sub4_sub5:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub4_sub5:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1774 %3003.sub6_sub7:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub6_sub7:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1775 %3003.sub8_sub9:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub8_sub9:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1776 %3003.sub10_sub11:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub10_sub11:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1777 %3003.sub12_sub13:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub12_sub13:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1778 %3003.sub14_sub15:vreg_512_align2 = contract nofpexcept V_PK_MUL_F32 8, %3003.sub14_sub15:vreg_512_align2, 0, %1455:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
1779 %1554:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1158:vgpr_32, implicit $mode, implicit $exec
1780 %1555:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1161:vgpr_32, implicit $mode, implicit $exec
1781 %1556:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1164:vgpr_32, implicit $mode, implicit $exec
1782 %1557:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1170:vgpr_32, implicit $mode, implicit $exec
1783 %1558:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1173:vgpr_32, implicit $mode, implicit $exec
1784 %1559:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1176:vgpr_32, implicit $mode, implicit $exec
1785 %1560:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1182:vgpr_32, implicit $mode, implicit $exec
1786 %1561:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1185:vgpr_32, implicit $mode, implicit $exec
1787 %1562:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1188:vgpr_32, implicit $mode, implicit $exec
1788 %1563:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1194:vgpr_32, implicit $mode, implicit $exec
1789 %1564:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1197:vgpr_32, implicit $mode, implicit $exec
1790 %1565:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1200:vgpr_32, implicit $mode, implicit $exec
1791 %1566:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1206:vgpr_32, implicit $mode, implicit $exec
1792 %1567:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1209:vgpr_32, implicit $mode, implicit $exec
1793 %1568:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1212:vgpr_32, implicit $mode, implicit $exec
1794 %1569:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1218:vgpr_32, implicit $mode, implicit $exec
1795 %1570:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1221:vgpr_32, implicit $mode, implicit $exec
1796 %1571:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1224:vgpr_32, implicit $mode, implicit $exec
1797 %1572:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1230:vgpr_32, implicit $mode, implicit $exec
1798 %1573:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1233:vgpr_32, implicit $mode, implicit $exec
1799 %1574:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1236:vgpr_32, implicit $mode, implicit $exec
1800 %1575:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1242:vgpr_32, implicit $mode, implicit $exec
1801 %1576:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1245:vgpr_32, implicit $mode, implicit $exec
1802 %1577:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1248:vgpr_32, implicit $mode, implicit $exec
1803 %1578:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1254:vgpr_32, implicit $mode, implicit $exec
1804 %1579:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1257:vgpr_32, implicit $mode, implicit $exec
1805 %1580:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1260:vgpr_32, implicit $mode, implicit $exec
1806 %1581:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1266:vgpr_32, implicit $mode, implicit $exec
1807 %1582:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1269:vgpr_32, implicit $mode, implicit $exec
1808 %1583:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1272:vgpr_32, implicit $mode, implicit $exec
1809 %1584:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1278:vgpr_32, implicit $mode, implicit $exec
1810 %1585:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1281:vgpr_32, implicit $mode, implicit $exec
1811 %1586:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1284:vgpr_32, implicit $mode, implicit $exec
1812 %1587:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1290:vgpr_32, implicit $mode, implicit $exec
1813 %1588:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1293:vgpr_32, implicit $mode, implicit $exec
1814 %1589:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1296:vgpr_32, implicit $mode, implicit $exec
1815 %1590:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3345:vgpr_32, implicit $exec
1816 %1591:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1590:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1817 %1592:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3334:vgpr_32, implicit $exec
1818 %1593:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1592:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1819 %1594:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3335:vgpr_32, implicit $exec
1820 %1595:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1594:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1821 %1596:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3336:vgpr_32, implicit $exec
1822 %1597:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1596:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1823 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1824 %1598:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec
1825 %1605:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec
1826 %1612:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec
1827 %1619:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec
1828 %1626:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec
1829 %1633:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec
1830 %1640:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec
1831 %1647:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec
1832 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1833 undef %3161.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub0:vreg_64_align2, %1591.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1834 undef %3145.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub0:vreg_64_align2, %1591.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1835 %3161.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub0:vreg_64_align2, %1595.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1836 %3145.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub0:vreg_64_align2, %1595.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1837 undef %3129.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub1:vreg_64_align2, %1591.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1838 undef %3113.sub0:vreg_64_align2 = V_PERM_B32_e64 %1593.sub1:vreg_64_align2, %1591.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1839 %3129.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub1:vreg_64_align2, %1595.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1840 %3113.sub1:vreg_64_align2 = V_PERM_B32_e64 %1597.sub1:vreg_64_align2, %1595.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1841 DS_WRITE_B64_gfx9 %1447:vgpr_32, %3161:vreg_64_align2, 0, 0, implicit $exec
1842 DS_WRITE_B64_gfx9 %1449:vgpr_32, %3145:vreg_64_align2, 0, 0, implicit $exec
1843 DS_WRITE_B64_gfx9 %1451:vgpr_32, %3129:vreg_64_align2, 0, 0, implicit $exec
1844 DS_WRITE_B64_gfx9 %1453:vgpr_32, %3113:vreg_64_align2, 0, 0, implicit $exec
1845 %1678:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3344:vgpr_32, implicit $exec
1846 %1679:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1678:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1847 %1680:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3337:vgpr_32, implicit $exec
1848 %1681:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1680:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1849 %1682:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3338:vgpr_32, implicit $exec
1850 %1683:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1682:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1851 %1684:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3339:vgpr_32, implicit $exec
1852 %1685:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1684:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1853 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1854 %1686:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec
1855 %1693:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec
1856 %1700:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec
1857 %1707:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec
1858 %1714:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec
1859 %1721:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec
1860 %1728:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec
1861 %1735:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec
1862 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1863 undef %3062.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub0:vreg_64_align2, %1679.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1864 undef %3046.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub0:vreg_64_align2, %1679.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1865 %3062.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub0:vreg_64_align2, %1683.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1866 %3046.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub0:vreg_64_align2, %1683.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1867 undef %3029.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub1:vreg_64_align2, %1679.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1868 undef %3013.sub0:vreg_64_align2 = V_PERM_B32_e64 %1681.sub1:vreg_64_align2, %1679.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1869 %3029.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub1:vreg_64_align2, %1683.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1870 %3013.sub1:vreg_64_align2 = V_PERM_B32_e64 %1685.sub1:vreg_64_align2, %1683.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1871 DS_WRITE_B64_gfx9 %1447:vgpr_32, %3062:vreg_64_align2, 0, 0, implicit $exec
1872 DS_WRITE_B64_gfx9 %1449:vgpr_32, %3046:vreg_64_align2, 0, 0, implicit $exec
1873 DS_WRITE_B64_gfx9 %1451:vgpr_32, %3029:vreg_64_align2, 0, 0, implicit $exec
1874 DS_WRITE_B64_gfx9 %1453:vgpr_32, %3013:vreg_64_align2, 0, 0, implicit $exec
1875 %1766:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3343:vgpr_32, implicit $exec
1876 %1767:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1766:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1877 %1768:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3340:vgpr_32, implicit $exec
1878 %1769:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1768:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1879 %1770:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3341:vgpr_32, implicit $exec
1880 %1771:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1770:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1881 %1772:vgpr_32 = V_ADD_U32_e32 %48:vgpr_32, %3342:vgpr_32, implicit $exec
1882 %1773:vreg_64_align2 = BUFFER_LOAD_DWORDX2_OFFEN %1772:vgpr_32, %473:sgpr_128, 0, 0, 0, 0, implicit $exec
1883 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1884 %1774:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec
1885 %1781:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec
1886 %1788:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec
1887 %1795:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec
1888 %1802:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec
1889 %1809:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec
1890 %1816:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec
1891 %1823:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec
1892 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
1893 undef %3185.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub0:vreg_64_align2, %1767.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1894 undef %3169.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub0:vreg_64_align2, %1767.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1895 %3185.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub0:vreg_64_align2, %1771.sub0:vreg_64_align2, %1422:sreg_32, implicit $exec
1896 %3169.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub0:vreg_64_align2, %1771.sub0:vreg_64_align2, %1424:sreg_32, implicit $exec
1897 undef %3153.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub1:vreg_64_align2, %1767.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1898 undef %3137.sub0:vreg_64_align2 = V_PERM_B32_e64 %1769.sub1:vreg_64_align2, %1767.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1899 %3153.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub1:vreg_64_align2, %1771.sub1:vreg_64_align2, %1422:sreg_32, implicit $exec
1900 %3137.sub1:vreg_64_align2 = V_PERM_B32_e64 %1773.sub1:vreg_64_align2, %1771.sub1:vreg_64_align2, %1424:sreg_32, implicit $exec
1901 DS_WRITE_B64_gfx9 %1447:vgpr_32, %3185:vreg_64_align2, 0, 0, implicit $exec
1902 DS_WRITE_B64_gfx9 %1449:vgpr_32, %3169:vreg_64_align2, 0, 0, implicit $exec
1903 DS_WRITE_B64_gfx9 %1451:vgpr_32, %3153:vreg_64_align2, 0, 0, implicit $exec
1904 DS_WRITE_B64_gfx9 %1453:vgpr_32, %3137:vreg_64_align2, 0, 0, implicit $exec
1905 %1854:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1167:vgpr_32, implicit $mode, implicit $exec
1906 %1855:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1179:vgpr_32, implicit $mode, implicit $exec
1907 %1856:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1191:vgpr_32, implicit $mode, implicit $exec
1908 %1857:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1203:vgpr_32, implicit $mode, implicit $exec
1909 %1858:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1215:vgpr_32, implicit $mode, implicit $exec
1910 %1859:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1227:vgpr_32, implicit $mode, implicit $exec
1911 %1860:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1239:vgpr_32, implicit $mode, implicit $exec
1912 %1861:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1251:vgpr_32, implicit $mode, implicit $exec
1913 %1862:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1263:vgpr_32, implicit $mode, implicit $exec
1914 %1863:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1275:vgpr_32, implicit $mode, implicit $exec
1915 %1864:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1287:vgpr_32, implicit $mode, implicit $exec
1916 %1865:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1299:vgpr_32, implicit $mode, implicit $exec
1917 undef %3121.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1556:vgpr_32, 0, %1854:vgpr_32, 0, 0, implicit $mode, implicit $exec
1918 %3121.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1554:vgpr_32, 0, %1555:vgpr_32, 0, 0, implicit $mode, implicit $exec
1919 undef %3105.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1559:vgpr_32, 0, %1855:vgpr_32, 0, 0, implicit $mode, implicit $exec
1920 %3105.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1557:vgpr_32, 0, %1558:vgpr_32, 0, 0, implicit $mode, implicit $exec
1921 undef %3089.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1562:vgpr_32, 0, %1856:vgpr_32, 0, 0, implicit $mode, implicit $exec
1922 %3089.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1560:vgpr_32, 0, %1561:vgpr_32, 0, 0, implicit $mode, implicit $exec
1923 undef %3073.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1565:vgpr_32, 0, %1857:vgpr_32, 0, 0, implicit $mode, implicit $exec
1924 %3073.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1563:vgpr_32, 0, %1564:vgpr_32, 0, 0, implicit $mode, implicit $exec
1925 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1598.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1926 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1598.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1927 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1605.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1928 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1605.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1929 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1612.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1930 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1612.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1931 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1619.sub0_sub1:vreg_128_align2, %3121:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1932 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1619.sub2_sub3:vreg_128_align2, %3105:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1933 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1626.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1934 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1626.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1935 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1633.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1936 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1633.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1937 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1640.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1938 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1640.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1939 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1647.sub0_sub1:vreg_128_align2, %3089:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1940 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1647.sub2_sub3:vreg_128_align2, %3073:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1941 undef %2993.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1568:vgpr_32, 0, %1858:vgpr_32, 0, 0, implicit $mode, implicit $exec
1942 %2993.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1566:vgpr_32, 0, %1567:vgpr_32, 0, 0, implicit $mode, implicit $exec
1943 undef %3195.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1571:vgpr_32, 0, %1859:vgpr_32, 0, 0, implicit $mode, implicit $exec
1944 %3195.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1569:vgpr_32, 0, %1570:vgpr_32, 0, 0, implicit $mode, implicit $exec
1945 undef %3178.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1574:vgpr_32, 0, %1860:vgpr_32, 0, 0, implicit $mode, implicit $exec
1946 %3178.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1572:vgpr_32, 0, %1573:vgpr_32, 0, 0, implicit $mode, implicit $exec
1947 undef %3162.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1577:vgpr_32, 0, %1861:vgpr_32, 0, 0, implicit $mode, implicit $exec
1948 %3162.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1575:vgpr_32, 0, %1576:vgpr_32, 0, 0, implicit $mode, implicit $exec
1949 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1686.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1950 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1686.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1951 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1693.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1952 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1693.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1953 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1700.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1954 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1700.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1955 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1707.sub0_sub1:vreg_128_align2, %2993:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1956 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1707.sub2_sub3:vreg_128_align2, %3195:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1957 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1714.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1958 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1714.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1959 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1721.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1960 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1721.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1961 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1728.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1962 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1728.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1963 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1735.sub0_sub1:vreg_128_align2, %3178:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1964 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1735.sub2_sub3:vreg_128_align2, %3162:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1965 undef %3146.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1580:vgpr_32, 0, %1862:vgpr_32, 0, 0, implicit $mode, implicit $exec
1966 %3146.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1578:vgpr_32, 0, %1579:vgpr_32, 0, 0, implicit $mode, implicit $exec
1967 undef %3130.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1583:vgpr_32, 0, %1863:vgpr_32, 0, 0, implicit $mode, implicit $exec
1968 %3130.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1581:vgpr_32, 0, %1582:vgpr_32, 0, 0, implicit $mode, implicit $exec
1969 undef %3114.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1586:vgpr_32, 0, %1864:vgpr_32, 0, 0, implicit $mode, implicit $exec
1970 %3114.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1584:vgpr_32, 0, %1585:vgpr_32, 0, 0, implicit $mode, implicit $exec
1971 undef %3098.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1589:vgpr_32, 0, %1865:vgpr_32, 0, 0, implicit $mode, implicit $exec
1972 %3098.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %1587:vgpr_32, 0, %1588:vgpr_32, 0, 0, implicit $mode, implicit $exec
1973 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1774.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1974 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1774.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1975 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1781.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1976 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1781.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1977 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1788.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1978 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1788.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1979 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1795.sub0_sub1:vreg_128_align2, %3146:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1980 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1795.sub2_sub3:vreg_128_align2, %3130:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1981 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1802.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1982 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1802.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1983 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1809.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1984 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1809.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1985 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1816.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1986 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1816.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1987 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1823.sub0_sub1:vreg_128_align2, %3114:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1988 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %1823.sub2_sub3:vreg_128_align2, %3098:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
1989 %2054:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1347:vgpr_32, implicit $mode, implicit $exec
1990 %2055:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1341:vgpr_32, implicit $mode, implicit $exec
1991 %2056:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1335:vgpr_32, implicit $mode, implicit $exec
1992 %2057:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1329:vgpr_32, implicit $mode, implicit $exec
1993 %2058:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1323:vgpr_32, implicit $mode, implicit $exec
1994 %2059:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1317:vgpr_32, implicit $mode, implicit $exec
1995 %2060:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1311:vgpr_32, implicit $mode, implicit $exec
1996 %2061:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1305:vgpr_32, implicit $mode, implicit $exec
1997 %2062:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1344:vgpr_32, implicit $mode, implicit $exec
1998 %2063:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1338:vgpr_32, implicit $mode, implicit $exec
1999 %2064:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1332:vgpr_32, implicit $mode, implicit $exec
2000 %2065:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1326:vgpr_32, implicit $mode, implicit $exec
2001 %2066:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1320:vgpr_32, implicit $mode, implicit $exec
2002 %2067:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1314:vgpr_32, implicit $mode, implicit $exec
2003 %2068:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1308:vgpr_32, implicit $mode, implicit $exec
2004 %2069:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1302:vgpr_32, implicit $mode, implicit $exec
2005 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
2006 undef %3082.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2068:vgpr_32, 0, %2060:vgpr_32, 0, 0, implicit $mode, implicit $exec
2007 %3082.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2069:vgpr_32, 0, %2061:vgpr_32, 0, 0, implicit $mode, implicit $exec
2008 undef %3066.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2066:vgpr_32, 0, %2058:vgpr_32, 0, 0, implicit $mode, implicit $exec
2009 %3066.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2067:vgpr_32, 0, %2059:vgpr_32, 0, 0, implicit $mode, implicit $exec
2010 undef %3050.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2064:vgpr_32, 0, %2056:vgpr_32, 0, 0, implicit $mode, implicit $exec
2011 %3050.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2065:vgpr_32, 0, %2057:vgpr_32, 0, 0, implicit $mode, implicit $exec
2012 undef %3033.sub1:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2062:vgpr_32, 0, %2054:vgpr_32, 0, 0, implicit $mode, implicit $exec
2013 %3033.sub0:vreg_64_align2 = nofpexcept V_PACK_B32_F16_e64 0, %2063:vgpr_32, 0, %2055:vgpr_32, 0, 0, implicit $mode, implicit $exec
2014 %2082:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 0, 0, implicit $exec
2015 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2082.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2016 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2082.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2017 %2095:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 576, 0, implicit $exec
2018 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2095.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2019 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2095.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2020 %2108:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1152, 0, implicit $exec
2021 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2108.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2022 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2108.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2023 %2121:vreg_128_align2 = DS_READ_B128_gfx9 %44:vgpr_32, 1728, 0, implicit $exec
2024 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2121.sub0_sub1:vreg_128_align2, %3082:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2025 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2121.sub2_sub3:vreg_128_align2, %3066:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2026 %2134:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 0, 0, implicit $exec
2027 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2134.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2028 %2986:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2134.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %2986:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2029 %2146:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 576, 0, implicit $exec
2030 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2146.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2031 %3038:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2146.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %3038:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2032 %2158:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1152, 0, implicit $exec
2033 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2158.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2034 %2980:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2158.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %2980:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2035 %2170:vreg_128_align2 = DS_READ_B128_gfx9 %45:vgpr_32, 1728, 0, implicit $exec
2036 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2170.sub0_sub1:vreg_128_align2, %3050:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2037 %3003:vreg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_vgprcd_e64 %2170.sub2_sub3:vreg_128_align2, %3033:vreg_64_align2, %3003:vreg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
2038 INLINEASM &"s_waitcnt vmcnt($0)", 57 /* sideeffect mayload maystore isconvergent attdialect */, 13 /* imm */, 8, !0
2039 %3345:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3345:vgpr_32, implicit $exec
2040 %3344:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3344:vgpr_32, implicit $exec
2041 %3343:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3343:vgpr_32, implicit $exec
2042 %3342:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3342:vgpr_32, implicit $exec
2043 %3341:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3341:vgpr_32, implicit $exec
2044 %3340:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3340:vgpr_32, implicit $exec
2045 %3339:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3339:vgpr_32, implicit $exec
2046 %3338:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3338:vgpr_32, implicit $exec
2047 %3337:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3337:vgpr_32, implicit $exec
2048 %3336:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3336:vgpr_32, implicit $exec
2049 %3335:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3335:vgpr_32, implicit $exec
2050 %3334:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3334:vgpr_32, implicit $exec
2051 %3333:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3333:vgpr_32, implicit $exec
2052 %3332:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3332:vgpr_32, implicit $exec
2053 %3331:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3331:vgpr_32, implicit $exec
2054 %3330:vgpr_32 = V_ADD_U32_e32 %50:sreg_32, %3330:vgpr_32, implicit $exec
2055 %3329:vgpr_32 = nuw V_ADD_U32_e32 128, %3329:vgpr_32, implicit $exec