1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
4 define void @scalar(float %num, ptr addrspace(1) %p) {
6 ; CHECK: ; %bb.0: ; %entry
7 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8 ; CHECK-NEXT: v_mov_b32_e32 v3, v2
9 ; CHECK-NEXT: v_mov_b32_e32 v2, v1
10 ; CHECK-NEXT: v_bfe_u32 v1, v0, 16, 1
11 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
12 ; CHECK-NEXT: v_add3_u32 v1, v1, v0, s4
13 ; CHECK-NEXT: v_or_b32_e32 v4, 0x400000, v0
14 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
15 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
16 ; CHECK-NEXT: global_store_short_d16_hi v[2:3], v0, off
17 ; CHECK-NEXT: s_waitcnt vmcnt(0)
18 ; CHECK-NEXT: s_setpc_b64 s[30:31]
20 %conv = fptrunc float %num to bfloat
21 store bfloat %conv, ptr addrspace(1) %p, align 8
25 define void @v2(<2 x float> %num, ptr addrspace(1) %p) {
27 ; CHECK: ; %bb.0: ; %entry
28 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29 ; CHECK-NEXT: v_bfe_u32 v4, v0, 16, 1
30 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
31 ; CHECK-NEXT: v_add3_u32 v4, v4, v0, s4
32 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v0
33 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
34 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
35 ; CHECK-NEXT: v_bfe_u32 v4, v1, 16, 1
36 ; CHECK-NEXT: v_add3_u32 v4, v4, v1, s4
37 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v1
38 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
39 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v5, vcc
40 ; CHECK-NEXT: s_mov_b32 s4, 0x7060302
41 ; CHECK-NEXT: v_perm_b32 v0, v1, v0, s4
42 ; CHECK-NEXT: global_store_dword v[2:3], v0, off
43 ; CHECK-NEXT: s_waitcnt vmcnt(0)
44 ; CHECK-NEXT: s_setpc_b64 s[30:31]
46 %conv = fptrunc <2 x float> %num to <2 x bfloat>
47 store <2 x bfloat> %conv, ptr addrspace(1) %p, align 8
51 define void @v3(<3 x float> %num, ptr addrspace(1) %p) {
53 ; CHECK: ; %bb.0: ; %entry
54 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; CHECK-NEXT: v_mov_b32_e32 v5, v4
56 ; CHECK-NEXT: v_mov_b32_e32 v4, v3
57 ; CHECK-NEXT: v_bfe_u32 v3, v0, 16, 1
58 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
59 ; CHECK-NEXT: v_add3_u32 v3, v3, v0, s4
60 ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0
61 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
62 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
63 ; CHECK-NEXT: v_bfe_u32 v3, v1, 16, 1
64 ; CHECK-NEXT: v_add3_u32 v3, v3, v1, s4
65 ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1
66 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
67 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc
68 ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
69 ; CHECK-NEXT: v_perm_b32 v0, v1, v0, s5
70 ; CHECK-NEXT: v_bfe_u32 v1, v2, 16, 1
71 ; CHECK-NEXT: v_add3_u32 v1, v1, v2, s4
72 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v2
73 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
74 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
75 ; CHECK-NEXT: global_store_short_d16_hi v[4:5], v1, off offset:4
76 ; CHECK-NEXT: global_store_dword v[4:5], v0, off
77 ; CHECK-NEXT: s_waitcnt vmcnt(0)
78 ; CHECK-NEXT: s_setpc_b64 s[30:31]
80 %conv = fptrunc <3 x float> %num to <3 x bfloat>
81 store <3 x bfloat> %conv, ptr addrspace(1) %p, align 8
85 define void @v4(<4 x float> %num, ptr addrspace(1) %p) {
87 ; CHECK: ; %bb.0: ; %entry
88 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; CHECK-NEXT: v_bfe_u32 v6, v2, 16, 1
90 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
91 ; CHECK-NEXT: v_add3_u32 v6, v6, v2, s4
92 ; CHECK-NEXT: v_or_b32_e32 v7, 0x400000, v2
93 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
94 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v7, vcc
95 ; CHECK-NEXT: v_bfe_u32 v6, v3, 16, 1
96 ; CHECK-NEXT: v_add3_u32 v6, v6, v3, s4
97 ; CHECK-NEXT: v_or_b32_e32 v7, 0x400000, v3
98 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
99 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc
100 ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
101 ; CHECK-NEXT: v_perm_b32 v3, v3, v2, s5
102 ; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1
103 ; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4
104 ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v0
105 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
106 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc
107 ; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1
108 ; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4
109 ; CHECK-NEXT: v_or_b32_e32 v6, 0x400000, v1
110 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
111 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v6, vcc
112 ; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5
113 ; CHECK-NEXT: global_store_dwordx2 v[4:5], v[2:3], off
114 ; CHECK-NEXT: s_waitcnt vmcnt(0)
115 ; CHECK-NEXT: s_setpc_b64 s[30:31]
117 %conv = fptrunc <4 x float> %num to <4 x bfloat>
118 store <4 x bfloat> %conv, ptr addrspace(1) %p, align 8
122 define void @v8(<8 x float> %num, ptr addrspace(1) %p) {
124 ; CHECK: ; %bb.0: ; %entry
125 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; CHECK-NEXT: v_bfe_u32 v10, v6, 16, 1
127 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
128 ; CHECK-NEXT: v_add3_u32 v10, v10, v6, s4
129 ; CHECK-NEXT: v_or_b32_e32 v11, 0x400000, v6
130 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
131 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v10, v11, vcc
132 ; CHECK-NEXT: v_bfe_u32 v10, v7, 16, 1
133 ; CHECK-NEXT: v_add3_u32 v10, v10, v7, s4
134 ; CHECK-NEXT: v_or_b32_e32 v11, 0x400000, v7
135 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
136 ; CHECK-NEXT: v_cndmask_b32_e32 v7, v10, v11, vcc
137 ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
138 ; CHECK-NEXT: v_perm_b32 v7, v7, v6, s5
139 ; CHECK-NEXT: v_bfe_u32 v6, v4, 16, 1
140 ; CHECK-NEXT: v_add3_u32 v6, v6, v4, s4
141 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v4
142 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
143 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v6, v10, vcc
144 ; CHECK-NEXT: v_bfe_u32 v6, v5, 16, 1
145 ; CHECK-NEXT: v_add3_u32 v6, v6, v5, s4
146 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v5
147 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
148 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v10, vcc
149 ; CHECK-NEXT: v_perm_b32 v6, v5, v4, s5
150 ; CHECK-NEXT: v_bfe_u32 v4, v2, 16, 1
151 ; CHECK-NEXT: v_add3_u32 v4, v4, v2, s4
152 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v2
153 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
154 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
155 ; CHECK-NEXT: v_bfe_u32 v4, v3, 16, 1
156 ; CHECK-NEXT: v_add3_u32 v4, v4, v3, s4
157 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v3
158 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
159 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
160 ; CHECK-NEXT: v_perm_b32 v5, v3, v2, s5
161 ; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1
162 ; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4
163 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v0
164 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
165 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
166 ; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1
167 ; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4
168 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v1
169 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
170 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
171 ; CHECK-NEXT: v_perm_b32 v4, v1, v0, s5
172 ; CHECK-NEXT: global_store_dwordx4 v[8:9], v[4:7], off
173 ; CHECK-NEXT: s_waitcnt vmcnt(0)
174 ; CHECK-NEXT: s_setpc_b64 s[30:31]
176 %conv = fptrunc <8 x float> %num to <8 x bfloat>
177 store <8 x bfloat> %conv, ptr addrspace(1) %p, align 8
181 define void @v16(<16 x float> %num, ptr addrspace(1) %p) {
183 ; CHECK: ; %bb.0: ; %entry
184 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185 ; CHECK-NEXT: v_bfe_u32 v18, v6, 16, 1
186 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
187 ; CHECK-NEXT: v_add3_u32 v18, v18, v6, s4
188 ; CHECK-NEXT: v_or_b32_e32 v19, 0x400000, v6
189 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
190 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v18, v19, vcc
191 ; CHECK-NEXT: v_bfe_u32 v18, v7, 16, 1
192 ; CHECK-NEXT: v_add3_u32 v18, v18, v7, s4
193 ; CHECK-NEXT: v_or_b32_e32 v19, 0x400000, v7
194 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
195 ; CHECK-NEXT: v_cndmask_b32_e32 v7, v18, v19, vcc
196 ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
197 ; CHECK-NEXT: v_perm_b32 v7, v7, v6, s5
198 ; CHECK-NEXT: v_bfe_u32 v6, v4, 16, 1
199 ; CHECK-NEXT: v_add3_u32 v6, v6, v4, s4
200 ; CHECK-NEXT: v_or_b32_e32 v18, 0x400000, v4
201 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
202 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v6, v18, vcc
203 ; CHECK-NEXT: v_bfe_u32 v6, v5, 16, 1
204 ; CHECK-NEXT: v_add3_u32 v6, v6, v5, s4
205 ; CHECK-NEXT: v_or_b32_e32 v18, 0x400000, v5
206 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
207 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v18, vcc
208 ; CHECK-NEXT: v_perm_b32 v6, v5, v4, s5
209 ; CHECK-NEXT: v_bfe_u32 v4, v2, 16, 1
210 ; CHECK-NEXT: v_add3_u32 v4, v4, v2, s4
211 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v2
212 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
213 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
214 ; CHECK-NEXT: v_bfe_u32 v4, v3, 16, 1
215 ; CHECK-NEXT: v_add3_u32 v4, v4, v3, s4
216 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v3
217 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
218 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
219 ; CHECK-NEXT: v_perm_b32 v5, v3, v2, s5
220 ; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1
221 ; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4
222 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v0
223 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
224 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
225 ; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1
226 ; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4
227 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v1
228 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
229 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
230 ; CHECK-NEXT: v_perm_b32 v4, v1, v0, s5
231 ; CHECK-NEXT: v_bfe_u32 v0, v14, 16, 1
232 ; CHECK-NEXT: v_add3_u32 v0, v0, v14, s4
233 ; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v14
234 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
235 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
236 ; CHECK-NEXT: v_bfe_u32 v1, v15, 16, 1
237 ; CHECK-NEXT: v_add3_u32 v1, v1, v15, s4
238 ; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v15
239 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
240 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
241 ; CHECK-NEXT: v_perm_b32 v3, v1, v0, s5
242 ; CHECK-NEXT: v_bfe_u32 v0, v12, 16, 1
243 ; CHECK-NEXT: v_add3_u32 v0, v0, v12, s4
244 ; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v12
245 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
246 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
247 ; CHECK-NEXT: v_bfe_u32 v1, v13, 16, 1
248 ; CHECK-NEXT: v_add3_u32 v1, v1, v13, s4
249 ; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v13
250 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
251 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
252 ; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5
253 ; CHECK-NEXT: v_bfe_u32 v0, v10, 16, 1
254 ; CHECK-NEXT: v_add3_u32 v0, v0, v10, s4
255 ; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v10
256 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
257 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
258 ; CHECK-NEXT: v_bfe_u32 v1, v11, 16, 1
259 ; CHECK-NEXT: v_add3_u32 v1, v1, v11, s4
260 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v11
261 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
262 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
263 ; CHECK-NEXT: v_perm_b32 v1, v1, v0, s5
264 ; CHECK-NEXT: v_bfe_u32 v0, v8, 16, 1
265 ; CHECK-NEXT: v_add3_u32 v0, v0, v8, s4
266 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v8
267 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
268 ; CHECK-NEXT: v_bfe_u32 v8, v9, 16, 1
269 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
270 ; CHECK-NEXT: v_add3_u32 v8, v8, v9, s4
271 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v9
272 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
273 ; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
274 ; CHECK-NEXT: v_perm_b32 v0, v8, v0, s5
275 ; CHECK-NEXT: global_store_dwordx4 v[16:17], v[0:3], off offset:16
276 ; CHECK-NEXT: global_store_dwordx4 v[16:17], v[4:7], off
277 ; CHECK-NEXT: s_waitcnt vmcnt(0)
278 ; CHECK-NEXT: s_setpc_b64 s[30:31]
280 %conv = fptrunc <16 x float> %num to <16 x bfloat>
281 store <16 x bfloat> %conv, ptr addrspace(1) %p, align 8
285 define void @v32(<32 x float> %num, ptr addrspace(1) %p) {
287 ; CHECK: ; %bb.0: ; %entry
288 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; CHECK-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
290 ; CHECK-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4
291 ; CHECK-NEXT: buffer_load_dword v31, off, s[0:3], s32
292 ; CHECK-NEXT: v_bfe_u32 v34, v6, 16, 1
293 ; CHECK-NEXT: s_movk_i32 s4, 0x7fff
294 ; CHECK-NEXT: v_add3_u32 v34, v34, v6, s4
295 ; CHECK-NEXT: v_or_b32_e32 v35, 0x400000, v6
296 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v6, v6
297 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v34, v35, vcc
298 ; CHECK-NEXT: v_bfe_u32 v34, v7, 16, 1
299 ; CHECK-NEXT: v_add3_u32 v34, v34, v7, s4
300 ; CHECK-NEXT: v_or_b32_e32 v35, 0x400000, v7
301 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v7, v7
302 ; CHECK-NEXT: v_cndmask_b32_e32 v7, v34, v35, vcc
303 ; CHECK-NEXT: s_mov_b32 s5, 0x7060302
304 ; CHECK-NEXT: v_perm_b32 v7, v7, v6, s5
305 ; CHECK-NEXT: v_bfe_u32 v6, v4, 16, 1
306 ; CHECK-NEXT: v_add3_u32 v6, v6, v4, s4
307 ; CHECK-NEXT: v_or_b32_e32 v34, 0x400000, v4
308 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v4, v4
309 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v6, v34, vcc
310 ; CHECK-NEXT: v_bfe_u32 v6, v5, 16, 1
311 ; CHECK-NEXT: v_add3_u32 v6, v6, v5, s4
312 ; CHECK-NEXT: v_or_b32_e32 v34, 0x400000, v5
313 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v5, v5
314 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v34, vcc
315 ; CHECK-NEXT: v_perm_b32 v6, v5, v4, s5
316 ; CHECK-NEXT: v_bfe_u32 v4, v2, 16, 1
317 ; CHECK-NEXT: v_add3_u32 v4, v4, v2, s4
318 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v2
319 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v2, v2
320 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc
321 ; CHECK-NEXT: v_bfe_u32 v4, v3, 16, 1
322 ; CHECK-NEXT: v_add3_u32 v4, v4, v3, s4
323 ; CHECK-NEXT: v_or_b32_e32 v5, 0x400000, v3
324 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v3, v3
325 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc
326 ; CHECK-NEXT: v_perm_b32 v5, v3, v2, s5
327 ; CHECK-NEXT: v_bfe_u32 v2, v0, 16, 1
328 ; CHECK-NEXT: v_add3_u32 v2, v2, v0, s4
329 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v0
330 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v0, v0
331 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
332 ; CHECK-NEXT: v_bfe_u32 v2, v1, 16, 1
333 ; CHECK-NEXT: v_add3_u32 v2, v2, v1, s4
334 ; CHECK-NEXT: v_or_b32_e32 v3, 0x400000, v1
335 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v1, v1
336 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc
337 ; CHECK-NEXT: v_perm_b32 v4, v1, v0, s5
338 ; CHECK-NEXT: v_bfe_u32 v0, v14, 16, 1
339 ; CHECK-NEXT: v_add3_u32 v0, v0, v14, s4
340 ; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v14
341 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v14, v14
342 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
343 ; CHECK-NEXT: v_bfe_u32 v1, v15, 16, 1
344 ; CHECK-NEXT: v_add3_u32 v1, v1, v15, s4
345 ; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v15
346 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v15, v15
347 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
348 ; CHECK-NEXT: v_perm_b32 v3, v1, v0, s5
349 ; CHECK-NEXT: v_bfe_u32 v0, v12, 16, 1
350 ; CHECK-NEXT: v_add3_u32 v0, v0, v12, s4
351 ; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v12
352 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v12, v12
353 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
354 ; CHECK-NEXT: v_bfe_u32 v1, v13, 16, 1
355 ; CHECK-NEXT: v_add3_u32 v1, v1, v13, s4
356 ; CHECK-NEXT: v_or_b32_e32 v2, 0x400000, v13
357 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v13, v13
358 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
359 ; CHECK-NEXT: v_perm_b32 v2, v1, v0, s5
360 ; CHECK-NEXT: v_bfe_u32 v0, v10, 16, 1
361 ; CHECK-NEXT: v_add3_u32 v0, v0, v10, s4
362 ; CHECK-NEXT: v_or_b32_e32 v1, 0x400000, v10
363 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v10, v10
364 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
365 ; CHECK-NEXT: v_bfe_u32 v1, v11, 16, 1
366 ; CHECK-NEXT: v_add3_u32 v1, v1, v11, s4
367 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v11
368 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v11, v11
369 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc
370 ; CHECK-NEXT: v_perm_b32 v1, v1, v0, s5
371 ; CHECK-NEXT: v_bfe_u32 v0, v8, 16, 1
372 ; CHECK-NEXT: v_add3_u32 v0, v0, v8, s4
373 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v8
374 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v8, v8
375 ; CHECK-NEXT: v_bfe_u32 v8, v9, 16, 1
376 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
377 ; CHECK-NEXT: v_add3_u32 v8, v8, v9, s4
378 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v9
379 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v9, v9
380 ; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
381 ; CHECK-NEXT: v_perm_b32 v0, v8, v0, s5
382 ; CHECK-NEXT: v_bfe_u32 v8, v22, 16, 1
383 ; CHECK-NEXT: v_add3_u32 v8, v8, v22, s4
384 ; CHECK-NEXT: v_or_b32_e32 v9, 0x400000, v22
385 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v22, v22
386 ; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
387 ; CHECK-NEXT: v_bfe_u32 v9, v23, 16, 1
388 ; CHECK-NEXT: v_add3_u32 v9, v9, v23, s4
389 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v23
390 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v23, v23
391 ; CHECK-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc
392 ; CHECK-NEXT: v_perm_b32 v11, v9, v8, s5
393 ; CHECK-NEXT: v_bfe_u32 v8, v20, 16, 1
394 ; CHECK-NEXT: v_add3_u32 v8, v8, v20, s4
395 ; CHECK-NEXT: v_or_b32_e32 v9, 0x400000, v20
396 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v20, v20
397 ; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
398 ; CHECK-NEXT: v_bfe_u32 v9, v21, 16, 1
399 ; CHECK-NEXT: v_add3_u32 v9, v9, v21, s4
400 ; CHECK-NEXT: v_or_b32_e32 v10, 0x400000, v21
401 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v21, v21
402 ; CHECK-NEXT: v_cndmask_b32_e32 v9, v9, v10, vcc
403 ; CHECK-NEXT: v_perm_b32 v10, v9, v8, s5
404 ; CHECK-NEXT: v_bfe_u32 v8, v18, 16, 1
405 ; CHECK-NEXT: v_add3_u32 v8, v8, v18, s4
406 ; CHECK-NEXT: v_or_b32_e32 v9, 0x400000, v18
407 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v18, v18
408 ; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v9, vcc
409 ; CHECK-NEXT: v_bfe_u32 v9, v19, 16, 1
410 ; CHECK-NEXT: v_add3_u32 v9, v9, v19, s4
411 ; CHECK-NEXT: v_or_b32_e32 v12, 0x400000, v19
412 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v19, v19
413 ; CHECK-NEXT: v_cndmask_b32_e32 v9, v9, v12, vcc
414 ; CHECK-NEXT: v_perm_b32 v9, v9, v8, s5
415 ; CHECK-NEXT: v_bfe_u32 v8, v16, 16, 1
416 ; CHECK-NEXT: v_add3_u32 v8, v8, v16, s4
417 ; CHECK-NEXT: v_or_b32_e32 v12, 0x400000, v16
418 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v16, v16
419 ; CHECK-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc
420 ; CHECK-NEXT: v_bfe_u32 v12, v17, 16, 1
421 ; CHECK-NEXT: v_add3_u32 v12, v12, v17, s4
422 ; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v17
423 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v17, v17
424 ; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
425 ; CHECK-NEXT: v_perm_b32 v8, v12, v8, s5
426 ; CHECK-NEXT: v_bfe_u32 v12, v30, 16, 1
427 ; CHECK-NEXT: v_add3_u32 v12, v12, v30, s4
428 ; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v30
429 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v30, v30
430 ; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
431 ; CHECK-NEXT: s_waitcnt vmcnt(0)
432 ; CHECK-NEXT: v_bfe_u32 v13, v31, 16, 1
433 ; CHECK-NEXT: v_add3_u32 v13, v13, v31, s4
434 ; CHECK-NEXT: v_or_b32_e32 v14, 0x400000, v31
435 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v31, v31
436 ; CHECK-NEXT: v_cndmask_b32_e32 v13, v13, v14, vcc
437 ; CHECK-NEXT: v_perm_b32 v15, v13, v12, s5
438 ; CHECK-NEXT: v_bfe_u32 v12, v28, 16, 1
439 ; CHECK-NEXT: v_add3_u32 v12, v12, v28, s4
440 ; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v28
441 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v28, v28
442 ; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
443 ; CHECK-NEXT: v_bfe_u32 v13, v29, 16, 1
444 ; CHECK-NEXT: v_add3_u32 v13, v13, v29, s4
445 ; CHECK-NEXT: v_or_b32_e32 v14, 0x400000, v29
446 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v29, v29
447 ; CHECK-NEXT: v_cndmask_b32_e32 v13, v13, v14, vcc
448 ; CHECK-NEXT: v_perm_b32 v14, v13, v12, s5
449 ; CHECK-NEXT: v_bfe_u32 v12, v26, 16, 1
450 ; CHECK-NEXT: v_add3_u32 v12, v12, v26, s4
451 ; CHECK-NEXT: v_or_b32_e32 v13, 0x400000, v26
452 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v26, v26
453 ; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
454 ; CHECK-NEXT: v_bfe_u32 v13, v27, 16, 1
455 ; CHECK-NEXT: v_add3_u32 v13, v13, v27, s4
456 ; CHECK-NEXT: v_or_b32_e32 v16, 0x400000, v27
457 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v27, v27
458 ; CHECK-NEXT: v_cndmask_b32_e32 v13, v13, v16, vcc
459 ; CHECK-NEXT: v_perm_b32 v13, v13, v12, s5
460 ; CHECK-NEXT: v_bfe_u32 v12, v24, 16, 1
461 ; CHECK-NEXT: v_add3_u32 v12, v12, v24, s4
462 ; CHECK-NEXT: v_or_b32_e32 v16, 0x400000, v24
463 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v24, v24
464 ; CHECK-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
465 ; CHECK-NEXT: v_bfe_u32 v16, v25, 16, 1
466 ; CHECK-NEXT: v_add3_u32 v16, v16, v25, s4
467 ; CHECK-NEXT: v_or_b32_e32 v17, 0x400000, v25
468 ; CHECK-NEXT: v_cmp_u_f32_e32 vcc, v25, v25
469 ; CHECK-NEXT: v_cndmask_b32_e32 v16, v16, v17, vcc
470 ; CHECK-NEXT: v_perm_b32 v12, v16, v12, s5
471 ; CHECK-NEXT: global_store_dwordx4 v[32:33], v[12:15], off offset:48
472 ; CHECK-NEXT: global_store_dwordx4 v[32:33], v[8:11], off offset:32
473 ; CHECK-NEXT: global_store_dwordx4 v[32:33], v[0:3], off offset:16
474 ; CHECK-NEXT: global_store_dwordx4 v[32:33], v[4:7], off
475 ; CHECK-NEXT: s_waitcnt vmcnt(0)
476 ; CHECK-NEXT: s_setpc_b64 s[30:31]
478 %conv = fptrunc <32 x float> %num to <32 x bfloat>
479 store <32 x bfloat> %conv, ptr addrspace(1) %p, align 8