1 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-MUBUF %s
2 ; RxN: llc -mtriple=amdgcn -mcpu=gfx906 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9 %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-promote-alloca -mattr=+enable-flat-scratch -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX9-FLATSCR %s
6 ; GCN-LABEL: {{^}}store_global_hi_v2i16:
9 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
11 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
12 ; GFX803-NEXT: flat_store_short v[0:1], v2
15 ; GCN-NEXT: s_setpc_b64
16 define void @store_global_hi_v2i16(ptr addrspace(1) %out, i32 %arg) #0 {
18 ; FIXME: ABI for pre-gfx9
19 %value = bitcast i32 %arg to <2 x i16>
20 %hi = extractelement <2 x i16> %value, i32 1
21 store i16 %hi, ptr addrspace(1) %out
25 ; GCN-LABEL: {{^}}store_global_hi_v2f16:
28 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
30 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
31 ; GFX803-NEXT: flat_store_short v[0:1], v2
34 ; GCN-NEXT: s_setpc_b64
35 define void @store_global_hi_v2f16(ptr addrspace(1) %out, i32 %arg) #0 {
37 ; FIXME: ABI for pre-gfx9
38 %value = bitcast i32 %arg to <2 x half>
39 %hi = extractelement <2 x half> %value, i32 1
40 store half %hi, ptr addrspace(1) %out
44 ; GCN-LABEL: {{^}}store_global_hi_i32_shift:
47 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
49 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
50 ; GFX803-NEXT: flat_store_short v[0:1], v2
53 ; GCN-NEXT: s_setpc_b64
54 define void @store_global_hi_i32_shift(ptr addrspace(1) %out, i32 %value) #0 {
56 %hi32 = lshr i32 %value, 16
57 %hi = trunc i32 %hi32 to i16
58 store i16 %hi, ptr addrspace(1) %out
62 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
65 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
67 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
68 ; GFX803-NEXT: flat_store_byte v[0:1], v2
71 ; GCN-NEXT: s_setpc_b64
72 define void @store_global_hi_v2i16_i8(ptr addrspace(1) %out, i32 %arg) #0 {
74 %value = bitcast i32 %arg to <2 x i16>
75 %hi = extractelement <2 x i16> %value, i32 1
76 %trunc = trunc i16 %hi to i8
77 store i8 %trunc, ptr addrspace(1) %out
81 ; GCN-LABEL: {{^}}store_global_hi_i8_shift:
84 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
86 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
87 ; GFX803-NEXT: flat_store_byte v[0:1], v2
90 ; GCN-NEXT: s_setpc_b64
91 define void @store_global_hi_i8_shift(ptr addrspace(1) %out, i32 %value) #0 {
93 %hi32 = lshr i32 %value, 16
94 %hi = trunc i32 %hi32 to i8
95 store i8 %hi, ptr addrspace(1) %out
99 ; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
101 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
103 ; GFX803-DAG: v_add_u32_e32
104 ; GFX803-DAG: v_addc_u32_e32
105 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
106 ; GFX803: flat_store_short v[0:1], v2{{$}}
108 ; GCN-NEXT: s_waitcnt
109 ; GCN-NEXT: s_setpc_b64
110 define void @store_global_hi_v2i16_max_offset(ptr addrspace(1) %out, i32 %arg) #0 {
112 ; FIXME: ABI for pre-gfx9
113 %value = bitcast i32 %arg to <2 x i16>
114 %hi = extractelement <2 x i16> %value, i32 1
115 %gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 2047
116 store i16 %hi, ptr addrspace(1) %gep
120 ; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
122 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
124 ; GFX803-DAG: v_add_u32_e32
125 ; GFX803-DAG: v_addc_u32_e32
126 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
127 ; GFX803: flat_store_short v[0:1], v{{[0-9]$}}
129 ; GCN-NEXT: s_waitcnt
130 ; GCN-NEXT: s_setpc_b64
131 define void @store_global_hi_v2i16_min_offset(ptr addrspace(1) %out, i32 %arg) #0 {
133 %value = bitcast i32 %arg to <2 x i16>
134 %hi = extractelement <2 x i16> %value, i32 1
135 %gep = getelementptr inbounds i16, ptr addrspace(1) %out, i64 -2048
136 store i16 %hi, ptr addrspace(1) %gep
140 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
142 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
144 ; GFX803-DAG: v_add_u32_e32
145 ; GFX803-DAG: v_addc_u32_e32
146 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
147 ; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
149 ; GCN-NEXT: s_waitcnt
150 ; GCN-NEXT: s_setpc_b64
151 define void @store_global_hi_v2i16_i8_max_offset(ptr addrspace(1) %out, i32 %arg) #0 {
153 %value = bitcast i32 %arg to <2 x i16>
154 %hi = extractelement <2 x i16> %value, i32 1
155 %trunc = trunc i16 %hi to i8
156 %gep = getelementptr inbounds i8, ptr addrspace(1) %out, i64 4095
157 store i8 %trunc, ptr addrspace(1) %gep
161 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
163 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
165 ; GFX803-DAG: v_add_u32_e32
166 ; GFX803-DAG: v_addc_u32_e32
167 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
168 ; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
170 ; GCN-NEXT: s_waitcnt
171 ; GCN-NEXT: s_setpc_b64
172 define void @store_global_hi_v2i16_i8_min_offset(ptr addrspace(1) %out, i32 %arg) #0 {
174 %value = bitcast i32 %arg to <2 x i16>
175 %hi = extractelement <2 x i16> %value, i32 1
176 %trunc = trunc i16 %hi to i8
177 %gep = getelementptr inbounds i8, ptr addrspace(1) %out, i64 -4095
178 store i8 %trunc, ptr addrspace(1) %gep
182 ; GCN-LABEL: {{^}}store_flat_hi_v2i16:
185 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
187 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
188 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
190 ; GCN-NEXT: s_waitcnt
191 ; GCN-NEXT: s_setpc_b64
192 define void @store_flat_hi_v2i16(ptr %out, i32 %arg) #0 {
194 %value = bitcast i32 %arg to <2 x i16>
195 %hi = extractelement <2 x i16> %value, i32 1
196 store i16 %hi, ptr %out
200 ; GCN-LABEL: {{^}}store_flat_hi_v2f16:
203 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
205 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
206 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
208 ; GCN-NEXT: s_waitcnt
209 ; GCN-NEXT: s_setpc_b64
210 define void @store_flat_hi_v2f16(ptr %out, i32 %arg) #0 {
212 %value = bitcast i32 %arg to <2 x half>
213 %hi = extractelement <2 x half> %value, i32 1
214 store half %hi, ptr %out
218 ; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
221 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
223 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
224 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
226 ; GCN-NEXT: s_waitcnt
227 ; GCN-NEXT: s_setpc_b64
228 define void @store_flat_hi_i32_shift(ptr %out, i32 %value) #0 {
230 %hi32 = lshr i32 %value, 16
231 %hi = trunc i32 %hi32 to i16
232 store i16 %hi, ptr %out
236 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
239 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
241 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
242 ; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
244 ; GCN-NEXT: s_waitcnt
245 ; GCN-NEXT: s_setpc_b64
246 define void @store_flat_hi_v2i16_i8(ptr %out, i32 %arg) #0 {
248 %value = bitcast i32 %arg to <2 x i16>
249 %hi = extractelement <2 x i16> %value, i32 1
250 %trunc = trunc i16 %hi to i8
251 store i8 %trunc, ptr %out
255 ; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
258 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
260 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
261 ; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
263 ; GCN-NEXT: s_waitcnt
264 ; GCN-NEXT: s_setpc_b64
265 define void @store_flat_hi_i8_shift(ptr %out, i32 %value) #0 {
267 %hi32 = lshr i32 %value, 16
268 %hi = trunc i32 %hi32 to i8
269 store i8 %hi, ptr %out
273 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
275 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
277 ; GFX803-DAG: v_add_u32_e32
278 ; GFX803-DAG: v_addc_u32_e32
279 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
280 ; GFX803: flat_store_short v[0:1], v2{{$}}
282 ; GCN-NEXT: s_waitcnt
283 ; GCN-NEXT: s_setpc_b64
284 define void @store_flat_hi_v2i16_max_offset(ptr %out, i32 %arg) #0 {
286 %value = bitcast i32 %arg to <2 x i16>
287 %hi = extractelement <2 x i16> %value, i32 1
288 %gep = getelementptr inbounds i16, ptr %out, i64 2047
289 store i16 %hi, ptr %gep
293 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
295 ; GFX803: v_add{{(_co)?}}_{{i|u}}32_e32
296 ; GFX803: v_addc_u32_e32
298 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff802, v
299 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v
301 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
302 ; GFX803: flat_store_short v[0:1], v2{{$}}
303 ; GCN-NEXT: s_waitcnt
304 ; GCN-NEXT: s_setpc_b64
305 define void @store_flat_hi_v2i16_neg_offset(ptr %out, i32 %arg) #0 {
307 %value = bitcast i32 %arg to <2 x i16>
308 %hi = extractelement <2 x i16> %value, i32 1
309 %gep = getelementptr inbounds i16, ptr %out, i64 -1023
310 store i16 %hi, ptr %gep
314 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
316 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
318 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
319 ; GFX803-DAG: v_add_u32_e32
320 ; GFX803-DAG: v_addc_u32_e32
321 ; GFX803: flat_store_byte v[0:1], v2{{$}}
323 ; GCN-NEXT: s_waitcnt
324 ; GCN-NEXT: s_setpc_b64
325 define void @store_flat_hi_v2i16_i8_max_offset(ptr %out, i32 %arg) #0 {
327 %value = bitcast i32 %arg to <2 x i16>
328 %hi = extractelement <2 x i16> %value, i32 1
329 %trunc = trunc i16 %hi to i8
330 %gep = getelementptr inbounds i8, ptr %out, i64 4095
331 store i8 %trunc, ptr %gep
335 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
338 ; GFX803-DAG: v_add_u32_e32
339 ; GFX803-DAG: v_addc_u32_e32
341 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff001, v
342 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc
344 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
346 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
347 ; GFX803: flat_store_byte v[0:1], v2{{$}}
349 ; GCN-NEXT: s_waitcnt
350 ; GCN-NEXT: s_setpc_b64
351 define void @store_flat_hi_v2i16_i8_neg_offset(ptr %out, i32 %arg) #0 {
353 %value = bitcast i32 %arg to <2 x i16>
354 %hi = extractelement <2 x i16> %value, i32 1
355 %trunc = trunc i16 %hi to i8
356 %gep = getelementptr inbounds i8, ptr %out, i64 -4095
357 store i8 %trunc, ptr %gep
361 ; GCN-LABEL: {{^}}store_private_hi_v2i16:
364 ; GFX9-MUBUF-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
365 ; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off
367 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
368 ; NO-D16-HI: buffer_store_short v1, v0, s[0:3], 0 offen{{$}}
370 ; GCN-NEXT: s_waitcnt
371 ; GCN-NEXT: s_setpc_b64
372 define void @store_private_hi_v2i16(ptr addrspace(5) %out, i32 %arg) #0 {
374 ; FIXME: ABI for pre-gfx9
375 %value = bitcast i32 %arg to <2 x i16>
376 %hi = extractelement <2 x i16> %value, i32 1
377 store i16 %hi, ptr addrspace(5) %out
381 ; GCN-LABEL: {{^}}store_private_hi_v2f16:
384 ; GFX9-MUBUF-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
385 ; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off{{$}}
387 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
388 ; NO-D16-HI: buffer_store_short v1, v0, s[0:3], 0 offen{{$}}
390 ; GCN-NEXT: s_waitcnt
391 ; GCN-NEXT: s_setpc_b64
392 define void @store_private_hi_v2f16(ptr addrspace(5) %out, i32 %arg) #0 {
394 ; FIXME: ABI for pre-gfx9
395 %value = bitcast i32 %arg to <2 x half>
396 %hi = extractelement <2 x half> %value, i32 1
397 store half %hi, ptr addrspace(5) %out
401 ; GCN-LABEL: {{^}}store_private_hi_i32_shift:
404 ; GFX9-MUBUF-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], 0 offen{{$}}
405 ; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi v0, v1, off{{$}}
407 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
408 ; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen{{$}}
410 ; GCN-NEXT: s_waitcnt
411 ; GCN-NEXT: s_setpc_b64
412 define void @store_private_hi_i32_shift(ptr addrspace(5) %out, i32 %value) #0 {
414 %hi32 = lshr i32 %value, 16
415 %hi = trunc i32 %hi32 to i16
416 store i16 %hi, ptr addrspace(5) %out
420 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
423 ; GFX9-MUBUF-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen{{$}}
424 ; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi v0, v1, off{{$}}
426 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
427 ; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen{{$}}
429 ; GCN-NEXT: s_waitcnt
430 ; GCN-NEXT: s_setpc_b64
431 define void @store_private_hi_v2i16_i8(ptr addrspace(5) %out, i32 %arg) #0 {
433 %value = bitcast i32 %arg to <2 x i16>
434 %hi = extractelement <2 x i16> %value, i32 1
435 %trunc = trunc i16 %hi to i8
436 store i8 %trunc, ptr addrspace(5) %out
440 ; GCN-LABEL: {{^}}store_private_hi_i8_shift:
443 ; GFX9-MUBUF-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen{{$}}
444 ; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi v0, v1, off{{$}}
446 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
447 ; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen{{$}}
449 ; GCN-NEXT: s_waitcnt
450 ; GCN-NEXT: s_setpc_b64
451 define void @store_private_hi_i8_shift(ptr addrspace(5) %out, i32 %value) #0 {
453 %hi32 = lshr i32 %value, 16
454 %hi = trunc i32 %hi32 to i8
455 store i8 %hi, ptr addrspace(5) %out
459 ; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
461 ; GFX9-MUBUF: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
462 ; GFX9-FLATSCR: scratch_store_short_d16_hi off, v0, s32 offset:4094{{$}}
464 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
465 ; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s32 offset:4094{{$}}
467 ; GCN-NEXT: s_waitcnt
468 ; GCN-NEXT: s_setpc_b64
469 define void @store_private_hi_v2i16_max_offset(ptr addrspace(5) byval(i16) %out, i32 %arg) #0 {
471 %value = bitcast i32 %arg to <2 x i16>
472 %hi = extractelement <2 x i16> %value, i32 1
473 %gep = getelementptr inbounds i16, ptr addrspace(5) %out, i64 2047
474 store i16 %hi, ptr addrspace(5) %gep
480 ; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
483 ; GFX9-MUBUF-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], 0{{$}}
484 ; GFX9-FLATSCR-NEXT: s_mov_b32 [[SOFF:s[0-9]+]], 0
485 ; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, [[SOFF]]{{$}}
487 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
488 ; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], 0{{$}}
490 ; GCN-NEXT: s_waitcnt
491 ; GCN-NEXT: s_setpc_b64
492 define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
494 ; FIXME: ABI for pre-gfx9
495 %value = bitcast i32 %arg to <2 x i16>
496 %hi = extractelement <2 x i16> %value, i32 1
497 store volatile i16 %hi, ptr addrspace(5) null
502 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
505 ; GFX9-MUBUF-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], 0{{$}}
506 ; GFX9-FLATSCR-NEXT: s_mov_b32 [[SOFF:s[0-9]+]], 0
507 ; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, [[SOFF]]{{$}}
509 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
510 ; NO-D16-HI: buffer_store_byte v0, off, s[0:3], 0{{$}}
512 ; GCN-NEXT: s_waitcnt
513 ; GCN-NEXT: s_setpc_b64
514 define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
516 %value = bitcast i32 %arg to <2 x i16>
517 %hi = extractelement <2 x i16> %value, i32 1
518 %trunc = trunc i16 %hi to i8
519 store volatile i8 %trunc, ptr addrspace(5) null
523 ; GCN-LABEL: {{^}}store_local_hi_v2i16:
526 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
528 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
529 ; NO-D16-HI: ds_write_b16 v0, v1
531 ; GCN-NEXT: s_waitcnt
532 ; GCN-NEXT: s_setpc_b64
533 define void @store_local_hi_v2i16(ptr addrspace(3) %out, i32 %arg) #0 {
535 ; FIXME: ABI for pre-gfx9
536 %value = bitcast i32 %arg to <2 x i16>
537 %hi = extractelement <2 x i16> %value, i32 1
538 store i16 %hi, ptr addrspace(3) %out
542 ; GCN-LABEL: {{^}}store_local_hi_v2f16:
545 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
547 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
548 ; NO-D16-HI: ds_write_b16 v0, v1
550 ; GCN-NEXT: s_waitcnt
551 ; GCN-NEXT: s_setpc_b64
552 define void @store_local_hi_v2f16(ptr addrspace(3) %out, i32 %arg) #0 {
554 ; FIXME: ABI for pre-gfx9
555 %value = bitcast i32 %arg to <2 x half>
556 %hi = extractelement <2 x half> %value, i32 1
557 store half %hi, ptr addrspace(3) %out
561 ; GCN-LABEL: {{^}}store_local_hi_i32_shift:
564 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
566 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
567 ; NO-D16-HI: ds_write_b16 v0, v1
569 ; GCN-NEXT: s_waitcnt
570 ; GCN-NEXT: s_setpc_b64
571 define void @store_local_hi_i32_shift(ptr addrspace(3) %out, i32 %value) #0 {
573 %hi32 = lshr i32 %value, 16
574 %hi = trunc i32 %hi32 to i16
575 store i16 %hi, ptr addrspace(3) %out
579 ; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
582 ; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
584 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
585 ; NO-D16-HI: ds_write_b8 v0, v1
587 ; GCN-NEXT: s_waitcnt
588 ; GCN-NEXT: s_setpc_b64
589 define void @store_local_hi_v2i16_i8(ptr addrspace(3) %out, i32 %arg) #0 {
591 %value = bitcast i32 %arg to <2 x i16>
592 %hi = extractelement <2 x i16> %value, i32 1
593 %trunc = trunc i16 %hi to i8
594 store i8 %trunc, ptr addrspace(3) %out
598 ; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
600 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
602 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
603 ; NO-D16-HI: ds_write_b16 v0, v1 offset:65534{{$}}
605 ; GCN-NEXT: s_waitcnt
606 ; GCN-NEXT: s_setpc_b64
607 define void @store_local_hi_v2i16_max_offset(ptr addrspace(3) %out, i32 %arg) #0 {
609 ; FIXME: ABI for pre-gfx9
610 %value = bitcast i32 %arg to <2 x i16>
611 %hi = extractelement <2 x i16> %value, i32 1
612 %gep = getelementptr inbounds i16, ptr addrspace(3) %out, i64 32767
613 store i16 %hi, ptr addrspace(3) %gep
617 ; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
619 ; GFX9-MUBUF: buffer_store_dword
620 ; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
621 ; GFX9-MUBUF-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4058
622 ; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
623 ; GFX9-FLATSCR: scratch_store_dword
624 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
625 ; GFX9-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, s32 offset:4058
626 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
627 define void @store_private_hi_v2i16_to_offset(i32 %arg, ptr addrspace(5) %obj0) #0 {
629 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
630 store volatile i32 123, ptr addrspace(5) %obj0
631 %value = bitcast i32 %arg to <2 x i16>
632 %hi = extractelement <2 x i16> %value, i32 1
633 %gep = getelementptr inbounds [4096 x i16], ptr addrspace(5) %obj1, i32 0, i32 2027
634 store i16 %hi, ptr addrspace(5) %gep
638 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
640 ; GFX9-MUBUF: buffer_store_dword
641 ; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0)
642 ; GFX9-MUBUF-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4059
643 ; GFX9-FLATSCR: scratch_store_dword
644 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
645 ; GFX9-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, s32 offset:4059
646 ; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
647 define void @store_private_hi_v2i16_i8_to_offset(i32 %arg, ptr addrspace(5) %obj0) #0 {
649 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
650 store volatile i32 123, ptr addrspace(5) %obj0
651 %value = bitcast i32 %arg to <2 x i16>
652 %hi = extractelement <2 x i16> %value, i32 1
653 %gep = getelementptr inbounds [4096 x i8], ptr addrspace(5) %obj1, i32 0, i32 4055
654 %trunc = trunc i16 %hi to i8
655 store i8 %trunc, ptr addrspace(5) %gep
659 attributes #0 = { nounwind }