1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
4 ; GCN-LABEL: {{^}}store_global_hi_v2i16:
7 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
9 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
10 ; VI-NEXT: flat_store_short v[0:1], v2
13 ; GCN-NEXT: s_setpc_b64
14 define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
16 ; FIXME: ABI for pre-gfx9
17 %value = bitcast i32 %arg to <2 x i16>
18 %hi = extractelement <2 x i16> %value, i32 1
19 store i16 %hi, i16 addrspace(1)* %out
23 ; GCN-LABEL: {{^}}store_global_hi_v2f16:
26 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
28 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
29 ; VI-NEXT: flat_store_short v[0:1], v2
32 ; GCN-NEXT: s_setpc_b64
33 define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
35 ; FIXME: ABI for pre-gfx9
36 %value = bitcast i32 %arg to <2 x half>
37 %hi = extractelement <2 x half> %value, i32 1
38 store half %hi, half addrspace(1)* %out
42 ; GCN-LABEL: {{^}}store_global_hi_i32_shift:
45 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
47 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
48 ; VI-NEXT: flat_store_short v[0:1], v2
51 ; GCN-NEXT: s_setpc_b64
52 define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
54 %hi32 = lshr i32 %value, 16
55 %hi = trunc i32 %hi32 to i16
56 store i16 %hi, i16 addrspace(1)* %out
60 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
63 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
65 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
66 ; VI-NEXT: flat_store_byte v[0:1], v2
69 ; GCN-NEXT: s_setpc_b64
70 define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
72 %value = bitcast i32 %arg to <2 x i16>
73 %hi = extractelement <2 x i16> %value, i32 1
74 %trunc = trunc i16 %hi to i8
75 store i8 %trunc, i8 addrspace(1)* %out
79 ; GCN-LABEL: {{^}}store_global_hi_i8_shift:
82 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
84 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
85 ; VI-NEXT: flat_store_byte v[0:1], v2
88 ; GCN-NEXT: s_setpc_b64
89 define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
91 %hi32 = lshr i32 %value, 16
92 %hi = trunc i32 %hi32 to i8
93 store i8 %hi, i8 addrspace(1)* %out
97 ; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
99 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
101 ; VI-DAG: v_add_u32_e32
102 ; VI-DAG: v_addc_u32_e32
103 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
105 ; VI: flat_store_short v[0:1], v2{{$}}
106 ; GCN-NEXT: s_waitcnt
107 ; GCN-NEXT: s_setpc_b64
108 define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
110 ; FIXME: ABI for pre-gfx9
111 %value = bitcast i32 %arg to <2 x i16>
112 %hi = extractelement <2 x i16> %value, i32 1
113 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
114 store i16 %hi, i16 addrspace(1)* %gep
118 ; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
120 ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
122 ; VI-DAG: v_add_u32_e32
123 ; VI-DAG: v_addc_u32_e32
124 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
126 ; VI: flat_store_short v[0:1], v{{[0-9]$}}
127 ; GCN-NEXT: s_waitcnt
128 ; GCN-NEXT: s_setpc_b64
129 define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
131 %value = bitcast i32 %arg to <2 x i16>
132 %hi = extractelement <2 x i16> %value, i32 1
133 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
134 store i16 %hi, i16 addrspace(1)* %gep
138 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
140 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
142 ; VI-DAG: v_add_u32_e32
143 ; VI-DAG: v_addc_u32_e32
144 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
145 ; VI: flat_store_byte v[0:1], v{{[0-9]$}}
147 ; GCN-NEXT: s_waitcnt
148 ; GCN-NEXT: s_setpc_b64
149 define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
151 %value = bitcast i32 %arg to <2 x i16>
152 %hi = extractelement <2 x i16> %value, i32 1
153 %trunc = trunc i16 %hi to i8
154 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
155 store i8 %trunc, i8 addrspace(1)* %gep
159 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
161 ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
163 ; VI-DAG: v_add_u32_e32
164 ; VI-DAG: v_addc_u32_e32
165 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
167 ; VI: flat_store_byte v[0:1], v{{[0-9]$}}
168 ; GCN-NEXT: s_waitcnt
169 ; GCN-NEXT: s_setpc_b64
170 define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
172 %value = bitcast i32 %arg to <2 x i16>
173 %hi = extractelement <2 x i16> %value, i32 1
174 %trunc = trunc i16 %hi to i8
175 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
176 store i8 %trunc, i8 addrspace(1)* %gep
180 ; GCN-LABEL: {{^}}store_flat_hi_v2i16:
183 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
185 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
186 ; VI-NEXT: flat_store_short v[0:1], v2
188 ; GCN-NEXT: s_waitcnt
189 ; GCN-NEXT: s_setpc_b64
190 define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 {
192 %value = bitcast i32 %arg to <2 x i16>
193 %hi = extractelement <2 x i16> %value, i32 1
194 store i16 %hi, i16 addrspace(4)* %out
198 ; GCN-LABEL: {{^}}store_flat_hi_v2f16:
201 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
203 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
204 ; VI-NEXT: flat_store_short v[0:1], v2
206 ; GCN-NEXT: s_waitcnt
207 ; GCN-NEXT: s_setpc_b64
208 define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 {
210 %value = bitcast i32 %arg to <2 x half>
211 %hi = extractelement <2 x half> %value, i32 1
212 store half %hi, half addrspace(4)* %out
216 ; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
219 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
221 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
222 ; VI-NEXT: flat_store_short v[0:1], v2
224 ; GCN-NEXT: s_waitcnt
225 ; GCN-NEXT: s_setpc_b64
226 define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 {
228 %hi32 = lshr i32 %value, 16
229 %hi = trunc i32 %hi32 to i16
230 store i16 %hi, i16 addrspace(4)* %out
234 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
237 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
239 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
240 ; VI-NEXT: flat_store_byte v[0:1], v2
242 ; GCN-NEXT: s_waitcnt
243 ; GCN-NEXT: s_setpc_b64
244 define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 {
246 %value = bitcast i32 %arg to <2 x i16>
247 %hi = extractelement <2 x i16> %value, i32 1
248 %trunc = trunc i16 %hi to i8
249 store i8 %trunc, i8 addrspace(4)* %out
253 ; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
256 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
258 ; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
259 ; VI-NEXT: flat_store_byte v[0:1], v2
261 ; GCN-NEXT: s_waitcnt
262 ; GCN-NEXT: s_setpc_b64
263 define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 {
265 %hi32 = lshr i32 %value, 16
266 %hi = trunc i32 %hi32 to i8
267 store i8 %hi, i8 addrspace(4)* %out
271 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
273 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
275 ; VI-DAG: v_add_u32_e32
276 ; VI-DAG: v_addc_u32_e32
277 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
278 ; VI: flat_store_short v[0:1], v2{{$}}
279 ; GCN-NEXT: s_waitcnt
280 ; GCN-NEXT: s_setpc_b64
281 define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
283 %value = bitcast i32 %arg to <2 x i16>
284 %hi = extractelement <2 x i16> %value, i32 1
285 %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047
286 store i16 %hi, i16 addrspace(4)* %gep
290 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
292 ; GCN: v_add{{(_co)?}}_{{i|u}}32_e32
294 ; GFX9: v_addc_co_u32_e32
296 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
297 ; VI: flat_store_short v[0:1], v2{{$}}
298 ; GCN-NEXT: s_waitcnt
299 ; GCN-NEXT: s_setpc_b64
300 define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
302 %value = bitcast i32 %arg to <2 x i16>
303 %hi = extractelement <2 x i16> %value, i32 1
304 %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023
305 store i16 %hi, i16 addrspace(4)* %gep
309 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
311 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
313 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
314 ; VI-DAG: v_add_u32_e32
315 ; VI-DAG: v_addc_u32_e32
316 ; VI: flat_store_byte v[0:1], v2{{$}}
317 ; GCN-NEXT: s_waitcnt
318 ; GCN-NEXT: s_setpc_b64
319 define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
321 %value = bitcast i32 %arg to <2 x i16>
322 %hi = extractelement <2 x i16> %value, i32 1
323 %trunc = trunc i16 %hi to i8
324 %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095
325 store i8 %trunc, i8 addrspace(4)* %gep
329 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
331 ; GCN-DAG: v_add{{(_co)?}}_{{i|u}}32_e32
332 ; VI-DAG: v_addc_u32_e32
333 ; GFX9-DAG: v_addc_co_u32_e32
335 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
336 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
337 ; VI: flat_store_byte v[0:1], v2{{$}}
338 ; GCN-NEXT: s_waitcnt
339 ; GCN-NEXT: s_setpc_b64
340 define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
342 %value = bitcast i32 %arg to <2 x i16>
343 %hi = extractelement <2 x i16> %value, i32 1
344 %trunc = trunc i16 %hi to i8
345 %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095
346 store i8 %trunc, i8 addrspace(4)* %gep
350 ; GCN-LABEL: {{^}}store_private_hi_v2i16:
353 ; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
355 ; VI: v_lshrrev_b32_e32 v1, 16, v1
356 ; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
358 ; GCN-NEXT: s_waitcnt
359 ; GCN-NEXT: s_setpc_b64
360 define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 {
362 ; FIXME: ABI for pre-gfx9
363 %value = bitcast i32 %arg to <2 x i16>
364 %hi = extractelement <2 x i16> %value, i32 1
365 store i16 %hi, i16* %out
369 ; GCN-LABEL: {{^}}store_private_hi_v2f16:
372 ; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
374 ; VI: v_lshrrev_b32_e32 v1, 16, v1
375 ; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
377 ; GCN-NEXT: s_waitcnt
378 ; GCN-NEXT: s_setpc_b64
379 define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 {
381 ; FIXME: ABI for pre-gfx9
382 %value = bitcast i32 %arg to <2 x half>
383 %hi = extractelement <2 x half> %value, i32 1
384 store half %hi, half* %out
388 ; GCN-LABEL: {{^}}store_private_hi_i32_shift:
391 ; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
393 ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
394 ; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
396 ; GCN-NEXT: s_waitcnt
397 ; GCN-NEXT: s_setpc_b64
398 define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 {
400 %hi32 = lshr i32 %value, 16
401 %hi = trunc i32 %hi32 to i16
402 store i16 %hi, i16* %out
406 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
409 ; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
411 ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
412 ; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
414 ; GCN-NEXT: s_waitcnt
415 ; GCN-NEXT: s_setpc_b64
416 define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
418 %value = bitcast i32 %arg to <2 x i16>
419 %hi = extractelement <2 x i16> %value, i32 1
420 %trunc = trunc i16 %hi to i8
421 store i8 %trunc, i8* %out
425 ; GCN-LABEL: {{^}}store_private_hi_i8_shift:
428 ; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
430 ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
431 ; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
433 ; GCN-NEXT: s_waitcnt
434 ; GCN-NEXT: s_setpc_b64
435 define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 {
437 %hi32 = lshr i32 %value, 16
438 %hi = trunc i32 %hi32 to i8
439 store i8 %hi, i8* %out
443 ; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
445 ; GFX9: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094{{$}}
447 ; VI: v_lshrrev_b32_e32 v0, 16, v0
448 ; VI-NEXT: buffer_store_short v0, off, s[0:3], s5 offset:4094{{$}}
450 ; GCN-NEXT: s_waitcnt
451 ; GCN-NEXT: s_setpc_b64
452 define void @store_private_hi_v2i16_max_offset(i16* byval %out, i32 %arg) #0 {
454 %value = bitcast i32 %arg to <2 x i16>
455 %hi = extractelement <2 x i16> %value, i32 1
456 %gep = getelementptr inbounds i16, i16* %out, i64 2045
457 store i16 %hi, i16* %gep
463 ; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
466 ; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
468 ; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
469 ; VI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
471 ; GCN-NEXT: s_waitcnt
472 ; GCN-NEXT: s_setpc_b64
473 define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
475 ; FIXME: ABI for pre-gfx9
476 %value = bitcast i32 %arg to <2 x i16>
477 %hi = extractelement <2 x i16> %value, i32 1
478 store volatile i16 %hi, i16* null
483 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
486 ; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
488 ; VI: v_lshrrev_b32_e32 v0, 16, v0
489 ; VI: buffer_store_byte v0, off, s[0:3], s4{{$}}
491 ; GCN-NEXT: s_waitcnt
492 ; GCN-NEXT: s_setpc_b64
493 define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
495 %value = bitcast i32 %arg to <2 x i16>
496 %hi = extractelement <2 x i16> %value, i32 1
497 %trunc = trunc i16 %hi to i8
498 store volatile i8 %trunc, i8* null
502 ; GCN-LABEL: {{^}}store_local_hi_v2i16:
505 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
507 ; VI: v_lshrrev_b32_e32 v1, 16, v1
508 ; VI: ds_write_b16 v0, v1
510 ; GCN-NEXT: s_waitcnt
511 ; GCN-NEXT: s_setpc_b64
512 define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
514 ; FIXME: ABI for pre-gfx9
515 %value = bitcast i32 %arg to <2 x i16>
516 %hi = extractelement <2 x i16> %value, i32 1
517 store i16 %hi, i16 addrspace(3)* %out
521 ; GCN-LABEL: {{^}}store_local_hi_v2f16:
524 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
526 ; VI: v_lshrrev_b32_e32 v1, 16, v1
527 ; VI: ds_write_b16 v0, v1
529 ; GCN-NEXT: s_waitcnt
530 ; GCN-NEXT: s_setpc_b64
531 define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
533 ; FIXME: ABI for pre-gfx9
534 %value = bitcast i32 %arg to <2 x half>
535 %hi = extractelement <2 x half> %value, i32 1
536 store half %hi, half addrspace(3)* %out
540 ; GCN-LABEL: {{^}}store_local_hi_i32_shift:
543 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
545 ; VI: v_lshrrev_b32_e32 v1, 16, v1
546 ; VI: ds_write_b16 v0, v1
548 ; GCN-NEXT: s_waitcnt
549 ; GCN-NEXT: s_setpc_b64
550 define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
552 %hi32 = lshr i32 %value, 16
553 %hi = trunc i32 %hi32 to i16
554 store i16 %hi, i16 addrspace(3)* %out
558 ; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
561 ; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
563 ; VI: v_lshrrev_b32_e32 v1, 16, v1
564 ; VI: ds_write_b8 v0, v1
566 ; GCN-NEXT: s_waitcnt
567 ; GCN-NEXT: s_setpc_b64
568 define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
570 %value = bitcast i32 %arg to <2 x i16>
571 %hi = extractelement <2 x i16> %value, i32 1
572 %trunc = trunc i16 %hi to i8
573 store i8 %trunc, i8 addrspace(3)* %out
577 ; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
579 ; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
581 ; VI: v_lshrrev_b32_e32 v1, 16, v1
582 ; VI: ds_write_b16 v0, v1 offset:65534{{$}}
584 ; GCN-NEXT: s_waitcnt
585 ; GCN-NEXT: s_setpc_b64
586 define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
588 ; FIXME: ABI for pre-gfx9
589 %value = bitcast i32 %arg to <2 x i16>
590 %hi = extractelement <2 x i16> %value, i32 1
591 %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
592 store i16 %hi, i16 addrspace(3)* %gep
596 ; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
598 ; GFX9: buffer_store_dword
599 ; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s5 offset:4094
600 define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
602 %obj0 = alloca [10 x i32], align 4
603 %obj1 = alloca [4096 x i16], align 2
604 %bc = bitcast [10 x i32]* %obj0 to i32*
605 store volatile i32 123, i32* %bc
606 %value = bitcast i32 %arg to <2 x i16>
607 %hi = extractelement <2 x i16> %value, i32 1
608 %gep = getelementptr inbounds [4096 x i16], [4096 x i16]* %obj1, i32 0, i32 2025
609 store i16 %hi, i16* %gep
613 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
615 ; GFX9: buffer_store_dword
616 ; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s5 offset:4095
617 define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
619 %obj0 = alloca [10 x i32], align 4
620 %obj1 = alloca [4096 x i8], align 2
621 %bc = bitcast [10 x i32]* %obj0 to i32*
622 store volatile i32 123, i32* %bc
623 %value = bitcast i32 %arg to <2 x i16>
624 %hi = extractelement <2 x i16> %value, i32 1
625 %gep = getelementptr inbounds [4096 x i8], [4096 x i8]* %obj1, i32 0, i32 4051
626 %trunc = trunc i16 %hi to i8
627 store i8 %trunc, i8* %gep
631 attributes #0 = { nounwind }