1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX900,GFX9 %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX906,GFX9,NO-D16-HI %s
3 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX803,NO-D16-HI %s
5 ; GCN-LABEL: {{^}}store_global_hi_v2i16:
8 ; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
10 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
11 ; GFX803-NEXT: flat_store_short v[0:1], v2
12 ; GFX906-NEXT: global_store_short v[0:1], v2, off
15 ; GCN-NEXT: s_setpc_b64
16 define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
18 ; FIXME: ABI for pre-gfx9
19 %value = bitcast i32 %arg to <2 x i16>
20 %hi = extractelement <2 x i16> %value, i32 1
21 store i16 %hi, i16 addrspace(1)* %out
25 ; GCN-LABEL: {{^}}store_global_hi_v2f16:
28 ; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
30 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
31 ; GFX803-NEXT: flat_store_short v[0:1], v2
32 ; GFX906-NEXT: global_store_short v[0:1], v2, off
35 ; GCN-NEXT: s_setpc_b64
36 define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
38 ; FIXME: ABI for pre-gfx9
39 %value = bitcast i32 %arg to <2 x half>
40 %hi = extractelement <2 x half> %value, i32 1
41 store half %hi, half addrspace(1)* %out
45 ; GCN-LABEL: {{^}}store_global_hi_i32_shift:
48 ; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off
50 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
51 ; GFX803-NEXT: flat_store_short v[0:1], v2
52 ; GFX906-NEXT: global_store_short v[0:1], v2, off
55 ; GCN-NEXT: s_setpc_b64
56 define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
58 %hi32 = lshr i32 %value, 16
59 %hi = trunc i32 %hi32 to i16
60 store i16 %hi, i16 addrspace(1)* %out
64 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
67 ; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off
69 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
70 ; GFX803-NEXT: flat_store_byte v[0:1], v2
71 ; GFX906-NEXT: global_store_byte v[0:1], v2, off
74 ; GCN-NEXT: s_setpc_b64
75 define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
77 %value = bitcast i32 %arg to <2 x i16>
78 %hi = extractelement <2 x i16> %value, i32 1
79 %trunc = trunc i16 %hi to i8
80 store i8 %trunc, i8 addrspace(1)* %out
84 ; GCN-LABEL: {{^}}store_global_hi_i8_shift:
87 ; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off
89 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
90 ; GFX803-NEXT: flat_store_byte v[0:1], v2
91 ; GFX906-NEXT: global_store_byte v[0:1], v2, off
94 ; GCN-NEXT: s_setpc_b64
95 define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
97 %hi32 = lshr i32 %value, 16
98 %hi = trunc i32 %hi32 to i8
99 store i8 %hi, i8 addrspace(1)* %out
103 ; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
105 ; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
107 ; GFX803-DAG: v_add_u32_e32
108 ; GFX803-DAG: v_addc_u32_e32
109 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
110 ; GFX803: flat_store_short v[0:1], v2{{$}}
112 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
113 ; GFX906-NEXT: global_store_short v[0:1], v2, off
115 ; GCN-NEXT: s_waitcnt
116 ; GCN-NEXT: s_setpc_b64
117 define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
119 ; FIXME: ABI for pre-gfx9
120 %value = bitcast i32 %arg to <2 x i16>
121 %hi = extractelement <2 x i16> %value, i32 1
122 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
123 store i16 %hi, i16 addrspace(1)* %gep
127 ; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
129 ; GFX900-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
131 ; GFX803-DAG: v_add_u32_e32
132 ; GFX803-DAG: v_addc_u32_e32
133 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
134 ; GFX803: flat_store_short v[0:1], v{{[0-9]$}}
136 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
137 ; GFX906-NEXT: global_store_short v[0:1], v2, off
139 ; GCN-NEXT: s_waitcnt
140 ; GCN-NEXT: s_setpc_b64
141 define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
143 %value = bitcast i32 %arg to <2 x i16>
144 %hi = extractelement <2 x i16> %value, i32 1
145 %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
146 store i16 %hi, i16 addrspace(1)* %gep
150 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
152 ; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
154 ; GFX803-DAG: v_add_u32_e32
155 ; GFX803-DAG: v_addc_u32_e32
156 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
157 ; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
159 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
160 ; GFX906-NEXT: global_store_byte v[0:1], v2, off
162 ; GCN-NEXT: s_waitcnt
163 ; GCN-NEXT: s_setpc_b64
164 define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
166 %value = bitcast i32 %arg to <2 x i16>
167 %hi = extractelement <2 x i16> %value, i32 1
168 %trunc = trunc i16 %hi to i8
169 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
170 store i8 %trunc, i8 addrspace(1)* %gep
174 ; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
176 ; GFX900-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
178 ; GFX803-DAG: v_add_u32_e32
179 ; GFX803-DAG: v_addc_u32_e32
180 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
181 ; GFX803: flat_store_byte v[0:1], v{{[0-9]$}}
183 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
184 ; GFX906-NEXT: global_store_byte v[0:1], v2, off
186 ; GCN-NEXT: s_waitcnt
187 ; GCN-NEXT: s_setpc_b64
188 define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
190 %value = bitcast i32 %arg to <2 x i16>
191 %hi = extractelement <2 x i16> %value, i32 1
192 %trunc = trunc i16 %hi to i8
193 %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
194 store i8 %trunc, i8 addrspace(1)* %gep
198 ; GCN-LABEL: {{^}}store_flat_hi_v2i16:
201 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
203 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
204 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
206 ; GCN-NEXT: s_waitcnt
207 ; GCN-NEXT: s_setpc_b64
208 define void @store_flat_hi_v2i16(i16* %out, i32 %arg) #0 {
210 %value = bitcast i32 %arg to <2 x i16>
211 %hi = extractelement <2 x i16> %value, i32 1
212 store i16 %hi, i16* %out
216 ; GCN-LABEL: {{^}}store_flat_hi_v2f16:
219 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
221 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
222 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
224 ; GCN-NEXT: s_waitcnt
225 ; GCN-NEXT: s_setpc_b64
226 define void @store_flat_hi_v2f16(half* %out, i32 %arg) #0 {
228 %value = bitcast i32 %arg to <2 x half>
229 %hi = extractelement <2 x half> %value, i32 1
230 store half %hi, half* %out
234 ; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
237 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
239 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
240 ; NO-D16-HI-NEXT: flat_store_short v[0:1], v2
242 ; GCN-NEXT: s_waitcnt
243 ; GCN-NEXT: s_setpc_b64
244 define void @store_flat_hi_i32_shift(i16* %out, i32 %value) #0 {
246 %hi32 = lshr i32 %value, 16
247 %hi = trunc i32 %hi32 to i16
248 store i16 %hi, i16* %out
252 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
255 ; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
257 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
258 ; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
260 ; GCN-NEXT: s_waitcnt
261 ; GCN-NEXT: s_setpc_b64
262 define void @store_flat_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
264 %value = bitcast i32 %arg to <2 x i16>
265 %hi = extractelement <2 x i16> %value, i32 1
266 %trunc = trunc i16 %hi to i8
267 store i8 %trunc, i8* %out
271 ; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
274 ; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
276 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
277 ; NO-D16-HI-NEXT: flat_store_byte v[0:1], v2
279 ; GCN-NEXT: s_waitcnt
280 ; GCN-NEXT: s_setpc_b64
281 define void @store_flat_hi_i8_shift(i8* %out, i32 %value) #0 {
283 %hi32 = lshr i32 %value, 16
284 %hi = trunc i32 %hi32 to i8
285 store i8 %hi, i8* %out
289 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
291 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
293 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
294 ; GFX906-NEXT: flat_store_short v[0:1], v2 offset:4094
296 ; GFX803-DAG: v_add_u32_e32
297 ; GFX803-DAG: v_addc_u32_e32
298 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
299 ; GFX803: flat_store_short v[0:1], v2{{$}}
301 ; GCN-NEXT: s_waitcnt
302 ; GCN-NEXT: s_setpc_b64
303 define void @store_flat_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
305 %value = bitcast i32 %arg to <2 x i16>
306 %hi = extractelement <2 x i16> %value, i32 1
307 %gep = getelementptr inbounds i16, i16* %out, i64 2047
308 store i16 %hi, i16* %gep
312 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
314 ; GFX803: v_add{{(_co)?}}_{{i|u}}32_e32
315 ; GFX803: v_addc_u32_e32
317 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
318 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v
320 ; GFX906-DAG: v_lshrrev_b32_e32
321 ; GFX906: flat_store_short v[0:1], v2 offset:2050{{$}}
323 ; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:2050{{$}}
324 ; GFX803: flat_store_short v[0:1], v2{{$}}
325 ; GCN-NEXT: s_waitcnt
326 ; GCN-NEXT: s_setpc_b64
327 define void @store_flat_hi_v2i16_neg_offset(i16* %out, i32 %arg) #0 {
329 %value = bitcast i32 %arg to <2 x i16>
330 %hi = extractelement <2 x i16> %value, i32 1
331 %gep = getelementptr inbounds i16, i16* %out, i64 -1023
332 store i16 %hi, i16* %gep
336 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
338 ; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
340 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
341 ; GFX803-DAG: v_add_u32_e32
342 ; GFX803-DAG: v_addc_u32_e32
343 ; GFX803: flat_store_byte v[0:1], v2{{$}}
345 ; GFX906-NEXT: v_lshrrev_b32_e32 v2, 16, v2
346 ; GFX906-NEXT: flat_store_byte v[0:1], v2 offset:4095{{$}}
348 ; GCN-NEXT: s_waitcnt
349 ; GCN-NEXT: s_setpc_b64
350 define void @store_flat_hi_v2i16_i8_max_offset(i8* %out, i32 %arg) #0 {
352 %value = bitcast i32 %arg to <2 x i16>
353 %hi = extractelement <2 x i16> %value, i32 1
354 %trunc = trunc i16 %hi to i8
355 %gep = getelementptr inbounds i8, i8* %out, i64 4095
356 store i8 %trunc, i8* %gep
360 ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
363 ; GFX803-DAG: v_add_u32_e32
364 ; GFX803-DAG: v_addc_u32_e32
366 ; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
367 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc
369 ; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:1{{$}}
371 ; GFX906-DAG: v_lshrrev_b32_e32 v2, 16, v2
372 ; GFX906: flat_store_byte v[0:1], v2 offset:1{{$}}
374 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
375 ; GFX803: flat_store_byte v[0:1], v2{{$}}
377 ; GCN-NEXT: s_waitcnt
378 ; GCN-NEXT: s_setpc_b64
379 define void @store_flat_hi_v2i16_i8_neg_offset(i8* %out, i32 %arg) #0 {
381 %value = bitcast i32 %arg to <2 x i16>
382 %hi = extractelement <2 x i16> %value, i32 1
383 %trunc = trunc i16 %hi to i8
384 %gep = getelementptr inbounds i8, i8* %out, i64 -4095
385 store i8 %trunc, i8* %gep
389 ; GCN-LABEL: {{^}}store_private_hi_v2i16:
392 ; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s33 offen{{$}}
394 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
395 ; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s33 offen{{$}}
397 ; GCN-NEXT: s_waitcnt
398 ; GCN-NEXT: s_setpc_b64
399 define void @store_private_hi_v2i16(i16 addrspace(5)* %out, i32 %arg) #0 {
401 ; FIXME: ABI for pre-gfx9
402 %value = bitcast i32 %arg to <2 x i16>
403 %hi = extractelement <2 x i16> %value, i32 1
404 store i16 %hi, i16 addrspace(5)* %out
408 ; GCN-LABEL: {{^}}store_private_hi_v2f16:
411 ; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s33 offen{{$}}
413 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
414 ; NO-D16-HI: buffer_store_short v1, v0, s[0:3], s33 offen{{$}}
416 ; GCN-NEXT: s_waitcnt
417 ; GCN-NEXT: s_setpc_b64
418 define void @store_private_hi_v2f16(half addrspace(5)* %out, i32 %arg) #0 {
420 ; FIXME: ABI for pre-gfx9
421 %value = bitcast i32 %arg to <2 x half>
422 %hi = extractelement <2 x half> %value, i32 1
423 store half %hi, half addrspace(5)* %out
427 ; GCN-LABEL: {{^}}store_private_hi_i32_shift:
430 ; GFX900-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s33 offen{{$}}
432 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
433 ; NO-D16-HI-NEXT: buffer_store_short v1, v0, s[0:3], s33 offen{{$}}
435 ; GCN-NEXT: s_waitcnt
436 ; GCN-NEXT: s_setpc_b64
437 define void @store_private_hi_i32_shift(i16 addrspace(5)* %out, i32 %value) #0 {
439 %hi32 = lshr i32 %value, 16
440 %hi = trunc i32 %hi32 to i16
441 store i16 %hi, i16 addrspace(5)* %out
445 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
448 ; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s33 offen{{$}}
450 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
451 ; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s33 offen{{$}}
453 ; GCN-NEXT: s_waitcnt
454 ; GCN-NEXT: s_setpc_b64
455 define void @store_private_hi_v2i16_i8(i8 addrspace(5)* %out, i32 %arg) #0 {
457 %value = bitcast i32 %arg to <2 x i16>
458 %hi = extractelement <2 x i16> %value, i32 1
459 %trunc = trunc i16 %hi to i8
460 store i8 %trunc, i8 addrspace(5)* %out
464 ; GCN-LABEL: {{^}}store_private_hi_i8_shift:
467 ; GFX900-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s33 offen{{$}}
469 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
470 ; NO-D16-HI-NEXT: buffer_store_byte v1, v0, s[0:3], s33 offen{{$}}
472 ; GCN-NEXT: s_waitcnt
473 ; GCN-NEXT: s_setpc_b64
474 define void @store_private_hi_i8_shift(i8 addrspace(5)* %out, i32 %value) #0 {
476 %hi32 = lshr i32 %value, 16
477 %hi = trunc i32 %hi32 to i8
478 store i8 %hi, i8 addrspace(5)* %out
482 ; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
484 ; GFX900: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094{{$}}
486 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
487 ; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s32 offset:4094{{$}}
489 ; GCN-NEXT: s_waitcnt
490 ; GCN-NEXT: s_setpc_b64
491 define void @store_private_hi_v2i16_max_offset(i16 addrspace(5)* byval %out, i32 %arg) #0 {
493 %value = bitcast i32 %arg to <2 x i16>
494 %hi = extractelement <2 x i16> %value, i32 1
495 %gep = getelementptr inbounds i16, i16 addrspace(5)* %out, i64 2047
496 store i16 %hi, i16 addrspace(5)* %gep
502 ; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
505 ; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s33{{$}}
507 ; NO-D16-HI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
508 ; NO-D16-HI-NEXT: buffer_store_short v0, off, s[0:3], s33{{$}}
510 ; GCN-NEXT: s_waitcnt
511 ; GCN-NEXT: s_setpc_b64
512 define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
514 ; FIXME: ABI for pre-gfx9
515 %value = bitcast i32 %arg to <2 x i16>
516 %hi = extractelement <2 x i16> %value, i32 1
517 store volatile i16 %hi, i16 addrspace(5)* null
522 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
525 ; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s33{{$}}
527 ; NO-D16-HI: v_lshrrev_b32_e32 v0, 16, v0
528 ; NO-D16-HI: buffer_store_byte v0, off, s[0:3], s33{{$}}
530 ; GCN-NEXT: s_waitcnt
531 ; GCN-NEXT: s_setpc_b64
532 define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
534 %value = bitcast i32 %arg to <2 x i16>
535 %hi = extractelement <2 x i16> %value, i32 1
536 %trunc = trunc i16 %hi to i8
537 store volatile i8 %trunc, i8 addrspace(5)* null
541 ; GCN-LABEL: {{^}}store_local_hi_v2i16:
544 ; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
546 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
547 ; NO-D16-HI: ds_write_b16 v0, v1
549 ; GCN-NEXT: s_waitcnt
550 ; GCN-NEXT: s_setpc_b64
551 define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
553 ; FIXME: ABI for pre-gfx9
554 %value = bitcast i32 %arg to <2 x i16>
555 %hi = extractelement <2 x i16> %value, i32 1
556 store i16 %hi, i16 addrspace(3)* %out
560 ; GCN-LABEL: {{^}}store_local_hi_v2f16:
563 ; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
565 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
566 ; NO-D16-HI: ds_write_b16 v0, v1
568 ; GCN-NEXT: s_waitcnt
569 ; GCN-NEXT: s_setpc_b64
570 define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
572 ; FIXME: ABI for pre-gfx9
573 %value = bitcast i32 %arg to <2 x half>
574 %hi = extractelement <2 x half> %value, i32 1
575 store half %hi, half addrspace(3)* %out
579 ; GCN-LABEL: {{^}}store_local_hi_i32_shift:
582 ; GFX900-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
584 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
585 ; NO-D16-HI: ds_write_b16 v0, v1
587 ; GCN-NEXT: s_waitcnt
588 ; GCN-NEXT: s_setpc_b64
589 define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
591 %hi32 = lshr i32 %value, 16
592 %hi = trunc i32 %hi32 to i16
593 store i16 %hi, i16 addrspace(3)* %out
597 ; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
600 ; GFX900-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
602 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
603 ; NO-D16-HI: ds_write_b8 v0, v1
605 ; GCN-NEXT: s_waitcnt
606 ; GCN-NEXT: s_setpc_b64
607 define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
609 %value = bitcast i32 %arg to <2 x i16>
610 %hi = extractelement <2 x i16> %value, i32 1
611 %trunc = trunc i16 %hi to i8
612 store i8 %trunc, i8 addrspace(3)* %out
616 ; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
618 ; GFX900-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
620 ; NO-D16-HI: v_lshrrev_b32_e32 v1, 16, v1
621 ; NO-D16-HI: ds_write_b16 v0, v1 offset:65534{{$}}
623 ; GCN-NEXT: s_waitcnt
624 ; GCN-NEXT: s_setpc_b64
625 define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
627 ; FIXME: ABI for pre-gfx9
628 %value = bitcast i32 %arg to <2 x i16>
629 %hi = extractelement <2 x i16> %value, i32 1
630 %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
631 store i16 %hi, i16 addrspace(3)* %gep
635 ; GCN-LABEL: {{^}}store_private_hi_v2i16_to_offset:
637 ; GFX900: buffer_store_dword
638 ; GFX900-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094
639 define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
641 %obj0 = alloca [10 x i32], align 4, addrspace(5)
642 %obj1 = alloca [4096 x i16], align 2, addrspace(5)
643 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
644 store volatile i32 123, i32 addrspace(5)* %bc
645 %value = bitcast i32 %arg to <2 x i16>
646 %hi = extractelement <2 x i16> %value, i32 1
647 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2027
648 store i16 %hi, i16 addrspace(5)* %gep
652 ; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_to_offset:
654 ; GFX900: buffer_store_dword
655 ; GFX900-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4095
656 define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
658 %obj0 = alloca [10 x i32], align 4, addrspace(5)
659 %obj1 = alloca [4096 x i8], align 2, addrspace(5)
660 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
661 store volatile i32 123, i32 addrspace(5)* %bc
662 %value = bitcast i32 %arg to <2 x i16>
663 %hi = extractelement <2 x i16> %value, i32 1
664 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4055
665 %trunc = trunc i16 %hi to i8
666 store i8 %trunc, i8 addrspace(5)* %gep
670 attributes #0 = { nounwind }