1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL %s
3 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=SDAG %s
5 define amdgpu_kernel void @buffer_ptr_vector_ops(ptr addrspace(1) %somewhere) {
6 ; GISEL-LABEL: buffer_ptr_vector_ops:
7 ; GISEL: ; %bb.0: ; %main_body
8 ; GISEL-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x24
9 ; GISEL-NEXT: v_mov_b32_e32 v8, 0
10 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
11 ; GISEL-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
12 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
13 ; GISEL-NEXT: v_mov_b32_e32 v0, s0
14 ; GISEL-NEXT: v_mov_b32_e32 v4, s4
15 ; GISEL-NEXT: v_mov_b32_e32 v1, s1
16 ; GISEL-NEXT: v_mov_b32_e32 v2, s2
17 ; GISEL-NEXT: v_mov_b32_e32 v3, s3
18 ; GISEL-NEXT: v_mov_b32_e32 v5, s5
19 ; GISEL-NEXT: v_mov_b32_e32 v6, s6
20 ; GISEL-NEXT: v_mov_b32_e32 v7, s7
21 ; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
22 ; GISEL-NEXT: global_store_dwordx4 v8, v[4:7], s[8:9] offset:32
23 ; GISEL-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] offset:48
24 ; GISEL-NEXT: s_endpgm
26 ; SDAG-LABEL: buffer_ptr_vector_ops:
27 ; SDAG: ; %bb.0: ; %main_body
28 ; SDAG-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x24
29 ; SDAG-NEXT: v_mov_b32_e32 v8, 0
30 ; SDAG-NEXT: s_waitcnt lgkmcnt(0)
31 ; SDAG-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
32 ; SDAG-NEXT: s_waitcnt lgkmcnt(0)
33 ; SDAG-NEXT: v_mov_b32_e32 v0, s0
34 ; SDAG-NEXT: v_mov_b32_e32 v1, s1
35 ; SDAG-NEXT: v_mov_b32_e32 v2, s2
36 ; SDAG-NEXT: v_mov_b32_e32 v3, s3
37 ; SDAG-NEXT: v_mov_b32_e32 v4, s4
38 ; SDAG-NEXT: v_mov_b32_e32 v5, s5
39 ; SDAG-NEXT: v_mov_b32_e32 v6, s6
40 ; SDAG-NEXT: v_mov_b32_e32 v7, s7
41 ; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
42 ; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] offset:48
43 ; SDAG-NEXT: global_store_dwordx4 v8, v[4:7], s[8:9] offset:32
46 %buffers = load <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere
47 %buf1 = extractelement <2 x ptr addrspace(8)> %buffers, i32 0
48 %buf2 = extractelement <2 x ptr addrspace(8)> %buffers, i32 1
49 %buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128
50 %buf1.vec = bitcast i128 %buf1.int to <4 x i32>
51 call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0)
52 %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> <i32 1, i32 0>
53 %somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1
54 store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next
58 %fat_buffer_struct = type {ptr addrspace(8), i32}
60 define amdgpu_kernel void @buffer_structs(%fat_buffer_struct %arg, ptr addrspace(1) %dest) {
61 ; GISEL-LABEL: buffer_structs:
62 ; GISEL: ; %bb.0: ; %main_body
63 ; GISEL-NEXT: s_load_dword s0, s[2:3], 0x34
64 ; GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
65 ; GISEL-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
66 ; GISEL-NEXT: v_mov_b32_e32 v5, 0
67 ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
68 ; GISEL-NEXT: s_ashr_i32 s1, s0, 31
69 ; GISEL-NEXT: v_mov_b32_e32 v4, s0
70 ; GISEL-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
71 ; GISEL-NEXT: s_add_u32 s0, s8, s0
72 ; GISEL-NEXT: v_mov_b32_e32 v0, s4
73 ; GISEL-NEXT: s_addc_u32 s1, s9, s1
74 ; GISEL-NEXT: v_mov_b32_e32 v1, s5
75 ; GISEL-NEXT: v_mov_b32_e32 v2, s6
76 ; GISEL-NEXT: v_mov_b32_e32 v3, s7
77 ; GISEL-NEXT: buffer_store_dword v4, v4, s[4:7], 0 offen
78 ; GISEL-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1]
79 ; GISEL-NEXT: global_store_dword v5, v4, s[0:1] offset:16
80 ; GISEL-NEXT: s_endpgm
82 ; SDAG-LABEL: buffer_structs:
83 ; SDAG: ; %bb.0: ; %main_body
84 ; SDAG-NEXT: s_load_dword s0, s[2:3], 0x34
85 ; SDAG-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
86 ; SDAG-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x44
87 ; SDAG-NEXT: v_mov_b32_e32 v4, 0
88 ; SDAG-NEXT: s_waitcnt lgkmcnt(0)
89 ; SDAG-NEXT: s_ashr_i32 s1, s0, 31
90 ; SDAG-NEXT: v_mov_b32_e32 v0, s0
91 ; SDAG-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
92 ; SDAG-NEXT: s_add_u32 s0, s8, s0
93 ; SDAG-NEXT: s_addc_u32 s1, s9, s1
94 ; SDAG-NEXT: buffer_store_dword v0, v0, s[4:7], 0 offen
95 ; SDAG-NEXT: global_store_dword v4, v0, s[0:1] offset:16
96 ; SDAG-NEXT: v_mov_b32_e32 v0, s4
97 ; SDAG-NEXT: v_mov_b32_e32 v1, s5
98 ; SDAG-NEXT: v_mov_b32_e32 v2, s6
99 ; SDAG-NEXT: v_mov_b32_e32 v3, s7
100 ; SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
101 ; SDAG-NEXT: s_endpgm
103 %buffer = extractvalue %fat_buffer_struct %arg, 0
104 %offset = extractvalue %fat_buffer_struct %arg, 1
105 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %offset, ptr addrspace(8) %buffer, i32 %offset, i32 0, i32 0)
106 ; Confirm the alignment of this struct is 32 bytes
107 %dest.next = getelementptr %fat_buffer_struct, ptr addrspace(1) %dest, i32 %offset
108 store %fat_buffer_struct %arg, ptr addrspace(1) %dest.next
112 declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
113 declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32 immarg)