1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 | FileCheck %s -check-prefix=GFX12-SDAG
3 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 | FileCheck %s -check-prefix=GFX12-GISEL
5 declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
6 declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
7 declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32)
8 declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)
10 define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
11 ; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
12 ; GFX12-SDAG: ; %bb.0:
13 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92
14 ; GFX12-SDAG-NEXT: s_endpgm
16 ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset:
17 ; GFX12-GISEL: ; %bb.0:
18 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92
19 ; GFX12-GISEL-NEXT: s_endpgm
20 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
24 define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
25 ; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret:
26 ; GFX12-SDAG: ; %bb.0:
27 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen
28 ; GFX12-SDAG-NEXT: s_endpgm
30 ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret:
31 ; GFX12-GISEL: ; %bb.0:
32 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
33 ; GFX12-GISEL-NEXT: s_endpgm
34 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
38 define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
39 ; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
40 ; GFX12-SDAG: ; %bb.0:
41 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
42 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
43 ; GFX12-SDAG-NEXT: ; return to shader part epilog
45 ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset:
46 ; GFX12-GISEL: ; %bb.0:
47 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN
48 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
49 ; GFX12-GISEL-NEXT: ; return to shader part epilog
50 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0)
54 define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
55 ; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret:
56 ; GFX12-SDAG: ; %bb.0:
57 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
58 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
59 ; GFX12-SDAG-NEXT: ; return to shader part epilog
61 ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret:
62 ; GFX12-GISEL: ; %bb.0:
63 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
64 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
65 ; GFX12-GISEL-NEXT: ; return to shader part epilog
66 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
70 define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
71 ; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_ret:
72 ; GFX12-SDAG: ; %bb.0:
73 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
74 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
75 ; GFX12-SDAG-NEXT: ; return to shader part epilog
77 ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret:
78 ; GFX12-GISEL: ; %bb.0:
79 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
80 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
81 ; GFX12-GISEL-NEXT: ; return to shader part epilog
82 %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
83 %r = bitcast <2 x half> %orig to float
87 define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
88 ; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_noret:
89 ; GFX12-SDAG: ; %bb.0:
90 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen
91 ; GFX12-SDAG-NEXT: s_endpgm
93 ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret:
94 ; GFX12-GISEL: ; %bb.0:
95 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
96 ; GFX12-GISEL-NEXT: s_endpgm
97 %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
101 define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
102 ; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret:
103 ; GFX12-SDAG: ; %bb.0:
104 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
105 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0
106 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
107 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
108 ; GFX12-SDAG-NEXT: flat_store_b32 v[1:2], v0
109 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
110 ; GFX12-SDAG-NEXT: s_wait_dscnt 0x0
111 ; GFX12-SDAG-NEXT: ; return to shader part epilog
113 ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret:
114 ; GFX12-GISEL: ; %bb.0:
115 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN
116 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
117 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0
118 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
119 ; GFX12-GISEL-NEXT: flat_store_b32 v[1:2], v0
120 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
121 ; GFX12-GISEL-NEXT: s_wait_dscnt 0x0
122 ; GFX12-GISEL-NEXT: ; return to shader part epilog
123 %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
124 store <2 x bfloat> %orig, ptr null
128 define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
129 ; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret:
130 ; GFX12-SDAG: ; %bb.0:
131 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
132 ; GFX12-SDAG-NEXT: s_endpgm
134 ; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret:
135 ; GFX12-GISEL: ; %bb.0:
136 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen
137 ; GFX12-GISEL-NEXT: s_endpgm
138 %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
142 define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
143 ; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16:
144 ; GFX12-SDAG: ; %bb.0:
145 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
146 ; GFX12-SDAG-NEXT: s_endpgm
148 ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16:
149 ; GFX12-GISEL: ; %bb.0:
150 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen
151 ; GFX12-GISEL-NEXT: s_endpgm
152 %ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
156 define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
157 ; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret:
158 ; GFX12-SDAG: ; %bb.0:
159 ; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
160 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0
161 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
162 ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
163 ; GFX12-SDAG-NEXT: flat_store_b32 v[1:2], v0
164 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
165 ; GFX12-SDAG-NEXT: s_wait_dscnt 0x0
166 ; GFX12-SDAG-NEXT: ; return to shader part epilog
168 ; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret:
169 ; GFX12-GISEL: ; %bb.0:
170 ; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN
171 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
172 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0
173 ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
174 ; GFX12-GISEL-NEXT: flat_store_b32 v[1:2], v0
175 ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
176 ; GFX12-GISEL-NEXT: s_wait_dscnt 0x0
177 ; GFX12-GISEL-NEXT: ; return to shader part epilog
178 %orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
179 store <2 x bfloat> %orig, ptr null