1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=SI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=VI %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-SDAG %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-GISEL %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-FAKE16-SDAG %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-FAKE16-GISEL %s
9 define amdgpu_kernel void @fadd_f16(
11 ; SI: ; %bb.0: ; %entry
12 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
13 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
14 ; SI-NEXT: s_mov_b32 s11, 0xf000
15 ; SI-NEXT: s_mov_b32 s10, -1
16 ; SI-NEXT: s_mov_b32 s2, s10
17 ; SI-NEXT: s_waitcnt lgkmcnt(0)
18 ; SI-NEXT: s_mov_b32 s8, s4
19 ; SI-NEXT: s_mov_b32 s9, s5
20 ; SI-NEXT: s_mov_b32 s4, s6
21 ; SI-NEXT: s_mov_b32 s5, s7
22 ; SI-NEXT: s_mov_b32 s6, s10
23 ; SI-NEXT: s_mov_b32 s7, s11
24 ; SI-NEXT: s_mov_b32 s3, s11
25 ; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc
26 ; SI-NEXT: s_waitcnt vmcnt(0)
27 ; SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 glc
28 ; SI-NEXT: s_waitcnt vmcnt(0)
29 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
30 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
31 ; SI-NEXT: v_add_f32_e32 v0, v0, v1
32 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
33 ; SI-NEXT: buffer_store_short v0, off, s[8:11], 0
37 ; VI: ; %bb.0: ; %entry
38 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
39 ; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
40 ; VI-NEXT: s_mov_b32 s11, 0xf000
41 ; VI-NEXT: s_mov_b32 s10, -1
42 ; VI-NEXT: s_mov_b32 s2, s10
43 ; VI-NEXT: s_waitcnt lgkmcnt(0)
44 ; VI-NEXT: s_mov_b32 s8, s4
45 ; VI-NEXT: s_mov_b32 s9, s5
46 ; VI-NEXT: s_mov_b32 s4, s6
47 ; VI-NEXT: s_mov_b32 s5, s7
48 ; VI-NEXT: s_mov_b32 s6, s10
49 ; VI-NEXT: s_mov_b32 s7, s11
50 ; VI-NEXT: s_mov_b32 s3, s11
51 ; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc
52 ; VI-NEXT: s_waitcnt vmcnt(0)
53 ; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 glc
54 ; VI-NEXT: s_waitcnt vmcnt(0)
55 ; VI-NEXT: v_add_f16_e32 v0, v0, v1
56 ; VI-NEXT: buffer_store_short v0, off, s[8:11], 0
59 ; GFX11-SDAG-LABEL: fadd_f16:
60 ; GFX11-SDAG: ; %bb.0: ; %entry
61 ; GFX11-SDAG-NEXT: s_clause 0x1
62 ; GFX11-SDAG-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
63 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
64 ; GFX11-SDAG-NEXT: s_mov_b32 s11, 0x31016000
65 ; GFX11-SDAG-NEXT: s_mov_b32 s10, -1
66 ; GFX11-SDAG-NEXT: s_mov_b32 s3, s11
67 ; GFX11-SDAG-NEXT: s_mov_b32 s2, s10
68 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
69 ; GFX11-SDAG-NEXT: s_mov_b32 s8, s4
70 ; GFX11-SDAG-NEXT: s_mov_b32 s9, s5
71 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s6
72 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s7
73 ; GFX11-SDAG-NEXT: s_mov_b32 s6, s10
74 ; GFX11-SDAG-NEXT: s_mov_b32 s7, s11
75 ; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
76 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
77 ; GFX11-SDAG-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc
78 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
79 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, v1.l
80 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
81 ; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
82 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[8:11], 0
83 ; GFX11-SDAG-NEXT: s_nop 0
84 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
85 ; GFX11-SDAG-NEXT: s_endpgm
87 ; GFX11-GISEL-LABEL: fadd_f16:
88 ; GFX11-GISEL: ; %bb.0: ; %entry
89 ; GFX11-GISEL-NEXT: s_clause 0x1
90 ; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
91 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
92 ; GFX11-GISEL-NEXT: s_mov_b32 s10, -1
93 ; GFX11-GISEL-NEXT: s_mov_b32 s11, 0x31016000
94 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
95 ; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
96 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
97 ; GFX11-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
98 ; GFX11-GISEL-NEXT: s_mov_b64 s[6:7], s[10:11]
99 ; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[8:11], 0 glc dlc
100 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
101 ; GFX11-GISEL-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc
102 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
103 ; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, v1.l
104 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
105 ; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
106 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[4:7], 0
107 ; GFX11-GISEL-NEXT: s_nop 0
108 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
109 ; GFX11-GISEL-NEXT: s_endpgm
111 ; GFX11-FAKE16-SDAG-LABEL: fadd_f16:
112 ; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry
113 ; GFX11-FAKE16-SDAG-NEXT: s_clause 0x1
114 ; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
115 ; GFX11-FAKE16-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
116 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s11, 0x31016000
117 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s10, -1
118 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, s11
119 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, s10
120 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0)
121 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s8, s4
122 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s9, s5
123 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s6
124 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s7
125 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, s10
126 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, s11
127 ; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
128 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
129 ; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc
130 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
131 ; GFX11-FAKE16-SDAG-NEXT: v_add_f16_e32 v0, v0, v1
132 ; GFX11-FAKE16-SDAG-NEXT: buffer_store_b16 v0, off, s[8:11], 0
133 ; GFX11-FAKE16-SDAG-NEXT: s_nop 0
134 ; GFX11-FAKE16-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
135 ; GFX11-FAKE16-SDAG-NEXT: s_endpgm
137 ; GFX11-FAKE16-GISEL-LABEL: fadd_f16:
138 ; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry
139 ; GFX11-FAKE16-GISEL-NEXT: s_clause 0x1
140 ; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
141 ; GFX11-FAKE16-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
142 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s10, -1
143 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s11, 0x31016000
144 ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
145 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
146 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0)
147 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7]
148 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[6:7], s[10:11]
149 ; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v0, off, s[8:11], 0 glc dlc
150 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
151 ; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc
152 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
153 ; GFX11-FAKE16-GISEL-NEXT: v_add_f16_e32 v0, v0, v1
154 ; GFX11-FAKE16-GISEL-NEXT: buffer_store_b16 v0, off, s[4:7], 0
155 ; GFX11-FAKE16-GISEL-NEXT: s_nop 0
156 ; GFX11-FAKE16-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
157 ; GFX11-FAKE16-GISEL-NEXT: s_endpgm
158 ; GFX11-LABEL: fadd_f16:
159 ; GFX11: ; %bb.0: ; %entry
160 ; GFX11-NEXT: s_clause 0x1
161 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
162 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
163 ; GFX11-NEXT: s_mov_b32 s11, 0x31016000
164 ; GFX11-NEXT: s_mov_b32 s10, -1
165 ; GFX11-NEXT: s_mov_b32 s3, s11
166 ; GFX11-NEXT: s_mov_b32 s2, s10
167 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
168 ; GFX11-NEXT: s_mov_b32 s8, s4
169 ; GFX11-NEXT: s_mov_b32 s9, s5
170 ; GFX11-NEXT: s_mov_b32 s4, s6
171 ; GFX11-NEXT: s_mov_b32 s5, s7
172 ; GFX11-NEXT: s_mov_b32 s6, s10
173 ; GFX11-NEXT: s_mov_b32 s7, s11
174 ; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
175 ; GFX11-NEXT: s_waitcnt vmcnt(0)
176 ; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc
177 ; GFX11-NEXT: s_waitcnt vmcnt(0)
178 ; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l
179 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
180 ; GFX11-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
181 ; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0
182 ; GFX11-NEXT: s_nop 0
183 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
184 ; GFX11-NEXT: s_endpgm
187 ptr addrspace(1) %b) {
189 %a.val = load volatile half, ptr addrspace(1) %a
190 %b.val = load volatile half, ptr addrspace(1) %b
191 %r.val = fadd half %a.val, %b.val
192 store half %r.val, ptr addrspace(1) %r
196 define amdgpu_kernel void @fadd_f16_imm_a(
197 ; SI-LABEL: fadd_f16_imm_a:
198 ; SI: ; %bb.0: ; %entry
199 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
200 ; SI-NEXT: s_mov_b32 s7, 0xf000
201 ; SI-NEXT: s_mov_b32 s6, -1
202 ; SI-NEXT: s_waitcnt lgkmcnt(0)
203 ; SI-NEXT: s_mov_b32 s4, s0
204 ; SI-NEXT: s_mov_b32 s5, s1
205 ; SI-NEXT: s_mov_b32 s0, s2
206 ; SI-NEXT: s_mov_b32 s1, s3
207 ; SI-NEXT: s_mov_b32 s2, s6
208 ; SI-NEXT: s_mov_b32 s3, s7
209 ; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
210 ; SI-NEXT: s_waitcnt vmcnt(0)
211 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
212 ; SI-NEXT: v_add_f32_e32 v0, 1.0, v0
213 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
214 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
217 ; VI-LABEL: fadd_f16_imm_a:
218 ; VI: ; %bb.0: ; %entry
219 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
220 ; VI-NEXT: s_mov_b32 s7, 0xf000
221 ; VI-NEXT: s_mov_b32 s6, -1
222 ; VI-NEXT: s_waitcnt lgkmcnt(0)
223 ; VI-NEXT: s_mov_b32 s4, s0
224 ; VI-NEXT: s_mov_b32 s5, s1
225 ; VI-NEXT: s_mov_b32 s0, s2
226 ; VI-NEXT: s_mov_b32 s1, s3
227 ; VI-NEXT: s_mov_b32 s2, s6
228 ; VI-NEXT: s_mov_b32 s3, s7
229 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
230 ; VI-NEXT: s_waitcnt vmcnt(0)
231 ; VI-NEXT: v_add_f16_e32 v0, 1.0, v0
232 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
235 ; GFX11-SDAG-LABEL: fadd_f16_imm_a:
236 ; GFX11-SDAG: ; %bb.0: ; %entry
237 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
238 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
239 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
240 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
241 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
242 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
243 ; GFX11-SDAG-NEXT: s_mov_b32 s0, s2
244 ; GFX11-SDAG-NEXT: s_mov_b32 s1, s3
245 ; GFX11-SDAG-NEXT: s_mov_b32 s2, s6
246 ; GFX11-SDAG-NEXT: s_mov_b32 s3, s7
247 ; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
248 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
249 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x3c00
250 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
251 ; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
252 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
253 ; GFX11-SDAG-NEXT: s_nop 0
254 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
255 ; GFX11-SDAG-NEXT: s_endpgm
257 ; GFX11-GISEL-LABEL: fadd_f16_imm_a:
258 ; GFX11-GISEL: ; %bb.0: ; %entry
259 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
260 ; GFX11-GISEL-NEXT: s_mov_b32 s6, -1
261 ; GFX11-GISEL-NEXT: s_mov_b32 s7, 0x31016000
262 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
263 ; GFX11-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
264 ; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
265 ; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
266 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
267 ; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, 0x3c00
268 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
269 ; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
270 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
271 ; GFX11-GISEL-NEXT: s_nop 0
272 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
273 ; GFX11-GISEL-NEXT: s_endpgm
275 ; GFX11-FAKE16-SDAG-LABEL: fadd_f16_imm_a:
276 ; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry
277 ; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
278 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000
279 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1
280 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0)
281 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0
282 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1
283 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s0, s2
284 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s1, s3
285 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, s6
286 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, s7
287 ; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
288 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
289 ; GFX11-FAKE16-SDAG-NEXT: v_add_f16_e32 v0, 1.0, v0
290 ; GFX11-FAKE16-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
291 ; GFX11-FAKE16-SDAG-NEXT: s_nop 0
292 ; GFX11-FAKE16-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
293 ; GFX11-FAKE16-SDAG-NEXT: s_endpgm
295 ; GFX11-FAKE16-GISEL-LABEL: fadd_f16_imm_a:
296 ; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry
297 ; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
298 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s6, -1
299 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s7, 0x31016000
300 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0)
301 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
302 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
303 ; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
304 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
305 ; GFX11-FAKE16-GISEL-NEXT: v_add_f16_e32 v0, 1.0, v0
306 ; GFX11-FAKE16-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
307 ; GFX11-FAKE16-GISEL-NEXT: s_nop 0
308 ; GFX11-FAKE16-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
309 ; GFX11-FAKE16-GISEL-NEXT: s_endpgm
310 ; GFX11-LABEL: fadd_f16_imm_a:
311 ; GFX11: ; %bb.0: ; %entry
312 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
313 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
314 ; GFX11-NEXT: s_mov_b32 s6, -1
315 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
316 ; GFX11-NEXT: s_mov_b32 s4, s0
317 ; GFX11-NEXT: s_mov_b32 s5, s1
318 ; GFX11-NEXT: s_mov_b32 s0, s2
319 ; GFX11-NEXT: s_mov_b32 s1, s3
320 ; GFX11-NEXT: s_mov_b32 s2, s6
321 ; GFX11-NEXT: s_mov_b32 s3, s7
322 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
323 ; GFX11-NEXT: s_waitcnt vmcnt(0)
324 ; GFX11-NEXT: v_mov_b16_e32 v0.h, 0x3c00
325 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
326 ; GFX11-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
327 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
328 ; GFX11-NEXT: s_nop 0
329 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
330 ; GFX11-NEXT: s_endpgm
332 ptr addrspace(1) %b) {
334 %b.val = load half, ptr addrspace(1) %b
335 %r.val = fadd half 1.0, %b.val
336 store half %r.val, ptr addrspace(1) %r
340 define amdgpu_kernel void @fadd_f16_imm_b(
341 ; SI-LABEL: fadd_f16_imm_b:
342 ; SI: ; %bb.0: ; %entry
343 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
344 ; SI-NEXT: s_mov_b32 s7, 0xf000
345 ; SI-NEXT: s_mov_b32 s6, -1
346 ; SI-NEXT: s_waitcnt lgkmcnt(0)
347 ; SI-NEXT: s_mov_b32 s4, s0
348 ; SI-NEXT: s_mov_b32 s5, s1
349 ; SI-NEXT: s_mov_b32 s0, s2
350 ; SI-NEXT: s_mov_b32 s1, s3
351 ; SI-NEXT: s_mov_b32 s2, s6
352 ; SI-NEXT: s_mov_b32 s3, s7
353 ; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
354 ; SI-NEXT: s_waitcnt vmcnt(0)
355 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
356 ; SI-NEXT: v_add_f32_e32 v0, 2.0, v0
357 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
358 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
361 ; VI-LABEL: fadd_f16_imm_b:
362 ; VI: ; %bb.0: ; %entry
363 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
364 ; VI-NEXT: s_mov_b32 s7, 0xf000
365 ; VI-NEXT: s_mov_b32 s6, -1
366 ; VI-NEXT: s_waitcnt lgkmcnt(0)
367 ; VI-NEXT: s_mov_b32 s4, s0
368 ; VI-NEXT: s_mov_b32 s5, s1
369 ; VI-NEXT: s_mov_b32 s0, s2
370 ; VI-NEXT: s_mov_b32 s1, s3
371 ; VI-NEXT: s_mov_b32 s2, s6
372 ; VI-NEXT: s_mov_b32 s3, s7
373 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
374 ; VI-NEXT: s_waitcnt vmcnt(0)
375 ; VI-NEXT: v_add_f16_e32 v0, 2.0, v0
376 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
379 ; GFX11-SDAG-LABEL: fadd_f16_imm_b:
380 ; GFX11-SDAG: ; %bb.0: ; %entry
381 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
382 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
383 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
384 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
385 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
386 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
387 ; GFX11-SDAG-NEXT: s_mov_b32 s0, s2
388 ; GFX11-SDAG-NEXT: s_mov_b32 s1, s3
389 ; GFX11-SDAG-NEXT: s_mov_b32 s2, s6
390 ; GFX11-SDAG-NEXT: s_mov_b32 s3, s7
391 ; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
392 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
393 ; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x4000
394 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
395 ; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
396 ; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
397 ; GFX11-SDAG-NEXT: s_nop 0
398 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
399 ; GFX11-SDAG-NEXT: s_endpgm
401 ; GFX11-GISEL-LABEL: fadd_f16_imm_b:
402 ; GFX11-GISEL: ; %bb.0: ; %entry
403 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
404 ; GFX11-GISEL-NEXT: s_mov_b32 s6, -1
405 ; GFX11-GISEL-NEXT: s_mov_b32 s7, 0x31016000
406 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
407 ; GFX11-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
408 ; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
409 ; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
410 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
411 ; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, 0x4000
412 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
413 ; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
414 ; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
415 ; GFX11-GISEL-NEXT: s_nop 0
416 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
417 ; GFX11-GISEL-NEXT: s_endpgm
419 ; GFX11-FAKE16-SDAG-LABEL: fadd_f16_imm_b:
420 ; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry
421 ; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
422 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000
423 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1
424 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0)
425 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0
426 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1
427 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s0, s2
428 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s1, s3
429 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, s6
430 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, s7
431 ; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
432 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
433 ; GFX11-FAKE16-SDAG-NEXT: v_add_f16_e32 v0, 2.0, v0
434 ; GFX11-FAKE16-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
435 ; GFX11-FAKE16-SDAG-NEXT: s_nop 0
436 ; GFX11-FAKE16-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
437 ; GFX11-FAKE16-SDAG-NEXT: s_endpgm
439 ; GFX11-FAKE16-GISEL-LABEL: fadd_f16_imm_b:
440 ; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry
441 ; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
442 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s6, -1
443 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s7, 0x31016000
444 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0)
445 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3]
446 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
447 ; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
448 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
449 ; GFX11-FAKE16-GISEL-NEXT: v_add_f16_e32 v0, 2.0, v0
450 ; GFX11-FAKE16-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
451 ; GFX11-FAKE16-GISEL-NEXT: s_nop 0
452 ; GFX11-FAKE16-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
453 ; GFX11-FAKE16-GISEL-NEXT: s_endpgm
454 ; GFX11-LABEL: fadd_f16_imm_b:
455 ; GFX11: ; %bb.0: ; %entry
456 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
457 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
458 ; GFX11-NEXT: s_mov_b32 s6, -1
459 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
460 ; GFX11-NEXT: s_mov_b32 s4, s0
461 ; GFX11-NEXT: s_mov_b32 s5, s1
462 ; GFX11-NEXT: s_mov_b32 s0, s2
463 ; GFX11-NEXT: s_mov_b32 s1, s3
464 ; GFX11-NEXT: s_mov_b32 s2, s6
465 ; GFX11-NEXT: s_mov_b32 s3, s7
466 ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
467 ; GFX11-NEXT: s_waitcnt vmcnt(0)
468 ; GFX11-NEXT: v_mov_b16_e32 v0.h, 0x4000
469 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
470 ; GFX11-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
471 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
472 ; GFX11-NEXT: s_nop 0
473 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
474 ; GFX11-NEXT: s_endpgm
476 ptr addrspace(1) %a) {
478 %a.val = load half, ptr addrspace(1) %a
479 %r.val = fadd half %a.val, 2.0
480 store half %r.val, ptr addrspace(1) %r
484 define amdgpu_kernel void @fadd_v2f16(
485 ; SI-LABEL: fadd_v2f16:
486 ; SI: ; %bb.0: ; %entry
487 ; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
488 ; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
489 ; SI-NEXT: s_mov_b32 s11, 0xf000
490 ; SI-NEXT: s_mov_b32 s14, 0
491 ; SI-NEXT: s_mov_b32 s15, s11
492 ; SI-NEXT: s_waitcnt lgkmcnt(0)
493 ; SI-NEXT: s_mov_b64 s[12:13], s[6:7]
494 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
495 ; SI-NEXT: v_mov_b32_e32 v1, 0
496 ; SI-NEXT: s_mov_b64 s[2:3], s[14:15]
497 ; SI-NEXT: buffer_load_dword v2, v[0:1], s[12:15], 0 addr64
498 ; SI-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
499 ; SI-NEXT: s_mov_b32 s10, -1
500 ; SI-NEXT: s_mov_b32 s8, s4
501 ; SI-NEXT: s_mov_b32 s9, s5
502 ; SI-NEXT: s_waitcnt vmcnt(1)
503 ; SI-NEXT: v_cvt_f32_f16_e32 v3, v2
504 ; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
505 ; SI-NEXT: s_waitcnt vmcnt(0)
506 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v0
507 ; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
508 ; SI-NEXT: v_cvt_f32_f16_e32 v2, v2
509 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
510 ; SI-NEXT: v_add_f32_e32 v1, v3, v1
511 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
512 ; SI-NEXT: v_add_f32_e32 v0, v2, v0
513 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
514 ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
515 ; SI-NEXT: v_or_b32_e32 v0, v1, v0
516 ; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0
519 ; VI-LABEL: fadd_v2f16:
520 ; VI: ; %bb.0: ; %entry
521 ; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
522 ; VI-NEXT: s_load_dwordx2 s[8:9], s[2:3], 0x34
523 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
524 ; VI-NEXT: s_mov_b32 s3, 0xf000
525 ; VI-NEXT: s_mov_b32 s2, -1
526 ; VI-NEXT: s_waitcnt lgkmcnt(0)
527 ; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
528 ; VI-NEXT: v_mov_b32_e32 v1, s7
529 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
530 ; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2
531 ; VI-NEXT: v_mov_b32_e32 v3, s9
532 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
533 ; VI-NEXT: flat_load_dword v0, v[0:1]
534 ; VI-NEXT: flat_load_dword v1, v[2:3]
535 ; VI-NEXT: s_mov_b32 s0, s4
536 ; VI-NEXT: s_mov_b32 s1, s5
537 ; VI-NEXT: s_waitcnt vmcnt(0)
538 ; VI-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
539 ; VI-NEXT: v_add_f16_e32 v0, v0, v1
540 ; VI-NEXT: v_or_b32_e32 v0, v0, v2
541 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
544 ; GFX11-SDAG-LABEL: fadd_v2f16:
545 ; GFX11-SDAG: ; %bb.0: ; %entry
546 ; GFX11-SDAG-NEXT: s_clause 0x1
547 ; GFX11-SDAG-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
548 ; GFX11-SDAG-NEXT: s_load_b64 s[8:9], s[2:3], 0x34
549 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
550 ; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
551 ; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
552 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
553 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
554 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
555 ; GFX11-SDAG-NEXT: s_clause 0x1
556 ; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[6:7]
557 ; GFX11-SDAG-NEXT: global_load_b32 v0, v0, s[8:9]
558 ; GFX11-SDAG-NEXT: s_mov_b32 s0, s4
559 ; GFX11-SDAG-NEXT: s_mov_b32 s1, s5
560 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
561 ; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v1, v0
562 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0
563 ; GFX11-SDAG-NEXT: s_nop 0
564 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
565 ; GFX11-SDAG-NEXT: s_endpgm
567 ; GFX11-GISEL-LABEL: fadd_v2f16:
568 ; GFX11-GISEL: ; %bb.0: ; %entry
569 ; GFX11-GISEL-NEXT: s_clause 0x1
570 ; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
571 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
572 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
573 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
574 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
575 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
576 ; GFX11-GISEL-NEXT: s_clause 0x1
577 ; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[6:7]
578 ; GFX11-GISEL-NEXT: global_load_b32 v0, v0, s[0:1]
579 ; GFX11-GISEL-NEXT: s_mov_b32 s6, -1
580 ; GFX11-GISEL-NEXT: s_mov_b32 s7, 0x31016000
581 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
582 ; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v1, v0
583 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[4:7], 0
584 ; GFX11-GISEL-NEXT: s_nop 0
585 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
586 ; GFX11-GISEL-NEXT: s_endpgm
588 ; GFX11-FAKE16-SDAG-LABEL: fadd_v2f16:
589 ; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry
590 ; GFX11-FAKE16-SDAG-NEXT: s_clause 0x1
591 ; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
592 ; GFX11-FAKE16-SDAG-NEXT: s_load_b64 s[8:9], s[2:3], 0x34
593 ; GFX11-FAKE16-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
594 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, 0x31016000
595 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, -1
596 ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
597 ; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
598 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0)
599 ; GFX11-FAKE16-SDAG-NEXT: s_clause 0x1
600 ; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v1, v0, s[6:7]
601 ; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v0, v0, s[8:9]
602 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s0, s4
603 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s1, s5
604 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
605 ; GFX11-FAKE16-SDAG-NEXT: v_pk_add_f16 v0, v1, v0
606 ; GFX11-FAKE16-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0
607 ; GFX11-FAKE16-SDAG-NEXT: s_nop 0
608 ; GFX11-FAKE16-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
609 ; GFX11-FAKE16-SDAG-NEXT: s_endpgm
611 ; GFX11-FAKE16-GISEL-LABEL: fadd_v2f16:
612 ; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry
613 ; GFX11-FAKE16-GISEL-NEXT: s_clause 0x1
614 ; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
615 ; GFX11-FAKE16-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
616 ; GFX11-FAKE16-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
617 ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
618 ; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
619 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0)
620 ; GFX11-FAKE16-GISEL-NEXT: s_clause 0x1
621 ; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v1, v0, s[6:7]
622 ; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v0, v0, s[0:1]
623 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s6, -1
624 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s7, 0x31016000
625 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
626 ; GFX11-FAKE16-GISEL-NEXT: v_pk_add_f16 v0, v1, v0
627 ; GFX11-FAKE16-GISEL-NEXT: buffer_store_b32 v0, off, s[4:7], 0
628 ; GFX11-FAKE16-GISEL-NEXT: s_nop 0
629 ; GFX11-FAKE16-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
630 ; GFX11-FAKE16-GISEL-NEXT: s_endpgm
631 ; GFX11-LABEL: fadd_v2f16:
632 ; GFX11: ; %bb.0: ; %entry
633 ; GFX11-NEXT: s_clause 0x1
634 ; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
635 ; GFX11-NEXT: s_load_b64 s[8:9], s[0:1], 0x34
636 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
637 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000
638 ; GFX11-NEXT: s_mov_b32 s2, -1
639 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
640 ; GFX11-NEXT: s_clause 0x1
641 ; GFX11-NEXT: global_load_b32 v1, v0, s[6:7]
642 ; GFX11-NEXT: global_load_b32 v0, v0, s[8:9]
643 ; GFX11-NEXT: s_mov_b32 s0, s4
644 ; GFX11-NEXT: s_mov_b32 s1, s5
645 ; GFX11-NEXT: s_waitcnt vmcnt(0)
646 ; GFX11-NEXT: v_pk_add_f16 v0, v1, v0
647 ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
648 ; GFX11-NEXT: s_nop 0
649 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
650 ; GFX11-NEXT: s_endpgm
653 ptr addrspace(1) %b) {
655 %tid = call i32 @llvm.amdgcn.workitem.id.x()
656 %gep.a = getelementptr inbounds <2 x half>, ptr addrspace(1) %a, i32 %tid
657 %gep.b = getelementptr inbounds <2 x half>, ptr addrspace(1) %b, i32 %tid
658 %a.val = load <2 x half>, ptr addrspace(1) %gep.a
659 %b.val = load <2 x half>, ptr addrspace(1) %gep.b
660 %r.val = fadd <2 x half> %a.val, %b.val
661 store <2 x half> %r.val, ptr addrspace(1) %r
665 define amdgpu_kernel void @fadd_v2f16_imm_a(
666 ; SI-LABEL: fadd_v2f16_imm_a:
667 ; SI: ; %bb.0: ; %entry
668 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
669 ; SI-NEXT: s_mov_b32 s7, 0xf000
670 ; SI-NEXT: s_mov_b32 s10, 0
671 ; SI-NEXT: s_mov_b32 s11, s7
672 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
673 ; SI-NEXT: s_waitcnt lgkmcnt(0)
674 ; SI-NEXT: s_mov_b64 s[8:9], s[2:3]
675 ; SI-NEXT: v_mov_b32_e32 v1, 0
676 ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
677 ; SI-NEXT: s_mov_b32 s6, -1
678 ; SI-NEXT: s_mov_b32 s4, s0
679 ; SI-NEXT: s_mov_b32 s5, s1
680 ; SI-NEXT: s_waitcnt vmcnt(0)
681 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v0
682 ; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
683 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
684 ; SI-NEXT: v_add_f32_e32 v1, 1.0, v1
685 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
686 ; SI-NEXT: v_add_f32_e32 v0, 2.0, v0
687 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
688 ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
689 ; SI-NEXT: v_or_b32_e32 v0, v1, v0
690 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
693 ; VI-LABEL: fadd_v2f16_imm_a:
694 ; VI: ; %bb.0: ; %entry
695 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
696 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
697 ; VI-NEXT: s_mov_b32 s7, 0xf000
698 ; VI-NEXT: s_mov_b32 s6, -1
699 ; VI-NEXT: s_waitcnt lgkmcnt(0)
700 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
701 ; VI-NEXT: v_mov_b32_e32 v1, s3
702 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
703 ; VI-NEXT: flat_load_dword v0, v[0:1]
704 ; VI-NEXT: v_mov_b32_e32 v1, 0x4000
705 ; VI-NEXT: s_mov_b32 s4, s0
706 ; VI-NEXT: s_mov_b32 s5, s1
707 ; VI-NEXT: s_waitcnt vmcnt(0)
708 ; VI-NEXT: v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
709 ; VI-NEXT: v_add_f16_e32 v0, 1.0, v0
710 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
711 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
714 ; GFX11-SDAG-LABEL: fadd_v2f16_imm_a:
715 ; GFX11-SDAG: ; %bb.0: ; %entry
716 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
717 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
718 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
719 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
720 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
721 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
722 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
723 ; GFX11-SDAG-NEXT: global_load_b32 v0, v0, s[2:3]
724 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
725 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
726 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
727 ; GFX11-SDAG-NEXT: v_pk_add_f16 v0, 0x40003c00, v0
728 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
729 ; GFX11-SDAG-NEXT: s_nop 0
730 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
731 ; GFX11-SDAG-NEXT: s_endpgm
733 ; GFX11-GISEL-LABEL: fadd_v2f16_imm_a:
734 ; GFX11-GISEL: ; %bb.0: ; %entry
735 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
736 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
737 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
738 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
739 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
740 ; GFX11-GISEL-NEXT: global_load_b32 v0, v0, s[2:3]
741 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
742 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
743 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
744 ; GFX11-GISEL-NEXT: v_pk_add_f16 v0, 0x40003c00, v0
745 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
746 ; GFX11-GISEL-NEXT: s_nop 0
747 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
748 ; GFX11-GISEL-NEXT: s_endpgm
750 ; GFX11-FAKE16-SDAG-LABEL: fadd_v2f16_imm_a:
751 ; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry
752 ; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
753 ; GFX11-FAKE16-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
754 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000
755 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1
756 ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
757 ; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
758 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0)
759 ; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v0, v0, s[2:3]
760 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0
761 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1
762 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
763 ; GFX11-FAKE16-SDAG-NEXT: v_pk_add_f16 v0, 0x40003c00, v0
764 ; GFX11-FAKE16-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
765 ; GFX11-FAKE16-SDAG-NEXT: s_nop 0
766 ; GFX11-FAKE16-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
767 ; GFX11-FAKE16-SDAG-NEXT: s_endpgm
769 ; GFX11-FAKE16-GISEL-LABEL: fadd_v2f16_imm_a:
770 ; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry
771 ; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
772 ; GFX11-FAKE16-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
773 ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
774 ; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
775 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0)
776 ; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v0, v0, s[2:3]
777 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s2, -1
778 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s3, 0x31016000
779 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
780 ; GFX11-FAKE16-GISEL-NEXT: v_pk_add_f16 v0, 0x40003c00, v0
781 ; GFX11-FAKE16-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
782 ; GFX11-FAKE16-GISEL-NEXT: s_nop 0
783 ; GFX11-FAKE16-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
784 ; GFX11-FAKE16-GISEL-NEXT: s_endpgm
785 ; GFX11-LABEL: fadd_v2f16_imm_a:
786 ; GFX11: ; %bb.0: ; %entry
787 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
788 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
789 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
790 ; GFX11-NEXT: s_mov_b32 s6, -1
791 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
792 ; GFX11-NEXT: global_load_b32 v0, v0, s[2:3]
793 ; GFX11-NEXT: s_mov_b32 s4, s0
794 ; GFX11-NEXT: s_mov_b32 s5, s1
795 ; GFX11-NEXT: s_waitcnt vmcnt(0)
796 ; GFX11-NEXT: v_pk_add_f16 v0, 0x40003c00, v0
797 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
798 ; GFX11-NEXT: s_nop 0
799 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
800 ; GFX11-NEXT: s_endpgm
802 ptr addrspace(1) %b) {
804 %tid = call i32 @llvm.amdgcn.workitem.id.x()
805 %gep.b = getelementptr inbounds <2 x half>, ptr addrspace(1) %b, i32 %tid
806 %b.val = load <2 x half>, ptr addrspace(1) %gep.b
807 %r.val = fadd <2 x half> <half 1.0, half 2.0>, %b.val
808 store <2 x half> %r.val, ptr addrspace(1) %r
812 define amdgpu_kernel void @fadd_v2f16_imm_b(
813 ; SI-LABEL: fadd_v2f16_imm_b:
814 ; SI: ; %bb.0: ; %entry
815 ; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
816 ; SI-NEXT: s_mov_b32 s7, 0xf000
817 ; SI-NEXT: s_mov_b32 s10, 0
818 ; SI-NEXT: s_mov_b32 s11, s7
819 ; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
820 ; SI-NEXT: s_waitcnt lgkmcnt(0)
821 ; SI-NEXT: s_mov_b64 s[8:9], s[2:3]
822 ; SI-NEXT: v_mov_b32_e32 v1, 0
823 ; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
824 ; SI-NEXT: s_mov_b32 s6, -1
825 ; SI-NEXT: s_mov_b32 s4, s0
826 ; SI-NEXT: s_mov_b32 s5, s1
827 ; SI-NEXT: s_waitcnt vmcnt(0)
828 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v0
829 ; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
830 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
831 ; SI-NEXT: v_add_f32_e32 v1, 2.0, v1
832 ; SI-NEXT: v_cvt_f16_f32_e32 v1, v1
833 ; SI-NEXT: v_add_f32_e32 v0, 1.0, v0
834 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
835 ; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0
836 ; SI-NEXT: v_or_b32_e32 v0, v1, v0
837 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
840 ; VI-LABEL: fadd_v2f16_imm_b:
841 ; VI: ; %bb.0: ; %entry
842 ; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
843 ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
844 ; VI-NEXT: s_mov_b32 s7, 0xf000
845 ; VI-NEXT: s_mov_b32 s6, -1
846 ; VI-NEXT: s_waitcnt lgkmcnt(0)
847 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
848 ; VI-NEXT: v_mov_b32_e32 v1, s3
849 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
850 ; VI-NEXT: flat_load_dword v0, v[0:1]
851 ; VI-NEXT: v_mov_b32_e32 v1, 0x3c00
852 ; VI-NEXT: s_mov_b32 s4, s0
853 ; VI-NEXT: s_mov_b32 s5, s1
854 ; VI-NEXT: s_waitcnt vmcnt(0)
855 ; VI-NEXT: v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
856 ; VI-NEXT: v_add_f16_e32 v0, 2.0, v0
857 ; VI-NEXT: v_or_b32_e32 v0, v0, v1
858 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
861 ; GFX11-SDAG-LABEL: fadd_v2f16_imm_b:
862 ; GFX11-SDAG: ; %bb.0: ; %entry
863 ; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
864 ; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
865 ; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000
866 ; GFX11-SDAG-NEXT: s_mov_b32 s6, -1
867 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
868 ; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
869 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
870 ; GFX11-SDAG-NEXT: global_load_b32 v0, v0, s[2:3]
871 ; GFX11-SDAG-NEXT: s_mov_b32 s4, s0
872 ; GFX11-SDAG-NEXT: s_mov_b32 s5, s1
873 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
874 ; GFX11-SDAG-NEXT: v_pk_add_f16 v0, 0x3c004000, v0
875 ; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
876 ; GFX11-SDAG-NEXT: s_nop 0
877 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
878 ; GFX11-SDAG-NEXT: s_endpgm
880 ; GFX11-GISEL-LABEL: fadd_v2f16_imm_b:
881 ; GFX11-GISEL: ; %bb.0: ; %entry
882 ; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
883 ; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
884 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
885 ; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
886 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
887 ; GFX11-GISEL-NEXT: global_load_b32 v0, v0, s[2:3]
888 ; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
889 ; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
890 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
891 ; GFX11-GISEL-NEXT: v_pk_add_f16 v0, 0x3c004000, v0
892 ; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
893 ; GFX11-GISEL-NEXT: s_nop 0
894 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
895 ; GFX11-GISEL-NEXT: s_endpgm
897 ; GFX11-FAKE16-SDAG-LABEL: fadd_v2f16_imm_b:
898 ; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry
899 ; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
900 ; GFX11-FAKE16-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
901 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000
902 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1
903 ; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
904 ; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
905 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0)
906 ; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v0, v0, s[2:3]
907 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0
908 ; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1
909 ; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0)
910 ; GFX11-FAKE16-SDAG-NEXT: v_pk_add_f16 v0, 0x3c004000, v0
911 ; GFX11-FAKE16-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0
912 ; GFX11-FAKE16-SDAG-NEXT: s_nop 0
913 ; GFX11-FAKE16-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
914 ; GFX11-FAKE16-SDAG-NEXT: s_endpgm
916 ; GFX11-FAKE16-GISEL-LABEL: fadd_v2f16_imm_b:
917 ; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry
918 ; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
919 ; GFX11-FAKE16-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
920 ; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
921 ; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
922 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0)
923 ; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v0, v0, s[2:3]
924 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s2, -1
925 ; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s3, 0x31016000
926 ; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0)
927 ; GFX11-FAKE16-GISEL-NEXT: v_pk_add_f16 v0, 0x3c004000, v0
928 ; GFX11-FAKE16-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
929 ; GFX11-FAKE16-GISEL-NEXT: s_nop 0
930 ; GFX11-FAKE16-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
931 ; GFX11-FAKE16-GISEL-NEXT: s_endpgm
932 ; GFX11-LABEL: fadd_v2f16_imm_b:
933 ; GFX11: ; %bb.0: ; %entry
934 ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
935 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
936 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000
937 ; GFX11-NEXT: s_mov_b32 s6, -1
938 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
939 ; GFX11-NEXT: global_load_b32 v0, v0, s[2:3]
940 ; GFX11-NEXT: s_mov_b32 s4, s0
941 ; GFX11-NEXT: s_mov_b32 s5, s1
942 ; GFX11-NEXT: s_waitcnt vmcnt(0)
943 ; GFX11-NEXT: v_pk_add_f16 v0, 0x3c004000, v0
944 ; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
945 ; GFX11-NEXT: s_nop 0
946 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
947 ; GFX11-NEXT: s_endpgm
949 ptr addrspace(1) %a) {
951 %tid = call i32 @llvm.amdgcn.workitem.id.x()
952 %gep.a = getelementptr inbounds <2 x half>, ptr addrspace(1) %a, i32 %tid
953 %a.val = load <2 x half>, ptr addrspace(1) %gep.a
954 %r.val = fadd <2 x half> %a.val, <half 2.0, half 1.0>
955 store <2 x half> %r.val, ptr addrspace(1) %r
959 declare i32 @llvm.amdgcn.workitem.id.x() #1
961 attributes #0 = { nounwind }
962 attributes #1 = { nounwind readnone }