1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
5 ; Use a 64-bit value with lo bits that can be represented as an inline constant
6 define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
7 ; SI-LABEL: i64_imm_inline_lo:
8 ; SI: ; %bb.0: ; %entry
9 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
10 ; SI-NEXT: s_mov_b32 s3, 0xf000
11 ; SI-NEXT: s_mov_b32 s2, -1
12 ; SI-NEXT: v_mov_b32_e32 v0, 5
13 ; SI-NEXT: v_mov_b32_e32 v1, 0x12345678
14 ; SI-NEXT: s_waitcnt lgkmcnt(0)
15 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
18 ; VI-LABEL: i64_imm_inline_lo:
19 ; VI: ; %bb.0: ; %entry
20 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
21 ; VI-NEXT: s_mov_b32 s3, 0xf000
22 ; VI-NEXT: s_mov_b32 s2, -1
23 ; VI-NEXT: v_mov_b32_e32 v0, 5
24 ; VI-NEXT: v_mov_b32_e32 v1, 0x12345678
25 ; VI-NEXT: s_waitcnt lgkmcnt(0)
26 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
29 store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
33 ; Use a 64-bit value with hi bits that can be represented as an inline constant
34 define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
35 ; SI-LABEL: i64_imm_inline_hi:
36 ; SI: ; %bb.0: ; %entry
37 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
38 ; SI-NEXT: s_mov_b32 s3, 0xf000
39 ; SI-NEXT: s_mov_b32 s2, -1
40 ; SI-NEXT: v_mov_b32_e32 v0, 0x12345678
41 ; SI-NEXT: v_mov_b32_e32 v1, 5
42 ; SI-NEXT: s_waitcnt lgkmcnt(0)
43 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
46 ; VI-LABEL: i64_imm_inline_hi:
47 ; VI: ; %bb.0: ; %entry
48 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
49 ; VI-NEXT: s_mov_b32 s3, 0xf000
50 ; VI-NEXT: s_mov_b32 s2, -1
51 ; VI-NEXT: v_mov_b32_e32 v0, 0x12345678
52 ; VI-NEXT: v_mov_b32_e32 v1, 5
53 ; VI-NEXT: s_waitcnt lgkmcnt(0)
54 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
57 store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
61 define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
62 ; SI-LABEL: store_imm_neg_0.0_i64:
64 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
65 ; SI-NEXT: s_mov_b32 s3, 0xf000
66 ; SI-NEXT: s_mov_b32 s2, -1
67 ; SI-NEXT: v_mov_b32_e32 v0, 0
68 ; SI-NEXT: v_bfrev_b32_e32 v1, 1
69 ; SI-NEXT: s_waitcnt lgkmcnt(0)
70 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
73 ; VI-LABEL: store_imm_neg_0.0_i64:
75 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
76 ; VI-NEXT: s_mov_b32 s3, 0xf000
77 ; VI-NEXT: s_mov_b32 s2, -1
78 ; VI-NEXT: v_mov_b32_e32 v0, 0
79 ; VI-NEXT: v_bfrev_b32_e32 v1, 1
80 ; VI-NEXT: s_waitcnt lgkmcnt(0)
81 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
83 store i64 -9223372036854775808, i64 addrspace(1) *%out
87 define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) {
88 ; SI-LABEL: store_inline_imm_neg_0.0_i32:
90 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
91 ; SI-NEXT: s_mov_b32 s3, 0xf000
92 ; SI-NEXT: s_mov_b32 s2, -1
93 ; SI-NEXT: v_bfrev_b32_e32 v0, 1
94 ; SI-NEXT: s_waitcnt lgkmcnt(0)
95 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
98 ; VI-LABEL: store_inline_imm_neg_0.0_i32:
100 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
101 ; VI-NEXT: s_mov_b32 s3, 0xf000
102 ; VI-NEXT: s_mov_b32 s2, -1
103 ; VI-NEXT: v_bfrev_b32_e32 v0, 1
104 ; VI-NEXT: s_waitcnt lgkmcnt(0)
105 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
107 store i32 -2147483648, i32 addrspace(1)* %out
111 define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
112 ; SI-LABEL: store_inline_imm_0.0_f32:
114 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
115 ; SI-NEXT: s_mov_b32 s3, 0xf000
116 ; SI-NEXT: s_mov_b32 s2, -1
117 ; SI-NEXT: v_mov_b32_e32 v0, 0
118 ; SI-NEXT: s_waitcnt lgkmcnt(0)
119 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
122 ; VI-LABEL: store_inline_imm_0.0_f32:
124 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
125 ; VI-NEXT: s_mov_b32 s3, 0xf000
126 ; VI-NEXT: s_mov_b32 s2, -1
127 ; VI-NEXT: v_mov_b32_e32 v0, 0
128 ; VI-NEXT: s_waitcnt lgkmcnt(0)
129 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
131 store float 0.0, float addrspace(1)* %out
135 define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
136 ; SI-LABEL: store_imm_neg_0.0_f32:
138 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
139 ; SI-NEXT: s_mov_b32 s3, 0xf000
140 ; SI-NEXT: s_mov_b32 s2, -1
141 ; SI-NEXT: v_bfrev_b32_e32 v0, 1
142 ; SI-NEXT: s_waitcnt lgkmcnt(0)
143 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
146 ; VI-LABEL: store_imm_neg_0.0_f32:
148 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
149 ; VI-NEXT: s_mov_b32 s3, 0xf000
150 ; VI-NEXT: s_mov_b32 s2, -1
151 ; VI-NEXT: v_bfrev_b32_e32 v0, 1
152 ; VI-NEXT: s_waitcnt lgkmcnt(0)
153 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
155 store float -0.0, float addrspace(1)* %out
159 define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
160 ; SI-LABEL: store_inline_imm_0.5_f32:
162 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
163 ; SI-NEXT: s_mov_b32 s3, 0xf000
164 ; SI-NEXT: s_mov_b32 s2, -1
165 ; SI-NEXT: v_mov_b32_e32 v0, 0.5
166 ; SI-NEXT: s_waitcnt lgkmcnt(0)
167 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
170 ; VI-LABEL: store_inline_imm_0.5_f32:
172 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
173 ; VI-NEXT: s_mov_b32 s3, 0xf000
174 ; VI-NEXT: s_mov_b32 s2, -1
175 ; VI-NEXT: v_mov_b32_e32 v0, 0.5
176 ; VI-NEXT: s_waitcnt lgkmcnt(0)
177 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
179 store float 0.5, float addrspace(1)* %out
183 define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
184 ; SI-LABEL: store_inline_imm_m_0.5_f32:
186 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
187 ; SI-NEXT: s_mov_b32 s3, 0xf000
188 ; SI-NEXT: s_mov_b32 s2, -1
189 ; SI-NEXT: v_mov_b32_e32 v0, -0.5
190 ; SI-NEXT: s_waitcnt lgkmcnt(0)
191 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
194 ; VI-LABEL: store_inline_imm_m_0.5_f32:
196 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
197 ; VI-NEXT: s_mov_b32 s3, 0xf000
198 ; VI-NEXT: s_mov_b32 s2, -1
199 ; VI-NEXT: v_mov_b32_e32 v0, -0.5
200 ; VI-NEXT: s_waitcnt lgkmcnt(0)
201 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
203 store float -0.5, float addrspace(1)* %out
207 define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
208 ; SI-LABEL: store_inline_imm_1.0_f32:
210 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
211 ; SI-NEXT: s_mov_b32 s3, 0xf000
212 ; SI-NEXT: s_mov_b32 s2, -1
213 ; SI-NEXT: v_mov_b32_e32 v0, 1.0
214 ; SI-NEXT: s_waitcnt lgkmcnt(0)
215 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
218 ; VI-LABEL: store_inline_imm_1.0_f32:
220 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
221 ; VI-NEXT: s_mov_b32 s3, 0xf000
222 ; VI-NEXT: s_mov_b32 s2, -1
223 ; VI-NEXT: v_mov_b32_e32 v0, 1.0
224 ; VI-NEXT: s_waitcnt lgkmcnt(0)
225 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
227 store float 1.0, float addrspace(1)* %out
231 define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
232 ; SI-LABEL: store_inline_imm_m_1.0_f32:
234 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
235 ; SI-NEXT: s_mov_b32 s3, 0xf000
236 ; SI-NEXT: s_mov_b32 s2, -1
237 ; SI-NEXT: v_mov_b32_e32 v0, -1.0
238 ; SI-NEXT: s_waitcnt lgkmcnt(0)
239 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
242 ; VI-LABEL: store_inline_imm_m_1.0_f32:
244 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
245 ; VI-NEXT: s_mov_b32 s3, 0xf000
246 ; VI-NEXT: s_mov_b32 s2, -1
247 ; VI-NEXT: v_mov_b32_e32 v0, -1.0
248 ; VI-NEXT: s_waitcnt lgkmcnt(0)
249 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
251 store float -1.0, float addrspace(1)* %out
255 define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
256 ; SI-LABEL: store_inline_imm_2.0_f32:
258 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
259 ; SI-NEXT: s_mov_b32 s3, 0xf000
260 ; SI-NEXT: s_mov_b32 s2, -1
261 ; SI-NEXT: v_mov_b32_e32 v0, 2.0
262 ; SI-NEXT: s_waitcnt lgkmcnt(0)
263 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
266 ; VI-LABEL: store_inline_imm_2.0_f32:
268 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
269 ; VI-NEXT: s_mov_b32 s3, 0xf000
270 ; VI-NEXT: s_mov_b32 s2, -1
271 ; VI-NEXT: v_mov_b32_e32 v0, 2.0
272 ; VI-NEXT: s_waitcnt lgkmcnt(0)
273 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
275 store float 2.0, float addrspace(1)* %out
279 define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
280 ; SI-LABEL: store_inline_imm_m_2.0_f32:
282 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
283 ; SI-NEXT: s_mov_b32 s3, 0xf000
284 ; SI-NEXT: s_mov_b32 s2, -1
285 ; SI-NEXT: v_mov_b32_e32 v0, -2.0
286 ; SI-NEXT: s_waitcnt lgkmcnt(0)
287 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
290 ; VI-LABEL: store_inline_imm_m_2.0_f32:
292 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
293 ; VI-NEXT: s_mov_b32 s3, 0xf000
294 ; VI-NEXT: s_mov_b32 s2, -1
295 ; VI-NEXT: v_mov_b32_e32 v0, -2.0
296 ; VI-NEXT: s_waitcnt lgkmcnt(0)
297 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
299 store float -2.0, float addrspace(1)* %out
303 define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
304 ; SI-LABEL: store_inline_imm_4.0_f32:
306 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
307 ; SI-NEXT: s_mov_b32 s3, 0xf000
308 ; SI-NEXT: s_mov_b32 s2, -1
309 ; SI-NEXT: v_mov_b32_e32 v0, 4.0
310 ; SI-NEXT: s_waitcnt lgkmcnt(0)
311 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
314 ; VI-LABEL: store_inline_imm_4.0_f32:
316 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
317 ; VI-NEXT: s_mov_b32 s3, 0xf000
318 ; VI-NEXT: s_mov_b32 s2, -1
319 ; VI-NEXT: v_mov_b32_e32 v0, 4.0
320 ; VI-NEXT: s_waitcnt lgkmcnt(0)
321 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
323 store float 4.0, float addrspace(1)* %out
327 define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
328 ; SI-LABEL: store_inline_imm_m_4.0_f32:
330 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
331 ; SI-NEXT: s_mov_b32 s3, 0xf000
332 ; SI-NEXT: s_mov_b32 s2, -1
333 ; SI-NEXT: v_mov_b32_e32 v0, -4.0
334 ; SI-NEXT: s_waitcnt lgkmcnt(0)
335 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
338 ; VI-LABEL: store_inline_imm_m_4.0_f32:
340 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
341 ; VI-NEXT: s_mov_b32 s3, 0xf000
342 ; VI-NEXT: s_mov_b32 s2, -1
343 ; VI-NEXT: v_mov_b32_e32 v0, -4.0
344 ; VI-NEXT: s_waitcnt lgkmcnt(0)
345 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
347 store float -4.0, float addrspace(1)* %out
351 define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) {
352 ; SI-LABEL: store_inline_imm_inv_2pi_f32:
354 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
355 ; SI-NEXT: s_mov_b32 s3, 0xf000
356 ; SI-NEXT: s_mov_b32 s2, -1
357 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e22f983
358 ; SI-NEXT: s_waitcnt lgkmcnt(0)
359 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
362 ; VI-LABEL: store_inline_imm_inv_2pi_f32:
364 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
365 ; VI-NEXT: s_mov_b32 s3, 0xf000
366 ; VI-NEXT: s_mov_b32 s2, -1
367 ; VI-NEXT: v_mov_b32_e32 v0, 0.15915494
368 ; VI-NEXT: s_waitcnt lgkmcnt(0)
369 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
371 store float 0x3FC45F3060000000, float addrspace(1)* %out
375 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) {
376 ; SI-LABEL: store_inline_imm_m_inv_2pi_f32:
378 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
379 ; SI-NEXT: s_mov_b32 s3, 0xf000
380 ; SI-NEXT: s_mov_b32 s2, -1
381 ; SI-NEXT: v_mov_b32_e32 v0, 0xbe22f983
382 ; SI-NEXT: s_waitcnt lgkmcnt(0)
383 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
386 ; VI-LABEL: store_inline_imm_m_inv_2pi_f32:
388 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
389 ; VI-NEXT: s_mov_b32 s3, 0xf000
390 ; VI-NEXT: s_mov_b32 s2, -1
391 ; VI-NEXT: v_mov_b32_e32 v0, 0xbe22f983
392 ; VI-NEXT: s_waitcnt lgkmcnt(0)
393 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
395 store float 0xBFC45F3060000000, float addrspace(1)* %out
399 define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) {
400 ; SI-LABEL: store_literal_imm_f32:
402 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
403 ; SI-NEXT: s_mov_b32 s3, 0xf000
404 ; SI-NEXT: s_mov_b32 s2, -1
405 ; SI-NEXT: v_mov_b32_e32 v0, 0x45800000
406 ; SI-NEXT: s_waitcnt lgkmcnt(0)
407 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
410 ; VI-LABEL: store_literal_imm_f32:
412 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
413 ; VI-NEXT: s_mov_b32 s3, 0xf000
414 ; VI-NEXT: s_mov_b32 s2, -1
415 ; VI-NEXT: v_mov_b32_e32 v0, 0x45800000
416 ; VI-NEXT: s_waitcnt lgkmcnt(0)
417 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
419 store float 4096.0, float addrspace(1)* %out
423 define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
424 ; SI-LABEL: add_inline_imm_0.0_f32:
426 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
427 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
428 ; SI-NEXT: s_mov_b32 s7, 0xf000
429 ; SI-NEXT: s_mov_b32 s6, -1
430 ; SI-NEXT: s_waitcnt lgkmcnt(0)
431 ; SI-NEXT: v_add_f32_e64 v0, s0, 0
432 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
435 ; VI-LABEL: add_inline_imm_0.0_f32:
437 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
438 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
439 ; VI-NEXT: s_mov_b32 s7, 0xf000
440 ; VI-NEXT: s_mov_b32 s6, -1
441 ; VI-NEXT: s_waitcnt lgkmcnt(0)
442 ; VI-NEXT: v_add_f32_e64 v0, s0, 0
443 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
445 %y = fadd float %x, 0.0
446 store float %y, float addrspace(1)* %out
450 define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
451 ; SI-LABEL: add_inline_imm_0.5_f32:
453 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
454 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
455 ; SI-NEXT: s_mov_b32 s7, 0xf000
456 ; SI-NEXT: s_mov_b32 s6, -1
457 ; SI-NEXT: s_waitcnt lgkmcnt(0)
458 ; SI-NEXT: v_add_f32_e64 v0, s0, 0.5
459 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
462 ; VI-LABEL: add_inline_imm_0.5_f32:
464 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
465 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
466 ; VI-NEXT: s_mov_b32 s7, 0xf000
467 ; VI-NEXT: s_mov_b32 s6, -1
468 ; VI-NEXT: s_waitcnt lgkmcnt(0)
469 ; VI-NEXT: v_add_f32_e64 v0, s0, 0.5
470 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
472 %y = fadd float %x, 0.5
473 store float %y, float addrspace(1)* %out
477 define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
478 ; SI-LABEL: add_inline_imm_neg_0.5_f32:
480 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
481 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
482 ; SI-NEXT: s_mov_b32 s7, 0xf000
483 ; SI-NEXT: s_mov_b32 s6, -1
484 ; SI-NEXT: s_waitcnt lgkmcnt(0)
485 ; SI-NEXT: v_add_f32_e64 v0, s0, -0.5
486 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
489 ; VI-LABEL: add_inline_imm_neg_0.5_f32:
491 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
492 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
493 ; VI-NEXT: s_mov_b32 s7, 0xf000
494 ; VI-NEXT: s_mov_b32 s6, -1
495 ; VI-NEXT: s_waitcnt lgkmcnt(0)
496 ; VI-NEXT: v_add_f32_e64 v0, s0, -0.5
497 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
499 %y = fadd float %x, -0.5
500 store float %y, float addrspace(1)* %out
504 define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
505 ; SI-LABEL: add_inline_imm_1.0_f32:
507 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
508 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
509 ; SI-NEXT: s_mov_b32 s7, 0xf000
510 ; SI-NEXT: s_mov_b32 s6, -1
511 ; SI-NEXT: s_waitcnt lgkmcnt(0)
512 ; SI-NEXT: v_add_f32_e64 v0, s0, 1.0
513 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
516 ; VI-LABEL: add_inline_imm_1.0_f32:
518 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
519 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
520 ; VI-NEXT: s_mov_b32 s7, 0xf000
521 ; VI-NEXT: s_mov_b32 s6, -1
522 ; VI-NEXT: s_waitcnt lgkmcnt(0)
523 ; VI-NEXT: v_add_f32_e64 v0, s0, 1.0
524 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
526 %y = fadd float %x, 1.0
527 store float %y, float addrspace(1)* %out
531 define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
532 ; SI-LABEL: add_inline_imm_neg_1.0_f32:
534 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
535 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
536 ; SI-NEXT: s_mov_b32 s7, 0xf000
537 ; SI-NEXT: s_mov_b32 s6, -1
538 ; SI-NEXT: s_waitcnt lgkmcnt(0)
539 ; SI-NEXT: v_add_f32_e64 v0, s0, -1.0
540 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
543 ; VI-LABEL: add_inline_imm_neg_1.0_f32:
545 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
546 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
547 ; VI-NEXT: s_mov_b32 s7, 0xf000
548 ; VI-NEXT: s_mov_b32 s6, -1
549 ; VI-NEXT: s_waitcnt lgkmcnt(0)
550 ; VI-NEXT: v_add_f32_e64 v0, s0, -1.0
551 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
553 %y = fadd float %x, -1.0
554 store float %y, float addrspace(1)* %out
558 define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
559 ; SI-LABEL: add_inline_imm_2.0_f32:
561 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
562 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
563 ; SI-NEXT: s_mov_b32 s7, 0xf000
564 ; SI-NEXT: s_mov_b32 s6, -1
565 ; SI-NEXT: s_waitcnt lgkmcnt(0)
566 ; SI-NEXT: v_add_f32_e64 v0, s0, 2.0
567 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
570 ; VI-LABEL: add_inline_imm_2.0_f32:
572 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
573 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
574 ; VI-NEXT: s_mov_b32 s7, 0xf000
575 ; VI-NEXT: s_mov_b32 s6, -1
576 ; VI-NEXT: s_waitcnt lgkmcnt(0)
577 ; VI-NEXT: v_add_f32_e64 v0, s0, 2.0
578 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
580 %y = fadd float %x, 2.0
581 store float %y, float addrspace(1)* %out
585 define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
586 ; SI-LABEL: add_inline_imm_neg_2.0_f32:
588 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
589 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
590 ; SI-NEXT: s_mov_b32 s7, 0xf000
591 ; SI-NEXT: s_mov_b32 s6, -1
592 ; SI-NEXT: s_waitcnt lgkmcnt(0)
593 ; SI-NEXT: v_add_f32_e64 v0, s0, -2.0
594 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
597 ; VI-LABEL: add_inline_imm_neg_2.0_f32:
599 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
600 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
601 ; VI-NEXT: s_mov_b32 s7, 0xf000
602 ; VI-NEXT: s_mov_b32 s6, -1
603 ; VI-NEXT: s_waitcnt lgkmcnt(0)
604 ; VI-NEXT: v_add_f32_e64 v0, s0, -2.0
605 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
607 %y = fadd float %x, -2.0
608 store float %y, float addrspace(1)* %out
612 define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
613 ; SI-LABEL: add_inline_imm_4.0_f32:
615 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
616 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
617 ; SI-NEXT: s_mov_b32 s7, 0xf000
618 ; SI-NEXT: s_mov_b32 s6, -1
619 ; SI-NEXT: s_waitcnt lgkmcnt(0)
620 ; SI-NEXT: v_add_f32_e64 v0, s0, 4.0
621 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
624 ; VI-LABEL: add_inline_imm_4.0_f32:
626 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
627 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
628 ; VI-NEXT: s_mov_b32 s7, 0xf000
629 ; VI-NEXT: s_mov_b32 s6, -1
630 ; VI-NEXT: s_waitcnt lgkmcnt(0)
631 ; VI-NEXT: v_add_f32_e64 v0, s0, 4.0
632 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
634 %y = fadd float %x, 4.0
635 store float %y, float addrspace(1)* %out
639 define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
640 ; SI-LABEL: add_inline_imm_neg_4.0_f32:
642 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
643 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
644 ; SI-NEXT: s_mov_b32 s7, 0xf000
645 ; SI-NEXT: s_mov_b32 s6, -1
646 ; SI-NEXT: s_waitcnt lgkmcnt(0)
647 ; SI-NEXT: v_add_f32_e64 v0, s0, -4.0
648 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
651 ; VI-LABEL: add_inline_imm_neg_4.0_f32:
653 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
654 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
655 ; VI-NEXT: s_mov_b32 s7, 0xf000
656 ; VI-NEXT: s_mov_b32 s6, -1
657 ; VI-NEXT: s_waitcnt lgkmcnt(0)
658 ; VI-NEXT: v_add_f32_e64 v0, s0, -4.0
659 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
661 %y = fadd float %x, -4.0
662 store float %y, float addrspace(1)* %out
666 define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
667 ; SI-LABEL: commute_add_inline_imm_0.5_f32:
669 ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
670 ; SI-NEXT: s_mov_b32 s3, 0xf000
671 ; SI-NEXT: s_mov_b32 s2, -1
672 ; SI-NEXT: s_waitcnt lgkmcnt(0)
673 ; SI-NEXT: s_mov_b32 s0, s4
674 ; SI-NEXT: s_mov_b32 s1, s5
675 ; SI-NEXT: s_mov_b32 s4, s6
676 ; SI-NEXT: s_mov_b32 s5, s7
677 ; SI-NEXT: s_mov_b32 s6, s2
678 ; SI-NEXT: s_mov_b32 s7, s3
679 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
680 ; SI-NEXT: s_waitcnt vmcnt(0)
681 ; SI-NEXT: v_add_f32_e32 v0, 0.5, v0
682 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
685 ; VI-LABEL: commute_add_inline_imm_0.5_f32:
687 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
688 ; VI-NEXT: s_mov_b32 s3, 0xf000
689 ; VI-NEXT: s_mov_b32 s2, -1
690 ; VI-NEXT: s_waitcnt lgkmcnt(0)
691 ; VI-NEXT: s_mov_b32 s0, s4
692 ; VI-NEXT: s_mov_b32 s1, s5
693 ; VI-NEXT: s_mov_b32 s4, s6
694 ; VI-NEXT: s_mov_b32 s5, s7
695 ; VI-NEXT: s_mov_b32 s6, s2
696 ; VI-NEXT: s_mov_b32 s7, s3
697 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
698 ; VI-NEXT: s_waitcnt vmcnt(0)
699 ; VI-NEXT: v_add_f32_e32 v0, 0.5, v0
700 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
702 %x = load float, float addrspace(1)* %in
703 %y = fadd float %x, 0.5
704 store float %y, float addrspace(1)* %out
708 define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
709 ; SI-LABEL: commute_add_literal_f32:
711 ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
712 ; SI-NEXT: s_mov_b32 s3, 0xf000
713 ; SI-NEXT: s_mov_b32 s2, -1
714 ; SI-NEXT: s_waitcnt lgkmcnt(0)
715 ; SI-NEXT: s_mov_b32 s0, s4
716 ; SI-NEXT: s_mov_b32 s1, s5
717 ; SI-NEXT: s_mov_b32 s4, s6
718 ; SI-NEXT: s_mov_b32 s5, s7
719 ; SI-NEXT: s_mov_b32 s6, s2
720 ; SI-NEXT: s_mov_b32 s7, s3
721 ; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0
722 ; SI-NEXT: s_waitcnt vmcnt(0)
723 ; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0
724 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
727 ; VI-LABEL: commute_add_literal_f32:
729 ; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
730 ; VI-NEXT: s_mov_b32 s3, 0xf000
731 ; VI-NEXT: s_mov_b32 s2, -1
732 ; VI-NEXT: s_waitcnt lgkmcnt(0)
733 ; VI-NEXT: s_mov_b32 s0, s4
734 ; VI-NEXT: s_mov_b32 s1, s5
735 ; VI-NEXT: s_mov_b32 s4, s6
736 ; VI-NEXT: s_mov_b32 s5, s7
737 ; VI-NEXT: s_mov_b32 s6, s2
738 ; VI-NEXT: s_mov_b32 s7, s3
739 ; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0
740 ; VI-NEXT: s_waitcnt vmcnt(0)
741 ; VI-NEXT: v_add_f32_e32 v0, 0x44800000, v0
742 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
744 %x = load float, float addrspace(1)* %in
745 %y = fadd float %x, 1024.0
746 store float %y, float addrspace(1)* %out
750 define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
751 ; SI-LABEL: add_inline_imm_1_f32:
753 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
754 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
755 ; SI-NEXT: s_mov_b32 s7, 0xf000
756 ; SI-NEXT: s_mov_b32 s6, -1
757 ; SI-NEXT: s_waitcnt lgkmcnt(0)
758 ; SI-NEXT: v_add_f32_e64 v0, s0, 1
759 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
762 ; VI-LABEL: add_inline_imm_1_f32:
764 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
765 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
766 ; VI-NEXT: s_mov_b32 s7, 0xf000
767 ; VI-NEXT: s_mov_b32 s6, -1
768 ; VI-NEXT: s_waitcnt lgkmcnt(0)
769 ; VI-NEXT: v_add_f32_e64 v0, s0, 1
770 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
772 %y = fadd float %x, 0x36a0000000000000
773 store float %y, float addrspace(1)* %out
777 define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
778 ; SI-LABEL: add_inline_imm_2_f32:
780 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
781 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
782 ; SI-NEXT: s_mov_b32 s7, 0xf000
783 ; SI-NEXT: s_mov_b32 s6, -1
784 ; SI-NEXT: s_waitcnt lgkmcnt(0)
785 ; SI-NEXT: v_add_f32_e64 v0, s0, 2
786 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
789 ; VI-LABEL: add_inline_imm_2_f32:
791 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
792 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
793 ; VI-NEXT: s_mov_b32 s7, 0xf000
794 ; VI-NEXT: s_mov_b32 s6, -1
795 ; VI-NEXT: s_waitcnt lgkmcnt(0)
796 ; VI-NEXT: v_add_f32_e64 v0, s0, 2
797 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
799 %y = fadd float %x, 0x36b0000000000000
800 store float %y, float addrspace(1)* %out
804 define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
805 ; SI-LABEL: add_inline_imm_16_f32:
807 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
808 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
809 ; SI-NEXT: s_mov_b32 s7, 0xf000
810 ; SI-NEXT: s_mov_b32 s6, -1
811 ; SI-NEXT: s_waitcnt lgkmcnt(0)
812 ; SI-NEXT: v_add_f32_e64 v0, s0, 16
813 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
816 ; VI-LABEL: add_inline_imm_16_f32:
818 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
819 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
820 ; VI-NEXT: s_mov_b32 s7, 0xf000
821 ; VI-NEXT: s_mov_b32 s6, -1
822 ; VI-NEXT: s_waitcnt lgkmcnt(0)
823 ; VI-NEXT: v_add_f32_e64 v0, s0, 16
824 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
826 %y = fadd float %x, 0x36e0000000000000
827 store float %y, float addrspace(1)* %out
831 define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
832 ; SI-LABEL: add_inline_imm_neg_1_f32:
834 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
835 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
836 ; SI-NEXT: s_mov_b32 s7, 0xf000
837 ; SI-NEXT: s_mov_b32 s6, -1
838 ; SI-NEXT: s_waitcnt lgkmcnt(0)
839 ; SI-NEXT: s_add_i32 s0, s0, -1
840 ; SI-NEXT: v_mov_b32_e32 v0, s0
841 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
844 ; VI-LABEL: add_inline_imm_neg_1_f32:
846 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
847 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
848 ; VI-NEXT: s_mov_b32 s7, 0xf000
849 ; VI-NEXT: s_mov_b32 s6, -1
850 ; VI-NEXT: s_waitcnt lgkmcnt(0)
851 ; VI-NEXT: s_add_i32 s0, s0, -1
852 ; VI-NEXT: v_mov_b32_e32 v0, s0
853 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
855 %xbc = bitcast float %x to i32
856 %y = add i32 %xbc, -1
857 %ybc = bitcast i32 %y to float
858 store float %ybc, float addrspace(1)* %out
862 define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
863 ; SI-LABEL: add_inline_imm_neg_2_f32:
865 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
866 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
867 ; SI-NEXT: s_mov_b32 s7, 0xf000
868 ; SI-NEXT: s_mov_b32 s6, -1
869 ; SI-NEXT: s_waitcnt lgkmcnt(0)
870 ; SI-NEXT: s_add_i32 s0, s0, -2
871 ; SI-NEXT: v_mov_b32_e32 v0, s0
872 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
875 ; VI-LABEL: add_inline_imm_neg_2_f32:
877 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
878 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
879 ; VI-NEXT: s_mov_b32 s7, 0xf000
880 ; VI-NEXT: s_mov_b32 s6, -1
881 ; VI-NEXT: s_waitcnt lgkmcnt(0)
882 ; VI-NEXT: s_add_i32 s0, s0, -2
883 ; VI-NEXT: v_mov_b32_e32 v0, s0
884 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
886 %xbc = bitcast float %x to i32
887 %y = add i32 %xbc, -2
888 %ybc = bitcast i32 %y to float
889 store float %ybc, float addrspace(1)* %out
893 define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
894 ; SI-LABEL: add_inline_imm_neg_16_f32:
896 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
897 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
898 ; SI-NEXT: s_mov_b32 s7, 0xf000
899 ; SI-NEXT: s_mov_b32 s6, -1
900 ; SI-NEXT: s_waitcnt lgkmcnt(0)
901 ; SI-NEXT: s_add_i32 s0, s0, -16
902 ; SI-NEXT: v_mov_b32_e32 v0, s0
903 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
906 ; VI-LABEL: add_inline_imm_neg_16_f32:
908 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
909 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
910 ; VI-NEXT: s_mov_b32 s7, 0xf000
911 ; VI-NEXT: s_mov_b32 s6, -1
912 ; VI-NEXT: s_waitcnt lgkmcnt(0)
913 ; VI-NEXT: s_add_i32 s0, s0, -16
914 ; VI-NEXT: v_mov_b32_e32 v0, s0
915 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
917 %xbc = bitcast float %x to i32
918 %y = add i32 %xbc, -16
919 %ybc = bitcast i32 %y to float
920 store float %ybc, float addrspace(1)* %out
924 define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
925 ; SI-LABEL: add_inline_imm_63_f32:
927 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
928 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
929 ; SI-NEXT: s_mov_b32 s7, 0xf000
930 ; SI-NEXT: s_mov_b32 s6, -1
931 ; SI-NEXT: s_waitcnt lgkmcnt(0)
932 ; SI-NEXT: v_add_f32_e64 v0, s0, 63
933 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
936 ; VI-LABEL: add_inline_imm_63_f32:
938 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
939 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
940 ; VI-NEXT: s_mov_b32 s7, 0xf000
941 ; VI-NEXT: s_mov_b32 s6, -1
942 ; VI-NEXT: s_waitcnt lgkmcnt(0)
943 ; VI-NEXT: v_add_f32_e64 v0, s0, 63
944 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
946 %y = fadd float %x, 0x36ff800000000000
947 store float %y, float addrspace(1)* %out
951 define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
952 ; SI-LABEL: add_inline_imm_64_f32:
954 ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
955 ; SI-NEXT: s_load_dword s0, s[0:1], 0xb
956 ; SI-NEXT: s_mov_b32 s7, 0xf000
957 ; SI-NEXT: s_mov_b32 s6, -1
958 ; SI-NEXT: s_waitcnt lgkmcnt(0)
959 ; SI-NEXT: v_add_f32_e64 v0, s0, 64
960 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
963 ; VI-LABEL: add_inline_imm_64_f32:
965 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
966 ; VI-NEXT: s_load_dword s0, s[0:1], 0x2c
967 ; VI-NEXT: s_mov_b32 s7, 0xf000
968 ; VI-NEXT: s_mov_b32 s6, -1
969 ; VI-NEXT: s_waitcnt lgkmcnt(0)
970 ; VI-NEXT: v_add_f32_e64 v0, s0, 64
971 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
973 %y = fadd float %x, 0x3700000000000000
974 store float %y, float addrspace(1)* %out
978 define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
979 ; SI-LABEL: add_inline_imm_0.0_f64:
981 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
982 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
983 ; SI-NEXT: s_waitcnt lgkmcnt(0)
984 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 0
985 ; SI-NEXT: s_mov_b32 s3, 0xf000
986 ; SI-NEXT: s_mov_b32 s2, -1
987 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
990 ; VI-LABEL: add_inline_imm_0.0_f64:
992 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
993 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
994 ; VI-NEXT: s_waitcnt lgkmcnt(0)
995 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 0
996 ; VI-NEXT: s_mov_b32 s3, 0xf000
997 ; VI-NEXT: s_mov_b32 s2, -1
998 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1000 %y = fadd double %x, 0.0
1001 store double %y, double addrspace(1)* %out
1005 define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1006 ; SI-LABEL: add_inline_imm_0.5_f64:
1008 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1009 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1010 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1011 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 0.5
1012 ; SI-NEXT: s_mov_b32 s3, 0xf000
1013 ; SI-NEXT: s_mov_b32 s2, -1
1014 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1017 ; VI-LABEL: add_inline_imm_0.5_f64:
1019 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1020 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1021 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1022 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 0.5
1023 ; VI-NEXT: s_mov_b32 s3, 0xf000
1024 ; VI-NEXT: s_mov_b32 s2, -1
1025 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1027 %y = fadd double %x, 0.5
1028 store double %y, double addrspace(1)* %out
1032 define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1033 ; SI-LABEL: add_inline_imm_neg_0.5_f64:
1035 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1036 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1037 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1038 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], -0.5
1039 ; SI-NEXT: s_mov_b32 s3, 0xf000
1040 ; SI-NEXT: s_mov_b32 s2, -1
1041 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1044 ; VI-LABEL: add_inline_imm_neg_0.5_f64:
1046 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1047 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1048 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1049 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], -0.5
1050 ; VI-NEXT: s_mov_b32 s3, 0xf000
1051 ; VI-NEXT: s_mov_b32 s2, -1
1052 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1054 %y = fadd double %x, -0.5
1055 store double %y, double addrspace(1)* %out
1059 define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1060 ; SI-LABEL: add_inline_imm_1.0_f64:
1062 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1063 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1064 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1065 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0
1066 ; SI-NEXT: s_mov_b32 s3, 0xf000
1067 ; SI-NEXT: s_mov_b32 s2, -1
1068 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1071 ; VI-LABEL: add_inline_imm_1.0_f64:
1073 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1074 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1075 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1076 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0
1077 ; VI-NEXT: s_mov_b32 s3, 0xf000
1078 ; VI-NEXT: s_mov_b32 s2, -1
1079 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1081 %y = fadd double %x, 1.0
1082 store double %y, double addrspace(1)* %out
1086 define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1087 ; SI-LABEL: add_inline_imm_neg_1.0_f64:
1089 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1090 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1091 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1092 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], -1.0
1093 ; SI-NEXT: s_mov_b32 s3, 0xf000
1094 ; SI-NEXT: s_mov_b32 s2, -1
1095 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1098 ; VI-LABEL: add_inline_imm_neg_1.0_f64:
1100 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1101 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1102 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1103 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], -1.0
1104 ; VI-NEXT: s_mov_b32 s3, 0xf000
1105 ; VI-NEXT: s_mov_b32 s2, -1
1106 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1108 %y = fadd double %x, -1.0
1109 store double %y, double addrspace(1)* %out
1113 define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1114 ; SI-LABEL: add_inline_imm_2.0_f64:
1116 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1117 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1118 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1119 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 2.0
1120 ; SI-NEXT: s_mov_b32 s3, 0xf000
1121 ; SI-NEXT: s_mov_b32 s2, -1
1122 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1125 ; VI-LABEL: add_inline_imm_2.0_f64:
1127 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1128 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1129 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1130 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 2.0
1131 ; VI-NEXT: s_mov_b32 s3, 0xf000
1132 ; VI-NEXT: s_mov_b32 s2, -1
1133 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1135 %y = fadd double %x, 2.0
1136 store double %y, double addrspace(1)* %out
1140 define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1141 ; SI-LABEL: add_inline_imm_neg_2.0_f64:
1143 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1144 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1145 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1146 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], -2.0
1147 ; SI-NEXT: s_mov_b32 s3, 0xf000
1148 ; SI-NEXT: s_mov_b32 s2, -1
1149 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1152 ; VI-LABEL: add_inline_imm_neg_2.0_f64:
1154 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1155 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1156 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1157 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], -2.0
1158 ; VI-NEXT: s_mov_b32 s3, 0xf000
1159 ; VI-NEXT: s_mov_b32 s2, -1
1160 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1162 %y = fadd double %x, -2.0
1163 store double %y, double addrspace(1)* %out
1167 define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1168 ; SI-LABEL: add_inline_imm_4.0_f64:
1170 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1171 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1172 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1173 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 4.0
1174 ; SI-NEXT: s_mov_b32 s3, 0xf000
1175 ; SI-NEXT: s_mov_b32 s2, -1
1176 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1179 ; VI-LABEL: add_inline_imm_4.0_f64:
1181 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1182 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1183 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1184 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 4.0
1185 ; VI-NEXT: s_mov_b32 s3, 0xf000
1186 ; VI-NEXT: s_mov_b32 s2, -1
1187 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1189 %y = fadd double %x, 4.0
1190 store double %y, double addrspace(1)* %out
1194 define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1195 ; SI-LABEL: add_inline_imm_neg_4.0_f64:
1197 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1198 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1199 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1200 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], -4.0
1201 ; SI-NEXT: s_mov_b32 s3, 0xf000
1202 ; SI-NEXT: s_mov_b32 s2, -1
1203 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1206 ; VI-LABEL: add_inline_imm_neg_4.0_f64:
1208 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1209 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1210 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1211 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], -4.0
1212 ; VI-NEXT: s_mov_b32 s3, 0xf000
1213 ; VI-NEXT: s_mov_b32 s2, -1
1214 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1216 %y = fadd double %x, -4.0
1217 store double %y, double addrspace(1)* %out
1221 define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1222 ; SI-LABEL: add_inline_imm_inv_2pi_f64:
1224 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1225 ; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1226 ; SI-NEXT: v_mov_b32_e32 v1, 0x3fc45f30
1227 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1228 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1229 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
1230 ; SI-NEXT: s_mov_b32 s3, 0xf000
1231 ; SI-NEXT: s_mov_b32 s2, -1
1232 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1235 ; VI-LABEL: add_inline_imm_inv_2pi_f64:
1237 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1238 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1239 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1240 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 0.15915494309189532
1241 ; VI-NEXT: s_mov_b32 s3, 0xf000
1242 ; VI-NEXT: s_mov_b32 s2, -1
1243 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1245 %y = fadd double %x, 0x3fc45f306dc9c882
1246 store double %y, double addrspace(1)* %out
1250 define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1251 ; SI-LABEL: add_m_inv_2pi_f64:
1253 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1254 ; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1255 ; SI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30
1256 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1257 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1258 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
1259 ; SI-NEXT: s_mov_b32 s3, 0xf000
1260 ; SI-NEXT: s_mov_b32 s2, -1
1261 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1264 ; VI-LABEL: add_m_inv_2pi_f64:
1266 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1267 ; VI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1268 ; VI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30
1269 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1270 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1271 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1]
1272 ; VI-NEXT: s_mov_b32 s3, 0xf000
1273 ; VI-NEXT: s_mov_b32 s2, -1
1274 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1276 %y = fadd double %x, 0xbfc45f306dc9c882
1277 store double %y, double addrspace(1)* %out
1281 define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1282 ; SI-LABEL: add_inline_imm_1_f64:
1284 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1285 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1286 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1287 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 1
1288 ; SI-NEXT: s_mov_b32 s3, 0xf000
1289 ; SI-NEXT: s_mov_b32 s2, -1
1290 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1293 ; VI-LABEL: add_inline_imm_1_f64:
1295 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1296 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1297 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1298 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1
1299 ; VI-NEXT: s_mov_b32 s3, 0xf000
1300 ; VI-NEXT: s_mov_b32 s2, -1
1301 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1303 %y = fadd double %x, 0x0000000000000001
1304 store double %y, double addrspace(1)* %out
1308 define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1309 ; SI-LABEL: add_inline_imm_2_f64:
1311 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1312 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1313 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1314 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 2
1315 ; SI-NEXT: s_mov_b32 s3, 0xf000
1316 ; SI-NEXT: s_mov_b32 s2, -1
1317 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1320 ; VI-LABEL: add_inline_imm_2_f64:
1322 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1323 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1324 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1325 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 2
1326 ; VI-NEXT: s_mov_b32 s3, 0xf000
1327 ; VI-NEXT: s_mov_b32 s2, -1
1328 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1330 %y = fadd double %x, 0x0000000000000002
1331 store double %y, double addrspace(1)* %out
1335 define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1336 ; SI-LABEL: add_inline_imm_16_f64:
1338 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1339 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1340 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1341 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 16
1342 ; SI-NEXT: s_mov_b32 s3, 0xf000
1343 ; SI-NEXT: s_mov_b32 s2, -1
1344 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1347 ; VI-LABEL: add_inline_imm_16_f64:
1349 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1350 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1351 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1352 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 16
1353 ; VI-NEXT: s_mov_b32 s3, 0xf000
1354 ; VI-NEXT: s_mov_b32 s2, -1
1355 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1357 %y = fadd double %x, 0x0000000000000010
1358 store double %y, double addrspace(1)* %out
1362 define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1363 ; SI-LABEL: add_inline_imm_neg_1_f64:
1365 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1366 ; SI-NEXT: v_mov_b32_e32 v0, -1
1367 ; SI-NEXT: s_mov_b32 s3, 0xf000
1368 ; SI-NEXT: s_mov_b32 s2, -1
1369 ; SI-NEXT: v_mov_b32_e32 v1, v0
1370 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1371 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1374 ; VI-LABEL: add_inline_imm_neg_1_f64:
1376 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1377 ; VI-NEXT: v_mov_b32_e32 v0, -1
1378 ; VI-NEXT: s_mov_b32 s3, 0xf000
1379 ; VI-NEXT: s_mov_b32 s2, -1
1380 ; VI-NEXT: v_mov_b32_e32 v1, v0
1381 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1382 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1384 %y = fadd double %x, 0xffffffffffffffff
1385 store double %y, double addrspace(1)* %out
1389 define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1390 ; SI-LABEL: add_inline_imm_neg_2_f64:
1392 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1393 ; SI-NEXT: s_mov_b32 s3, 0xf000
1394 ; SI-NEXT: s_mov_b32 s2, -1
1395 ; SI-NEXT: v_mov_b32_e32 v0, -2
1396 ; SI-NEXT: v_mov_b32_e32 v1, -1
1397 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1398 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1401 ; VI-LABEL: add_inline_imm_neg_2_f64:
1403 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1404 ; VI-NEXT: s_mov_b32 s3, 0xf000
1405 ; VI-NEXT: s_mov_b32 s2, -1
1406 ; VI-NEXT: v_mov_b32_e32 v0, -2
1407 ; VI-NEXT: v_mov_b32_e32 v1, -1
1408 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1409 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1411 %y = fadd double %x, 0xfffffffffffffffe
1412 store double %y, double addrspace(1)* %out
1416 define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1417 ; SI-LABEL: add_inline_imm_neg_16_f64:
1419 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1420 ; SI-NEXT: s_mov_b32 s3, 0xf000
1421 ; SI-NEXT: s_mov_b32 s2, -1
1422 ; SI-NEXT: v_mov_b32_e32 v0, -16
1423 ; SI-NEXT: v_mov_b32_e32 v1, -1
1424 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1425 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1428 ; VI-LABEL: add_inline_imm_neg_16_f64:
1430 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1431 ; VI-NEXT: s_mov_b32 s3, 0xf000
1432 ; VI-NEXT: s_mov_b32 s2, -1
1433 ; VI-NEXT: v_mov_b32_e32 v0, -16
1434 ; VI-NEXT: v_mov_b32_e32 v1, -1
1435 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1436 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1438 %y = fadd double %x, 0xfffffffffffffff0
1439 store double %y, double addrspace(1)* %out
1443 define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1444 ; SI-LABEL: add_inline_imm_63_f64:
1446 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1447 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1448 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1449 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 63
1450 ; SI-NEXT: s_mov_b32 s3, 0xf000
1451 ; SI-NEXT: s_mov_b32 s2, -1
1452 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1455 ; VI-LABEL: add_inline_imm_63_f64:
1457 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1458 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1459 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1460 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 63
1461 ; VI-NEXT: s_mov_b32 s3, 0xf000
1462 ; VI-NEXT: s_mov_b32 s2, -1
1463 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1465 %y = fadd double %x, 0x000000000000003F
1466 store double %y, double addrspace(1)* %out
1470 define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1471 ; SI-LABEL: add_inline_imm_64_f64:
1473 ; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13
1474 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1475 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1476 ; SI-NEXT: v_add_f64 v[0:1], s[2:3], 64
1477 ; SI-NEXT: s_mov_b32 s3, 0xf000
1478 ; SI-NEXT: s_mov_b32 s2, -1
1479 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1482 ; VI-LABEL: add_inline_imm_64_f64:
1484 ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
1485 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1486 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1487 ; VI-NEXT: v_add_f64 v[0:1], s[2:3], 64
1488 ; VI-NEXT: s_mov_b32 s3, 0xf000
1489 ; VI-NEXT: s_mov_b32 s2, -1
1490 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1492 %y = fadd double %x, 0x0000000000000040
1493 store double %y, double addrspace(1)* %out
1497 define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
1498 ; SI-LABEL: store_inline_imm_0.0_f64:
1500 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1501 ; SI-NEXT: v_mov_b32_e32 v0, 0
1502 ; SI-NEXT: s_mov_b32 s3, 0xf000
1503 ; SI-NEXT: s_mov_b32 s2, -1
1504 ; SI-NEXT: v_mov_b32_e32 v1, v0
1505 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1506 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1509 ; VI-LABEL: store_inline_imm_0.0_f64:
1511 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1512 ; VI-NEXT: v_mov_b32_e32 v0, 0
1513 ; VI-NEXT: s_mov_b32 s3, 0xf000
1514 ; VI-NEXT: s_mov_b32 s2, -1
1515 ; VI-NEXT: v_mov_b32_e32 v1, v0
1516 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1517 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1519 store double 0.0, double addrspace(1)* %out
1523 define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
1524 ; SI-LABEL: store_literal_imm_neg_0.0_f64:
1526 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1527 ; SI-NEXT: s_mov_b32 s3, 0xf000
1528 ; SI-NEXT: s_mov_b32 s2, -1
1529 ; SI-NEXT: v_mov_b32_e32 v0, 0
1530 ; SI-NEXT: v_bfrev_b32_e32 v1, 1
1531 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1532 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1535 ; VI-LABEL: store_literal_imm_neg_0.0_f64:
1537 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1538 ; VI-NEXT: s_mov_b32 s3, 0xf000
1539 ; VI-NEXT: s_mov_b32 s2, -1
1540 ; VI-NEXT: v_mov_b32_e32 v0, 0
1541 ; VI-NEXT: v_bfrev_b32_e32 v1, 1
1542 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1543 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1545 store double -0.0, double addrspace(1)* %out
1549 define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) {
1550 ; SI-LABEL: store_inline_imm_0.5_f64:
1552 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1553 ; SI-NEXT: s_mov_b32 s3, 0xf000
1554 ; SI-NEXT: s_mov_b32 s2, -1
1555 ; SI-NEXT: v_mov_b32_e32 v0, 0
1556 ; SI-NEXT: v_mov_b32_e32 v1, 0x3fe00000
1557 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1558 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1561 ; VI-LABEL: store_inline_imm_0.5_f64:
1563 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1564 ; VI-NEXT: s_mov_b32 s3, 0xf000
1565 ; VI-NEXT: s_mov_b32 s2, -1
1566 ; VI-NEXT: v_mov_b32_e32 v0, 0
1567 ; VI-NEXT: v_mov_b32_e32 v1, 0x3fe00000
1568 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1569 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1571 store double 0.5, double addrspace(1)* %out
1575 define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) {
1576 ; SI-LABEL: store_inline_imm_m_0.5_f64:
1578 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1579 ; SI-NEXT: s_mov_b32 s3, 0xf000
1580 ; SI-NEXT: s_mov_b32 s2, -1
1581 ; SI-NEXT: v_mov_b32_e32 v0, 0
1582 ; SI-NEXT: v_mov_b32_e32 v1, 0xbfe00000
1583 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1584 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1587 ; VI-LABEL: store_inline_imm_m_0.5_f64:
1589 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1590 ; VI-NEXT: s_mov_b32 s3, 0xf000
1591 ; VI-NEXT: s_mov_b32 s2, -1
1592 ; VI-NEXT: v_mov_b32_e32 v0, 0
1593 ; VI-NEXT: v_mov_b32_e32 v1, 0xbfe00000
1594 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1595 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1597 store double -0.5, double addrspace(1)* %out
1601 define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) {
1602 ; SI-LABEL: store_inline_imm_1.0_f64:
1604 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1605 ; SI-NEXT: s_mov_b32 s3, 0xf000
1606 ; SI-NEXT: s_mov_b32 s2, -1
1607 ; SI-NEXT: v_mov_b32_e32 v0, 0
1608 ; SI-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1609 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1610 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1613 ; VI-LABEL: store_inline_imm_1.0_f64:
1615 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1616 ; VI-NEXT: s_mov_b32 s3, 0xf000
1617 ; VI-NEXT: s_mov_b32 s2, -1
1618 ; VI-NEXT: v_mov_b32_e32 v0, 0
1619 ; VI-NEXT: v_mov_b32_e32 v1, 0x3ff00000
1620 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1621 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1623 store double 1.0, double addrspace(1)* %out
1627 define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) {
1628 ; SI-LABEL: store_inline_imm_m_1.0_f64:
1630 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1631 ; SI-NEXT: s_mov_b32 s3, 0xf000
1632 ; SI-NEXT: s_mov_b32 s2, -1
1633 ; SI-NEXT: v_mov_b32_e32 v0, 0
1634 ; SI-NEXT: v_mov_b32_e32 v1, 0xbff00000
1635 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1636 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1639 ; VI-LABEL: store_inline_imm_m_1.0_f64:
1641 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1642 ; VI-NEXT: s_mov_b32 s3, 0xf000
1643 ; VI-NEXT: s_mov_b32 s2, -1
1644 ; VI-NEXT: v_mov_b32_e32 v0, 0
1645 ; VI-NEXT: v_mov_b32_e32 v1, 0xbff00000
1646 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1647 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1649 store double -1.0, double addrspace(1)* %out
1653 define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) {
1654 ; SI-LABEL: store_inline_imm_2.0_f64:
1656 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1657 ; SI-NEXT: s_mov_b32 s3, 0xf000
1658 ; SI-NEXT: s_mov_b32 s2, -1
1659 ; SI-NEXT: v_mov_b32_e32 v0, 0
1660 ; SI-NEXT: v_mov_b32_e32 v1, 2.0
1661 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1662 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1665 ; VI-LABEL: store_inline_imm_2.0_f64:
1667 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1668 ; VI-NEXT: s_mov_b32 s3, 0xf000
1669 ; VI-NEXT: s_mov_b32 s2, -1
1670 ; VI-NEXT: v_mov_b32_e32 v0, 0
1671 ; VI-NEXT: v_mov_b32_e32 v1, 2.0
1672 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1673 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1675 store double 2.0, double addrspace(1)* %out
1679 define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) {
1680 ; SI-LABEL: store_inline_imm_m_2.0_f64:
1682 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1683 ; SI-NEXT: s_mov_b32 s3, 0xf000
1684 ; SI-NEXT: s_mov_b32 s2, -1
1685 ; SI-NEXT: v_mov_b32_e32 v0, 0
1686 ; SI-NEXT: v_mov_b32_e32 v1, -2.0
1687 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1688 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1691 ; VI-LABEL: store_inline_imm_m_2.0_f64:
1693 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1694 ; VI-NEXT: s_mov_b32 s3, 0xf000
1695 ; VI-NEXT: s_mov_b32 s2, -1
1696 ; VI-NEXT: v_mov_b32_e32 v0, 0
1697 ; VI-NEXT: v_mov_b32_e32 v1, -2.0
1698 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1699 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1701 store double -2.0, double addrspace(1)* %out
1705 define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) {
1706 ; SI-LABEL: store_inline_imm_4.0_f64:
1708 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1709 ; SI-NEXT: s_mov_b32 s3, 0xf000
1710 ; SI-NEXT: s_mov_b32 s2, -1
1711 ; SI-NEXT: v_mov_b32_e32 v0, 0
1712 ; SI-NEXT: v_mov_b32_e32 v1, 0x40100000
1713 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1714 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1717 ; VI-LABEL: store_inline_imm_4.0_f64:
1719 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1720 ; VI-NEXT: s_mov_b32 s3, 0xf000
1721 ; VI-NEXT: s_mov_b32 s2, -1
1722 ; VI-NEXT: v_mov_b32_e32 v0, 0
1723 ; VI-NEXT: v_mov_b32_e32 v1, 0x40100000
1724 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1725 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1727 store double 4.0, double addrspace(1)* %out
1731 define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
1732 ; SI-LABEL: store_inline_imm_m_4.0_f64:
1734 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1735 ; SI-NEXT: s_mov_b32 s3, 0xf000
1736 ; SI-NEXT: s_mov_b32 s2, -1
1737 ; SI-NEXT: v_mov_b32_e32 v0, 0
1738 ; SI-NEXT: v_mov_b32_e32 v1, 0xc0100000
1739 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1740 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1743 ; VI-LABEL: store_inline_imm_m_4.0_f64:
1745 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1746 ; VI-NEXT: s_mov_b32 s3, 0xf000
1747 ; VI-NEXT: s_mov_b32 s2, -1
1748 ; VI-NEXT: v_mov_b32_e32 v0, 0
1749 ; VI-NEXT: v_mov_b32_e32 v1, 0xc0100000
1750 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1751 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1753 store double -4.0, double addrspace(1)* %out
1757 define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) {
1758 ; SI-LABEL: store_inv_2pi_f64:
1760 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1761 ; SI-NEXT: s_mov_b32 s3, 0xf000
1762 ; SI-NEXT: s_mov_b32 s2, -1
1763 ; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1764 ; SI-NEXT: v_mov_b32_e32 v1, 0x3fc45f30
1765 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1766 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1769 ; VI-LABEL: store_inv_2pi_f64:
1771 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1772 ; VI-NEXT: s_mov_b32 s3, 0xf000
1773 ; VI-NEXT: s_mov_b32 s2, -1
1774 ; VI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1775 ; VI-NEXT: v_mov_b32_e32 v1, 0x3fc45f30
1776 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1777 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1779 store double 0x3fc45f306dc9c882, double addrspace(1)* %out
1783 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) {
1784 ; SI-LABEL: store_inline_imm_m_inv_2pi_f64:
1786 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1787 ; SI-NEXT: s_mov_b32 s3, 0xf000
1788 ; SI-NEXT: s_mov_b32 s2, -1
1789 ; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1790 ; SI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30
1791 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1792 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1795 ; VI-LABEL: store_inline_imm_m_inv_2pi_f64:
1797 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1798 ; VI-NEXT: s_mov_b32 s3, 0xf000
1799 ; VI-NEXT: s_mov_b32 s2, -1
1800 ; VI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882
1801 ; VI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30
1802 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1803 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1805 store double 0xbfc45f306dc9c882, double addrspace(1)* %out
1809 define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) {
1810 ; SI-LABEL: store_literal_imm_f64:
1812 ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
1813 ; SI-NEXT: s_mov_b32 s3, 0xf000
1814 ; SI-NEXT: s_mov_b32 s2, -1
1815 ; SI-NEXT: v_mov_b32_e32 v0, 0
1816 ; SI-NEXT: v_mov_b32_e32 v1, 0x40b00000
1817 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1818 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1821 ; VI-LABEL: store_literal_imm_f64:
1823 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1824 ; VI-NEXT: s_mov_b32 s3, 0xf000
1825 ; VI-NEXT: s_mov_b32 s2, -1
1826 ; VI-NEXT: v_mov_b32_e32 v0, 0
1827 ; VI-NEXT: v_mov_b32_e32 v1, 0x40b00000
1828 ; VI-NEXT: s_waitcnt lgkmcnt(0)
1829 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1831 store double 4096.0, double addrspace(1)* %out
1835 define amdgpu_vs void @literal_folding(float %arg) {
1836 ; GCN-LABEL: literal_folding:
1837 ; GCN: ; %bb.0: ; %main_body
1838 ; GCN-NEXT: v_mul_f32_e32 v1, 0x3f4353f8, v0
1839 ; GCN-NEXT: v_mul_f32_e32 v0, 0xbf4353f8, v0
1840 ; GCN-NEXT: exp pos0 v1, v1, v0, v0 done
1841 ; GCN-NEXT: s_endpgm
1843 %tmp = fmul float %arg, 0x3FE86A7F00000000
1844 %tmp1 = fmul float %arg, 0xBFE86A7F00000000
1845 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
1849 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
1851 attributes #0 = { nounwind }