1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4 ; Use a 64-bit value with lo bits that can be represented as an inline constant
5 ; GCN-LABEL: {{^}}i64_imm_inline_lo:
6 ; GCN: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], 5
7 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
8 define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
10 store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
14 ; Use a 64-bit value with hi bits that can be represented as an inline constant
15 ; GCN-LABEL: {{^}}i64_imm_inline_hi:
16 ; GCN: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], 5
17 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
18 define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
20 store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
24 ; GCN-LABEL: {{^}}store_imm_neg_0.0_i64:
25 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
26 ; GCN-DAG: v_bfrev_b32_e32 v[[HI_VREG:[0-9]+]], 1{{$}}
27 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
28 define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
29 store i64 -9223372036854775808, i64 addrspace(1) *%out
33 ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i32:
34 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
35 ; GCN: buffer_store_dword [[REG]]
36 define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) {
37 store i32 -2147483648, i32 addrspace(1)* %out
41 ; GCN-LABEL: {{^}}store_inline_imm_0.0_f32:
42 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
43 ; GCN: buffer_store_dword [[REG]]
44 define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
45 store float 0.0, float addrspace(1)* %out
49 ; GCN-LABEL: {{^}}store_imm_neg_0.0_f32:
50 ; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
51 ; GCN: buffer_store_dword [[REG]]
52 define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
53 store float -0.0, float addrspace(1)* %out
57 ; GCN-LABEL: {{^}}store_inline_imm_0.5_f32:
58 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
59 ; GCN: buffer_store_dword [[REG]]
60 define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
61 store float 0.5, float addrspace(1)* %out
65 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f32:
66 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
67 ; GCN: buffer_store_dword [[REG]]
68 define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
69 store float -0.5, float addrspace(1)* %out
73 ; GCN-LABEL: {{^}}store_inline_imm_1.0_f32:
74 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
75 ; GCN: buffer_store_dword [[REG]]
76 define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
77 store float 1.0, float addrspace(1)* %out
81 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f32:
82 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
83 ; GCN: buffer_store_dword [[REG]]
84 define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
85 store float -1.0, float addrspace(1)* %out
89 ; GCN-LABEL: {{^}}store_inline_imm_2.0_f32:
90 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
91 ; GCN: buffer_store_dword [[REG]]
92 define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
93 store float 2.0, float addrspace(1)* %out
97 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f32:
98 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
99 ; GCN: buffer_store_dword [[REG]]
100 define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
101 store float -2.0, float addrspace(1)* %out
105 ; GCN-LABEL: {{^}}store_inline_imm_4.0_f32:
106 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
107 ; GCN: buffer_store_dword [[REG]]
108 define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
109 store float 4.0, float addrspace(1)* %out
113 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f32:
114 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
115 ; GCN: buffer_store_dword [[REG]]
116 define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
117 store float -4.0, float addrspace(1)* %out
122 ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f32:
123 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e22f983{{$}}
124 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0.15915494{{$}}
125 ; GCN: buffer_store_dword [[REG]]
126 define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) {
127 store float 0x3FC45F3060000000, float addrspace(1)* %out
131 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f32:
132 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbe22f983{{$}}
133 ; GCN: buffer_store_dword [[REG]]
134 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) {
135 store float 0xBFC45F3060000000, float addrspace(1)* %out
139 ; GCN-LABEL: {{^}}store_literal_imm_f32:
140 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
141 ; GCN: buffer_store_dword [[REG]]
142 define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) {
143 store float 4096.0, float addrspace(1)* %out
147 ; GCN-LABEL: {{^}}add_inline_imm_0.0_f32:
148 ; GCN: s_load_dword [[VAL:s[0-9]+]]
149 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
150 ; GCN: buffer_store_dword [[REG]]
151 define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
152 %y = fadd float %x, 0.0
153 store float %y, float addrspace(1)* %out
157 ; GCN-LABEL: {{^}}add_inline_imm_0.5_f32:
158 ; GCN: s_load_dword [[VAL:s[0-9]+]]
159 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
160 ; GCN: buffer_store_dword [[REG]]
161 define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
162 %y = fadd float %x, 0.5
163 store float %y, float addrspace(1)* %out
167 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f32:
168 ; GCN: s_load_dword [[VAL:s[0-9]+]]
169 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
170 ; GCN: buffer_store_dword [[REG]]
171 define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
172 %y = fadd float %x, -0.5
173 store float %y, float addrspace(1)* %out
177 ; GCN-LABEL: {{^}}add_inline_imm_1.0_f32:
178 ; GCN: s_load_dword [[VAL:s[0-9]+]]
179 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
180 ; GCN: buffer_store_dword [[REG]]
181 define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
182 %y = fadd float %x, 1.0
183 store float %y, float addrspace(1)* %out
187 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f32:
188 ; GCN: s_load_dword [[VAL:s[0-9]+]]
189 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
190 ; GCN: buffer_store_dword [[REG]]
191 define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
192 %y = fadd float %x, -1.0
193 store float %y, float addrspace(1)* %out
197 ; GCN-LABEL: {{^}}add_inline_imm_2.0_f32:
198 ; GCN: s_load_dword [[VAL:s[0-9]+]]
199 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
200 ; GCN: buffer_store_dword [[REG]]
201 define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
202 %y = fadd float %x, 2.0
203 store float %y, float addrspace(1)* %out
207 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f32:
208 ; GCN: s_load_dword [[VAL:s[0-9]+]]
209 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
210 ; GCN: buffer_store_dword [[REG]]
211 define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
212 %y = fadd float %x, -2.0
213 store float %y, float addrspace(1)* %out
217 ; GCN-LABEL: {{^}}add_inline_imm_4.0_f32:
218 ; GCN: s_load_dword [[VAL:s[0-9]+]]
219 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
220 ; GCN: buffer_store_dword [[REG]]
221 define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
222 %y = fadd float %x, 4.0
223 store float %y, float addrspace(1)* %out
227 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f32:
228 ; GCN: s_load_dword [[VAL:s[0-9]+]]
229 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
230 ; GCN: buffer_store_dword [[REG]]
231 define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
232 %y = fadd float %x, -4.0
233 store float %y, float addrspace(1)* %out
237 ; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f32:
238 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
239 ; GCN: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
240 ; GCN: buffer_store_dword [[REG]]
241 define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
242 %x = load float, float addrspace(1)* %in
243 %y = fadd float %x, 0.5
244 store float %y, float addrspace(1)* %out
248 ; GCN-LABEL: {{^}}commute_add_literal_f32:
249 ; GCN: buffer_load_dword [[VAL:v[0-9]+]]
250 ; GCN: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
251 ; GCN: buffer_store_dword [[REG]]
252 define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
253 %x = load float, float addrspace(1)* %in
254 %y = fadd float %x, 1024.0
255 store float %y, float addrspace(1)* %out
259 ; GCN-LABEL: {{^}}add_inline_imm_1_f32:
260 ; GCN: s_load_dword [[VAL:s[0-9]+]]
261 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
262 ; GCN: buffer_store_dword [[REG]]
263 define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
264 %y = fadd float %x, 0x36a0000000000000
265 store float %y, float addrspace(1)* %out
269 ; GCN-LABEL: {{^}}add_inline_imm_2_f32:
270 ; GCN: s_load_dword [[VAL:s[0-9]+]]
271 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
272 ; GCN: buffer_store_dword [[REG]]
273 define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
274 %y = fadd float %x, 0x36b0000000000000
275 store float %y, float addrspace(1)* %out
279 ; GCN-LABEL: {{^}}add_inline_imm_16_f32:
280 ; GCN: s_load_dword [[VAL:s[0-9]+]]
281 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 16
282 ; GCN: buffer_store_dword [[REG]]
283 define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
284 %y = fadd float %x, 0x36e0000000000000
285 store float %y, float addrspace(1)* %out
289 ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f32:
290 ; GCN: s_add_i32 [[VAL:s[0-9]+]], s0, -1
291 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
292 ; GCN: buffer_store_dword [[REG]]
293 define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
294 %xbc = bitcast float %x to i32
295 %y = add i32 %xbc, -1
296 %ybc = bitcast i32 %y to float
297 store float %ybc, float addrspace(1)* %out
301 ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f32:
302 ; GCN: s_add_i32 [[VAL:s[0-9]+]], s0, -2
303 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
304 ; GCN: buffer_store_dword [[REG]]
305 define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
306 %xbc = bitcast float %x to i32
307 %y = add i32 %xbc, -2
308 %ybc = bitcast i32 %y to float
309 store float %ybc, float addrspace(1)* %out
313 ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f32:
314 ; GCN: s_add_i32 [[VAL:s[0-9]+]], s0, -16
315 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]]
316 ; GCN: buffer_store_dword [[REG]]
317 define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
318 %xbc = bitcast float %x to i32
319 %y = add i32 %xbc, -16
320 %ybc = bitcast i32 %y to float
321 store float %ybc, float addrspace(1)* %out
325 ; GCN-LABEL: {{^}}add_inline_imm_63_f32:
326 ; GCN: s_load_dword [[VAL:s[0-9]+]]
327 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 63
328 ; GCN: buffer_store_dword [[REG]]
329 define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
330 %y = fadd float %x, 0x36ff800000000000
331 store float %y, float addrspace(1)* %out
335 ; GCN-LABEL: {{^}}add_inline_imm_64_f32:
336 ; GCN: s_load_dword [[VAL:s[0-9]+]]
337 ; GCN: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 64
338 ; GCN: buffer_store_dword [[REG]]
339 define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
340 %y = fadd float %x, 0x3700000000000000
341 store float %y, float addrspace(1)* %out
346 ; GCN-LABEL: {{^}}add_inline_imm_0.0_f64:
347 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
348 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
349 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
350 ; GCN: buffer_store_dwordx2 [[REG]]
351 define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
352 %y = fadd double %x, 0.0
353 store double %y, double addrspace(1)* %out
357 ; GCN-LABEL: {{^}}add_inline_imm_0.5_f64:
358 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
359 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
360 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0.5
361 ; GCN: buffer_store_dwordx2 [[REG]]
362 define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) {
363 %y = fadd double %x, 0.5
364 store double %y, double addrspace(1)* %out
368 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f64:
369 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
370 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
371 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -0.5
372 ; GCN: buffer_store_dwordx2 [[REG]]
373 define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) {
374 %y = fadd double %x, -0.5
375 store double %y, double addrspace(1)* %out
379 ; GCN-LABEL: {{^}}add_inline_imm_1.0_f64:
380 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
381 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
382 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1.0
383 ; GCN: buffer_store_dwordx2 [[REG]]
384 define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
385 %y = fadd double %x, 1.0
386 store double %y, double addrspace(1)* %out
390 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f64:
391 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
392 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
393 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -1.0
394 ; GCN: buffer_store_dwordx2 [[REG]]
395 define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
396 %y = fadd double %x, -1.0
397 store double %y, double addrspace(1)* %out
401 ; GCN-LABEL: {{^}}add_inline_imm_2.0_f64:
402 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
403 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
404 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2.0
405 ; GCN: buffer_store_dwordx2 [[REG]]
406 define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
407 %y = fadd double %x, 2.0
408 store double %y, double addrspace(1)* %out
412 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f64:
413 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
414 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
415 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -2.0
416 ; GCN: buffer_store_dwordx2 [[REG]]
417 define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
418 %y = fadd double %x, -2.0
419 store double %y, double addrspace(1)* %out
423 ; GCN-LABEL: {{^}}add_inline_imm_4.0_f64:
424 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
425 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
426 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 4.0
427 ; GCN: buffer_store_dwordx2 [[REG]]
428 define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
429 %y = fadd double %x, 4.0
430 store double %y, double addrspace(1)* %out
434 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f64:
435 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
436 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
437 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], -4.0
438 ; GCN: buffer_store_dwordx2 [[REG]]
439 define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
440 %y = fadd double %x, -4.0
441 store double %y, double addrspace(1)* %out
445 ; GCN-LABEL: {{^}}add_inline_imm_inv_2pi_f64:
446 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
447 ; SI-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
448 ; SI-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30
449 ; SI: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
451 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
452 ; VI: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 0.15915494309189532{{$}}
453 ; VI: buffer_store_dwordx2 [[REG]]
454 define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) {
455 %y = fadd double %x, 0x3fc45f306dc9c882
456 store double %y, double addrspace(1)* %out
460 ; GCN-LABEL: {{^}}add_m_inv_2pi_f64:
461 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
462 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30
463 ; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
464 define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) {
465 %y = fadd double %x, 0xbfc45f306dc9c882
466 store double %y, double addrspace(1)* %out
470 ; GCN-LABEL: {{^}}add_inline_imm_1_f64:
471 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
472 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
473 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 1{{$}}
474 ; GCN: buffer_store_dwordx2 [[REG]]
475 define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) {
476 %y = fadd double %x, 0x0000000000000001
477 store double %y, double addrspace(1)* %out
481 ; GCN-LABEL: {{^}}add_inline_imm_2_f64:
482 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
483 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
484 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 2{{$}}
485 ; GCN: buffer_store_dwordx2 [[REG]]
486 define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) {
487 %y = fadd double %x, 0x0000000000000002
488 store double %y, double addrspace(1)* %out
492 ; GCN-LABEL: {{^}}add_inline_imm_16_f64:
493 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
494 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
495 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 16
496 ; GCN: buffer_store_dwordx2 [[REG]]
497 define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) {
498 %y = fadd double %x, 0x0000000000000010
499 store double %y, double addrspace(1)* %out
503 ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f64:
504 ; GCN: v_mov_b32_e32 v0, -1
505 ; GCN: v_mov_b32_e32 v1, v0
506 ; GCN: buffer_store_dwordx2 v[0:1]
507 define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) {
508 %y = fadd double %x, 0xffffffffffffffff
509 store double %y, double addrspace(1)* %out
513 ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f64:
514 ; GCN: v_mov_b32_e32 v0, -2
515 ; GCN: v_mov_b32_e32 v1, -1
516 ; GCN: buffer_store_dwordx2 v[0:1]
517 define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) {
518 %y = fadd double %x, 0xfffffffffffffffe
519 store double %y, double addrspace(1)* %out
523 ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f64:
524 ; GCN: v_mov_b32_e32 v0, -16
525 ; GCN: v_mov_b32_e32 v1, -1
526 ; GCN: buffer_store_dwordx2 v[0:1]
527 define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) {
528 %y = fadd double %x, 0xfffffffffffffff0
529 store double %y, double addrspace(1)* %out
533 ; GCN-LABEL: {{^}}add_inline_imm_63_f64:
534 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
535 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
536 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 63
537 ; GCN: buffer_store_dwordx2 [[REG]]
538 define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) {
539 %y = fadd double %x, 0x000000000000003F
540 store double %y, double addrspace(1)* %out
544 ; GCN-LABEL: {{^}}add_inline_imm_64_f64:
545 ; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
546 ; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x4c
547 ; GCN: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], [[VAL]], 64
548 ; GCN: buffer_store_dwordx2 [[REG]]
549 define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) {
550 %y = fadd double %x, 0x0000000000000040
551 store double %y, double addrspace(1)* %out
556 ; GCN-LABEL: {{^}}store_inline_imm_0.0_f64:
557 ; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
558 ; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], v[[LO_VREG]]{{$}}
559 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
560 define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
561 store double 0.0, double addrspace(1)* %out
566 ; GCN-LABEL: {{^}}store_literal_imm_neg_0.0_f64:
567 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
568 ; GCN-DAG: v_bfrev_b32_e32 v[[HI_VREG:[0-9]+]], 1{{$}}
569 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
570 define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
571 store double -0.0, double addrspace(1)* %out
575 ; GCN-LABEL: {{^}}store_inline_imm_0.5_f64:
576 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
577 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fe00000
578 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
579 define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) {
580 store double 0.5, double addrspace(1)* %out
584 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f64:
585 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
586 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfe00000
587 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
588 define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) {
589 store double -0.5, double addrspace(1)* %out
593 ; GCN-LABEL: {{^}}store_inline_imm_1.0_f64:
594 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
595 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3ff00000
596 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
597 define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) {
598 store double 1.0, double addrspace(1)* %out
602 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f64:
603 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
604 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbff00000
605 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
606 define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) {
607 store double -1.0, double addrspace(1)* %out
611 ; GCN-LABEL: {{^}}store_inline_imm_2.0_f64:
612 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
613 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 2.0
614 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
615 define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) {
616 store double 2.0, double addrspace(1)* %out
620 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f64:
621 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
622 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], -2.0
623 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
624 define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) {
625 store double -2.0, double addrspace(1)* %out
629 ; GCN-LABEL: {{^}}store_inline_imm_4.0_f64:
630 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
631 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40100000
632 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
633 define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) {
634 store double 4.0, double addrspace(1)* %out
638 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f64:
639 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
640 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xc0100000
641 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
642 define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
643 store double -4.0, double addrspace(1)* %out
647 ; GCN-LABEL: {{^}}store_inv_2pi_f64:
648 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
649 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fc45f30
650 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
651 define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) {
652 store double 0x3fc45f306dc9c882, double addrspace(1)* %out
656 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f64:
657 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0x6dc9c882
658 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfc45f30
659 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
660 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) {
661 store double 0xbfc45f306dc9c882, double addrspace(1)* %out
665 ; GCN-LABEL: {{^}}store_literal_imm_f64:
666 ; GCN-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
667 ; GCN-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000
668 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
669 define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) {
670 store double 4096.0, double addrspace(1)* %out
674 ; GCN-LABEL: {{^}}literal_folding:
675 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x3f4353f8, v{{[0-9]+}}
676 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 0xbf4353f8, v{{[0-9]+}}
677 define amdgpu_vs void @literal_folding(float %arg) {
679 %tmp = fmul float %arg, 0x3FE86A7F00000000
680 %tmp1 = fmul float %arg, 0xBFE86A7F00000000
681 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
685 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
687 attributes #0 = { nounwind }