1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
2 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
4 ; FIXME: Merge into imm.ll
6 ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i16:
7 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
8 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
9 ; GCN: buffer_store_short [[REG]]
10 define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
11 store volatile i16 -32768, i16 addrspace(1)* %out
15 ; GCN-LABEL: {{^}}store_inline_imm_0.0_f16:
16 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
17 ; GCN: buffer_store_short [[REG]]
18 define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
19 store half 0.0, half addrspace(1)* %out
23 ; GCN-LABEL: {{^}}store_imm_neg_0.0_f16:
24 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
25 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
26 ; GCN: buffer_store_short [[REG]]
27 define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
28 store half -0.0, half addrspace(1)* %out
32 ; GCN-LABEL: {{^}}store_inline_imm_0.5_f16:
33 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3800{{$}}
34 ; GCN: buffer_store_short [[REG]]
35 define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
36 store half 0.5, half addrspace(1)* %out
40 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f16:
41 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800{{$}}
42 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb800{{$}}
43 ; GCN: buffer_store_short [[REG]]
44 define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
45 store half -0.5, half addrspace(1)* %out
49 ; GCN-LABEL: {{^}}store_inline_imm_1.0_f16:
50 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
51 ; GCN: buffer_store_short [[REG]]
52 define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
53 store half 1.0, half addrspace(1)* %out
57 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f16:
58 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00{{$}}
59 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
60 ; GCN: buffer_store_short [[REG]]
61 define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
62 store half -1.0, half addrspace(1)* %out
66 ; GCN-LABEL: {{^}}store_inline_imm_2.0_f16:
67 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
68 ; GCN: buffer_store_short [[REG]]
69 define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
70 store half 2.0, half addrspace(1)* %out
74 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f16:
75 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000{{$}}
76 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc000{{$}}
77 ; GCN: buffer_store_short [[REG]]
78 define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
79 store half -2.0, half addrspace(1)* %out
83 ; GCN-LABEL: {{^}}store_inline_imm_4.0_f16:
84 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4400{{$}}
85 ; GCN: buffer_store_short [[REG]]
86 define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
87 store half 4.0, half addrspace(1)* %out
91 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f16:
92 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400{{$}}
93 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc400{{$}}
94 ; GCN: buffer_store_short [[REG]]
95 define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
96 store half -4.0, half addrspace(1)* %out
101 ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f16:
102 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3118{{$}}
103 ; GCN: buffer_store_short [[REG]]
104 define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
105 store half 0xH3118, half addrspace(1)* %out
109 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f16:
110 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118{{$}}
111 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb118{{$}}
112 ; GCN: buffer_store_short [[REG]]
113 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
114 store half 0xHB118, half addrspace(1)* %out
118 ; GCN-LABEL: {{^}}store_literal_imm_f16:
119 ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c00
120 ; GCN: buffer_store_short [[REG]]
121 define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
122 store half 4096.0, half addrspace(1)* %out
126 ; GCN-LABEL: {{^}}add_inline_imm_0.0_f16:
127 ; VI: s_load_dword [[VAL:s[0-9]+]]
128 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
129 ; VI: buffer_store_short [[REG]]
130 define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
131 %y = fadd half %x, 0.0
132 store half %y, half addrspace(1)* %out
136 ; GCN-LABEL: {{^}}add_inline_imm_0.5_f16:
137 ; VI: s_load_dword [[VAL:s[0-9]+]]
138 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}}
139 ; VI: buffer_store_short [[REG]]
140 define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
141 %y = fadd half %x, 0.5
142 store half %y, half addrspace(1)* %out
146 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f16:
147 ; VI: s_load_dword [[VAL:s[0-9]+]]
148 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}}
149 ; VI: buffer_store_short [[REG]]
150 define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
151 %y = fadd half %x, -0.5
152 store half %y, half addrspace(1)* %out
156 ; GCN-LABEL: {{^}}add_inline_imm_1.0_f16:
157 ; VI: s_load_dword [[VAL:s[0-9]+]]
158 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}}
159 ; VI: buffer_store_short [[REG]]
160 define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
161 %y = fadd half %x, 1.0
162 store half %y, half addrspace(1)* %out
166 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f16:
167 ; VI: s_load_dword [[VAL:s[0-9]+]]
168 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}}
169 ; VI: buffer_store_short [[REG]]
170 define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
171 %y = fadd half %x, -1.0
172 store half %y, half addrspace(1)* %out
176 ; GCN-LABEL: {{^}}add_inline_imm_2.0_f16:
177 ; VI: s_load_dword [[VAL:s[0-9]+]]
178 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}}
179 ; VI: buffer_store_short [[REG]]
180 define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
181 %y = fadd half %x, 2.0
182 store half %y, half addrspace(1)* %out
186 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f16:
187 ; VI: s_load_dword [[VAL:s[0-9]+]]
188 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}}
189 ; VI: buffer_store_short [[REG]]
190 define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
191 %y = fadd half %x, -2.0
192 store half %y, half addrspace(1)* %out
196 ; GCN-LABEL: {{^}}add_inline_imm_4.0_f16:
197 ; VI: s_load_dword [[VAL:s[0-9]+]]
198 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}}
199 ; VI: buffer_store_short [[REG]]
200 define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
201 %y = fadd half %x, 4.0
202 store half %y, half addrspace(1)* %out
206 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f16:
207 ; VI: s_load_dword [[VAL:s[0-9]+]]
208 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}}
209 ; VI: buffer_store_short [[REG]]
210 define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
211 %y = fadd half %x, -4.0
212 store half %y, half addrspace(1)* %out
216 ; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f16:
217 ; VI: buffer_load_ushort [[VAL:v[0-9]+]]
218 ; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
219 ; VI: buffer_store_short [[REG]]
220 define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
221 %x = load half, half addrspace(1)* %in
222 %y = fadd half %x, 0.5
223 store half %y, half addrspace(1)* %out
227 ; GCN-LABEL: {{^}}commute_add_literal_f16:
228 ; VI: buffer_load_ushort [[VAL:v[0-9]+]]
229 ; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0x6400, [[VAL]]
230 ; VI: buffer_store_short [[REG]]
231 define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
232 %x = load half, half addrspace(1)* %in
233 %y = fadd half %x, 1024.0
234 store half %y, half addrspace(1)* %out
238 ; GCN-LABEL: {{^}}add_inline_imm_1_f16:
239 ; VI: s_load_dword [[VAL:s[0-9]+]]
240 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}}
241 ; VI: buffer_store_short [[REG]]
242 define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
243 %y = fadd half %x, 0xH0001
244 store half %y, half addrspace(1)* %out
248 ; GCN-LABEL: {{^}}add_inline_imm_2_f16:
249 ; VI: s_load_dword [[VAL:s[0-9]+]]
250 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}}
251 ; VI: buffer_store_short [[REG]]
252 define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
253 %y = fadd half %x, 0xH0002
254 store half %y, half addrspace(1)* %out
258 ; GCN-LABEL: {{^}}add_inline_imm_16_f16:
259 ; VI: s_load_dword [[VAL:s[0-9]+]]
260 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 16{{$}}
261 ; VI: buffer_store_short [[REG]]
262 define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
263 %y = fadd half %x, 0xH0010
264 store half %y, half addrspace(1)* %out
268 ; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16:
269 ; VI: v_add_u16_e32 [[REG:v[0-9]+]], -1, [[REG:v[0-9]+]]
270 ; VI: buffer_store_short [[REG]]
271 define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
272 %x = load i16, i16 addrspace(1)* %in
274 %ybc = bitcast i16 %y to half
275 store half %ybc, half addrspace(1)* %out
279 ; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16:
280 ; VI: v_add_u16_e32 [[REG:v[0-9]+]], -2, [[REG:v[0-9]+]]
281 ; VI: buffer_store_short [[REG]]
282 define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
283 %x = load i16, i16 addrspace(1)* %in
285 %ybc = bitcast i16 %y to half
286 store half %ybc, half addrspace(1)* %out
290 ; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16:
291 ; VI: v_add_u16_e32 [[REG:v[0-9]+]], -16, [[REG:v[0-9]+]]
292 ; VI: buffer_store_short [[REG]]
293 define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
294 %x = load i16, i16 addrspace(1)* %in
296 %ybc = bitcast i16 %y to half
297 store half %ybc, half addrspace(1)* %out
301 ; GCN-LABEL: {{^}}add_inline_imm_63_f16:
302 ; VI: s_load_dword [[VAL:s[0-9]+]]
303 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 63
304 ; VI: buffer_store_short [[REG]]
305 define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
306 %y = fadd half %x, 0xH003F
307 store half %y, half addrspace(1)* %out
311 ; GCN-LABEL: {{^}}add_inline_imm_64_f16:
312 ; VI: s_load_dword [[VAL:s[0-9]+]]
313 ; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 64
314 ; VI: buffer_store_short [[REG]]
315 define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
316 %y = fadd half %x, 0xH0040
317 store half %y, half addrspace(1)* %out