1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s
4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
7 ; FIXME: Merge into imm.ll
9 define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) {
10 ; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
12 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
13 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
14 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
15 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
16 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
17 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
18 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
19 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
21 ; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
23 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
24 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
25 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
26 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
27 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
28 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
29 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
30 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
32 ; VI-LABEL: store_inline_imm_neg_0.0_i16:
34 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
35 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
36 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
37 ; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
38 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
39 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
40 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
41 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
43 ; SI-LABEL: store_inline_imm_neg_0.0_i16:
45 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
46 ; SI-NEXT: s_mov_b32 s3, 0xf000
47 ; SI-NEXT: s_mov_b32 s2, -1
48 ; SI-NEXT: v_mov_b32_e32 v0, 0x8000
49 ; SI-NEXT: s_waitcnt lgkmcnt(0)
50 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
51 ; SI-NEXT: s_waitcnt vmcnt(0)
53 store volatile i16 -32768, ptr addrspace(1) %out
57 define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) {
58 ; GFX10-LABEL: store_inline_imm_0.0_f16:
60 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
61 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
62 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
63 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
64 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
65 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
66 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
68 ; GFX11-LABEL: store_inline_imm_0.0_f16:
70 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
71 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
72 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
73 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
74 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
75 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
76 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
78 ; VI-LABEL: store_inline_imm_0.0_f16:
80 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
81 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
82 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
83 ; VI-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
84 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
85 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
86 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
88 ; SI-LABEL: store_inline_imm_0.0_f16:
90 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
91 ; SI-NEXT: s_mov_b32 s3, 0xf000
92 ; SI-NEXT: s_mov_b32 s2, -1
93 ; SI-NEXT: v_mov_b32_e32 v0, 0
94 ; SI-NEXT: s_waitcnt lgkmcnt(0)
95 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
97 store half 0.0, ptr addrspace(1) %out
101 define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) {
102 ; GFX10-LABEL: store_imm_neg_0.0_f16:
104 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
105 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
106 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
107 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
108 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
109 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
110 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
112 ; GFX11-LABEL: store_imm_neg_0.0_f16:
114 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
115 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
116 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
117 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
118 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
119 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
120 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
122 ; VI-LABEL: store_imm_neg_0.0_f16:
124 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
125 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
126 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
127 ; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
128 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
129 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
130 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
132 ; SI-LABEL: store_imm_neg_0.0_f16:
134 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
135 ; SI-NEXT: s_mov_b32 s3, 0xf000
136 ; SI-NEXT: s_mov_b32 s2, -1
137 ; SI-NEXT: v_mov_b32_e32 v0, 0x8000
138 ; SI-NEXT: s_waitcnt lgkmcnt(0)
139 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
141 store half -0.0, ptr addrspace(1) %out
145 define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) {
146 ; GFX10-LABEL: store_inline_imm_0.5_f16:
148 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
149 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
150 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
151 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
152 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
153 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
154 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
156 ; GFX11-LABEL: store_inline_imm_0.5_f16:
158 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
159 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
160 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
161 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
162 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
163 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
164 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
166 ; VI-LABEL: store_inline_imm_0.5_f16:
168 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
169 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
170 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
171 ; VI-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
172 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
173 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
174 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
176 ; SI-LABEL: store_inline_imm_0.5_f16:
178 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
179 ; SI-NEXT: s_mov_b32 s3, 0xf000
180 ; SI-NEXT: s_mov_b32 s2, -1
181 ; SI-NEXT: v_mov_b32_e32 v0, 0x3800
182 ; SI-NEXT: s_waitcnt lgkmcnt(0)
183 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
185 store half 0.5, ptr addrspace(1) %out
189 define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) {
190 ; GFX10-LABEL: store_inline_imm_m_0.5_f16:
192 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
193 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
194 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
195 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
196 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
197 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
198 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
200 ; GFX11-LABEL: store_inline_imm_m_0.5_f16:
202 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
203 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
204 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
205 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
206 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
207 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
208 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
210 ; VI-LABEL: store_inline_imm_m_0.5_f16:
212 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
213 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
214 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
215 ; VI-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
216 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
217 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
218 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
220 ; SI-LABEL: store_inline_imm_m_0.5_f16:
222 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
223 ; SI-NEXT: s_mov_b32 s3, 0xf000
224 ; SI-NEXT: s_mov_b32 s2, -1
225 ; SI-NEXT: v_mov_b32_e32 v0, 0xb800
226 ; SI-NEXT: s_waitcnt lgkmcnt(0)
227 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
229 store half -0.5, ptr addrspace(1) %out
233 define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) {
234 ; GFX10-LABEL: store_inline_imm_1.0_f16:
236 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
237 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
238 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
239 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
240 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
241 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
242 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
244 ; GFX11-LABEL: store_inline_imm_1.0_f16:
246 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
247 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
248 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
249 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
250 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
251 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
252 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
254 ; VI-LABEL: store_inline_imm_1.0_f16:
256 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
257 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
258 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
259 ; VI-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
260 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
261 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
262 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
264 ; SI-LABEL: store_inline_imm_1.0_f16:
266 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
267 ; SI-NEXT: s_mov_b32 s3, 0xf000
268 ; SI-NEXT: s_mov_b32 s2, -1
269 ; SI-NEXT: v_mov_b32_e32 v0, 0x3c00
270 ; SI-NEXT: s_waitcnt lgkmcnt(0)
271 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
273 store half 1.0, ptr addrspace(1) %out
277 define amdgpu_kernel void @store_inline_imm_m_1.0_f16(ptr addrspace(1) %out) {
278 ; GFX10-LABEL: store_inline_imm_m_1.0_f16:
280 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
281 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
282 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
283 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
284 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
285 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
286 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
288 ; GFX11-LABEL: store_inline_imm_m_1.0_f16:
290 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
291 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
292 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
293 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
294 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
295 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
296 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
298 ; VI-LABEL: store_inline_imm_m_1.0_f16:
300 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
301 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
302 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
303 ; VI-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
304 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
305 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
306 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
308 ; SI-LABEL: store_inline_imm_m_1.0_f16:
310 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
311 ; SI-NEXT: s_mov_b32 s3, 0xf000
312 ; SI-NEXT: s_mov_b32 s2, -1
313 ; SI-NEXT: v_mov_b32_e32 v0, 0xbc00
314 ; SI-NEXT: s_waitcnt lgkmcnt(0)
315 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
317 store half -1.0, ptr addrspace(1) %out
321 define amdgpu_kernel void @store_inline_imm_2.0_f16(ptr addrspace(1) %out) {
322 ; GFX10-LABEL: store_inline_imm_2.0_f16:
324 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
325 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
326 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
327 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
328 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
329 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
330 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
332 ; GFX11-LABEL: store_inline_imm_2.0_f16:
334 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
335 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
336 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
337 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
338 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
339 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
340 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
342 ; VI-LABEL: store_inline_imm_2.0_f16:
344 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
345 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
346 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
347 ; VI-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
348 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
349 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
350 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
352 ; SI-LABEL: store_inline_imm_2.0_f16:
354 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
355 ; SI-NEXT: s_mov_b32 s3, 0xf000
356 ; SI-NEXT: s_mov_b32 s2, -1
357 ; SI-NEXT: v_mov_b32_e32 v0, 0x4000
358 ; SI-NEXT: s_waitcnt lgkmcnt(0)
359 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
361 store half 2.0, ptr addrspace(1) %out
365 define amdgpu_kernel void @store_inline_imm_m_2.0_f16(ptr addrspace(1) %out) {
366 ; GFX10-LABEL: store_inline_imm_m_2.0_f16:
368 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
369 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
370 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
371 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
372 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
373 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
374 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
376 ; GFX11-LABEL: store_inline_imm_m_2.0_f16:
378 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
379 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
380 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
381 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
382 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
383 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
384 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
386 ; VI-LABEL: store_inline_imm_m_2.0_f16:
388 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
389 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
390 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
391 ; VI-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
392 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
393 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
394 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
396 ; SI-LABEL: store_inline_imm_m_2.0_f16:
398 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
399 ; SI-NEXT: s_mov_b32 s3, 0xf000
400 ; SI-NEXT: s_mov_b32 s2, -1
401 ; SI-NEXT: v_mov_b32_e32 v0, 0xc000
402 ; SI-NEXT: s_waitcnt lgkmcnt(0)
403 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
405 store half -2.0, ptr addrspace(1) %out
409 define amdgpu_kernel void @store_inline_imm_4.0_f16(ptr addrspace(1) %out) {
410 ; GFX10-LABEL: store_inline_imm_4.0_f16:
412 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
413 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
414 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
415 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
416 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
417 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
418 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
420 ; GFX11-LABEL: store_inline_imm_4.0_f16:
422 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
423 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
424 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
425 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
426 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
427 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
428 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
430 ; VI-LABEL: store_inline_imm_4.0_f16:
432 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
433 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
434 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
435 ; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
436 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
437 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
438 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
440 ; SI-LABEL: store_inline_imm_4.0_f16:
442 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
443 ; SI-NEXT: s_mov_b32 s3, 0xf000
444 ; SI-NEXT: s_mov_b32 s2, -1
445 ; SI-NEXT: v_mov_b32_e32 v0, 0x4400
446 ; SI-NEXT: s_waitcnt lgkmcnt(0)
447 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
449 store half 4.0, ptr addrspace(1) %out
453 define amdgpu_kernel void @store_inline_imm_m_4.0_f16(ptr addrspace(1) %out) {
454 ; GFX10-LABEL: store_inline_imm_m_4.0_f16:
456 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
457 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
458 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
459 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
460 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
461 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
462 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
464 ; GFX11-LABEL: store_inline_imm_m_4.0_f16:
466 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
467 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
468 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
469 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
470 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
471 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
472 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
474 ; VI-LABEL: store_inline_imm_m_4.0_f16:
476 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
477 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
478 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
479 ; VI-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
480 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
481 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
482 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
484 ; SI-LABEL: store_inline_imm_m_4.0_f16:
486 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
487 ; SI-NEXT: s_mov_b32 s3, 0xf000
488 ; SI-NEXT: s_mov_b32 s2, -1
489 ; SI-NEXT: v_mov_b32_e32 v0, 0xc400
490 ; SI-NEXT: s_waitcnt lgkmcnt(0)
491 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
493 store half -4.0, ptr addrspace(1) %out
497 define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(ptr addrspace(1) %out) {
498 ; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
500 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
501 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
502 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
503 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
504 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
505 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
506 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
508 ; GFX11-LABEL: store_inline_imm_inv_2pi_f16:
510 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
511 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
512 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
513 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
514 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
515 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
516 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
518 ; VI-LABEL: store_inline_imm_inv_2pi_f16:
520 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
521 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
522 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
523 ; VI-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
524 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
525 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
526 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
528 ; SI-LABEL: store_inline_imm_inv_2pi_f16:
530 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
531 ; SI-NEXT: s_mov_b32 s3, 0xf000
532 ; SI-NEXT: s_mov_b32 s2, -1
533 ; SI-NEXT: v_mov_b32_e32 v0, 0x3118
534 ; SI-NEXT: s_waitcnt lgkmcnt(0)
535 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
537 store half 0xH3118, ptr addrspace(1) %out
541 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(ptr addrspace(1) %out) {
542 ; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
544 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
545 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
546 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
547 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
548 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
549 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
550 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
552 ; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16:
554 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
555 ; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
556 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
557 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
558 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
559 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
560 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
562 ; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
564 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
565 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
566 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
567 ; VI-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
568 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
569 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
570 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
572 ; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
574 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
575 ; SI-NEXT: s_mov_b32 s3, 0xf000
576 ; SI-NEXT: s_mov_b32 s2, -1
577 ; SI-NEXT: v_mov_b32_e32 v0, 0xb118
578 ; SI-NEXT: s_waitcnt lgkmcnt(0)
579 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
581 store half 0xHB118, ptr addrspace(1) %out
585 define amdgpu_kernel void @store_literal_imm_f16(ptr addrspace(1) %out) {
586 ; GFX10-LABEL: store_literal_imm_f16:
588 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
589 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
590 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
591 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
592 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
593 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
594 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
596 ; GFX11-LABEL: store_literal_imm_f16:
598 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
599 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
600 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
601 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
602 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
603 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
604 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
606 ; VI-LABEL: store_literal_imm_f16:
608 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
609 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
610 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
611 ; VI-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
612 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
613 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
614 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
616 ; SI-LABEL: store_literal_imm_f16:
618 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
619 ; SI-NEXT: s_mov_b32 s3, 0xf000
620 ; SI-NEXT: s_mov_b32 s2, -1
621 ; SI-NEXT: v_mov_b32_e32 v0, 0x6c00
622 ; SI-NEXT: s_waitcnt lgkmcnt(0)
623 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
625 store half 4096.0, ptr addrspace(1) %out
629 define amdgpu_kernel void @add_inline_imm_0.0_f16(ptr addrspace(1) %out, half %x) {
630 ; GFX10-LABEL: add_inline_imm_0.0_f16:
632 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
633 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
634 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
635 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
636 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
637 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
638 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
639 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
640 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
642 ; GFX11-LABEL: add_inline_imm_0.0_f16:
644 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
645 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
646 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
647 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
648 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
649 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
650 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
651 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
652 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
654 ; VI-LABEL: add_inline_imm_0.0_f16:
656 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
657 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
658 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
659 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
660 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
661 ; VI-NEXT: v_add_f16_e64 v0, s4, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x00,0x01,0x00]
662 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
663 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
665 ; SI-LABEL: add_inline_imm_0.0_f16:
667 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
668 ; SI-NEXT: s_mov_b32 s3, 0xf000
669 ; SI-NEXT: s_mov_b32 s2, -1
670 ; SI-NEXT: s_waitcnt lgkmcnt(0)
671 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
672 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
673 ; SI-NEXT: v_add_f32_e32 v0, 0, v0
674 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
675 ; SI-NEXT: s_waitcnt lgkmcnt(0)
676 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
678 %y = fadd half %x, 0.0
679 store half %y, ptr addrspace(1) %out
683 define amdgpu_kernel void @add_inline_imm_0.5_f16(ptr addrspace(1) %out, half %x) {
684 ; GFX10-LABEL: add_inline_imm_0.5_f16:
686 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
687 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
688 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
689 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
690 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
691 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
692 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
693 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
694 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
696 ; GFX11-LABEL: add_inline_imm_0.5_f16:
698 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
699 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
700 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
701 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
702 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
703 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
704 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
705 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
706 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
708 ; VI-LABEL: add_inline_imm_0.5_f16:
710 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
711 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
712 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
713 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
714 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
715 ; VI-NEXT: v_add_f16_e64 v0, s4, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe0,0x01,0x00]
716 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
717 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
719 ; SI-LABEL: add_inline_imm_0.5_f16:
721 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
722 ; SI-NEXT: s_mov_b32 s3, 0xf000
723 ; SI-NEXT: s_mov_b32 s2, -1
724 ; SI-NEXT: s_waitcnt lgkmcnt(0)
725 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
726 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
727 ; SI-NEXT: v_add_f32_e32 v0, 0.5, v0
728 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
729 ; SI-NEXT: s_waitcnt lgkmcnt(0)
730 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
732 %y = fadd half %x, 0.5
733 store half %y, ptr addrspace(1) %out
737 define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(ptr addrspace(1) %out, half %x) {
738 ; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
740 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
741 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
742 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
743 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
744 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
745 ; GFX10-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
746 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
747 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
748 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
750 ; GFX11-LABEL: add_inline_imm_neg_0.5_f16:
752 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
753 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
754 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
755 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
756 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
757 ; GFX11-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
758 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
759 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
760 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
762 ; VI-LABEL: add_inline_imm_neg_0.5_f16:
764 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
765 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
766 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
767 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
768 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
769 ; VI-NEXT: v_add_f16_e64 v0, s4, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe2,0x01,0x00]
770 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
771 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
773 ; SI-LABEL: add_inline_imm_neg_0.5_f16:
775 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
776 ; SI-NEXT: s_mov_b32 s3, 0xf000
777 ; SI-NEXT: s_mov_b32 s2, -1
778 ; SI-NEXT: s_waitcnt lgkmcnt(0)
779 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
780 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
781 ; SI-NEXT: v_add_f32_e32 v0, -0.5, v0
782 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
783 ; SI-NEXT: s_waitcnt lgkmcnt(0)
784 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
786 %y = fadd half %x, -0.5
787 store half %y, ptr addrspace(1) %out
791 define amdgpu_kernel void @add_inline_imm_1.0_f16(ptr addrspace(1) %out, half %x) {
792 ; GFX10-LABEL: add_inline_imm_1.0_f16:
794 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
795 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
796 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
797 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
798 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
799 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
800 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
801 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
802 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
804 ; GFX11-LABEL: add_inline_imm_1.0_f16:
806 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
807 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
808 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
809 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
810 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
811 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
812 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
813 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
814 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
816 ; VI-LABEL: add_inline_imm_1.0_f16:
818 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
819 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
820 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
821 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
822 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
823 ; VI-NEXT: v_add_f16_e64 v0, s4, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe4,0x01,0x00]
824 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
825 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
827 ; SI-LABEL: add_inline_imm_1.0_f16:
829 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
830 ; SI-NEXT: s_mov_b32 s3, 0xf000
831 ; SI-NEXT: s_mov_b32 s2, -1
832 ; SI-NEXT: s_waitcnt lgkmcnt(0)
833 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
834 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
835 ; SI-NEXT: v_add_f32_e32 v0, 1.0, v0
836 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
837 ; SI-NEXT: s_waitcnt lgkmcnt(0)
838 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
840 %y = fadd half %x, 1.0
841 store half %y, ptr addrspace(1) %out
845 define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(ptr addrspace(1) %out, half %x) {
846 ; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
848 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
849 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
850 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
851 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
852 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
853 ; GFX10-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
854 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
855 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
856 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
858 ; GFX11-LABEL: add_inline_imm_neg_1.0_f16:
860 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
861 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
862 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
863 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
864 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
865 ; GFX11-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
866 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
867 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
868 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
870 ; VI-LABEL: add_inline_imm_neg_1.0_f16:
872 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
873 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
874 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
875 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
876 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
877 ; VI-NEXT: v_add_f16_e64 v0, s4, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe6,0x01,0x00]
878 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
879 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
881 ; SI-LABEL: add_inline_imm_neg_1.0_f16:
883 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
884 ; SI-NEXT: s_mov_b32 s3, 0xf000
885 ; SI-NEXT: s_mov_b32 s2, -1
886 ; SI-NEXT: s_waitcnt lgkmcnt(0)
887 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
888 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
889 ; SI-NEXT: v_add_f32_e32 v0, -1.0, v0
890 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
891 ; SI-NEXT: s_waitcnt lgkmcnt(0)
892 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
894 %y = fadd half %x, -1.0
895 store half %y, ptr addrspace(1) %out
899 define amdgpu_kernel void @add_inline_imm_2.0_f16(ptr addrspace(1) %out, half %x) {
900 ; GFX10-LABEL: add_inline_imm_2.0_f16:
902 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
903 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
904 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
905 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
906 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
907 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
908 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
909 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
910 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
912 ; GFX11-LABEL: add_inline_imm_2.0_f16:
914 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
915 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
916 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
917 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
918 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
919 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
920 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
921 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
922 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
924 ; VI-LABEL: add_inline_imm_2.0_f16:
926 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
927 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
928 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
929 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
930 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
931 ; VI-NEXT: v_add_f16_e64 v0, s4, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe8,0x01,0x00]
932 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
933 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
935 ; SI-LABEL: add_inline_imm_2.0_f16:
937 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
938 ; SI-NEXT: s_mov_b32 s3, 0xf000
939 ; SI-NEXT: s_mov_b32 s2, -1
940 ; SI-NEXT: s_waitcnt lgkmcnt(0)
941 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
942 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
943 ; SI-NEXT: v_add_f32_e32 v0, 2.0, v0
944 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
945 ; SI-NEXT: s_waitcnt lgkmcnt(0)
946 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
948 %y = fadd half %x, 2.0
949 store half %y, ptr addrspace(1) %out
953 define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(ptr addrspace(1) %out, half %x) {
954 ; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
956 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
957 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
958 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
959 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
960 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
961 ; GFX10-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
962 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
963 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
964 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
966 ; GFX11-LABEL: add_inline_imm_neg_2.0_f16:
968 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
969 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
970 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
971 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
972 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
973 ; GFX11-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
974 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
975 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
976 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
978 ; VI-LABEL: add_inline_imm_neg_2.0_f16:
980 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
981 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
982 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
983 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
984 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
985 ; VI-NEXT: v_add_f16_e64 v0, s4, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xea,0x01,0x00]
986 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
987 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
989 ; SI-LABEL: add_inline_imm_neg_2.0_f16:
991 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
992 ; SI-NEXT: s_mov_b32 s3, 0xf000
993 ; SI-NEXT: s_mov_b32 s2, -1
994 ; SI-NEXT: s_waitcnt lgkmcnt(0)
995 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
996 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
997 ; SI-NEXT: v_add_f32_e32 v0, -2.0, v0
998 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
999 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1000 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1002 %y = fadd half %x, -2.0
1003 store half %y, ptr addrspace(1) %out
1007 define amdgpu_kernel void @add_inline_imm_4.0_f16(ptr addrspace(1) %out, half %x) {
1008 ; GFX10-LABEL: add_inline_imm_4.0_f16:
1010 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1011 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1012 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1013 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1014 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1015 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1016 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1017 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1018 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1020 ; GFX11-LABEL: add_inline_imm_4.0_f16:
1022 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1023 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1024 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1025 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1026 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1027 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1028 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1029 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1030 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1032 ; VI-LABEL: add_inline_imm_4.0_f16:
1034 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1035 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1036 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1037 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1038 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1039 ; VI-NEXT: v_add_f16_e64 v0, s4, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xec,0x01,0x00]
1040 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1041 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1043 ; SI-LABEL: add_inline_imm_4.0_f16:
1045 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1046 ; SI-NEXT: s_mov_b32 s3, 0xf000
1047 ; SI-NEXT: s_mov_b32 s2, -1
1048 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1049 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1050 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1051 ; SI-NEXT: v_add_f32_e32 v0, 4.0, v0
1052 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1053 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1054 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1056 %y = fadd half %x, 4.0
1057 store half %y, ptr addrspace(1) %out
1061 define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(ptr addrspace(1) %out, half %x) {
1062 ; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
1064 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1065 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1066 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1067 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1068 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1069 ; GFX10-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1070 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1071 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1072 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1074 ; GFX11-LABEL: add_inline_imm_neg_4.0_f16:
1076 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1077 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1078 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1079 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1080 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1081 ; GFX11-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1082 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1083 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1084 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1086 ; VI-LABEL: add_inline_imm_neg_4.0_f16:
1088 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1089 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1090 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1091 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1092 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1093 ; VI-NEXT: v_add_f16_e64 v0, s4, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xee,0x01,0x00]
1094 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1095 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1097 ; SI-LABEL: add_inline_imm_neg_4.0_f16:
1099 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1100 ; SI-NEXT: s_mov_b32 s3, 0xf000
1101 ; SI-NEXT: s_mov_b32 s2, -1
1102 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1103 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1104 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1105 ; SI-NEXT: v_add_f32_e32 v0, -4.0, v0
1106 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1107 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1108 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1110 %y = fadd half %x, -4.0
1111 store half %y, ptr addrspace(1) %out
1115 define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1116 ; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
1118 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1119 ; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1120 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1121 ; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1122 ; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1123 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1124 ; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1125 ; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1126 ; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1127 ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1128 ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1129 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1130 ; GFX10-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1131 ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1132 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1134 ; GFX11-LABEL: commute_add_inline_imm_0.5_f16:
1136 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1137 ; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1138 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1139 ; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1140 ; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1141 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1142 ; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1143 ; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1144 ; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1145 ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1146 ; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1147 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1148 ; GFX11-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1149 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1150 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1152 ; VI-LABEL: commute_add_inline_imm_0.5_f16:
1154 ; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1155 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1156 ; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1157 ; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1158 ; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1159 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1160 ; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1161 ; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1162 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1163 ; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1164 ; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1165 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1166 ; VI-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
1167 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1168 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1170 ; SI-LABEL: commute_add_inline_imm_0.5_f16:
1172 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1173 ; SI-NEXT: s_mov_b32 s7, 0xf000
1174 ; SI-NEXT: s_mov_b32 s6, -1
1175 ; SI-NEXT: s_mov_b32 s10, s6
1176 ; SI-NEXT: s_mov_b32 s11, s7
1177 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1178 ; SI-NEXT: s_mov_b32 s8, s2
1179 ; SI-NEXT: s_mov_b32 s9, s3
1180 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
1181 ; SI-NEXT: s_mov_b32 s4, s0
1182 ; SI-NEXT: s_mov_b32 s5, s1
1183 ; SI-NEXT: s_waitcnt vmcnt(0)
1184 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
1185 ; SI-NEXT: v_add_f32_e32 v0, 0.5, v0
1186 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1187 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
1189 %x = load half, ptr addrspace(1) %in
1190 %y = fadd half %x, 0.5
1191 store half %y, ptr addrspace(1) %out
1195 define amdgpu_kernel void @commute_add_literal_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1196 ; GFX10-LABEL: commute_add_literal_f16:
1198 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1199 ; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1200 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1201 ; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1202 ; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1203 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1204 ; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1205 ; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1206 ; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1207 ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1208 ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1209 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1210 ; GFX10-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1211 ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1212 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1214 ; GFX11-LABEL: commute_add_literal_f16:
1216 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1217 ; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1218 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1219 ; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1220 ; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1221 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1222 ; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1223 ; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1224 ; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1225 ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1226 ; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1227 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1228 ; GFX11-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1229 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1230 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1232 ; VI-LABEL: commute_add_literal_f16:
1234 ; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1235 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1236 ; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1237 ; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1238 ; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1239 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1240 ; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1241 ; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1242 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1243 ; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1244 ; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1245 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1246 ; VI-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
1247 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1248 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1250 ; SI-LABEL: commute_add_literal_f16:
1252 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1253 ; SI-NEXT: s_mov_b32 s7, 0xf000
1254 ; SI-NEXT: s_mov_b32 s6, -1
1255 ; SI-NEXT: s_mov_b32 s10, s6
1256 ; SI-NEXT: s_mov_b32 s11, s7
1257 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1258 ; SI-NEXT: s_mov_b32 s8, s2
1259 ; SI-NEXT: s_mov_b32 s9, s3
1260 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
1261 ; SI-NEXT: s_mov_b32 s4, s0
1262 ; SI-NEXT: s_mov_b32 s5, s1
1263 ; SI-NEXT: s_waitcnt vmcnt(0)
1264 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
1265 ; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0
1266 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1267 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
1269 %x = load half, ptr addrspace(1) %in
1270 %y = fadd half %x, 1024.0
1271 store half %y, ptr addrspace(1) %out
1275 define amdgpu_kernel void @add_inline_imm_1_f16(ptr addrspace(1) %out, half %x) {
1276 ; GFX10-LABEL: add_inline_imm_1_f16:
1278 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1279 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1280 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1281 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1282 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1283 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1284 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1285 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1286 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1288 ; GFX11-LABEL: add_inline_imm_1_f16:
1290 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1291 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1292 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1293 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1294 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1295 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1296 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1297 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1298 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1300 ; VI-LABEL: add_inline_imm_1_f16:
1302 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1303 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1304 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1305 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1306 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1307 ; VI-NEXT: v_add_f16_e64 v0, s4, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x02,0x01,0x00]
1308 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1309 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1311 ; SI-LABEL: add_inline_imm_1_f16:
1313 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1314 ; SI-NEXT: s_mov_b32 s3, 0xf000
1315 ; SI-NEXT: s_mov_b32 s2, -1
1316 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1317 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1318 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1319 ; SI-NEXT: v_add_f32_e32 v0, 0x33800000, v0
1320 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1321 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1322 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1324 %y = fadd half %x, 0xH0001
1325 store half %y, ptr addrspace(1) %out
1329 define amdgpu_kernel void @add_inline_imm_2_f16(ptr addrspace(1) %out, half %x) {
1330 ; GFX10-LABEL: add_inline_imm_2_f16:
1332 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1333 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1334 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1335 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1336 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1337 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1338 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1339 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1340 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1342 ; GFX11-LABEL: add_inline_imm_2_f16:
1344 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1345 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1346 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1347 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1348 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1349 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1350 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1351 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1352 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1354 ; VI-LABEL: add_inline_imm_2_f16:
1356 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1357 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1358 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1359 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1360 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1361 ; VI-NEXT: v_add_f16_e64 v0, s4, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x04,0x01,0x00]
1362 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1363 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1365 ; SI-LABEL: add_inline_imm_2_f16:
1367 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1368 ; SI-NEXT: s_mov_b32 s3, 0xf000
1369 ; SI-NEXT: s_mov_b32 s2, -1
1370 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1371 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1372 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1373 ; SI-NEXT: v_add_f32_e32 v0, 0x34000000, v0
1374 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1375 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1376 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1378 %y = fadd half %x, 0xH0002
1379 store half %y, ptr addrspace(1) %out
1383 define amdgpu_kernel void @add_inline_imm_16_f16(ptr addrspace(1) %out, half %x) {
1384 ; GFX10-LABEL: add_inline_imm_16_f16:
1386 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1387 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1388 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1389 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1390 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1391 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1392 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1393 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1394 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1396 ; GFX11-LABEL: add_inline_imm_16_f16:
1398 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1399 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1400 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1401 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1402 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1403 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1404 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1405 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1406 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1408 ; VI-LABEL: add_inline_imm_16_f16:
1410 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1411 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1412 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1413 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1414 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1415 ; VI-NEXT: v_add_f16_e64 v0, s4, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x20,0x01,0x00]
1416 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1417 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1419 ; SI-LABEL: add_inline_imm_16_f16:
1421 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1422 ; SI-NEXT: s_mov_b32 s3, 0xf000
1423 ; SI-NEXT: s_mov_b32 s2, -1
1424 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1425 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1426 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1427 ; SI-NEXT: v_add_f32_e32 v0, 0x35800000, v0
1428 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1429 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1430 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1432 %y = fadd half %x, 0xH0010
1433 store half %y, ptr addrspace(1) %out
1437 define amdgpu_kernel void @add_inline_imm_neg_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1438 ; GFX10-LABEL: add_inline_imm_neg_1_f16:
1440 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1441 ; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1442 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1443 ; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1444 ; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1445 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1446 ; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1447 ; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1448 ; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1449 ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1450 ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1451 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1452 ; GFX10-NEXT: v_add_nc_u32_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4a]
1453 ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1454 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1456 ; GFX11-LABEL: add_inline_imm_neg_1_f16:
1458 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1459 ; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1460 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1461 ; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1462 ; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1463 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1464 ; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1465 ; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1466 ; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1467 ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1468 ; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1469 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1470 ; GFX11-NEXT: v_add_nc_u32_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4a]
1471 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1472 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1474 ; VI-LABEL: add_inline_imm_neg_1_f16:
1476 ; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1477 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1478 ; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1479 ; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1480 ; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1481 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1482 ; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1483 ; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1484 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1485 ; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1486 ; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1487 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1488 ; VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 ; encoding: [0xc1,0x00,0x00,0x32]
1489 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1490 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1492 ; SI-LABEL: add_inline_imm_neg_1_f16:
1494 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1495 ; SI-NEXT: s_mov_b32 s7, 0xf000
1496 ; SI-NEXT: s_mov_b32 s6, -1
1497 ; SI-NEXT: s_mov_b32 s10, s6
1498 ; SI-NEXT: s_mov_b32 s11, s7
1499 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1500 ; SI-NEXT: s_mov_b32 s8, s2
1501 ; SI-NEXT: s_mov_b32 s9, s3
1502 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
1503 ; SI-NEXT: s_mov_b32 s4, s0
1504 ; SI-NEXT: s_mov_b32 s5, s1
1505 ; SI-NEXT: s_waitcnt vmcnt(0)
1506 ; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0
1507 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
1509 %x = load i16, ptr addrspace(1) %in
1511 %ybc = bitcast i16 %y to half
1512 store half %ybc, ptr addrspace(1) %out
1516 define amdgpu_kernel void @add_inline_imm_neg_2_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1517 ; GFX10-LABEL: add_inline_imm_neg_2_f16:
1519 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1520 ; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1521 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1522 ; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1523 ; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1524 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1525 ; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1526 ; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1527 ; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1528 ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1529 ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1530 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1531 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xfe,0xff,0x00,0x00]
1532 ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1533 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1535 ; GFX11-LABEL: add_inline_imm_neg_2_f16:
1537 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1538 ; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1539 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1540 ; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1541 ; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1542 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1543 ; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1544 ; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1545 ; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1546 ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1547 ; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1548 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1549 ; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xfe,0xff,0x00,0x00]
1550 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1551 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1553 ; VI-LABEL: add_inline_imm_neg_2_f16:
1555 ; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1556 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1557 ; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1558 ; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1559 ; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1560 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1561 ; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1562 ; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1563 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1564 ; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1565 ; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1566 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1567 ; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x32,0xfe,0xff,0x00,0x00]
1568 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1569 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1571 ; SI-LABEL: add_inline_imm_neg_2_f16:
1573 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1574 ; SI-NEXT: s_mov_b32 s7, 0xf000
1575 ; SI-NEXT: s_mov_b32 s6, -1
1576 ; SI-NEXT: s_mov_b32 s10, s6
1577 ; SI-NEXT: s_mov_b32 s11, s7
1578 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1579 ; SI-NEXT: s_mov_b32 s8, s2
1580 ; SI-NEXT: s_mov_b32 s9, s3
1581 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
1582 ; SI-NEXT: s_mov_b32 s4, s0
1583 ; SI-NEXT: s_mov_b32 s5, s1
1584 ; SI-NEXT: s_waitcnt vmcnt(0)
1585 ; SI-NEXT: v_add_i32_e32 v0, vcc, -2, v0
1586 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
1588 %x = load i16, ptr addrspace(1) %in
1590 %ybc = bitcast i16 %y to half
1591 store half %ybc, ptr addrspace(1) %out
1595 define amdgpu_kernel void @add_inline_imm_neg_16_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1596 ; GFX10-LABEL: add_inline_imm_neg_16_f16:
1598 ; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1599 ; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1600 ; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1601 ; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1602 ; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1603 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1604 ; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1605 ; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1606 ; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1607 ; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1608 ; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1609 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1610 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xf0,0xff,0x00,0x00]
1611 ; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1612 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1614 ; GFX11-LABEL: add_inline_imm_neg_16_f16:
1616 ; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1617 ; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1618 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1619 ; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1620 ; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1621 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1622 ; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1623 ; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1624 ; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1625 ; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1626 ; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1627 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1628 ; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xf0,0xff,0x00,0x00]
1629 ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1630 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1632 ; VI-LABEL: add_inline_imm_neg_16_f16:
1634 ; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1635 ; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1636 ; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1637 ; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1638 ; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1639 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1640 ; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1641 ; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1642 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1643 ; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1644 ; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1645 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1646 ; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x32,0xf0,0xff,0x00,0x00]
1647 ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1648 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1650 ; SI-LABEL: add_inline_imm_neg_16_f16:
1652 ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
1653 ; SI-NEXT: s_mov_b32 s7, 0xf000
1654 ; SI-NEXT: s_mov_b32 s6, -1
1655 ; SI-NEXT: s_mov_b32 s10, s6
1656 ; SI-NEXT: s_mov_b32 s11, s7
1657 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1658 ; SI-NEXT: s_mov_b32 s8, s2
1659 ; SI-NEXT: s_mov_b32 s9, s3
1660 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
1661 ; SI-NEXT: s_mov_b32 s4, s0
1662 ; SI-NEXT: s_mov_b32 s5, s1
1663 ; SI-NEXT: s_waitcnt vmcnt(0)
1664 ; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0
1665 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
1667 %x = load i16, ptr addrspace(1) %in
1668 %y = add i16 %x, -16
1669 %ybc = bitcast i16 %y to half
1670 store half %ybc, ptr addrspace(1) %out
1674 define amdgpu_kernel void @add_inline_imm_63_f16(ptr addrspace(1) %out, half %x) {
1675 ; GFX10-LABEL: add_inline_imm_63_f16:
1677 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1678 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1679 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1680 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1681 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1682 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1683 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1684 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1685 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1687 ; GFX11-LABEL: add_inline_imm_63_f16:
1689 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1690 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1691 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1692 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1693 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1694 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1695 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1696 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1697 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1699 ; VI-LABEL: add_inline_imm_63_f16:
1701 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1702 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1703 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1704 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1705 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1706 ; VI-NEXT: v_add_f16_e64 v0, s4, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x7e,0x01,0x00]
1707 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1708 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1710 ; SI-LABEL: add_inline_imm_63_f16:
1712 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1713 ; SI-NEXT: s_mov_b32 s3, 0xf000
1714 ; SI-NEXT: s_mov_b32 s2, -1
1715 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1716 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1717 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1718 ; SI-NEXT: v_add_f32_e32 v0, 0x367c0000, v0
1719 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1720 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1721 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1723 %y = fadd half %x, 0xH003F
1724 store half %y, ptr addrspace(1) %out
1728 define amdgpu_kernel void @add_inline_imm_64_f16(ptr addrspace(1) %out, half %x) {
1729 ; GFX10-LABEL: add_inline_imm_64_f16:
1731 ; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1732 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1733 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1734 ; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1735 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1736 ; GFX10-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1737 ; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1738 ; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1739 ; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1741 ; GFX11-LABEL: add_inline_imm_64_f16:
1743 ; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1744 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1745 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1746 ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1747 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1748 ; GFX11-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1749 ; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1750 ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1751 ; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1753 ; VI-LABEL: add_inline_imm_64_f16:
1755 ; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1756 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1757 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1758 ; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1759 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1760 ; VI-NEXT: v_add_f16_e64 v0, s4, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x80,0x01,0x00]
1761 ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1762 ; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1764 ; SI-LABEL: add_inline_imm_64_f16:
1766 ; SI-NEXT: s_load_dword s0, s[4:5], 0xb
1767 ; SI-NEXT: s_mov_b32 s3, 0xf000
1768 ; SI-NEXT: s_mov_b32 s2, -1
1769 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1770 ; SI-NEXT: v_cvt_f32_f16_e32 v0, s0
1771 ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
1772 ; SI-NEXT: v_add_f32_e32 v0, 0x36800000, v0
1773 ; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
1774 ; SI-NEXT: s_waitcnt lgkmcnt(0)
1775 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
1777 %y = fadd half %x, 0xH0040
1778 store half %y, ptr addrspace(1) %out
1782 ; This needs to be emitted as a literal constant since the 16-bit
1783 ; float values do not work for 16-bit integer operations.
1784 define void @mul_inline_imm_0.5_i16(ptr addrspace(1) %out, i16 %x) {
1785 ; GFX10-LABEL: mul_inline_imm_0.5_i16:
1787 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1788 ; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1789 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1790 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1792 ; GFX11-LABEL: mul_inline_imm_0.5_i16:
1794 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1795 ; GFX11-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1796 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1797 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1799 ; VI-LABEL: mul_inline_imm_0.5_i16:
1801 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1802 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
1803 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1804 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1805 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1807 ; SI-LABEL: mul_inline_imm_0.5_i16:
1809 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1810 ; SI-NEXT: s_mov_b32 s6, 0
1811 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
1812 ; SI-NEXT: s_mov_b32 s7, 0xf000
1813 ; SI-NEXT: s_mov_b32 s4, s6
1814 ; SI-NEXT: s_mov_b32 s5, s6
1815 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3800, v2
1816 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1817 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1818 ; SI-NEXT: s_setpc_b64 s[30:31]
1819 %y = mul i16 %x, bitcast (half 0.5 to i16)
1820 store i16 %y, ptr addrspace(1) %out
1824 define void @mul_inline_imm_neg_0.5_i16(ptr addrspace(1) %out, i16 %x) {
1825 ; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
1827 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1828 ; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1829 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1830 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1832 ; GFX11-LABEL: mul_inline_imm_neg_0.5_i16:
1834 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1835 ; GFX11-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1836 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1837 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1839 ; VI-LABEL: mul_inline_imm_neg_0.5_i16:
1841 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1842 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
1843 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1844 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1845 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1847 ; SI-LABEL: mul_inline_imm_neg_0.5_i16:
1849 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1850 ; SI-NEXT: s_mov_b32 s6, 0
1851 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
1852 ; SI-NEXT: s_mov_b32 s7, 0xf000
1853 ; SI-NEXT: s_mov_b32 s4, s6
1854 ; SI-NEXT: s_mov_b32 s5, s6
1855 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0xb800, v2
1856 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1857 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1858 ; SI-NEXT: s_setpc_b64 s[30:31]
1859 %y = mul i16 %x, bitcast (half -0.5 to i16)
1860 store i16 %y, ptr addrspace(1) %out
1864 define void @mul_inline_imm_1.0_i16(ptr addrspace(1) %out, i16 %x) {
1865 ; GFX10-LABEL: mul_inline_imm_1.0_i16:
1867 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1868 ; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1869 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1870 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1872 ; GFX11-LABEL: mul_inline_imm_1.0_i16:
1874 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1875 ; GFX11-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1876 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1877 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1879 ; VI-LABEL: mul_inline_imm_1.0_i16:
1881 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1882 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
1883 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1884 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1885 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1887 ; SI-LABEL: mul_inline_imm_1.0_i16:
1889 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1890 ; SI-NEXT: s_mov_b32 s6, 0
1891 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
1892 ; SI-NEXT: s_mov_b32 s7, 0xf000
1893 ; SI-NEXT: s_mov_b32 s4, s6
1894 ; SI-NEXT: s_mov_b32 s5, s6
1895 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3c00, v2
1896 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1897 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1898 ; SI-NEXT: s_setpc_b64 s[30:31]
1899 %y = mul i16 %x, bitcast (half 1.0 to i16)
1900 store i16 %y, ptr addrspace(1) %out
1904 define void @mul_inline_imm_neg_1.0_i16(ptr addrspace(1) %out, i16 %x) {
1905 ; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
1907 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1908 ; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1909 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1910 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1912 ; GFX11-LABEL: mul_inline_imm_neg_1.0_i16:
1914 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1915 ; GFX11-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1916 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1917 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1919 ; VI-LABEL: mul_inline_imm_neg_1.0_i16:
1921 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1922 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
1923 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1924 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1925 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1927 ; SI-LABEL: mul_inline_imm_neg_1.0_i16:
1929 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1930 ; SI-NEXT: s_mov_b32 s6, 0
1931 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
1932 ; SI-NEXT: s_mov_b32 s7, 0xf000
1933 ; SI-NEXT: s_mov_b32 s4, s6
1934 ; SI-NEXT: s_mov_b32 s5, s6
1935 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0xbc00, v2
1936 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1937 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1938 ; SI-NEXT: s_setpc_b64 s[30:31]
1939 %y = mul i16 %x, bitcast (half -1.0 to i16)
1940 store i16 %y, ptr addrspace(1) %out
1944 define void @shl_inline_imm_2.0_i16(ptr addrspace(1) %out, i16 %x) {
1945 ; GFX10-LABEL: shl_inline_imm_2.0_i16:
1947 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1948 ; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1949 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1950 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1952 ; GFX11-LABEL: shl_inline_imm_2.0_i16:
1954 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1955 ; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1956 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1957 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1959 ; VI-LABEL: shl_inline_imm_2.0_i16:
1961 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1962 ; VI-NEXT: s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
1963 ; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
1964 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1965 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1966 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1968 ; SI-LABEL: shl_inline_imm_2.0_i16:
1970 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1971 ; SI-NEXT: s_mov_b32 s6, 0
1972 ; SI-NEXT: s_mov_b32 s7, 0xf000
1973 ; SI-NEXT: s_mov_b32 s4, s6
1974 ; SI-NEXT: s_mov_b32 s5, s6
1975 ; SI-NEXT: v_lshl_b32_e32 v2, 0x4000, v2
1976 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1977 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
1978 ; SI-NEXT: s_setpc_b64 s[30:31]
1979 %y = shl i16 bitcast (half 2.0 to i16), %x
1980 store i16 %y, ptr addrspace(1) %out
1984 define void @shl_inline_imm_neg_2.0_i16(ptr addrspace(1) %out, i16 %x) {
1985 ; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
1987 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1988 ; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
1989 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1990 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1992 ; GFX11-LABEL: shl_inline_imm_neg_2.0_i16:
1994 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1995 ; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
1996 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1997 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1999 ; VI-LABEL: shl_inline_imm_neg_2.0_i16:
2001 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2002 ; VI-NEXT: s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
2003 ; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
2004 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2005 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2006 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2008 ; SI-LABEL: shl_inline_imm_neg_2.0_i16:
2010 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2011 ; SI-NEXT: s_mov_b32 s6, 0
2012 ; SI-NEXT: s_mov_b32 s7, 0xf000
2013 ; SI-NEXT: s_mov_b32 s4, s6
2014 ; SI-NEXT: s_mov_b32 s5, s6
2015 ; SI-NEXT: v_lshl_b32_e32 v2, 0xffffc000, v2
2016 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2017 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2018 ; SI-NEXT: s_setpc_b64 s[30:31]
2019 %y = shl i16 bitcast (half -2.0 to i16), %x
2020 store i16 %y, ptr addrspace(1) %out
2024 define void @mul_inline_imm_4.0_i16(ptr addrspace(1) %out, i16 %x) {
2025 ; GFX10-LABEL: mul_inline_imm_4.0_i16:
2027 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2028 ; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2029 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2030 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2032 ; GFX11-LABEL: mul_inline_imm_4.0_i16:
2034 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2035 ; GFX11-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2036 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2037 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2039 ; VI-LABEL: mul_inline_imm_4.0_i16:
2041 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2042 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
2043 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2044 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2045 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2047 ; SI-LABEL: mul_inline_imm_4.0_i16:
2049 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050 ; SI-NEXT: s_mov_b32 s6, 0
2051 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2052 ; SI-NEXT: s_mov_b32 s7, 0xf000
2053 ; SI-NEXT: s_mov_b32 s4, s6
2054 ; SI-NEXT: s_mov_b32 s5, s6
2055 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0x4400, v2
2056 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2057 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2058 ; SI-NEXT: s_setpc_b64 s[30:31]
2059 %y = mul i16 %x, bitcast (half 4.0 to i16)
2060 store i16 %y, ptr addrspace(1) %out
2064 define void @mul_inline_imm_neg_4.0_i16(ptr addrspace(1) %out, i16 %x) {
2065 ; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
2067 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2068 ; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2069 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2070 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2072 ; GFX11-LABEL: mul_inline_imm_neg_4.0_i16:
2074 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2075 ; GFX11-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2076 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2077 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2079 ; VI-LABEL: mul_inline_imm_neg_4.0_i16:
2081 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2082 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
2083 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2084 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2085 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2087 ; SI-LABEL: mul_inline_imm_neg_4.0_i16:
2089 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2090 ; SI-NEXT: s_mov_b32 s6, 0
2091 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2092 ; SI-NEXT: s_mov_b32 s7, 0xf000
2093 ; SI-NEXT: s_mov_b32 s4, s6
2094 ; SI-NEXT: s_mov_b32 s5, s6
2095 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0xc400, v2
2096 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2097 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2098 ; SI-NEXT: s_setpc_b64 s[30:31]
2099 %y = mul i16 %x, bitcast (half -4.0 to i16)
2100 store i16 %y, ptr addrspace(1) %out
2104 define void @mul_inline_imm_inv2pi_i16(ptr addrspace(1) %out, i16 %x) {
2105 ; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
2107 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2108 ; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2109 ; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2110 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2112 ; GFX11-LABEL: mul_inline_imm_inv2pi_i16:
2114 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2115 ; GFX11-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2116 ; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2117 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2119 ; VI-LABEL: mul_inline_imm_inv2pi_i16:
2121 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2122 ; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
2123 ; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2124 ; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2125 ; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2127 ; SI-LABEL: mul_inline_imm_inv2pi_i16:
2129 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2130 ; SI-NEXT: s_mov_b32 s6, 0
2131 ; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2
2132 ; SI-NEXT: s_mov_b32 s7, 0xf000
2133 ; SI-NEXT: s_mov_b32 s4, s6
2134 ; SI-NEXT: s_mov_b32 s5, s6
2135 ; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3118, v2
2136 ; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2137 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
2138 ; SI-NEXT: s_setpc_b64 s[30:31]
2139 %y = mul i16 %x, bitcast (half 0xH3118 to i16)
2140 store i16 %y, ptr addrspace(1) %out