[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / imm16.ll
blob761719ebbb9362718b6f2d28128df13b898c3cbd
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
4 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
6 ; FIXME: Merge into imm.ll
8 define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
9 ; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
10 ; GFX10:       ; %bb.0:
11 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
12 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
13 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
14 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
15 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
16 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
17 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
18 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
20 ; VI-LABEL: store_inline_imm_neg_0.0_i16:
21 ; VI:       ; %bb.0:
22 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
23 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
24 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
25 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
26 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
27 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
28 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
29 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
31 ; SI-LABEL: store_inline_imm_neg_0.0_i16:
32 ; SI:       ; %bb.0:
33 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
34 ; SI-NEXT:    s_mov_b32 s3, 0xf000
35 ; SI-NEXT:    s_mov_b32 s2, -1
36 ; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
37 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
38 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
39 ; SI-NEXT:    s_waitcnt vmcnt(0)
40 ; SI-NEXT:    s_endpgm
41   store volatile i16 -32768, i16 addrspace(1)* %out
42   ret void
45 define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
46 ; GFX10-LABEL: store_inline_imm_0.0_f16:
47 ; GFX10:       ; %bb.0:
48 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
49 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
50 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
51 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
52 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
53 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
54 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
56 ; VI-LABEL: store_inline_imm_0.0_f16:
57 ; VI:       ; %bb.0:
58 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
59 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
60 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
61 ; VI-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
62 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
63 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
64 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
66 ; SI-LABEL: store_inline_imm_0.0_f16:
67 ; SI:       ; %bb.0:
68 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
69 ; SI-NEXT:    s_mov_b32 s3, 0xf000
70 ; SI-NEXT:    s_mov_b32 s2, -1
71 ; SI-NEXT:    v_mov_b32_e32 v0, 0
72 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
73 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
74 ; SI-NEXT:    s_endpgm
75   store half 0.0, half addrspace(1)* %out
76   ret void
79 define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
80 ; GFX10-LABEL: store_imm_neg_0.0_f16:
81 ; GFX10:       ; %bb.0:
82 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
83 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
84 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
85 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
86 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
87 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
88 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
90 ; VI-LABEL: store_imm_neg_0.0_f16:
91 ; VI:       ; %bb.0:
92 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
93 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
94 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
95 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
96 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
97 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
98 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
100 ; SI-LABEL: store_imm_neg_0.0_f16:
101 ; SI:       ; %bb.0:
102 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
103 ; SI-NEXT:    s_mov_b32 s3, 0xf000
104 ; SI-NEXT:    s_mov_b32 s2, -1
105 ; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
106 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
107 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
108 ; SI-NEXT:    s_endpgm
109   store half -0.0, half addrspace(1)* %out
110   ret void
113 define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
114 ; GFX10-LABEL: store_inline_imm_0.5_f16:
115 ; GFX10:       ; %bb.0:
116 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
117 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
118 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
119 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
120 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
121 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
122 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
124 ; VI-LABEL: store_inline_imm_0.5_f16:
125 ; VI:       ; %bb.0:
126 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
127 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
128 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
129 ; VI-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
130 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
131 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
132 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
134 ; SI-LABEL: store_inline_imm_0.5_f16:
135 ; SI:       ; %bb.0:
136 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
137 ; SI-NEXT:    s_mov_b32 s3, 0xf000
138 ; SI-NEXT:    s_mov_b32 s2, -1
139 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
140 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
141 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
142 ; SI-NEXT:    s_endpgm
143   store half 0.5, half addrspace(1)* %out
144   ret void
147 define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
148 ; GFX10-LABEL: store_inline_imm_m_0.5_f16:
149 ; GFX10:       ; %bb.0:
150 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
151 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
152 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
153 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
154 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
155 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
156 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
158 ; VI-LABEL: store_inline_imm_m_0.5_f16:
159 ; VI:       ; %bb.0:
160 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
161 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
162 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
163 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
164 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
165 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
166 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
168 ; SI-LABEL: store_inline_imm_m_0.5_f16:
169 ; SI:       ; %bb.0:
170 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
171 ; SI-NEXT:    s_mov_b32 s3, 0xf000
172 ; SI-NEXT:    s_mov_b32 s2, -1
173 ; SI-NEXT:    v_mov_b32_e32 v0, 0xb800
174 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
175 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
176 ; SI-NEXT:    s_endpgm
177   store half -0.5, half addrspace(1)* %out
178   ret void
181 define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
182 ; GFX10-LABEL: store_inline_imm_1.0_f16:
183 ; GFX10:       ; %bb.0:
184 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
185 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
186 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
187 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
188 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
189 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
190 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
192 ; VI-LABEL: store_inline_imm_1.0_f16:
193 ; VI:       ; %bb.0:
194 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
195 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
196 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
197 ; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
198 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
199 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
200 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
202 ; SI-LABEL: store_inline_imm_1.0_f16:
203 ; SI:       ; %bb.0:
204 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
205 ; SI-NEXT:    s_mov_b32 s3, 0xf000
206 ; SI-NEXT:    s_mov_b32 s2, -1
207 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3c00
208 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
209 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
210 ; SI-NEXT:    s_endpgm
211   store half 1.0, half addrspace(1)* %out
212   ret void
215 define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
216 ; GFX10-LABEL: store_inline_imm_m_1.0_f16:
217 ; GFX10:       ; %bb.0:
218 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
219 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
220 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
221 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
222 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
223 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
224 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
226 ; VI-LABEL: store_inline_imm_m_1.0_f16:
227 ; VI:       ; %bb.0:
228 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
229 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
230 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
231 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
232 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
233 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
234 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
236 ; SI-LABEL: store_inline_imm_m_1.0_f16:
237 ; SI:       ; %bb.0:
238 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
239 ; SI-NEXT:    s_mov_b32 s3, 0xf000
240 ; SI-NEXT:    s_mov_b32 s2, -1
241 ; SI-NEXT:    v_mov_b32_e32 v0, 0xbc00
242 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
243 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
244 ; SI-NEXT:    s_endpgm
245   store half -1.0, half addrspace(1)* %out
246   ret void
249 define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
250 ; GFX10-LABEL: store_inline_imm_2.0_f16:
251 ; GFX10:       ; %bb.0:
252 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
253 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
254 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
255 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
256 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
257 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
258 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
260 ; VI-LABEL: store_inline_imm_2.0_f16:
261 ; VI:       ; %bb.0:
262 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
263 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
264 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
265 ; VI-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
266 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
267 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
268 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
270 ; SI-LABEL: store_inline_imm_2.0_f16:
271 ; SI:       ; %bb.0:
272 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
273 ; SI-NEXT:    s_mov_b32 s3, 0xf000
274 ; SI-NEXT:    s_mov_b32 s2, -1
275 ; SI-NEXT:    v_mov_b32_e32 v0, 0x4000
276 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
277 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
278 ; SI-NEXT:    s_endpgm
279   store half 2.0, half addrspace(1)* %out
280   ret void
283 define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
284 ; GFX10-LABEL: store_inline_imm_m_2.0_f16:
285 ; GFX10:       ; %bb.0:
286 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
287 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
288 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
289 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
290 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
291 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
292 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
294 ; VI-LABEL: store_inline_imm_m_2.0_f16:
295 ; VI:       ; %bb.0:
296 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
297 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
298 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
299 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
300 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
301 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
302 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
304 ; SI-LABEL: store_inline_imm_m_2.0_f16:
305 ; SI:       ; %bb.0:
306 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
307 ; SI-NEXT:    s_mov_b32 s3, 0xf000
308 ; SI-NEXT:    s_mov_b32 s2, -1
309 ; SI-NEXT:    v_mov_b32_e32 v0, 0xc000
310 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
311 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
312 ; SI-NEXT:    s_endpgm
313   store half -2.0, half addrspace(1)* %out
314   ret void
317 define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
318 ; GFX10-LABEL: store_inline_imm_4.0_f16:
319 ; GFX10:       ; %bb.0:
320 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
321 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
322 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
323 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
324 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
325 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
326 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
328 ; VI-LABEL: store_inline_imm_4.0_f16:
329 ; VI:       ; %bb.0:
330 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
331 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
332 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
333 ; VI-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
334 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
335 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
336 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
338 ; SI-LABEL: store_inline_imm_4.0_f16:
339 ; SI:       ; %bb.0:
340 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
341 ; SI-NEXT:    s_mov_b32 s3, 0xf000
342 ; SI-NEXT:    s_mov_b32 s2, -1
343 ; SI-NEXT:    v_mov_b32_e32 v0, 0x4400
344 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
345 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
346 ; SI-NEXT:    s_endpgm
347   store half 4.0, half addrspace(1)* %out
348   ret void
351 define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
352 ; GFX10-LABEL: store_inline_imm_m_4.0_f16:
353 ; GFX10:       ; %bb.0:
354 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
355 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
356 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
357 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
358 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
359 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
360 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
362 ; VI-LABEL: store_inline_imm_m_4.0_f16:
363 ; VI:       ; %bb.0:
364 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
365 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
366 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
367 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
368 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
369 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
370 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
372 ; SI-LABEL: store_inline_imm_m_4.0_f16:
373 ; SI:       ; %bb.0:
374 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
375 ; SI-NEXT:    s_mov_b32 s3, 0xf000
376 ; SI-NEXT:    s_mov_b32 s2, -1
377 ; SI-NEXT:    v_mov_b32_e32 v0, 0xc400
378 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
379 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
380 ; SI-NEXT:    s_endpgm
381   store half -4.0, half addrspace(1)* %out
382   ret void
385 define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
386 ; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
387 ; GFX10:       ; %bb.0:
388 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
389 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
390 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
391 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
392 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
393 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
394 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
396 ; VI-LABEL: store_inline_imm_inv_2pi_f16:
397 ; VI:       ; %bb.0:
398 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
399 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
400 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
401 ; VI-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
402 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
403 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
404 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
406 ; SI-LABEL: store_inline_imm_inv_2pi_f16:
407 ; SI:       ; %bb.0:
408 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
409 ; SI-NEXT:    s_mov_b32 s3, 0xf000
410 ; SI-NEXT:    s_mov_b32 s2, -1
411 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3118
412 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
413 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
414 ; SI-NEXT:    s_endpgm
415   store half 0xH3118, half addrspace(1)* %out
416   ret void
419 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
420 ; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
421 ; GFX10:       ; %bb.0:
422 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
423 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
424 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
425 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
426 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
427 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
428 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
430 ; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
431 ; VI:       ; %bb.0:
432 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
433 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
434 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
435 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
436 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
437 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
438 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
440 ; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
441 ; SI:       ; %bb.0:
442 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
443 ; SI-NEXT:    s_mov_b32 s3, 0xf000
444 ; SI-NEXT:    s_mov_b32 s2, -1
445 ; SI-NEXT:    v_mov_b32_e32 v0, 0xb118
446 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
447 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
448 ; SI-NEXT:    s_endpgm
449   store half 0xHB118, half addrspace(1)* %out
450   ret void
453 define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
454 ; GFX10-LABEL: store_literal_imm_f16:
455 ; GFX10:       ; %bb.0:
456 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
457 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
458 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
459 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
460 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
461 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
462 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
464 ; VI-LABEL: store_literal_imm_f16:
465 ; VI:       ; %bb.0:
466 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
467 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
468 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
469 ; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
470 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
471 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
472 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
474 ; SI-LABEL: store_literal_imm_f16:
475 ; SI:       ; %bb.0:
476 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
477 ; SI-NEXT:    s_mov_b32 s3, 0xf000
478 ; SI-NEXT:    s_mov_b32 s2, -1
479 ; SI-NEXT:    v_mov_b32_e32 v0, 0x6c00
480 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
481 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
482 ; SI-NEXT:    s_endpgm
483   store half 4096.0, half addrspace(1)* %out
484   ret void
487 define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
488 ; GFX10-LABEL: add_inline_imm_0.0_f16:
489 ; GFX10:       ; %bb.0:
490 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
491 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
492 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
493 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
494 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
495 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
496 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
497 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
498 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
500 ; VI-LABEL: add_inline_imm_0.0_f16:
501 ; VI:       ; %bb.0:
502 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
503 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
504 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
505 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
506 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
507 ; VI-NEXT:    v_add_f16_e64 v0, s4, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x00,0x01,0x00]
508 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
509 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
511 ; SI-LABEL: add_inline_imm_0.0_f16:
512 ; SI:       ; %bb.0:
513 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
514 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
515 ; SI-NEXT:    s_mov_b32 s3, 0xf000
516 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
517 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
518 ; SI-NEXT:    s_mov_b32 s2, -1
519 ; SI-NEXT:    v_add_f32_e32 v0, 0, v0
520 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
521 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
522 ; SI-NEXT:    s_endpgm
523   %y = fadd half %x, 0.0
524   store half %y, half addrspace(1)* %out
525   ret void
528 define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
529 ; GFX10-LABEL: add_inline_imm_0.5_f16:
530 ; GFX10:       ; %bb.0:
531 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
532 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
533 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
534 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
535 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
536 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
537 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
538 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
539 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
541 ; VI-LABEL: add_inline_imm_0.5_f16:
542 ; VI:       ; %bb.0:
543 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
544 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
545 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
546 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
547 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
548 ; VI-NEXT:    v_add_f16_e64 v0, s4, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe0,0x01,0x00]
549 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
550 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
552 ; SI-LABEL: add_inline_imm_0.5_f16:
553 ; SI:       ; %bb.0:
554 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
555 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
556 ; SI-NEXT:    s_mov_b32 s3, 0xf000
557 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
558 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
559 ; SI-NEXT:    s_mov_b32 s2, -1
560 ; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
561 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
562 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
563 ; SI-NEXT:    s_endpgm
564   %y = fadd half %x, 0.5
565   store half %y, half addrspace(1)* %out
566   ret void
569 define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
570 ; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
571 ; GFX10:       ; %bb.0:
572 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
573 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
574 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
575 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
576 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
577 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
578 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
579 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
580 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
582 ; VI-LABEL: add_inline_imm_neg_0.5_f16:
583 ; VI:       ; %bb.0:
584 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
585 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
586 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
587 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
588 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
589 ; VI-NEXT:    v_add_f16_e64 v0, s4, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe2,0x01,0x00]
590 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
591 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
593 ; SI-LABEL: add_inline_imm_neg_0.5_f16:
594 ; SI:       ; %bb.0:
595 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
596 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
597 ; SI-NEXT:    s_mov_b32 s3, 0xf000
598 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
599 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
600 ; SI-NEXT:    s_mov_b32 s2, -1
601 ; SI-NEXT:    v_add_f32_e32 v0, -0.5, v0
602 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
603 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
604 ; SI-NEXT:    s_endpgm
605   %y = fadd half %x, -0.5
606   store half %y, half addrspace(1)* %out
607   ret void
610 define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
611 ; GFX10-LABEL: add_inline_imm_1.0_f16:
612 ; GFX10:       ; %bb.0:
613 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
614 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
615 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
616 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
617 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
618 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
619 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
620 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
621 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
623 ; VI-LABEL: add_inline_imm_1.0_f16:
624 ; VI:       ; %bb.0:
625 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
626 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
627 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
628 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
629 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
630 ; VI-NEXT:    v_add_f16_e64 v0, s4, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe4,0x01,0x00]
631 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
632 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
634 ; SI-LABEL: add_inline_imm_1.0_f16:
635 ; SI:       ; %bb.0:
636 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
637 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
638 ; SI-NEXT:    s_mov_b32 s3, 0xf000
639 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
640 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
641 ; SI-NEXT:    s_mov_b32 s2, -1
642 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
643 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
644 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
645 ; SI-NEXT:    s_endpgm
646   %y = fadd half %x, 1.0
647   store half %y, half addrspace(1)* %out
648   ret void
651 define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
652 ; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
653 ; GFX10:       ; %bb.0:
654 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
655 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
656 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
657 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
658 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
659 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
660 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
661 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
662 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
664 ; VI-LABEL: add_inline_imm_neg_1.0_f16:
665 ; VI:       ; %bb.0:
666 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
667 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
668 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
669 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
670 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
671 ; VI-NEXT:    v_add_f16_e64 v0, s4, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe6,0x01,0x00]
672 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
673 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
675 ; SI-LABEL: add_inline_imm_neg_1.0_f16:
676 ; SI:       ; %bb.0:
677 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
678 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
679 ; SI-NEXT:    s_mov_b32 s3, 0xf000
680 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
681 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
682 ; SI-NEXT:    s_mov_b32 s2, -1
683 ; SI-NEXT:    v_add_f32_e32 v0, -1.0, v0
684 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
685 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
686 ; SI-NEXT:    s_endpgm
687   %y = fadd half %x, -1.0
688   store half %y, half addrspace(1)* %out
689   ret void
692 define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
693 ; GFX10-LABEL: add_inline_imm_2.0_f16:
694 ; GFX10:       ; %bb.0:
695 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
696 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
697 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
698 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
699 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
700 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
701 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
702 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
703 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
705 ; VI-LABEL: add_inline_imm_2.0_f16:
706 ; VI:       ; %bb.0:
707 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
708 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
709 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
710 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
711 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
712 ; VI-NEXT:    v_add_f16_e64 v0, s4, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe8,0x01,0x00]
713 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
714 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
716 ; SI-LABEL: add_inline_imm_2.0_f16:
717 ; SI:       ; %bb.0:
718 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
719 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
720 ; SI-NEXT:    s_mov_b32 s3, 0xf000
721 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
722 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
723 ; SI-NEXT:    s_mov_b32 s2, -1
724 ; SI-NEXT:    v_add_f32_e32 v0, 2.0, v0
725 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
726 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
727 ; SI-NEXT:    s_endpgm
728   %y = fadd half %x, 2.0
729   store half %y, half addrspace(1)* %out
730   ret void
733 define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
734 ; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
735 ; GFX10:       ; %bb.0:
736 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
737 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
738 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
739 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
740 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
741 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
742 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
743 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
744 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
746 ; VI-LABEL: add_inline_imm_neg_2.0_f16:
747 ; VI:       ; %bb.0:
748 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
749 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
750 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
751 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
752 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
753 ; VI-NEXT:    v_add_f16_e64 v0, s4, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xea,0x01,0x00]
754 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
755 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
757 ; SI-LABEL: add_inline_imm_neg_2.0_f16:
758 ; SI:       ; %bb.0:
759 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
760 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
761 ; SI-NEXT:    s_mov_b32 s3, 0xf000
762 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
763 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
764 ; SI-NEXT:    s_mov_b32 s2, -1
765 ; SI-NEXT:    v_add_f32_e32 v0, -2.0, v0
766 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
767 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
768 ; SI-NEXT:    s_endpgm
769   %y = fadd half %x, -2.0
770   store half %y, half addrspace(1)* %out
771   ret void
774 define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
775 ; GFX10-LABEL: add_inline_imm_4.0_f16:
776 ; GFX10:       ; %bb.0:
777 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
778 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
779 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
780 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
781 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
782 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
783 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
784 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
785 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
787 ; VI-LABEL: add_inline_imm_4.0_f16:
788 ; VI:       ; %bb.0:
789 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
790 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
791 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
792 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
793 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
794 ; VI-NEXT:    v_add_f16_e64 v0, s4, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xec,0x01,0x00]
795 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
796 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
798 ; SI-LABEL: add_inline_imm_4.0_f16:
799 ; SI:       ; %bb.0:
800 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
801 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
802 ; SI-NEXT:    s_mov_b32 s3, 0xf000
803 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
804 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
805 ; SI-NEXT:    s_mov_b32 s2, -1
806 ; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
807 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
808 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
809 ; SI-NEXT:    s_endpgm
810   %y = fadd half %x, 4.0
811   store half %y, half addrspace(1)* %out
812   ret void
815 define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
816 ; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
817 ; GFX10:       ; %bb.0:
818 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
819 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
820 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
821 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
822 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
823 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
824 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
825 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
826 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
828 ; VI-LABEL: add_inline_imm_neg_4.0_f16:
829 ; VI:       ; %bb.0:
830 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
831 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
832 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
833 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
834 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
835 ; VI-NEXT:    v_add_f16_e64 v0, s4, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xee,0x01,0x00]
836 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
837 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
839 ; SI-LABEL: add_inline_imm_neg_4.0_f16:
840 ; SI:       ; %bb.0:
841 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
842 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
843 ; SI-NEXT:    s_mov_b32 s3, 0xf000
844 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
845 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
846 ; SI-NEXT:    s_mov_b32 s2, -1
847 ; SI-NEXT:    v_add_f32_e32 v0, -4.0, v0
848 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
849 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
850 ; SI-NEXT:    s_endpgm
851   %y = fadd half %x, -4.0
852   store half %y, half addrspace(1)* %out
853   ret void
856 define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
857 ; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
858 ; GFX10:       ; %bb.0:
859 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
860 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
861 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
862 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
863 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
864 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
865 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
866 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
867 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
868 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
869 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
870 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
871 ; GFX10-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
872 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
873 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
875 ; VI-LABEL: commute_add_inline_imm_0.5_f16:
876 ; VI:       ; %bb.0:
877 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
878 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
879 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
880 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
881 ; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
882 ; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
883 ; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
884 ; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
885 ; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
886 ; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
887 ; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
888 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
889 ; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
890 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
891 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
893 ; SI-LABEL: commute_add_inline_imm_0.5_f16:
894 ; SI:       ; %bb.0:
895 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
896 ; SI-NEXT:    s_mov_b32 s3, 0xf000
897 ; SI-NEXT:    s_mov_b32 s2, -1
898 ; SI-NEXT:    s_mov_b32 s10, s2
899 ; SI-NEXT:    s_mov_b32 s11, s3
900 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
901 ; SI-NEXT:    s_mov_b32 s8, s6
902 ; SI-NEXT:    s_mov_b32 s9, s7
903 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
904 ; SI-NEXT:    s_mov_b32 s0, s4
905 ; SI-NEXT:    s_mov_b32 s1, s5
906 ; SI-NEXT:    s_waitcnt vmcnt(0)
907 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
908 ; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
909 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
910 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
911 ; SI-NEXT:    s_endpgm
912   %x = load half, half addrspace(1)* %in
913   %y = fadd half %x, 0.5
914   store half %y, half addrspace(1)* %out
915   ret void
918 define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
919 ; GFX10-LABEL: commute_add_literal_f16:
920 ; GFX10:       ; %bb.0:
921 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
922 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
923 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
924 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
925 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
926 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
927 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
928 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
929 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
930 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
931 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
932 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
933 ; GFX10-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
934 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
935 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
937 ; VI-LABEL: commute_add_literal_f16:
938 ; VI:       ; %bb.0:
939 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
940 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
941 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
942 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
943 ; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
944 ; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
945 ; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
946 ; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
947 ; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
948 ; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
949 ; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
950 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
951 ; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
952 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
953 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
955 ; SI-LABEL: commute_add_literal_f16:
956 ; SI:       ; %bb.0:
957 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
958 ; SI-NEXT:    s_mov_b32 s3, 0xf000
959 ; SI-NEXT:    s_mov_b32 s2, -1
960 ; SI-NEXT:    s_mov_b32 s10, s2
961 ; SI-NEXT:    s_mov_b32 s11, s3
962 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
963 ; SI-NEXT:    s_mov_b32 s8, s6
964 ; SI-NEXT:    s_mov_b32 s9, s7
965 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
966 ; SI-NEXT:    s_mov_b32 s0, s4
967 ; SI-NEXT:    s_mov_b32 s1, s5
968 ; SI-NEXT:    s_waitcnt vmcnt(0)
969 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
970 ; SI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
971 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
972 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
973 ; SI-NEXT:    s_endpgm
974   %x = load half, half addrspace(1)* %in
975   %y = fadd half %x, 1024.0
976   store half %y, half addrspace(1)* %out
977   ret void
980 define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
981 ; GFX10-LABEL: add_inline_imm_1_f16:
982 ; GFX10:       ; %bb.0:
983 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
984 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
985 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
986 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
987 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
988 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
989 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
990 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
991 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
993 ; VI-LABEL: add_inline_imm_1_f16:
994 ; VI:       ; %bb.0:
995 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
996 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
997 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
998 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
999 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1000 ; VI-NEXT:    v_add_f16_e64 v0, s4, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x02,0x01,0x00]
1001 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1002 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1004 ; SI-LABEL: add_inline_imm_1_f16:
1005 ; SI:       ; %bb.0:
1006 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1007 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1008 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1009 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1010 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1011 ; SI-NEXT:    s_mov_b32 s2, -1
1012 ; SI-NEXT:    v_add_f32_e32 v0, 0x33800000, v0
1013 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1014 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1015 ; SI-NEXT:    s_endpgm
1016   %y = fadd half %x, 0xH0001
1017   store half %y, half addrspace(1)* %out
1018   ret void
1021 define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
1022 ; GFX10-LABEL: add_inline_imm_2_f16:
1023 ; GFX10:       ; %bb.0:
1024 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1025 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1026 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1027 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1028 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1029 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1030 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1031 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1032 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1034 ; VI-LABEL: add_inline_imm_2_f16:
1035 ; VI:       ; %bb.0:
1036 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1037 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1038 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1039 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1040 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1041 ; VI-NEXT:    v_add_f16_e64 v0, s4, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x04,0x01,0x00]
1042 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1043 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1045 ; SI-LABEL: add_inline_imm_2_f16:
1046 ; SI:       ; %bb.0:
1047 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1048 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1049 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1050 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1051 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1052 ; SI-NEXT:    s_mov_b32 s2, -1
1053 ; SI-NEXT:    v_add_f32_e32 v0, 0x34000000, v0
1054 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1055 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1056 ; SI-NEXT:    s_endpgm
1057   %y = fadd half %x, 0xH0002
1058   store half %y, half addrspace(1)* %out
1059   ret void
1062 define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
1063 ; GFX10-LABEL: add_inline_imm_16_f16:
1064 ; GFX10:       ; %bb.0:
1065 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1066 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1067 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1068 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1069 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1070 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1071 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1072 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1073 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1075 ; VI-LABEL: add_inline_imm_16_f16:
1076 ; VI:       ; %bb.0:
1077 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1078 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1079 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1080 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1081 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1082 ; VI-NEXT:    v_add_f16_e64 v0, s4, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x20,0x01,0x00]
1083 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1084 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1086 ; SI-LABEL: add_inline_imm_16_f16:
1087 ; SI:       ; %bb.0:
1088 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1089 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1090 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1091 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1092 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1093 ; SI-NEXT:    s_mov_b32 s2, -1
1094 ; SI-NEXT:    v_add_f32_e32 v0, 0x35800000, v0
1095 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1096 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1097 ; SI-NEXT:    s_endpgm
1098   %y = fadd half %x, 0xH0010
1099   store half %y, half addrspace(1)* %out
1100   ret void
1103 define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1104 ; GFX10-LABEL: add_inline_imm_neg_1_f16:
1105 ; GFX10:       ; %bb.0:
1106 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1107 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1108 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1109 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1110 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1111 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1112 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1113 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1114 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1115 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1116 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1117 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1118 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
1119 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1120 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1122 ; VI-LABEL: add_inline_imm_neg_1_f16:
1123 ; VI:       ; %bb.0:
1124 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
1125 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1126 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1127 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1128 ; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
1129 ; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
1130 ; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
1131 ; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
1132 ; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
1133 ; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
1134 ; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
1135 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1136 ; VI-NEXT:    v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c]
1137 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1138 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1140 ; SI-LABEL: add_inline_imm_neg_1_f16:
1141 ; SI:       ; %bb.0:
1142 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1143 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1144 ; SI-NEXT:    s_mov_b32 s2, -1
1145 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1146 ; SI-NEXT:    s_mov_b32 s0, s4
1147 ; SI-NEXT:    s_mov_b32 s1, s5
1148 ; SI-NEXT:    s_mov_b32 s4, s6
1149 ; SI-NEXT:    s_mov_b32 s5, s7
1150 ; SI-NEXT:    s_mov_b32 s6, s2
1151 ; SI-NEXT:    s_mov_b32 s7, s3
1152 ; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
1153 ; SI-NEXT:    s_waitcnt vmcnt(0)
1154 ; SI-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
1155 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1156 ; SI-NEXT:    s_endpgm
1157   %x = load i16, i16 addrspace(1)* %in
1158   %y = add i16 %x, -1
1159   %ybc = bitcast i16 %y to half
1160   store half %ybc, half addrspace(1)* %out
1161   ret void
1164 define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1165 ; GFX10-LABEL: add_inline_imm_neg_2_f16:
1166 ; GFX10:       ; %bb.0:
1167 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1168 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1169 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1170 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1171 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1172 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1173 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1174 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1175 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1176 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1177 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1178 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1179 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
1180 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1181 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1183 ; VI-LABEL: add_inline_imm_neg_2_f16:
1184 ; VI:       ; %bb.0:
1185 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
1186 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1187 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1188 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1189 ; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
1190 ; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
1191 ; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
1192 ; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
1193 ; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
1194 ; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
1195 ; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
1196 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1197 ; VI-NEXT:    v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c]
1198 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1199 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1201 ; SI-LABEL: add_inline_imm_neg_2_f16:
1202 ; SI:       ; %bb.0:
1203 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1204 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1205 ; SI-NEXT:    s_mov_b32 s2, -1
1206 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1207 ; SI-NEXT:    s_mov_b32 s0, s4
1208 ; SI-NEXT:    s_mov_b32 s1, s5
1209 ; SI-NEXT:    s_mov_b32 s4, s6
1210 ; SI-NEXT:    s_mov_b32 s5, s7
1211 ; SI-NEXT:    s_mov_b32 s6, s2
1212 ; SI-NEXT:    s_mov_b32 s7, s3
1213 ; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
1214 ; SI-NEXT:    s_waitcnt vmcnt(0)
1215 ; SI-NEXT:    v_add_i32_e32 v0, vcc, -2, v0
1216 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1217 ; SI-NEXT:    s_endpgm
1218   %x = load i16, i16 addrspace(1)* %in
1219   %y = add i16 %x, -2
1220   %ybc = bitcast i16 %y to half
1221   store half %ybc, half addrspace(1)* %out
1222   ret void
1225 define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
1226 ; GFX10-LABEL: add_inline_imm_neg_16_f16:
1227 ; GFX10:       ; %bb.0:
1228 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1229 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1230 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1231 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1232 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1233 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1234 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1235 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1236 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1237 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1238 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1239 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1240 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
1241 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1242 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1244 ; VI-LABEL: add_inline_imm_neg_16_f16:
1245 ; VI:       ; %bb.0:
1246 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
1247 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1248 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1249 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1250 ; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
1251 ; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
1252 ; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
1253 ; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
1254 ; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
1255 ; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
1256 ; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
1257 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1258 ; VI-NEXT:    v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c]
1259 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1260 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1262 ; SI-LABEL: add_inline_imm_neg_16_f16:
1263 ; SI:       ; %bb.0:
1264 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
1265 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1266 ; SI-NEXT:    s_mov_b32 s2, -1
1267 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1268 ; SI-NEXT:    s_mov_b32 s0, s4
1269 ; SI-NEXT:    s_mov_b32 s1, s5
1270 ; SI-NEXT:    s_mov_b32 s4, s6
1271 ; SI-NEXT:    s_mov_b32 s5, s7
1272 ; SI-NEXT:    s_mov_b32 s6, s2
1273 ; SI-NEXT:    s_mov_b32 s7, s3
1274 ; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
1275 ; SI-NEXT:    s_waitcnt vmcnt(0)
1276 ; SI-NEXT:    v_add_i32_e32 v0, vcc, -16, v0
1277 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1278 ; SI-NEXT:    s_endpgm
1279   %x = load i16, i16 addrspace(1)* %in
1280   %y = add i16 %x, -16
1281   %ybc = bitcast i16 %y to half
1282   store half %ybc, half addrspace(1)* %out
1283   ret void
1286 define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
1287 ; GFX10-LABEL: add_inline_imm_63_f16:
1288 ; GFX10:       ; %bb.0:
1289 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1290 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1291 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1292 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1293 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1294 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1295 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1296 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1297 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1299 ; VI-LABEL: add_inline_imm_63_f16:
1300 ; VI:       ; %bb.0:
1301 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1302 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1303 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1304 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1305 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1306 ; VI-NEXT:    v_add_f16_e64 v0, s4, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x7e,0x01,0x00]
1307 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1308 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1310 ; SI-LABEL: add_inline_imm_63_f16:
1311 ; SI:       ; %bb.0:
1312 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1313 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1314 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1315 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1316 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1317 ; SI-NEXT:    s_mov_b32 s2, -1
1318 ; SI-NEXT:    v_add_f32_e32 v0, 0x367c0000, v0
1319 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1320 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1321 ; SI-NEXT:    s_endpgm
1322   %y = fadd half %x, 0xH003F
1323   store half %y, half addrspace(1)* %out
1324   ret void
1327 define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
1328 ; GFX10-LABEL: add_inline_imm_64_f16:
1329 ; GFX10:       ; %bb.0:
1330 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1331 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1332 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1333 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1334 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1335 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1336 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1337 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1338 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1340 ; VI-LABEL: add_inline_imm_64_f16:
1341 ; VI:       ; %bb.0:
1342 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1343 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1344 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1345 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1346 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1347 ; VI-NEXT:    v_add_f16_e64 v0, s4, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x80,0x01,0x00]
1348 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1349 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1351 ; SI-LABEL: add_inline_imm_64_f16:
1352 ; SI:       ; %bb.0:
1353 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1354 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1355 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1356 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1357 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1358 ; SI-NEXT:    s_mov_b32 s2, -1
1359 ; SI-NEXT:    v_add_f32_e32 v0, 0x36800000, v0
1360 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1361 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1362 ; SI-NEXT:    s_endpgm
1363   %y = fadd half %x, 0xH0040
1364   store half %y, half addrspace(1)* %out
1365   ret void
1368 ; This needs to be emitted as a literal constant since the 16-bit
1369 ; float values do not work for 16-bit integer operations.
1370 define void @mul_inline_imm_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
1371 ; GFX10-LABEL: mul_inline_imm_0.5_i16:
1372 ; GFX10:       ; %bb.0:
1373 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1374 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1375 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1376 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1377 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1378 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1380 ; VI-LABEL: mul_inline_imm_0.5_i16:
1381 ; VI:       ; %bb.0:
1382 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1383 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
1384 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1385 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1386 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1388 ; SI-LABEL: mul_inline_imm_0.5_i16:
1389 ; SI:       ; %bb.0:
1390 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1391 ; SI-NEXT:    s_mov_b32 s6, 0
1392 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1393 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1394 ; SI-NEXT:    s_mov_b32 s4, s6
1395 ; SI-NEXT:    s_mov_b32 s5, s6
1396 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3800, v2
1397 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1398 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1399 ; SI-NEXT:    s_setpc_b64 s[30:31]
1400   %y = mul i16 %x, bitcast (half 0.5 to i16)
1401   store i16 %y, i16 addrspace(1)* %out
1402   ret void
1405 define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
1406 ; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
1407 ; GFX10:       ; %bb.0:
1408 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1409 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1410 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1411 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1412 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1413 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1415 ; VI-LABEL: mul_inline_imm_neg_0.5_i16:
1416 ; VI:       ; %bb.0:
1417 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1418 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
1419 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1420 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1421 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1423 ; SI-LABEL: mul_inline_imm_neg_0.5_i16:
1424 ; SI:       ; %bb.0:
1425 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426 ; SI-NEXT:    s_mov_b32 s6, 0
1427 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1428 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1429 ; SI-NEXT:    s_mov_b32 s4, s6
1430 ; SI-NEXT:    s_mov_b32 s5, s6
1431 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xb800, v2
1432 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1433 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1434 ; SI-NEXT:    s_setpc_b64 s[30:31]
1435   %y = mul i16 %x, bitcast (half -0.5 to i16)
1436   store i16 %y, i16 addrspace(1)* %out
1437   ret void
1440 define void @mul_inline_imm_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
1441 ; GFX10-LABEL: mul_inline_imm_1.0_i16:
1442 ; GFX10:       ; %bb.0:
1443 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1444 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1445 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1446 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1447 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1448 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1450 ; VI-LABEL: mul_inline_imm_1.0_i16:
1451 ; VI:       ; %bb.0:
1452 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1453 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
1454 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1455 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1456 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1458 ; SI-LABEL: mul_inline_imm_1.0_i16:
1459 ; SI:       ; %bb.0:
1460 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461 ; SI-NEXT:    s_mov_b32 s6, 0
1462 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1463 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1464 ; SI-NEXT:    s_mov_b32 s4, s6
1465 ; SI-NEXT:    s_mov_b32 s5, s6
1466 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3c00, v2
1467 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1468 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1469 ; SI-NEXT:    s_setpc_b64 s[30:31]
1470   %y = mul i16 %x, bitcast (half 1.0 to i16)
1471   store i16 %y, i16 addrspace(1)* %out
1472   ret void
1475 define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
1476 ; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
1477 ; GFX10:       ; %bb.0:
1478 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1479 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1480 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1481 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1482 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1483 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1485 ; VI-LABEL: mul_inline_imm_neg_1.0_i16:
1486 ; VI:       ; %bb.0:
1487 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1488 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
1489 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1490 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1491 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1493 ; SI-LABEL: mul_inline_imm_neg_1.0_i16:
1494 ; SI:       ; %bb.0:
1495 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1496 ; SI-NEXT:    s_mov_b32 s6, 0
1497 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1498 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1499 ; SI-NEXT:    s_mov_b32 s4, s6
1500 ; SI-NEXT:    s_mov_b32 s5, s6
1501 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xbc00, v2
1502 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1503 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1504 ; SI-NEXT:    s_setpc_b64 s[30:31]
1505   %y = mul i16 %x, bitcast (half -1.0 to i16)
1506   store i16 %y, i16 addrspace(1)* %out
1507   ret void
1510 define void @shl_inline_imm_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
1511 ; GFX10-LABEL: shl_inline_imm_2.0_i16:
1512 ; GFX10:       ; %bb.0:
1513 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1514 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1515 ; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
1516 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1517 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1518 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1520 ; VI-LABEL: shl_inline_imm_2.0_i16:
1521 ; VI:       ; %bb.0:
1522 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1523 ; VI-NEXT:    s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
1524 ; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
1525 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1526 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1527 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1529 ; SI-LABEL: shl_inline_imm_2.0_i16:
1530 ; SI:       ; %bb.0:
1531 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532 ; SI-NEXT:    s_mov_b32 s6, 0
1533 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1534 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1535 ; SI-NEXT:    s_mov_b32 s4, s6
1536 ; SI-NEXT:    s_mov_b32 s5, s6
1537 ; SI-NEXT:    v_lshl_b32_e32 v2, 0x4000, v2
1538 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1539 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1540 ; SI-NEXT:    s_setpc_b64 s[30:31]
1541   %y = shl i16 bitcast (half 2.0 to i16), %x
1542   store i16 %y, i16 addrspace(1)* %out
1543   ret void
1546 define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
1547 ; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
1548 ; GFX10:       ; %bb.0:
1549 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1550 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1551 ; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
1552 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1553 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1554 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1556 ; VI-LABEL: shl_inline_imm_neg_2.0_i16:
1557 ; VI:       ; %bb.0:
1558 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1559 ; VI-NEXT:    s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
1560 ; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
1561 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1562 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1563 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1565 ; SI-LABEL: shl_inline_imm_neg_2.0_i16:
1566 ; SI:       ; %bb.0:
1567 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1568 ; SI-NEXT:    s_mov_b32 s6, 0
1569 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1570 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1571 ; SI-NEXT:    s_mov_b32 s4, s6
1572 ; SI-NEXT:    s_mov_b32 s5, s6
1573 ; SI-NEXT:    v_lshl_b32_e32 v2, 0xffffc000, v2
1574 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1575 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1576 ; SI-NEXT:    s_setpc_b64 s[30:31]
1577   %y = shl i16 bitcast (half -2.0 to i16), %x
1578   store i16 %y, i16 addrspace(1)* %out
1579   ret void
1582 define void @mul_inline_imm_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
1583 ; GFX10-LABEL: mul_inline_imm_4.0_i16:
1584 ; GFX10:       ; %bb.0:
1585 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1586 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1587 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
1588 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1589 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1590 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1592 ; VI-LABEL: mul_inline_imm_4.0_i16:
1593 ; VI:       ; %bb.0:
1594 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1595 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
1596 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1597 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1598 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1600 ; SI-LABEL: mul_inline_imm_4.0_i16:
1601 ; SI:       ; %bb.0:
1602 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1603 ; SI-NEXT:    s_mov_b32 s6, 0
1604 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1605 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1606 ; SI-NEXT:    s_mov_b32 s4, s6
1607 ; SI-NEXT:    s_mov_b32 s5, s6
1608 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x4400, v2
1609 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1610 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1611 ; SI-NEXT:    s_setpc_b64 s[30:31]
1612   %y = mul i16 %x, bitcast (half 4.0 to i16)
1613   store i16 %y, i16 addrspace(1)* %out
1614   ret void
1617 define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
1618 ; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
1619 ; GFX10:       ; %bb.0:
1620 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1621 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1622 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
1623 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1624 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1625 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1627 ; VI-LABEL: mul_inline_imm_neg_4.0_i16:
1628 ; VI:       ; %bb.0:
1629 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1630 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
1631 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1632 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1633 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1635 ; SI-LABEL: mul_inline_imm_neg_4.0_i16:
1636 ; SI:       ; %bb.0:
1637 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638 ; SI-NEXT:    s_mov_b32 s6, 0
1639 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1640 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1641 ; SI-NEXT:    s_mov_b32 s4, s6
1642 ; SI-NEXT:    s_mov_b32 s5, s6
1643 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xc400, v2
1644 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1645 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1646 ; SI-NEXT:    s_setpc_b64 s[30:31]
1647   %y = mul i16 %x, bitcast (half -4.0 to i16)
1648   store i16 %y, i16 addrspace(1)* %out
1649   ret void
1652 define void @mul_inline_imm_inv2pi_i16(i16 addrspace(1)* %out, i16 %x) {
1653 ; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
1654 ; GFX10:       ; %bb.0:
1655 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1656 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1657 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
1658 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1659 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
1660 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1662 ; VI-LABEL: mul_inline_imm_inv2pi_i16:
1663 ; VI:       ; %bb.0:
1664 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1665 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
1666 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1667 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1668 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1670 ; SI-LABEL: mul_inline_imm_inv2pi_i16:
1671 ; SI:       ; %bb.0:
1672 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673 ; SI-NEXT:    s_mov_b32 s6, 0
1674 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1675 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1676 ; SI-NEXT:    s_mov_b32 s4, s6
1677 ; SI-NEXT:    s_mov_b32 s5, s6
1678 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3118, v2
1679 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1680 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1681 ; SI-NEXT:    s_setpc_b64 s[30:31]
1682   %y = mul i16 %x, bitcast (half 0xH3118 to i16)
1683   store i16 %y, i16 addrspace(1)* %out
1684   ret void