Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / imm16.ll
blobdcc615232e56bed2e14d5f9e956cae5937b2acc7
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s
3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s
4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
7 ; FIXME: Merge into imm.ll
9 define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) {
10 ; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
11 ; GFX10:       ; %bb.0:
12 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
13 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
14 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
15 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
16 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
17 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
18 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
19 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
21 ; GFX11-LABEL: store_inline_imm_neg_0.0_i16:
22 ; GFX11:       ; %bb.0:
23 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
24 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
25 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
26 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
27 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
28 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80]
29 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc]
30 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
31 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
32 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
34 ; VI-LABEL: store_inline_imm_neg_0.0_i16:
35 ; VI:       ; %bb.0:
36 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
37 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
38 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
39 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
40 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
41 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
42 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
43 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
45 ; SI-LABEL: store_inline_imm_neg_0.0_i16:
46 ; SI:       ; %bb.0:
47 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
48 ; SI-NEXT:    s_mov_b32 s3, 0xf000
49 ; SI-NEXT:    s_mov_b32 s2, -1
50 ; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
51 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
52 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
53 ; SI-NEXT:    s_waitcnt vmcnt(0)
54 ; SI-NEXT:    s_endpgm
55   store volatile i16 -32768, ptr addrspace(1) %out
56   ret void
59 define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) {
60 ; GFX10-LABEL: store_inline_imm_0.0_f16:
61 ; GFX10:       ; %bb.0:
62 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
63 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
64 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
65 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
66 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
67 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
68 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
70 ; GFX11-LABEL: store_inline_imm_0.0_f16:
71 ; GFX11:       ; %bb.0:
72 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
73 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
74 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
75 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
76 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
77 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
78 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
79 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
80 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
82 ; VI-LABEL: store_inline_imm_0.0_f16:
83 ; VI:       ; %bb.0:
84 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
85 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
86 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
87 ; VI-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
88 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
89 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
90 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
92 ; SI-LABEL: store_inline_imm_0.0_f16:
93 ; SI:       ; %bb.0:
94 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
95 ; SI-NEXT:    s_mov_b32 s3, 0xf000
96 ; SI-NEXT:    s_mov_b32 s2, -1
97 ; SI-NEXT:    v_mov_b32_e32 v0, 0
98 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
99 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
100 ; SI-NEXT:    s_endpgm
101   store half 0.0, ptr addrspace(1) %out
102   ret void
105 define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) {
106 ; GFX10-LABEL: store_imm_neg_0.0_f16:
107 ; GFX10:       ; %bb.0:
108 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
109 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
110 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
111 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
112 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
113 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
114 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
116 ; GFX11-LABEL: store_imm_neg_0.0_f16:
117 ; GFX11:       ; %bb.0:
118 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
119 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
120 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
121 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
122 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
123 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
124 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
125 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
126 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
128 ; VI-LABEL: store_imm_neg_0.0_f16:
129 ; VI:       ; %bb.0:
130 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
131 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
132 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
133 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
134 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
135 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
136 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
138 ; SI-LABEL: store_imm_neg_0.0_f16:
139 ; SI:       ; %bb.0:
140 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
141 ; SI-NEXT:    s_mov_b32 s3, 0xf000
142 ; SI-NEXT:    s_mov_b32 s2, -1
143 ; SI-NEXT:    v_mov_b32_e32 v0, 0x8000
144 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
145 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
146 ; SI-NEXT:    s_endpgm
147   store half -0.0, ptr addrspace(1) %out
148   ret void
151 define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) {
152 ; GFX10-LABEL: store_inline_imm_0.5_f16:
153 ; GFX10:       ; %bb.0:
154 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
155 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
156 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
157 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
158 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
159 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
160 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
162 ; GFX11-LABEL: store_inline_imm_0.5_f16:
163 ; GFX11:       ; %bb.0:
164 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
165 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
166 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
167 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
168 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
169 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
170 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
171 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
172 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
174 ; VI-LABEL: store_inline_imm_0.5_f16:
175 ; VI:       ; %bb.0:
176 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
177 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
178 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
179 ; VI-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
180 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
181 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
182 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
184 ; SI-LABEL: store_inline_imm_0.5_f16:
185 ; SI:       ; %bb.0:
186 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
187 ; SI-NEXT:    s_mov_b32 s3, 0xf000
188 ; SI-NEXT:    s_mov_b32 s2, -1
189 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
190 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
191 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
192 ; SI-NEXT:    s_endpgm
193   store half 0.5, ptr addrspace(1) %out
194   ret void
197 define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) {
198 ; GFX10-LABEL: store_inline_imm_m_0.5_f16:
199 ; GFX10:       ; %bb.0:
200 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
201 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
202 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
203 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
204 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
205 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
206 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
208 ; GFX11-LABEL: store_inline_imm_m_0.5_f16:
209 ; GFX11:       ; %bb.0:
210 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
211 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
212 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
213 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
214 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
215 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
216 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
217 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
218 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
220 ; VI-LABEL: store_inline_imm_m_0.5_f16:
221 ; VI:       ; %bb.0:
222 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
223 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
224 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
225 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
226 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
227 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
228 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
230 ; SI-LABEL: store_inline_imm_m_0.5_f16:
231 ; SI:       ; %bb.0:
232 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
233 ; SI-NEXT:    s_mov_b32 s3, 0xf000
234 ; SI-NEXT:    s_mov_b32 s2, -1
235 ; SI-NEXT:    v_mov_b32_e32 v0, 0xb800
236 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
237 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
238 ; SI-NEXT:    s_endpgm
239   store half -0.5, ptr addrspace(1) %out
240   ret void
243 define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) {
244 ; GFX10-LABEL: store_inline_imm_1.0_f16:
245 ; GFX10:       ; %bb.0:
246 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
247 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
248 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
249 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
250 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
251 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
252 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
254 ; GFX11-LABEL: store_inline_imm_1.0_f16:
255 ; GFX11:       ; %bb.0:
256 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
257 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
258 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
259 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
260 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
261 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
262 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
263 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
264 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
266 ; VI-LABEL: store_inline_imm_1.0_f16:
267 ; VI:       ; %bb.0:
268 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
269 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
270 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
271 ; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
272 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
273 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
274 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
276 ; SI-LABEL: store_inline_imm_1.0_f16:
277 ; SI:       ; %bb.0:
278 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
279 ; SI-NEXT:    s_mov_b32 s3, 0xf000
280 ; SI-NEXT:    s_mov_b32 s2, -1
281 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3c00
282 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
283 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
284 ; SI-NEXT:    s_endpgm
285   store half 1.0, ptr addrspace(1) %out
286   ret void
289 define amdgpu_kernel void @store_inline_imm_m_1.0_f16(ptr addrspace(1) %out) {
290 ; GFX10-LABEL: store_inline_imm_m_1.0_f16:
291 ; GFX10:       ; %bb.0:
292 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
293 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
294 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
295 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
296 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
297 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
298 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
300 ; GFX11-LABEL: store_inline_imm_m_1.0_f16:
301 ; GFX11:       ; %bb.0:
302 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
303 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
304 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
305 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
306 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
307 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
308 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
309 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
310 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
312 ; VI-LABEL: store_inline_imm_m_1.0_f16:
313 ; VI:       ; %bb.0:
314 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
315 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
316 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
317 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
318 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
319 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
320 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
322 ; SI-LABEL: store_inline_imm_m_1.0_f16:
323 ; SI:       ; %bb.0:
324 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
325 ; SI-NEXT:    s_mov_b32 s3, 0xf000
326 ; SI-NEXT:    s_mov_b32 s2, -1
327 ; SI-NEXT:    v_mov_b32_e32 v0, 0xbc00
328 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
329 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
330 ; SI-NEXT:    s_endpgm
331   store half -1.0, ptr addrspace(1) %out
332   ret void
335 define amdgpu_kernel void @store_inline_imm_2.0_f16(ptr addrspace(1) %out) {
336 ; GFX10-LABEL: store_inline_imm_2.0_f16:
337 ; GFX10:       ; %bb.0:
338 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
339 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
340 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
341 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
342 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
343 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
344 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
346 ; GFX11-LABEL: store_inline_imm_2.0_f16:
347 ; GFX11:       ; %bb.0:
348 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
349 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
350 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
351 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
352 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
353 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
354 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
355 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
356 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
358 ; VI-LABEL: store_inline_imm_2.0_f16:
359 ; VI:       ; %bb.0:
360 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
361 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
362 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
363 ; VI-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
364 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
365 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
366 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
368 ; SI-LABEL: store_inline_imm_2.0_f16:
369 ; SI:       ; %bb.0:
370 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
371 ; SI-NEXT:    s_mov_b32 s3, 0xf000
372 ; SI-NEXT:    s_mov_b32 s2, -1
373 ; SI-NEXT:    v_mov_b32_e32 v0, 0x4000
374 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
375 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
376 ; SI-NEXT:    s_endpgm
377   store half 2.0, ptr addrspace(1) %out
378   ret void
381 define amdgpu_kernel void @store_inline_imm_m_2.0_f16(ptr addrspace(1) %out) {
382 ; GFX10-LABEL: store_inline_imm_m_2.0_f16:
383 ; GFX10:       ; %bb.0:
384 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
385 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
386 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
387 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
388 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
389 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
390 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
392 ; GFX11-LABEL: store_inline_imm_m_2.0_f16:
393 ; GFX11:       ; %bb.0:
394 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
395 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
396 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
397 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
398 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
399 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
400 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
401 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
402 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
404 ; VI-LABEL: store_inline_imm_m_2.0_f16:
405 ; VI:       ; %bb.0:
406 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
407 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
408 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
409 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
410 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
411 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
412 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
414 ; SI-LABEL: store_inline_imm_m_2.0_f16:
415 ; SI:       ; %bb.0:
416 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
417 ; SI-NEXT:    s_mov_b32 s3, 0xf000
418 ; SI-NEXT:    s_mov_b32 s2, -1
419 ; SI-NEXT:    v_mov_b32_e32 v0, 0xc000
420 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
421 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
422 ; SI-NEXT:    s_endpgm
423   store half -2.0, ptr addrspace(1) %out
424   ret void
427 define amdgpu_kernel void @store_inline_imm_4.0_f16(ptr addrspace(1) %out) {
428 ; GFX10-LABEL: store_inline_imm_4.0_f16:
429 ; GFX10:       ; %bb.0:
430 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
431 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
432 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
433 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
434 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
435 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
436 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
438 ; GFX11-LABEL: store_inline_imm_4.0_f16:
439 ; GFX11:       ; %bb.0:
440 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
441 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
442 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
443 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
444 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
445 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
446 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
447 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
448 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
450 ; VI-LABEL: store_inline_imm_4.0_f16:
451 ; VI:       ; %bb.0:
452 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
453 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
454 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
455 ; VI-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
456 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
457 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
458 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
460 ; SI-LABEL: store_inline_imm_4.0_f16:
461 ; SI:       ; %bb.0:
462 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
463 ; SI-NEXT:    s_mov_b32 s3, 0xf000
464 ; SI-NEXT:    s_mov_b32 s2, -1
465 ; SI-NEXT:    v_mov_b32_e32 v0, 0x4400
466 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
467 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
468 ; SI-NEXT:    s_endpgm
469   store half 4.0, ptr addrspace(1) %out
470   ret void
473 define amdgpu_kernel void @store_inline_imm_m_4.0_f16(ptr addrspace(1) %out) {
474 ; GFX10-LABEL: store_inline_imm_m_4.0_f16:
475 ; GFX10:       ; %bb.0:
476 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
477 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
478 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
479 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
480 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
481 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
482 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
484 ; GFX11-LABEL: store_inline_imm_m_4.0_f16:
485 ; GFX11:       ; %bb.0:
486 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
487 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
488 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
489 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
490 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
491 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
492 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
493 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
494 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
496 ; VI-LABEL: store_inline_imm_m_4.0_f16:
497 ; VI:       ; %bb.0:
498 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
499 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
500 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
501 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
502 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
503 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
504 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
506 ; SI-LABEL: store_inline_imm_m_4.0_f16:
507 ; SI:       ; %bb.0:
508 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
509 ; SI-NEXT:    s_mov_b32 s3, 0xf000
510 ; SI-NEXT:    s_mov_b32 s2, -1
511 ; SI-NEXT:    v_mov_b32_e32 v0, 0xc400
512 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
513 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
514 ; SI-NEXT:    s_endpgm
515   store half -4.0, ptr addrspace(1) %out
516   ret void
519 define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(ptr addrspace(1) %out) {
520 ; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
521 ; GFX10:       ; %bb.0:
522 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
523 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
524 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
525 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
526 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
527 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
528 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
530 ; GFX11-LABEL: store_inline_imm_inv_2pi_f16:
531 ; GFX11:       ; %bb.0:
532 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
533 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
534 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
535 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
536 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
537 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
538 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
539 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
540 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
542 ; VI-LABEL: store_inline_imm_inv_2pi_f16:
543 ; VI:       ; %bb.0:
544 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
545 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
546 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
547 ; VI-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
548 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
549 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
550 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
552 ; SI-LABEL: store_inline_imm_inv_2pi_f16:
553 ; SI:       ; %bb.0:
554 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
555 ; SI-NEXT:    s_mov_b32 s3, 0xf000
556 ; SI-NEXT:    s_mov_b32 s2, -1
557 ; SI-NEXT:    v_mov_b32_e32 v0, 0x3118
558 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
559 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
560 ; SI-NEXT:    s_endpgm
561   store half 0xH3118, ptr addrspace(1) %out
562   ret void
565 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(ptr addrspace(1) %out) {
566 ; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
567 ; GFX10:       ; %bb.0:
568 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
569 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
570 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
571 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
572 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
573 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
574 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
576 ; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16:
577 ; GFX11:       ; %bb.0:
578 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
579 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
580 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
581 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
582 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
583 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
584 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
585 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
586 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
588 ; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
589 ; VI:       ; %bb.0:
590 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
591 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
592 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
593 ; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
594 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
595 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
596 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
598 ; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
599 ; SI:       ; %bb.0:
600 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
601 ; SI-NEXT:    s_mov_b32 s3, 0xf000
602 ; SI-NEXT:    s_mov_b32 s2, -1
603 ; SI-NEXT:    v_mov_b32_e32 v0, 0xb118
604 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
605 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
606 ; SI-NEXT:    s_endpgm
607   store half 0xHB118, ptr addrspace(1) %out
608   ret void
611 define amdgpu_kernel void @store_literal_imm_f16(ptr addrspace(1) %out) {
612 ; GFX10-LABEL: store_literal_imm_f16:
613 ; GFX10:       ; %bb.0:
614 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
615 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
616 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
617 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
618 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
619 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
620 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
622 ; GFX11-LABEL: store_literal_imm_f16:
623 ; GFX11:       ; %bb.0:
624 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
625 ; GFX11-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
626 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
627 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
628 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
629 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
630 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
631 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
632 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
634 ; VI-LABEL: store_literal_imm_f16:
635 ; VI:       ; %bb.0:
636 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
637 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
638 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
639 ; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
640 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
641 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
642 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
644 ; SI-LABEL: store_literal_imm_f16:
645 ; SI:       ; %bb.0:
646 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
647 ; SI-NEXT:    s_mov_b32 s3, 0xf000
648 ; SI-NEXT:    s_mov_b32 s2, -1
649 ; SI-NEXT:    v_mov_b32_e32 v0, 0x6c00
650 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
651 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
652 ; SI-NEXT:    s_endpgm
653   store half 4096.0, ptr addrspace(1) %out
654   ret void
657 define amdgpu_kernel void @add_inline_imm_0.0_f16(ptr addrspace(1) %out, half %x) {
658 ; GFX10-LABEL: add_inline_imm_0.0_f16:
659 ; GFX10:       ; %bb.0:
660 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
661 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
662 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
663 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
664 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
665 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
666 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
667 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
668 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
670 ; GFX11-LABEL: add_inline_imm_0.0_f16:
671 ; GFX11:       ; %bb.0:
672 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
673 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
674 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
675 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
676 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
677 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
678 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
679 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
680 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
681 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
682 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
684 ; VI-LABEL: add_inline_imm_0.0_f16:
685 ; VI:       ; %bb.0:
686 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
687 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
688 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
689 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
690 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
691 ; VI-NEXT:    v_add_f16_e64 v0, s6, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x00,0x01,0x00]
692 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
693 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
695 ; SI-LABEL: add_inline_imm_0.0_f16:
696 ; SI:       ; %bb.0:
697 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
698 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
699 ; SI-NEXT:    s_mov_b32 s3, 0xf000
700 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
701 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
702 ; SI-NEXT:    s_mov_b32 s2, -1
703 ; SI-NEXT:    v_add_f32_e32 v0, 0, v0
704 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
705 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
706 ; SI-NEXT:    s_endpgm
707   %y = fadd half %x, 0.0
708   store half %y, ptr addrspace(1) %out
709   ret void
712 define amdgpu_kernel void @add_inline_imm_0.5_f16(ptr addrspace(1) %out, half %x) {
713 ; GFX10-LABEL: add_inline_imm_0.5_f16:
714 ; GFX10:       ; %bb.0:
715 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
716 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
717 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
718 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
719 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
720 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
721 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
722 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
723 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
725 ; GFX11-LABEL: add_inline_imm_0.5_f16:
726 ; GFX11:       ; %bb.0:
727 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
728 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
729 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
730 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
731 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
732 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
733 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
734 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
735 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
736 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
737 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
739 ; VI-LABEL: add_inline_imm_0.5_f16:
740 ; VI:       ; %bb.0:
741 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
742 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
743 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
744 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
745 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
746 ; VI-NEXT:    v_add_f16_e64 v0, s6, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe0,0x01,0x00]
747 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
748 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
750 ; SI-LABEL: add_inline_imm_0.5_f16:
751 ; SI:       ; %bb.0:
752 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
753 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
754 ; SI-NEXT:    s_mov_b32 s3, 0xf000
755 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
756 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
757 ; SI-NEXT:    s_mov_b32 s2, -1
758 ; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
759 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
760 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
761 ; SI-NEXT:    s_endpgm
762   %y = fadd half %x, 0.5
763   store half %y, ptr addrspace(1) %out
764   ret void
767 define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(ptr addrspace(1) %out, half %x) {
768 ; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
769 ; GFX10:       ; %bb.0:
770 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
771 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
772 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
773 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
774 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
775 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
776 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
777 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
778 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
780 ; GFX11-LABEL: add_inline_imm_neg_0.5_f16:
781 ; GFX11:       ; %bb.0:
782 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
783 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
784 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
785 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
786 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
787 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
788 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
789 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
790 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
791 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
792 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
794 ; VI-LABEL: add_inline_imm_neg_0.5_f16:
795 ; VI:       ; %bb.0:
796 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
797 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
798 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
799 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
800 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
801 ; VI-NEXT:    v_add_f16_e64 v0, s6, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe2,0x01,0x00]
802 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
803 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
805 ; SI-LABEL: add_inline_imm_neg_0.5_f16:
806 ; SI:       ; %bb.0:
807 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
808 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
809 ; SI-NEXT:    s_mov_b32 s3, 0xf000
810 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
811 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
812 ; SI-NEXT:    s_mov_b32 s2, -1
813 ; SI-NEXT:    v_add_f32_e32 v0, -0.5, v0
814 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
815 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
816 ; SI-NEXT:    s_endpgm
817   %y = fadd half %x, -0.5
818   store half %y, ptr addrspace(1) %out
819   ret void
822 define amdgpu_kernel void @add_inline_imm_1.0_f16(ptr addrspace(1) %out, half %x) {
823 ; GFX10-LABEL: add_inline_imm_1.0_f16:
824 ; GFX10:       ; %bb.0:
825 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
826 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
827 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
828 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
829 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
830 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
831 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
832 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
833 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
835 ; GFX11-LABEL: add_inline_imm_1.0_f16:
836 ; GFX11:       ; %bb.0:
837 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
838 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
839 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
840 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
841 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
842 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
843 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
844 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
845 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
846 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
847 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
849 ; VI-LABEL: add_inline_imm_1.0_f16:
850 ; VI:       ; %bb.0:
851 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
852 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
853 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
854 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
855 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
856 ; VI-NEXT:    v_add_f16_e64 v0, s6, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe4,0x01,0x00]
857 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
858 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
860 ; SI-LABEL: add_inline_imm_1.0_f16:
861 ; SI:       ; %bb.0:
862 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
863 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
864 ; SI-NEXT:    s_mov_b32 s3, 0xf000
865 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
866 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
867 ; SI-NEXT:    s_mov_b32 s2, -1
868 ; SI-NEXT:    v_add_f32_e32 v0, 1.0, v0
869 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
870 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
871 ; SI-NEXT:    s_endpgm
872   %y = fadd half %x, 1.0
873   store half %y, ptr addrspace(1) %out
874   ret void
877 define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(ptr addrspace(1) %out, half %x) {
878 ; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
879 ; GFX10:       ; %bb.0:
880 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
881 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
882 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
883 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
884 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
885 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
886 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
887 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
888 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
890 ; GFX11-LABEL: add_inline_imm_neg_1.0_f16:
891 ; GFX11:       ; %bb.0:
892 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
893 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
894 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
895 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
896 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
897 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
898 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
899 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
900 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
901 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
902 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
904 ; VI-LABEL: add_inline_imm_neg_1.0_f16:
905 ; VI:       ; %bb.0:
906 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
907 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
908 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
909 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
910 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
911 ; VI-NEXT:    v_add_f16_e64 v0, s6, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe6,0x01,0x00]
912 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
913 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
915 ; SI-LABEL: add_inline_imm_neg_1.0_f16:
916 ; SI:       ; %bb.0:
917 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
918 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
919 ; SI-NEXT:    s_mov_b32 s3, 0xf000
920 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
921 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
922 ; SI-NEXT:    s_mov_b32 s2, -1
923 ; SI-NEXT:    v_add_f32_e32 v0, -1.0, v0
924 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
925 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
926 ; SI-NEXT:    s_endpgm
927   %y = fadd half %x, -1.0
928   store half %y, ptr addrspace(1) %out
929   ret void
932 define amdgpu_kernel void @add_inline_imm_2.0_f16(ptr addrspace(1) %out, half %x) {
933 ; GFX10-LABEL: add_inline_imm_2.0_f16:
934 ; GFX10:       ; %bb.0:
935 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
936 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
937 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
938 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
939 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
940 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
941 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
942 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
943 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
945 ; GFX11-LABEL: add_inline_imm_2.0_f16:
946 ; GFX11:       ; %bb.0:
947 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
948 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
949 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
950 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
951 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
952 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
953 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
954 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
955 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
956 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
957 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
959 ; VI-LABEL: add_inline_imm_2.0_f16:
960 ; VI:       ; %bb.0:
961 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
962 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
963 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
964 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
965 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
966 ; VI-NEXT:    v_add_f16_e64 v0, s6, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe8,0x01,0x00]
967 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
968 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
970 ; SI-LABEL: add_inline_imm_2.0_f16:
971 ; SI:       ; %bb.0:
972 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
973 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
974 ; SI-NEXT:    s_mov_b32 s3, 0xf000
975 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
976 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
977 ; SI-NEXT:    s_mov_b32 s2, -1
978 ; SI-NEXT:    v_add_f32_e32 v0, 2.0, v0
979 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
980 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
981 ; SI-NEXT:    s_endpgm
982   %y = fadd half %x, 2.0
983   store half %y, ptr addrspace(1) %out
984   ret void
987 define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(ptr addrspace(1) %out, half %x) {
988 ; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
989 ; GFX10:       ; %bb.0:
990 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
991 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
992 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
993 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
994 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
995 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
996 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
997 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
998 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1000 ; GFX11-LABEL: add_inline_imm_neg_2.0_f16:
1001 ; GFX11:       ; %bb.0:
1002 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1003 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1004 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1005 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1006 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1007 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
1008 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1009 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1010 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1011 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1012 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1014 ; VI-LABEL: add_inline_imm_neg_2.0_f16:
1015 ; VI:       ; %bb.0:
1016 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1017 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1018 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1019 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1020 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1021 ; VI-NEXT:    v_add_f16_e64 v0, s6, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xea,0x01,0x00]
1022 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1023 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1025 ; SI-LABEL: add_inline_imm_neg_2.0_f16:
1026 ; SI:       ; %bb.0:
1027 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1028 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1029 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1030 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1031 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1032 ; SI-NEXT:    s_mov_b32 s2, -1
1033 ; SI-NEXT:    v_add_f32_e32 v0, -2.0, v0
1034 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1035 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1036 ; SI-NEXT:    s_endpgm
1037   %y = fadd half %x, -2.0
1038   store half %y, ptr addrspace(1) %out
1039   ret void
1042 define amdgpu_kernel void @add_inline_imm_4.0_f16(ptr addrspace(1) %out, half %x) {
1043 ; GFX10-LABEL: add_inline_imm_4.0_f16:
1044 ; GFX10:       ; %bb.0:
1045 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1046 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1047 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1048 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1049 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1050 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1051 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1052 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1053 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1055 ; GFX11-LABEL: add_inline_imm_4.0_f16:
1056 ; GFX11:       ; %bb.0:
1057 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1058 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1059 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1060 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1061 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1062 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
1063 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1064 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1065 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1066 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1067 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1069 ; VI-LABEL: add_inline_imm_4.0_f16:
1070 ; VI:       ; %bb.0:
1071 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1072 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1073 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1074 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1075 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1076 ; VI-NEXT:    v_add_f16_e64 v0, s6, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xec,0x01,0x00]
1077 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1078 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1080 ; SI-LABEL: add_inline_imm_4.0_f16:
1081 ; SI:       ; %bb.0:
1082 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1083 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1084 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1085 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1086 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1087 ; SI-NEXT:    s_mov_b32 s2, -1
1088 ; SI-NEXT:    v_add_f32_e32 v0, 4.0, v0
1089 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1090 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1091 ; SI-NEXT:    s_endpgm
1092   %y = fadd half %x, 4.0
1093   store half %y, ptr addrspace(1) %out
1094   ret void
1097 define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(ptr addrspace(1) %out, half %x) {
1098 ; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
1099 ; GFX10:       ; %bb.0:
1100 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1101 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1102 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1103 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1104 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1105 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1106 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1107 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1108 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1110 ; GFX11-LABEL: add_inline_imm_neg_4.0_f16:
1111 ; GFX11:       ; %bb.0:
1112 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1113 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1114 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1115 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1116 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1117 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
1118 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1119 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1120 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1121 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1122 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1124 ; VI-LABEL: add_inline_imm_neg_4.0_f16:
1125 ; VI:       ; %bb.0:
1126 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1127 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1128 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1129 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1130 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1131 ; VI-NEXT:    v_add_f16_e64 v0, s6, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xee,0x01,0x00]
1132 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1133 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1135 ; SI-LABEL: add_inline_imm_neg_4.0_f16:
1136 ; SI:       ; %bb.0:
1137 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1138 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1139 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1140 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1141 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1142 ; SI-NEXT:    s_mov_b32 s2, -1
1143 ; SI-NEXT:    v_add_f32_e32 v0, -4.0, v0
1144 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1145 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1146 ; SI-NEXT:    s_endpgm
1147   %y = fadd half %x, -4.0
1148   store half %y, ptr addrspace(1) %out
1149   ret void
1152 define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1153 ; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
1154 ; GFX10:       ; %bb.0:
1155 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1156 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1157 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1158 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1159 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1160 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1161 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1162 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1163 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1164 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1165 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1166 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1167 ; GFX10-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1168 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1169 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1171 ; GFX11-LABEL: commute_add_inline_imm_0.5_f16:
1172 ; GFX11:       ; %bb.0:
1173 ; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1174 ; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1175 ; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1176 ; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1177 ; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1178 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1179 ; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1180 ; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1181 ; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1182 ; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1183 ; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1184 ; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1185 ; GFX11-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
1186 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1187 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1188 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1189 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1191 ; VI-LABEL: commute_add_inline_imm_0.5_f16:
1192 ; VI:       ; %bb.0:
1193 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1194 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1195 ; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1196 ; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1197 ; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1198 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1199 ; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1200 ; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1201 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1202 ; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1203 ; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1204 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1205 ; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
1206 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1207 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1209 ; SI-LABEL: commute_add_inline_imm_0.5_f16:
1210 ; SI:       ; %bb.0:
1211 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1212 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1213 ; SI-NEXT:    s_mov_b32 s6, -1
1214 ; SI-NEXT:    s_mov_b32 s10, s6
1215 ; SI-NEXT:    s_mov_b32 s11, s7
1216 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1217 ; SI-NEXT:    s_mov_b32 s8, s2
1218 ; SI-NEXT:    s_mov_b32 s9, s3
1219 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1220 ; SI-NEXT:    s_mov_b32 s4, s0
1221 ; SI-NEXT:    s_mov_b32 s5, s1
1222 ; SI-NEXT:    s_waitcnt vmcnt(0)
1223 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1224 ; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
1225 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1226 ; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1227 ; SI-NEXT:    s_endpgm
1228   %x = load half, ptr addrspace(1) %in
1229   %y = fadd half %x, 0.5
1230   store half %y, ptr addrspace(1) %out
1231   ret void
1234 define amdgpu_kernel void @commute_add_literal_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1235 ; GFX10-LABEL: commute_add_literal_f16:
1236 ; GFX10:       ; %bb.0:
1237 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1238 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1239 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1240 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1241 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1242 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1243 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1244 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1245 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1246 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1247 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1248 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1249 ; GFX10-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1250 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1251 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1253 ; GFX11-LABEL: commute_add_literal_f16:
1254 ; GFX11:       ; %bb.0:
1255 ; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1256 ; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1257 ; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1258 ; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1259 ; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1260 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1261 ; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1262 ; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1263 ; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1264 ; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1265 ; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1266 ; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1267 ; GFX11-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
1268 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1269 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1270 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1271 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1273 ; VI-LABEL: commute_add_literal_f16:
1274 ; VI:       ; %bb.0:
1275 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1276 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1277 ; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1278 ; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1279 ; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1280 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1281 ; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1282 ; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1283 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1284 ; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1285 ; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1286 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1287 ; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
1288 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1289 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1291 ; SI-LABEL: commute_add_literal_f16:
1292 ; SI:       ; %bb.0:
1293 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1294 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1295 ; SI-NEXT:    s_mov_b32 s6, -1
1296 ; SI-NEXT:    s_mov_b32 s10, s6
1297 ; SI-NEXT:    s_mov_b32 s11, s7
1298 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1299 ; SI-NEXT:    s_mov_b32 s8, s2
1300 ; SI-NEXT:    s_mov_b32 s9, s3
1301 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1302 ; SI-NEXT:    s_mov_b32 s4, s0
1303 ; SI-NEXT:    s_mov_b32 s5, s1
1304 ; SI-NEXT:    s_waitcnt vmcnt(0)
1305 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1306 ; SI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
1307 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1308 ; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1309 ; SI-NEXT:    s_endpgm
1310   %x = load half, ptr addrspace(1) %in
1311   %y = fadd half %x, 1024.0
1312   store half %y, ptr addrspace(1) %out
1313   ret void
1316 define amdgpu_kernel void @add_inline_imm_1_f16(ptr addrspace(1) %out, half %x) {
1317 ; GFX10-LABEL: add_inline_imm_1_f16:
1318 ; GFX10:       ; %bb.0:
1319 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1320 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1321 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1322 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1323 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1324 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1325 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1326 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1327 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1329 ; GFX11-LABEL: add_inline_imm_1_f16:
1330 ; GFX11:       ; %bb.0:
1331 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1332 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1333 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1334 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1335 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1336 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
1337 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1338 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1339 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1340 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1341 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1343 ; VI-LABEL: add_inline_imm_1_f16:
1344 ; VI:       ; %bb.0:
1345 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1346 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1347 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1348 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1349 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1350 ; VI-NEXT:    v_add_f16_e64 v0, s6, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x02,0x01,0x00]
1351 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1352 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1354 ; SI-LABEL: add_inline_imm_1_f16:
1355 ; SI:       ; %bb.0:
1356 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1357 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1358 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1359 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1360 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1361 ; SI-NEXT:    s_mov_b32 s2, -1
1362 ; SI-NEXT:    v_add_f32_e32 v0, 0x33800000, v0
1363 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1364 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1365 ; SI-NEXT:    s_endpgm
1366   %y = fadd half %x, 0xH0001
1367   store half %y, ptr addrspace(1) %out
1368   ret void
1371 define amdgpu_kernel void @add_inline_imm_2_f16(ptr addrspace(1) %out, half %x) {
1372 ; GFX10-LABEL: add_inline_imm_2_f16:
1373 ; GFX10:       ; %bb.0:
1374 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1375 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1376 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1377 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1378 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1379 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1380 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1381 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1382 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1384 ; GFX11-LABEL: add_inline_imm_2_f16:
1385 ; GFX11:       ; %bb.0:
1386 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1387 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1388 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1389 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1390 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1391 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
1392 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1393 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1394 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1395 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1396 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1398 ; VI-LABEL: add_inline_imm_2_f16:
1399 ; VI:       ; %bb.0:
1400 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1401 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1402 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1403 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1404 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1405 ; VI-NEXT:    v_add_f16_e64 v0, s6, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x04,0x01,0x00]
1406 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1407 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1409 ; SI-LABEL: add_inline_imm_2_f16:
1410 ; SI:       ; %bb.0:
1411 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1412 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1413 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1414 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1415 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1416 ; SI-NEXT:    s_mov_b32 s2, -1
1417 ; SI-NEXT:    v_add_f32_e32 v0, 0x34000000, v0
1418 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1419 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1420 ; SI-NEXT:    s_endpgm
1421   %y = fadd half %x, 0xH0002
1422   store half %y, ptr addrspace(1) %out
1423   ret void
1426 define amdgpu_kernel void @add_inline_imm_16_f16(ptr addrspace(1) %out, half %x) {
1427 ; GFX10-LABEL: add_inline_imm_16_f16:
1428 ; GFX10:       ; %bb.0:
1429 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1430 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1431 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1432 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1433 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1434 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1435 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1436 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1437 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1439 ; GFX11-LABEL: add_inline_imm_16_f16:
1440 ; GFX11:       ; %bb.0:
1441 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1442 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1443 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1444 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1445 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1446 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
1447 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1448 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1449 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1450 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1451 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1453 ; VI-LABEL: add_inline_imm_16_f16:
1454 ; VI:       ; %bb.0:
1455 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1456 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1457 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1458 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1459 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1460 ; VI-NEXT:    v_add_f16_e64 v0, s6, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x20,0x01,0x00]
1461 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1462 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1464 ; SI-LABEL: add_inline_imm_16_f16:
1465 ; SI:       ; %bb.0:
1466 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1467 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1468 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1469 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1470 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1471 ; SI-NEXT:    s_mov_b32 s2, -1
1472 ; SI-NEXT:    v_add_f32_e32 v0, 0x35800000, v0
1473 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1474 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1475 ; SI-NEXT:    s_endpgm
1476   %y = fadd half %x, 0xH0010
1477   store half %y, ptr addrspace(1) %out
1478   ret void
1481 define amdgpu_kernel void @add_inline_imm_neg_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1482 ; GFX10-LABEL: add_inline_imm_neg_1_f16:
1483 ; GFX10:       ; %bb.0:
1484 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1485 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1486 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1487 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1488 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1489 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1490 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1491 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1492 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1493 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1494 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1495 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1496 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
1497 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1498 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1500 ; GFX11-LABEL: add_inline_imm_neg_1_f16:
1501 ; GFX11:       ; %bb.0:
1502 ; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1503 ; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1504 ; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1505 ; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1506 ; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1507 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1508 ; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1509 ; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1510 ; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1511 ; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1512 ; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1513 ; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1514 ; GFX11-NEXT:    v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
1515 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1516 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1517 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1518 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1520 ; VI-LABEL: add_inline_imm_neg_1_f16:
1521 ; VI:       ; %bb.0:
1522 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1523 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1524 ; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1525 ; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1526 ; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1527 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1528 ; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1529 ; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1530 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1531 ; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1532 ; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1533 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1534 ; VI-NEXT:    v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c]
1535 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1536 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1538 ; SI-LABEL: add_inline_imm_neg_1_f16:
1539 ; SI:       ; %bb.0:
1540 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1541 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1542 ; SI-NEXT:    s_mov_b32 s6, -1
1543 ; SI-NEXT:    s_mov_b32 s10, s6
1544 ; SI-NEXT:    s_mov_b32 s11, s7
1545 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1546 ; SI-NEXT:    s_mov_b32 s8, s2
1547 ; SI-NEXT:    s_mov_b32 s9, s3
1548 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1549 ; SI-NEXT:    s_mov_b32 s4, s0
1550 ; SI-NEXT:    s_mov_b32 s5, s1
1551 ; SI-NEXT:    s_waitcnt vmcnt(0)
1552 ; SI-NEXT:    v_add_i32_e32 v0, vcc, -1, v0
1553 ; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1554 ; SI-NEXT:    s_endpgm
1555   %x = load i16, ptr addrspace(1) %in
1556   %y = add i16 %x, -1
1557   %ybc = bitcast i16 %y to half
1558   store half %ybc, ptr addrspace(1) %out
1559   ret void
1562 define amdgpu_kernel void @add_inline_imm_neg_2_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1563 ; GFX10-LABEL: add_inline_imm_neg_2_f16:
1564 ; GFX10:       ; %bb.0:
1565 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1566 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1567 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1568 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1569 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1570 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1571 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1572 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1573 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1574 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1575 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1576 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1577 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
1578 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1579 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1581 ; GFX11-LABEL: add_inline_imm_neg_2_f16:
1582 ; GFX11:       ; %bb.0:
1583 ; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1584 ; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1585 ; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1586 ; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1587 ; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1588 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1589 ; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1590 ; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1591 ; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1592 ; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1593 ; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1594 ; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1595 ; GFX11-NEXT:    v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
1596 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1597 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1598 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1599 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1601 ; VI-LABEL: add_inline_imm_neg_2_f16:
1602 ; VI:       ; %bb.0:
1603 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1604 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1605 ; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1606 ; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1607 ; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1608 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1609 ; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1610 ; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1611 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1612 ; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1613 ; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1614 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1615 ; VI-NEXT:    v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c]
1616 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1617 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1619 ; SI-LABEL: add_inline_imm_neg_2_f16:
1620 ; SI:       ; %bb.0:
1621 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1622 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1623 ; SI-NEXT:    s_mov_b32 s6, -1
1624 ; SI-NEXT:    s_mov_b32 s10, s6
1625 ; SI-NEXT:    s_mov_b32 s11, s7
1626 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1627 ; SI-NEXT:    s_mov_b32 s8, s2
1628 ; SI-NEXT:    s_mov_b32 s9, s3
1629 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1630 ; SI-NEXT:    s_mov_b32 s4, s0
1631 ; SI-NEXT:    s_mov_b32 s5, s1
1632 ; SI-NEXT:    s_waitcnt vmcnt(0)
1633 ; SI-NEXT:    v_add_i32_e32 v0, vcc, -2, v0
1634 ; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1635 ; SI-NEXT:    s_endpgm
1636   %x = load i16, ptr addrspace(1) %in
1637   %y = add i16 %x, -2
1638   %ybc = bitcast i16 %y to half
1639   store half %ybc, ptr addrspace(1) %out
1640   ret void
1643 define amdgpu_kernel void @add_inline_imm_neg_16_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
1644 ; GFX10-LABEL: add_inline_imm_neg_16_f16:
1645 ; GFX10:       ; %bb.0:
1646 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
1647 ; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
1648 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
1649 ; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
1650 ; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
1651 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1652 ; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
1653 ; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
1654 ; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
1655 ; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
1656 ; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
1657 ; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
1658 ; GFX10-NEXT:    v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
1659 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1660 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1662 ; GFX11-LABEL: add_inline_imm_neg_16_f16:
1663 ; GFX11:       ; %bb.0:
1664 ; GFX11-NEXT:    s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8]
1665 ; GFX11-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1666 ; GFX11-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31]
1667 ; GFX11-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1668 ; GFX11-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1669 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1670 ; GFX11-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1671 ; GFX11-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1672 ; GFX11-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1673 ; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1674 ; GFX11-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1675 ; GFX11-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf]
1676 ; GFX11-NEXT:    v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
1677 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80]
1678 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1679 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1680 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1682 ; VI-LABEL: add_inline_imm_neg_16_f16:
1683 ; VI:       ; %bb.0:
1684 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00]
1685 ; VI-NEXT:    s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11]
1686 ; VI-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe]
1687 ; VI-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe]
1688 ; VI-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe]
1689 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1690 ; VI-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe]
1691 ; VI-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe]
1692 ; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80]
1693 ; VI-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe]
1694 ; VI-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe]
1695 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1696 ; VI-NEXT:    v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c]
1697 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
1698 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1700 ; SI-LABEL: add_inline_imm_neg_16_f16:
1701 ; SI:       ; %bb.0:
1702 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
1703 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1704 ; SI-NEXT:    s_mov_b32 s6, -1
1705 ; SI-NEXT:    s_mov_b32 s10, s6
1706 ; SI-NEXT:    s_mov_b32 s11, s7
1707 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1708 ; SI-NEXT:    s_mov_b32 s8, s2
1709 ; SI-NEXT:    s_mov_b32 s9, s3
1710 ; SI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
1711 ; SI-NEXT:    s_mov_b32 s4, s0
1712 ; SI-NEXT:    s_mov_b32 s5, s1
1713 ; SI-NEXT:    s_waitcnt vmcnt(0)
1714 ; SI-NEXT:    v_add_i32_e32 v0, vcc, -16, v0
1715 ; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1716 ; SI-NEXT:    s_endpgm
1717   %x = load i16, ptr addrspace(1) %in
1718   %y = add i16 %x, -16
1719   %ybc = bitcast i16 %y to half
1720   store half %ybc, ptr addrspace(1) %out
1721   ret void
1724 define amdgpu_kernel void @add_inline_imm_63_f16(ptr addrspace(1) %out, half %x) {
1725 ; GFX10-LABEL: add_inline_imm_63_f16:
1726 ; GFX10:       ; %bb.0:
1727 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1728 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1729 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1730 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1731 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1732 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1733 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1734 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1735 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1737 ; GFX11-LABEL: add_inline_imm_63_f16:
1738 ; GFX11:       ; %bb.0:
1739 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1740 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1741 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1742 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1743 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1744 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
1745 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1746 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1747 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1748 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1749 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1751 ; VI-LABEL: add_inline_imm_63_f16:
1752 ; VI:       ; %bb.0:
1753 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1754 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1755 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1756 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1757 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1758 ; VI-NEXT:    v_add_f16_e64 v0, s6, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x7e,0x01,0x00]
1759 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1760 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1762 ; SI-LABEL: add_inline_imm_63_f16:
1763 ; SI:       ; %bb.0:
1764 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1765 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1766 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1767 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1768 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1769 ; SI-NEXT:    s_mov_b32 s2, -1
1770 ; SI-NEXT:    v_add_f32_e32 v0, 0x367c0000, v0
1771 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1772 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1773 ; SI-NEXT:    s_endpgm
1774   %y = fadd half %x, 0xH003F
1775   store half %y, ptr addrspace(1) %out
1776   ret void
1779 define amdgpu_kernel void @add_inline_imm_64_f16(ptr addrspace(1) %out, half %x) {
1780 ; GFX10-LABEL: add_inline_imm_64_f16:
1781 ; GFX10:       ; %bb.0:
1782 ; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
1783 ; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
1784 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
1785 ; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
1786 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
1787 ; GFX10-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1788 ; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
1789 ; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1790 ; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1792 ; GFX11-LABEL: add_inline_imm_64_f16:
1793 ; GFX11:       ; %bb.0:
1794 ; GFX11-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf]
1795 ; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8]
1796 ; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8]
1797 ; GFX11-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31]
1798 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf]
1799 ; GFX11-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
1800 ; GFX11-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1801 ; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80]
1802 ; GFX11-NEXT:    s_nop 0 ; encoding: [0x00,0x00,0x80,0xbf]
1803 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
1804 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
1806 ; VI-LABEL: add_inline_imm_64_f16:
1807 ; VI:       ; %bb.0:
1808 ; VI-NEXT:    s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
1809 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
1810 ; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
1811 ; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
1812 ; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
1813 ; VI-NEXT:    v_add_f16_e64 v0, s6, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x80,0x01,0x00]
1814 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
1815 ; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
1817 ; SI-LABEL: add_inline_imm_64_f16:
1818 ; SI:       ; %bb.0:
1819 ; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
1820 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1821 ; SI-NEXT:    s_mov_b32 s3, 0xf000
1822 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
1823 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
1824 ; SI-NEXT:    s_mov_b32 s2, -1
1825 ; SI-NEXT:    v_add_f32_e32 v0, 0x36800000, v0
1826 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1827 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
1828 ; SI-NEXT:    s_endpgm
1829   %y = fadd half %x, 0xH0040
1830   store half %y, ptr addrspace(1) %out
1831   ret void
1834 ; This needs to be emitted as a literal constant since the 16-bit
1835 ; float values do not work for 16-bit integer operations.
1836 define void @mul_inline_imm_0.5_i16(ptr addrspace(1) %out, i16 %x) {
1837 ; GFX10-LABEL: mul_inline_imm_0.5_i16:
1838 ; GFX10:       ; %bb.0:
1839 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1840 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1841 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1842 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1844 ; GFX11-LABEL: mul_inline_imm_0.5_i16:
1845 ; GFX11:       ; %bb.0:
1846 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1847 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00]
1848 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1849 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1851 ; VI-LABEL: mul_inline_imm_0.5_i16:
1852 ; VI:       ; %bb.0:
1853 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1854 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00]
1855 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1856 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1857 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1859 ; SI-LABEL: mul_inline_imm_0.5_i16:
1860 ; SI:       ; %bb.0:
1861 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862 ; SI-NEXT:    s_mov_b32 s6, 0
1863 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1864 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1865 ; SI-NEXT:    s_mov_b32 s4, s6
1866 ; SI-NEXT:    s_mov_b32 s5, s6
1867 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3800, v2
1868 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1869 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1870 ; SI-NEXT:    s_setpc_b64 s[30:31]
1871   %y = mul i16 %x, bitcast (half 0.5 to i16)
1872   store i16 %y, ptr addrspace(1) %out
1873   ret void
1876 define void @mul_inline_imm_neg_0.5_i16(ptr addrspace(1) %out, i16 %x) {
1877 ; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
1878 ; GFX10:       ; %bb.0:
1879 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1880 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1881 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1882 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1884 ; GFX11-LABEL: mul_inline_imm_neg_0.5_i16:
1885 ; GFX11:       ; %bb.0:
1886 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1887 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff]
1888 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1889 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1891 ; VI-LABEL: mul_inline_imm_neg_0.5_i16:
1892 ; VI:       ; %bb.0:
1893 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1894 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff]
1895 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1896 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1897 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1899 ; SI-LABEL: mul_inline_imm_neg_0.5_i16:
1900 ; SI:       ; %bb.0:
1901 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1902 ; SI-NEXT:    s_mov_b32 s6, 0
1903 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1904 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1905 ; SI-NEXT:    s_mov_b32 s4, s6
1906 ; SI-NEXT:    s_mov_b32 s5, s6
1907 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xb800, v2
1908 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1909 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1910 ; SI-NEXT:    s_setpc_b64 s[30:31]
1911   %y = mul i16 %x, bitcast (half -0.5 to i16)
1912   store i16 %y, ptr addrspace(1) %out
1913   ret void
1916 define void @mul_inline_imm_1.0_i16(ptr addrspace(1) %out, i16 %x) {
1917 ; GFX10-LABEL: mul_inline_imm_1.0_i16:
1918 ; GFX10:       ; %bb.0:
1919 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1920 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1921 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1922 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1924 ; GFX11-LABEL: mul_inline_imm_1.0_i16:
1925 ; GFX11:       ; %bb.0:
1926 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1927 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00]
1928 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1929 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1931 ; VI-LABEL: mul_inline_imm_1.0_i16:
1932 ; VI:       ; %bb.0:
1933 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1934 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00]
1935 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1936 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1937 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1939 ; SI-LABEL: mul_inline_imm_1.0_i16:
1940 ; SI:       ; %bb.0:
1941 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942 ; SI-NEXT:    s_mov_b32 s6, 0
1943 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1944 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1945 ; SI-NEXT:    s_mov_b32 s4, s6
1946 ; SI-NEXT:    s_mov_b32 s5, s6
1947 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3c00, v2
1948 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1949 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1950 ; SI-NEXT:    s_setpc_b64 s[30:31]
1951   %y = mul i16 %x, bitcast (half 1.0 to i16)
1952   store i16 %y, ptr addrspace(1) %out
1953   ret void
1956 define void @mul_inline_imm_neg_1.0_i16(ptr addrspace(1) %out, i16 %x) {
1957 ; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
1958 ; GFX10:       ; %bb.0:
1959 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1960 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1961 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
1962 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
1964 ; GFX11-LABEL: mul_inline_imm_neg_1.0_i16:
1965 ; GFX11:       ; %bb.0:
1966 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
1967 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff]
1968 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
1969 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
1971 ; VI-LABEL: mul_inline_imm_neg_1.0_i16:
1972 ; VI:       ; %bb.0:
1973 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
1974 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff]
1975 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
1976 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
1977 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
1979 ; SI-LABEL: mul_inline_imm_neg_1.0_i16:
1980 ; SI:       ; %bb.0:
1981 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982 ; SI-NEXT:    s_mov_b32 s6, 0
1983 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1984 ; SI-NEXT:    s_mov_b32 s7, 0xf000
1985 ; SI-NEXT:    s_mov_b32 s4, s6
1986 ; SI-NEXT:    s_mov_b32 s5, s6
1987 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xbc00, v2
1988 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
1989 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1990 ; SI-NEXT:    s_setpc_b64 s[30:31]
1991   %y = mul i16 %x, bitcast (half -1.0 to i16)
1992   store i16 %y, ptr addrspace(1) %out
1993   ret void
1996 define void @shl_inline_imm_2.0_i16(ptr addrspace(1) %out, i16 %x) {
1997 ; GFX10-LABEL: shl_inline_imm_2.0_i16:
1998 ; GFX10:       ; %bb.0:
1999 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2000 ; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
2001 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2002 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2004 ; GFX11-LABEL: shl_inline_imm_2.0_i16:
2005 ; GFX11:       ; %bb.0:
2006 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2007 ; GFX11-NEXT:    v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00]
2008 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2009 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2011 ; VI-LABEL: shl_inline_imm_2.0_i16:
2012 ; VI:       ; %bb.0:
2013 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2014 ; VI-NEXT:    s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0]
2015 ; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
2016 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2017 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2018 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2020 ; SI-LABEL: shl_inline_imm_2.0_i16:
2021 ; SI:       ; %bb.0:
2022 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023 ; SI-NEXT:    s_mov_b32 s6, 0
2024 ; SI-NEXT:    s_mov_b32 s7, 0xf000
2025 ; SI-NEXT:    s_mov_b32 s4, s6
2026 ; SI-NEXT:    s_mov_b32 s5, s6
2027 ; SI-NEXT:    v_lshl_b32_e32 v2, 0x4000, v2
2028 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2029 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2030 ; SI-NEXT:    s_setpc_b64 s[30:31]
2031   %y = shl i16 bitcast (half 2.0 to i16), %x
2032   store i16 %y, ptr addrspace(1) %out
2033   ret void
2036 define void @shl_inline_imm_neg_2.0_i16(ptr addrspace(1) %out, i16 %x) {
2037 ; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
2038 ; GFX10:       ; %bb.0:
2039 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2040 ; GFX10-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
2041 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2042 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2044 ; GFX11-LABEL: shl_inline_imm_neg_2.0_i16:
2045 ; GFX11:       ; %bb.0:
2046 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2047 ; GFX11-NEXT:    v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff]
2048 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2049 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2051 ; VI-LABEL: shl_inline_imm_neg_2.0_i16:
2052 ; VI:       ; %bb.0:
2053 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2054 ; VI-NEXT:    s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0]
2055 ; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00]
2056 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2057 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2058 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2060 ; SI-LABEL: shl_inline_imm_neg_2.0_i16:
2061 ; SI:       ; %bb.0:
2062 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063 ; SI-NEXT:    s_mov_b32 s6, 0
2064 ; SI-NEXT:    s_mov_b32 s7, 0xf000
2065 ; SI-NEXT:    s_mov_b32 s4, s6
2066 ; SI-NEXT:    s_mov_b32 s5, s6
2067 ; SI-NEXT:    v_lshl_b32_e32 v2, 0xffffc000, v2
2068 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2069 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2070 ; SI-NEXT:    s_setpc_b64 s[30:31]
2071   %y = shl i16 bitcast (half -2.0 to i16), %x
2072   store i16 %y, ptr addrspace(1) %out
2073   ret void
2076 define void @mul_inline_imm_4.0_i16(ptr addrspace(1) %out, i16 %x) {
2077 ; GFX10-LABEL: mul_inline_imm_4.0_i16:
2078 ; GFX10:       ; %bb.0:
2079 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2080 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2081 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2082 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2084 ; GFX11-LABEL: mul_inline_imm_4.0_i16:
2085 ; GFX11:       ; %bb.0:
2086 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2087 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00]
2088 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2089 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2091 ; VI-LABEL: mul_inline_imm_4.0_i16:
2092 ; VI:       ; %bb.0:
2093 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2094 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00]
2095 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2096 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2097 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2099 ; SI-LABEL: mul_inline_imm_4.0_i16:
2100 ; SI:       ; %bb.0:
2101 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2102 ; SI-NEXT:    s_mov_b32 s6, 0
2103 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2104 ; SI-NEXT:    s_mov_b32 s7, 0xf000
2105 ; SI-NEXT:    s_mov_b32 s4, s6
2106 ; SI-NEXT:    s_mov_b32 s5, s6
2107 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x4400, v2
2108 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2109 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2110 ; SI-NEXT:    s_setpc_b64 s[30:31]
2111   %y = mul i16 %x, bitcast (half 4.0 to i16)
2112   store i16 %y, ptr addrspace(1) %out
2113   ret void
2116 define void @mul_inline_imm_neg_4.0_i16(ptr addrspace(1) %out, i16 %x) {
2117 ; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
2118 ; GFX10:       ; %bb.0:
2119 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2120 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2121 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2122 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2124 ; GFX11-LABEL: mul_inline_imm_neg_4.0_i16:
2125 ; GFX11:       ; %bb.0:
2126 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2127 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff]
2128 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2129 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2131 ; VI-LABEL: mul_inline_imm_neg_4.0_i16:
2132 ; VI:       ; %bb.0:
2133 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2134 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff]
2135 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2136 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2137 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2139 ; SI-LABEL: mul_inline_imm_neg_4.0_i16:
2140 ; SI:       ; %bb.0:
2141 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2142 ; SI-NEXT:    s_mov_b32 s6, 0
2143 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2144 ; SI-NEXT:    s_mov_b32 s7, 0xf000
2145 ; SI-NEXT:    s_mov_b32 s4, s6
2146 ; SI-NEXT:    s_mov_b32 s5, s6
2147 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xc400, v2
2148 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2149 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2150 ; SI-NEXT:    s_setpc_b64 s[30:31]
2151   %y = mul i16 %x, bitcast (half -4.0 to i16)
2152   store i16 %y, ptr addrspace(1) %out
2153   ret void
2156 define void @mul_inline_imm_inv2pi_i16(ptr addrspace(1) %out, i16 %x) {
2157 ; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
2158 ; GFX10:       ; %bb.0:
2159 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2160 ; GFX10-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2161 ; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
2162 ; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
2164 ; GFX11-LABEL: mul_inline_imm_inv2pi_i16:
2165 ; GFX11:       ; %bb.0:
2166 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2167 ; GFX11-NEXT:    v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00]
2168 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00]
2169 ; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2171 ; VI-LABEL: mul_inline_imm_inv2pi_i16:
2172 ; VI:       ; %bb.0:
2173 ; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
2174 ; VI-NEXT:    v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00]
2175 ; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
2176 ; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
2177 ; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
2179 ; SI-LABEL: mul_inline_imm_inv2pi_i16:
2180 ; SI:       ; %bb.0:
2181 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182 ; SI-NEXT:    s_mov_b32 s6, 0
2183 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2184 ; SI-NEXT:    s_mov_b32 s7, 0xf000
2185 ; SI-NEXT:    s_mov_b32 s4, s6
2186 ; SI-NEXT:    s_mov_b32 s5, s6
2187 ; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3118, v2
2188 ; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
2189 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2190 ; SI-NEXT:    s_setpc_b64 s[30:31]
2191   %y = mul i16 %x, bitcast (half 0xH3118 to i16)
2192   store i16 %y, ptr addrspace(1) %out
2193   ret void