[AMDGPU] Add True16 register classes.
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / fptrunc.ll
blob97216b6c94693c4093b7d5394ceb857005f08068
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
3 ; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
4 ; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
5 ; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
6 ; RUN: llc -march=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s
7 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
8 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
9 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
10 ; RUN: llc -march=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
11 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
12 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
13 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s
14 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s
16 define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
17 ; SI-LABEL: fptrunc_f64_to_f32:
18 ; SI:       ; %bb.0:
19 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
20 ; SI-NEXT:    s_mov_b32 s7, 0xf000
21 ; SI-NEXT:    s_mov_b32 s6, -1
22 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
23 ; SI-NEXT:    s_mov_b32 s4, s0
24 ; SI-NEXT:    s_mov_b32 s5, s1
25 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
26 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
27 ; SI-NEXT:    s_endpgm
29 ; VI-SDAG-LABEL: fptrunc_f64_to_f32:
30 ; VI-SDAG:       ; %bb.0:
31 ; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
32 ; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
33 ; VI-SDAG-NEXT:    s_mov_b32 s6, -1
34 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
35 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
36 ; VI-SDAG-NEXT:    s_mov_b32 s4, s0
37 ; VI-SDAG-NEXT:    s_mov_b32 s5, s1
38 ; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
39 ; VI-SDAG-NEXT:    s_endpgm
41 ; VI-GISEL-LABEL: fptrunc_f64_to_f32:
42 ; VI-GISEL:       ; %bb.0:
43 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
44 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
45 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
46 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
47 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
48 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
49 ; VI-GISEL-NEXT:    s_endpgm
51 ; GFX10-SDAG-LABEL: fptrunc_f64_to_f32:
52 ; GFX10-SDAG:       ; %bb.0:
53 ; GFX10-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
54 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
55 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
56 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
57 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
58 ; GFX10-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
59 ; GFX10-SDAG-NEXT:    s_endpgm
61 ; GFX10-GISEL-LABEL: fptrunc_f64_to_f32:
62 ; GFX10-GISEL:       ; %bb.0:
63 ; GFX10-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
64 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
65 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
66 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
67 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
68 ; GFX10-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
69 ; GFX10-GISEL-NEXT:    s_endpgm
71 ; GFX11-SDAG-LABEL: fptrunc_f64_to_f32:
72 ; GFX11-SDAG:       ; %bb.0:
73 ; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
74 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
75 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
76 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
77 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
78 ; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
79 ; GFX11-SDAG-NEXT:    s_nop 0
80 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
81 ; GFX11-SDAG-NEXT:    s_endpgm
83 ; GFX11-GISEL-LABEL: fptrunc_f64_to_f32:
84 ; GFX11-GISEL:       ; %bb.0:
85 ; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
86 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
87 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
88 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
89 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
90 ; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
91 ; GFX11-GISEL-NEXT:    s_nop 0
92 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
93 ; GFX11-GISEL-NEXT:    s_endpgm
94   %result = fptrunc double %in to float
95   store float %result, ptr addrspace(1) %out
96   ret void
99 define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) {
100 ; SI-LABEL: fptrunc_f64_to_f16:
101 ; SI:       ; %bb.0:
102 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
103 ; SI-NEXT:    s_mov_b32 s3, 0xf000
104 ; SI-NEXT:    s_mov_b32 s2, -1
105 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
106 ; SI-NEXT:    s_mov_b32 s0, s4
107 ; SI-NEXT:    s_mov_b32 s1, s5
108 ; SI-NEXT:    s_lshr_b32 s4, s7, 8
109 ; SI-NEXT:    s_and_b32 s5, s7, 0x1ff
110 ; SI-NEXT:    s_and_b32 s8, s4, 0xffe
111 ; SI-NEXT:    s_or_b32 s4, s5, s6
112 ; SI-NEXT:    s_cmp_lg_u32 s4, 0
113 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
114 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
115 ; SI-NEXT:    s_bfe_u32 s4, s7, 0xb0014
116 ; SI-NEXT:    v_readfirstlane_b32 s5, v0
117 ; SI-NEXT:    s_sub_i32 s6, 0x3f1, s4
118 ; SI-NEXT:    s_add_i32 s10, s4, 0xfffffc10
119 ; SI-NEXT:    s_or_b32 s11, s8, s5
120 ; SI-NEXT:    v_med3_i32 v0, s6, 0, 13
121 ; SI-NEXT:    s_lshl_b32 s4, s10, 12
122 ; SI-NEXT:    s_or_b32 s5, s11, 0x1000
123 ; SI-NEXT:    v_readfirstlane_b32 s6, v0
124 ; SI-NEXT:    s_or_b32 s4, s11, s4
125 ; SI-NEXT:    s_lshr_b32 s6, s5, s6
126 ; SI-NEXT:    v_lshl_b32_e32 v0, s6, v0
127 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, s5, v0
128 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
129 ; SI-NEXT:    v_readfirstlane_b32 s5, v0
130 ; SI-NEXT:    s_or_b32 s5, s6, s5
131 ; SI-NEXT:    s_cmp_lt_i32 s10, 1
132 ; SI-NEXT:    s_cselect_b32 s6, s5, s4
133 ; SI-NEXT:    s_and_b32 s8, s6, 7
134 ; SI-NEXT:    s_cmp_gt_i32 s8, 5
135 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
136 ; SI-NEXT:    s_cmp_eq_u32 s8, 3
137 ; SI-NEXT:    s_cselect_b64 s[8:9], -1, 0
138 ; SI-NEXT:    s_lshr_b32 s6, s6, 2
139 ; SI-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
140 ; SI-NEXT:    s_or_b32 s4, s4, s5
141 ; SI-NEXT:    s_cmp_lg_u32 s4, 0
142 ; SI-NEXT:    s_addc_u32 s4, s6, 0
143 ; SI-NEXT:    s_cmp_lt_i32 s10, 31
144 ; SI-NEXT:    s_cselect_b32 s6, s4, 0x7c00
145 ; SI-NEXT:    s_cmp_lg_u32 s11, 0
146 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
147 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
148 ; SI-NEXT:    s_cmpk_eq_i32 s10, 0x40f
149 ; SI-NEXT:    v_mov_b32_e32 v1, s6
150 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
151 ; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
152 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
153 ; SI-NEXT:    s_lshr_b32 s4, s7, 16
154 ; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
155 ; SI-NEXT:    s_and_b32 s4, s4, 0x8000
156 ; SI-NEXT:    v_or_b32_e32 v0, s4, v0
157 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
158 ; SI-NEXT:    s_endpgm
160 ; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
161 ; VI-SAFE-SDAG:       ; %bb.0:
162 ; VI-SAFE-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
163 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
164 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
165 ; VI-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
166 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s0, s4
167 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 8
168 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s4, 0xffe
169 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s7, 0x1ff
170 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s6
171 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
172 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s1, s5
173 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
174 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
175 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
176 ; VI-SAFE-SDAG-NEXT:    s_bfe_u32 s5, s7, 0xb0014
177 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s6, s8, s4
178 ; VI-SAFE-SDAG-NEXT:    s_sub_i32 s8, 0x3f1, s5
179 ; VI-SAFE-SDAG-NEXT:    v_med3_i32 v0, s8, 0, 13
180 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s6, 0x1000
181 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s8, v0
182 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s4, s8
183 ; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v0, s8
184 ; VI-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
185 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
186 ; VI-SAFE-SDAG-NEXT:    s_add_i32 s10, s5, 0xfffffc10
187 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
188 ; VI-SAFE-SDAG-NEXT:    s_lshl_b32 s5, s10, 12
189 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s8, s4
190 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
191 ; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 1
192 ; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s11, s4, s5
193 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s11, 7
194 ; VI-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s8, 5
195 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
196 ; VI-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s8, 3
197 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[8:9], -1, 0
198 ; VI-SAFE-SDAG-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
199 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s11, 2
200 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
201 ; VI-SAFE-SDAG-NEXT:    s_addc_u32 s4, s8, 0
202 ; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 31
203 ; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s8, s4, 0x7c00
204 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
205 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
206 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
207 ; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
208 ; VI-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s10, 0x40f
209 ; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
210 ; VI-SAFE-SDAG-NEXT:    v_mov_b32_e32 v1, s8
211 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 vcc, -1, 0
212 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 16
213 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
214 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s4, 0x8000
215 ; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s4, v0
216 ; VI-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
217 ; VI-SAFE-SDAG-NEXT:    s_endpgm
219 ; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
220 ; VI-SAFE-GISEL:       ; %bb.0:
221 ; VI-SAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
222 ; VI-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
223 ; VI-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
224 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
225 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
226 ; VI-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
227 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
228 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
229 ; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
230 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
231 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
232 ; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
233 ; VI-SAFE-GISEL-NEXT:    s_movk_i32 s5, 0x7e00
234 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s5, s5, 0x7c00
235 ; VI-SAFE-GISEL-NEXT:    s_sub_i32 s7, 1, s4
236 ; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s4, 12
237 ; VI-SAFE-GISEL-NEXT:    s_max_i32 s7, s7, 0
238 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s6, s2, s6
239 ; VI-SAFE-GISEL-NEXT:    s_min_i32 s7, s7, 13
240 ; VI-SAFE-GISEL-NEXT:    s_bitset1_b32 s2, 12
241 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s8, s2, s7
242 ; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s8, s7
243 ; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s7, s2
244 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
245 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s8, s2
246 ; VI-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
247 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s2, s6
248 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
249 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
250 ; VI-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
251 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
252 ; VI-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
253 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
254 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
255 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
256 ; VI-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
257 ; VI-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
258 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
259 ; VI-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
260 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
261 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
262 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
263 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
264 ; VI-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
265 ; VI-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
266 ; VI-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0xf000
267 ; VI-SAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
268 ; VI-SAFE-GISEL-NEXT:    s_endpgm
270 ; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
271 ; VI-UNSAFE-SDAG:       ; %bb.0:
272 ; VI-UNSAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
273 ; VI-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
274 ; VI-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
275 ; VI-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
276 ; VI-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
277 ; VI-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
278 ; VI-UNSAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
279 ; VI-UNSAFE-SDAG-NEXT:    s_endpgm
281 ; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
282 ; VI-UNSAFE-GISEL:       ; %bb.0:
283 ; VI-UNSAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
284 ; VI-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
285 ; VI-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
286 ; VI-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
287 ; VI-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0xf000
288 ; VI-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
289 ; VI-UNSAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
290 ; VI-UNSAFE-GISEL-NEXT:    s_endpgm
292 ; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
293 ; GFX10-SAFE-SDAG:       ; %bb.0:
294 ; GFX10-SAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
295 ; GFX10-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
296 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s4, s3, 0x1ff
297 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s3, 8
298 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s2, s4, s2
299 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s4, s5, 0xffe
300 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
301 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s2, -1, 0
302 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
303 ; GFX10-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
304 ; GFX10-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
305 ; GFX10-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
306 ; GFX10-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
307 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
308 ; GFX10-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
309 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
310 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
311 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
312 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
313 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
314 ; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
315 ; GFX10-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
316 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
317 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
318 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
319 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
320 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
321 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
322 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
323 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
324 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
325 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
326 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
327 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
328 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
329 ; GFX10-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
330 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
331 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
332 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
333 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
334 ; GFX10-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
335 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
336 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
337 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
338 ; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
339 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
340 ; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
341 ; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
342 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
343 ; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
344 ; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
345 ; GFX10-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
346 ; GFX10-SAFE-SDAG-NEXT:    s_endpgm
348 ; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
349 ; GFX10-SAFE-GISEL:       ; %bb.0:
350 ; GFX10-SAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
351 ; GFX10-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
352 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
353 ; GFX10-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
354 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
355 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
356 ; GFX10-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
357 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
358 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
359 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
360 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
361 ; GFX10-SAFE-GISEL-NEXT:    s_movk_i32 s5, 0x7e00
362 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
363 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s5, s5, 0x7c00
364 ; GFX10-SAFE-GISEL-NEXT:    s_sub_i32 s6, 1, s4
365 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s7, s2, 0x1000
366 ; GFX10-SAFE-GISEL-NEXT:    s_max_i32 s6, s6, 0
367 ; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s9, s4, 12
368 ; GFX10-SAFE-GISEL-NEXT:    s_min_i32 s6, s6, 13
369 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s2, s9
370 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s8, s7, s6
371 ; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s8, s6
372 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s6, s7
373 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
374 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s6, s8, s6
375 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
376 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s6, s2
377 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
378 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
379 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
380 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
381 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
382 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
383 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
384 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
385 ; GFX10-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
386 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
387 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
388 ; GFX10-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
389 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
390 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
391 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
392 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
393 ; GFX10-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
394 ; GFX10-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
395 ; GFX10-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
396 ; GFX10-SAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
397 ; GFX10-SAFE-GISEL-NEXT:    s_endpgm
399 ; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
400 ; GFX10-UNSAFE-SDAG:       ; %bb.0:
401 ; GFX10-UNSAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
402 ; GFX10-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
403 ; GFX10-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
404 ; GFX10-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
405 ; GFX10-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
406 ; GFX10-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
407 ; GFX10-UNSAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
408 ; GFX10-UNSAFE-SDAG-NEXT:    s_endpgm
410 ; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
411 ; GFX10-UNSAFE-GISEL:       ; %bb.0:
412 ; GFX10-UNSAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
413 ; GFX10-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
414 ; GFX10-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
415 ; GFX10-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
416 ; GFX10-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
417 ; GFX10-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
418 ; GFX10-UNSAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
419 ; GFX10-UNSAFE-GISEL-NEXT:    s_endpgm
421 ; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
422 ; GFX11-SAFE-SDAG:       ; %bb.0:
423 ; GFX11-SAFE-SDAG-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
424 ; GFX11-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
425 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s4, s3, 0x1ff
426 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s3, 8
427 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s2, s4, s2
428 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s4, s5, 0xffe
429 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
430 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s2, -1, 0
431 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
432 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
433 ; GFX11-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
434 ; GFX11-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
435 ; GFX11-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
436 ; GFX11-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
437 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
438 ; GFX11-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
439 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
440 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
441 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
442 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
443 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
444 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
445 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
446 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
447 ; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
448 ; GFX11-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
449 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
450 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
451 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
452 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
453 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
454 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
455 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
456 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
457 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
458 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
459 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
460 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
461 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
462 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
463 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
464 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
465 ; GFX11-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
466 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
467 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
468 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
469 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
470 ; GFX11-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
471 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
472 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
473 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
474 ; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
475 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
476 ; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
477 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
478 ; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
479 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
480 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
481 ; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
482 ; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
483 ; GFX11-SAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
484 ; GFX11-SAFE-SDAG-NEXT:    s_nop 0
485 ; GFX11-SAFE-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
486 ; GFX11-SAFE-SDAG-NEXT:    s_endpgm
488 ; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
489 ; GFX11-SAFE-GISEL:       ; %bb.0:
490 ; GFX11-SAFE-GISEL-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
491 ; GFX11-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
492 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
493 ; GFX11-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
494 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
495 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
496 ; GFX11-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
497 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
498 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
499 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
500 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
501 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
502 ; GFX11-SAFE-GISEL-NEXT:    s_movk_i32 s5, 0x7e00
503 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
504 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s5, s5, 0x7c00
505 ; GFX11-SAFE-GISEL-NEXT:    s_sub_i32 s6, 1, s4
506 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s7, s2, 0x1000
507 ; GFX11-SAFE-GISEL-NEXT:    s_max_i32 s6, s6, 0
508 ; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s9, s4, 12
509 ; GFX11-SAFE-GISEL-NEXT:    s_min_i32 s6, s6, 13
510 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s2, s9
511 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s8, s7, s6
512 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
513 ; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s8, s6
514 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s6, s7
515 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
516 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
517 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s6, s8, s6
518 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
519 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s6, s2
520 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
521 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
522 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
523 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
524 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
525 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
526 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
527 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
528 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
529 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
530 ; GFX11-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
531 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
532 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
533 ; GFX11-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
534 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
535 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
536 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
537 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
538 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
539 ; GFX11-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
540 ; GFX11-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
541 ; GFX11-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
542 ; GFX11-SAFE-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
543 ; GFX11-SAFE-GISEL-NEXT:    s_nop 0
544 ; GFX11-SAFE-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
545 ; GFX11-SAFE-GISEL-NEXT:    s_endpgm
547 ; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
548 ; GFX11-UNSAFE-SDAG:       ; %bb.0:
549 ; GFX11-UNSAFE-SDAG-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
550 ; GFX11-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
551 ; GFX11-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
552 ; GFX11-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
553 ; GFX11-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
554 ; GFX11-UNSAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
555 ; GFX11-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
556 ; GFX11-UNSAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
557 ; GFX11-UNSAFE-SDAG-NEXT:    s_nop 0
558 ; GFX11-UNSAFE-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
559 ; GFX11-UNSAFE-SDAG-NEXT:    s_endpgm
561 ; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
562 ; GFX11-UNSAFE-GISEL:       ; %bb.0:
563 ; GFX11-UNSAFE-GISEL-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
564 ; GFX11-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
565 ; GFX11-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
566 ; GFX11-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
567 ; GFX11-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
568 ; GFX11-UNSAFE-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
569 ; GFX11-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
570 ; GFX11-UNSAFE-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
571 ; GFX11-UNSAFE-GISEL-NEXT:    s_nop 0
572 ; GFX11-UNSAFE-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
573 ; GFX11-UNSAFE-GISEL-NEXT:    s_endpgm
574   %result = fptrunc double %in to half
575   %result_i16 = bitcast half %result to i16
576   store i16 %result_i16, ptr addrspace(1) %out
577   ret void
580 define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {
581 ; SI-LABEL: fptrunc_v2f64_to_v2f32:
582 ; SI:       ; %bb.0:
583 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
584 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
585 ; SI-NEXT:    s_mov_b32 s3, 0xf000
586 ; SI-NEXT:    s_mov_b32 s2, -1
587 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
588 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
589 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
590 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
591 ; SI-NEXT:    s_endpgm
593 ; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
594 ; VI-SDAG:       ; %bb.0:
595 ; VI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
596 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
597 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
598 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
599 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
600 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
601 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
602 ; VI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
603 ; VI-SDAG-NEXT:    s_endpgm
605 ; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
606 ; VI-GISEL:       ; %bb.0:
607 ; VI-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
608 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
609 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
610 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
611 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
612 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
613 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
614 ; VI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
615 ; VI-GISEL-NEXT:    s_endpgm
617 ; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
618 ; GFX10-SDAG:       ; %bb.0:
619 ; GFX10-SDAG-NEXT:    s_clause 0x1
620 ; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
621 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
622 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
623 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
624 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
625 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
626 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
627 ; GFX10-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
628 ; GFX10-SDAG-NEXT:    s_endpgm
630 ; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
631 ; GFX10-GISEL:       ; %bb.0:
632 ; GFX10-GISEL-NEXT:    s_clause 0x1
633 ; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
634 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
635 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
636 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
637 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
638 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
639 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
640 ; GFX10-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
641 ; GFX10-GISEL-NEXT:    s_endpgm
643 ; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
644 ; GFX11-SDAG:       ; %bb.0:
645 ; GFX11-SDAG-NEXT:    s_clause 0x1
646 ; GFX11-SDAG-NEXT:    s_load_b128 s[4:7], s[0:1], 0x34
647 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
648 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
649 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
650 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
651 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
652 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
653 ; GFX11-SDAG-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
654 ; GFX11-SDAG-NEXT:    s_nop 0
655 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
656 ; GFX11-SDAG-NEXT:    s_endpgm
658 ; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
659 ; GFX11-GISEL:       ; %bb.0:
660 ; GFX11-GISEL-NEXT:    s_clause 0x1
661 ; GFX11-GISEL-NEXT:    s_load_b128 s[4:7], s[0:1], 0x34
662 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
663 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
664 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
665 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
666 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
667 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
668 ; GFX11-GISEL-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
669 ; GFX11-GISEL-NEXT:    s_nop 0
670 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
671 ; GFX11-GISEL-NEXT:    s_endpgm
672   %result = fptrunc <2 x double> %in to <2 x float>
673   store <2 x float> %result, ptr addrspace(1) %out
674   ret void
677 define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) {
678 ; SI-LABEL: fptrunc_v3f64_to_v3f32:
679 ; SI:       ; %bb.0:
680 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
681 ; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x11
682 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x15
683 ; SI-NEXT:    s_mov_b32 s7, 0xf000
684 ; SI-NEXT:    s_mov_b32 s6, -1
685 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
686 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
687 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
688 ; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[0:1]
689 ; SI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
690 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
691 ; SI-NEXT:    s_endpgm
693 ; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
694 ; VI-SDAG:       ; %bb.0:
695 ; VI-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x54
696 ; VI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
697 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
698 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
699 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[2:3]
700 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
701 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
702 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
703 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
704 ; VI-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
705 ; VI-SDAG-NEXT:    s_endpgm
707 ; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
708 ; VI-GISEL:       ; %bb.0:
709 ; VI-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
710 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
711 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
712 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
713 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
714 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
715 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
716 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
717 ; VI-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
718 ; VI-GISEL-NEXT:    s_endpgm
720 ; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
721 ; GFX10-SDAG:       ; %bb.0:
722 ; GFX10-SDAG-NEXT:    s_clause 0x2
723 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x54
724 ; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
725 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
726 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
727 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[2:3]
728 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
729 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
730 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
731 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
732 ; GFX10-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
733 ; GFX10-SDAG-NEXT:    s_endpgm
735 ; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
736 ; GFX10-GISEL:       ; %bb.0:
737 ; GFX10-GISEL-NEXT:    s_clause 0x1
738 ; GFX10-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
739 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
740 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
741 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
742 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
743 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
744 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
745 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
746 ; GFX10-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
747 ; GFX10-GISEL-NEXT:    s_endpgm
749 ; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
750 ; GFX11-SDAG:       ; %bb.0:
751 ; GFX11-SDAG-NEXT:    s_clause 0x2
752 ; GFX11-SDAG-NEXT:    s_load_b64 s[2:3], s[0:1], 0x54
753 ; GFX11-SDAG-NEXT:    s_load_b128 s[4:7], s[0:1], 0x44
754 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
755 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
756 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[2:3]
757 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
758 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
759 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
760 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
761 ; GFX11-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
762 ; GFX11-SDAG-NEXT:    s_nop 0
763 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
764 ; GFX11-SDAG-NEXT:    s_endpgm
766 ; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
767 ; GFX11-GISEL:       ; %bb.0:
768 ; GFX11-GISEL-NEXT:    s_clause 0x1
769 ; GFX11-GISEL-NEXT:    s_load_b256 s[4:11], s[0:1], 0x44
770 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
771 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
772 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
773 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
774 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
775 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
776 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
777 ; GFX11-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
778 ; GFX11-GISEL-NEXT:    s_nop 0
779 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
780 ; GFX11-GISEL-NEXT:    s_endpgm
781   %result = fptrunc <3 x double> %in to <3 x float>
782   store <3 x float> %result, ptr addrspace(1) %out
783   ret void
786 define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) {
787 ; SI-LABEL: fptrunc_v4f64_to_v4f32:
788 ; SI:       ; %bb.0:
789 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x11
790 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
791 ; SI-NEXT:    s_mov_b32 s3, 0xf000
792 ; SI-NEXT:    s_mov_b32 s2, -1
793 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
794 ; SI-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
795 ; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
796 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
797 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
798 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
799 ; SI-NEXT:    s_endpgm
801 ; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
802 ; VI-SDAG:       ; %bb.0:
803 ; VI-SDAG-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
804 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
805 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
806 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
807 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
808 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
809 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
810 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
811 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
812 ; VI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
813 ; VI-SDAG-NEXT:    s_endpgm
815 ; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
816 ; VI-GISEL:       ; %bb.0:
817 ; VI-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
818 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
819 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
820 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
821 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
822 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
823 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
824 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
825 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
826 ; VI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
827 ; VI-GISEL-NEXT:    s_endpgm
829 ; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
830 ; GFX10-SDAG:       ; %bb.0:
831 ; GFX10-SDAG-NEXT:    s_clause 0x1
832 ; GFX10-SDAG-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
833 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
834 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
835 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
836 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
837 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
838 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
839 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
840 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
841 ; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
842 ; GFX10-SDAG-NEXT:    s_endpgm
844 ; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
845 ; GFX10-GISEL:       ; %bb.0:
846 ; GFX10-GISEL-NEXT:    s_clause 0x1
847 ; GFX10-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
848 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
849 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
850 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
851 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
852 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
853 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
854 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
855 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
856 ; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
857 ; GFX10-GISEL-NEXT:    s_endpgm
859 ; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
860 ; GFX11-SDAG:       ; %bb.0:
861 ; GFX11-SDAG-NEXT:    s_clause 0x1
862 ; GFX11-SDAG-NEXT:    s_load_b256 s[4:11], s[0:1], 0x44
863 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
864 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
865 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
866 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
867 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
868 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
869 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
870 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
871 ; GFX11-SDAG-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
872 ; GFX11-SDAG-NEXT:    s_nop 0
873 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
874 ; GFX11-SDAG-NEXT:    s_endpgm
876 ; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
877 ; GFX11-GISEL:       ; %bb.0:
878 ; GFX11-GISEL-NEXT:    s_clause 0x1
879 ; GFX11-GISEL-NEXT:    s_load_b256 s[4:11], s[0:1], 0x44
880 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
881 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
882 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
883 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
884 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
885 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
886 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
887 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
888 ; GFX11-GISEL-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
889 ; GFX11-GISEL-NEXT:    s_nop 0
890 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
891 ; GFX11-GISEL-NEXT:    s_endpgm
892   %result = fptrunc <4 x double> %in to <4 x float>
893   store <4 x float> %result, ptr addrspace(1) %out
894   ret void
897 define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) {
898 ; SI-LABEL: fptrunc_v8f64_to_v8f32:
899 ; SI:       ; %bb.0:
900 ; SI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x19
901 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
902 ; SI-NEXT:    s_mov_b32 s3, 0xf000
903 ; SI-NEXT:    s_mov_b32 s2, -1
904 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
905 ; SI-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
906 ; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
907 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
908 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
909 ; SI-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
910 ; SI-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
911 ; SI-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
912 ; SI-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
913 ; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
914 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
915 ; SI-NEXT:    s_endpgm
917 ; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
918 ; VI-SDAG:       ; %bb.0:
919 ; VI-SDAG-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
920 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
921 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
922 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
923 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
924 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
925 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
926 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
927 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
928 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
929 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
930 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
931 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
932 ; VI-SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
933 ; VI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
934 ; VI-SDAG-NEXT:    s_endpgm
936 ; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
937 ; VI-GISEL:       ; %bb.0:
938 ; VI-GISEL-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
939 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
940 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
941 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
942 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
943 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
944 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
945 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
946 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
947 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
948 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
949 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
950 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
951 ; VI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
952 ; VI-GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
953 ; VI-GISEL-NEXT:    s_endpgm
955 ; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
956 ; GFX10-SDAG:       ; %bb.0:
957 ; GFX10-SDAG-NEXT:    s_clause 0x1
958 ; GFX10-SDAG-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
959 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
960 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
961 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
962 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
963 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
964 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
965 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
966 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
967 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
968 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
969 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
970 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
971 ; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
972 ; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
973 ; GFX10-SDAG-NEXT:    s_endpgm
975 ; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
976 ; GFX10-GISEL:       ; %bb.0:
977 ; GFX10-GISEL-NEXT:    s_clause 0x1
978 ; GFX10-GISEL-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
979 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
980 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
981 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
982 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
983 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
984 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
985 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
986 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
987 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
988 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
989 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
990 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
991 ; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
992 ; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
993 ; GFX10-GISEL-NEXT:    s_endpgm
995 ; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
996 ; GFX11-SDAG:       ; %bb.0:
997 ; GFX11-SDAG-NEXT:    s_clause 0x1
998 ; GFX11-SDAG-NEXT:    s_load_b512 s[4:19], s[0:1], 0x64
999 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1000 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
1001 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
1002 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1003 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
1004 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
1005 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
1006 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
1007 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
1008 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
1009 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
1010 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
1011 ; GFX11-SDAG-NEXT:    s_clause 0x1
1012 ; GFX11-SDAG-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
1013 ; GFX11-SDAG-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1014 ; GFX11-SDAG-NEXT:    s_nop 0
1015 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1016 ; GFX11-SDAG-NEXT:    s_endpgm
1018 ; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
1019 ; GFX11-GISEL:       ; %bb.0:
1020 ; GFX11-GISEL-NEXT:    s_clause 0x1
1021 ; GFX11-GISEL-NEXT:    s_load_b512 s[4:19], s[0:1], 0x64
1022 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1023 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1024 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1025 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1026 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
1027 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
1028 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
1029 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
1030 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
1031 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
1032 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
1033 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
1034 ; GFX11-GISEL-NEXT:    s_clause 0x1
1035 ; GFX11-GISEL-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1036 ; GFX11-GISEL-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
1037 ; GFX11-GISEL-NEXT:    s_nop 0
1038 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1039 ; GFX11-GISEL-NEXT:    s_endpgm
1040   %result = fptrunc <8 x double> %in to <8 x float>
1041   store <8 x float> %result, ptr addrspace(1) %out
1042   ret void