Re-land [openmp] Fix warnings when building on Windows with latest MSVC or Clang...
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / fptrunc.ll
blobe4aa4d1d3ddb55e71877cdac07832741bc916621
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
5 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
6 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
8 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
11 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
12 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
13 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s
14 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s
16 define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
17 ; SI-LABEL: fptrunc_f64_to_f32:
18 ; SI:       ; %bb.0:
19 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
20 ; SI-NEXT:    s_mov_b32 s7, 0xf000
21 ; SI-NEXT:    s_mov_b32 s6, -1
22 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
23 ; SI-NEXT:    s_mov_b32 s4, s0
24 ; SI-NEXT:    s_mov_b32 s5, s1
25 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
26 ; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
27 ; SI-NEXT:    s_endpgm
29 ; VI-SDAG-LABEL: fptrunc_f64_to_f32:
30 ; VI-SDAG:       ; %bb.0:
31 ; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
32 ; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
33 ; VI-SDAG-NEXT:    s_mov_b32 s6, -1
34 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
35 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
36 ; VI-SDAG-NEXT:    s_mov_b32 s4, s0
37 ; VI-SDAG-NEXT:    s_mov_b32 s5, s1
38 ; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
39 ; VI-SDAG-NEXT:    s_endpgm
41 ; VI-GISEL-LABEL: fptrunc_f64_to_f32:
42 ; VI-GISEL:       ; %bb.0:
43 ; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
44 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
45 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
46 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
47 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
48 ; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
49 ; VI-GISEL-NEXT:    s_endpgm
51 ; GFX10-SDAG-LABEL: fptrunc_f64_to_f32:
52 ; GFX10-SDAG:       ; %bb.0:
53 ; GFX10-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
54 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
55 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
56 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
57 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
58 ; GFX10-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
59 ; GFX10-SDAG-NEXT:    s_endpgm
61 ; GFX10-GISEL-LABEL: fptrunc_f64_to_f32:
62 ; GFX10-GISEL:       ; %bb.0:
63 ; GFX10-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
64 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
65 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
66 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
67 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
68 ; GFX10-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
69 ; GFX10-GISEL-NEXT:    s_endpgm
71 ; GFX11-SDAG-LABEL: fptrunc_f64_to_f32:
72 ; GFX11-SDAG:       ; %bb.0:
73 ; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
74 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
75 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
76 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
77 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
78 ; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
79 ; GFX11-SDAG-NEXT:    s_nop 0
80 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
81 ; GFX11-SDAG-NEXT:    s_endpgm
83 ; GFX11-GISEL-LABEL: fptrunc_f64_to_f32:
84 ; GFX11-GISEL:       ; %bb.0:
85 ; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
86 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
87 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
88 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
89 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
90 ; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
91 ; GFX11-GISEL-NEXT:    s_nop 0
92 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
93 ; GFX11-GISEL-NEXT:    s_endpgm
94   %result = fptrunc double %in to float
95   store float %result, ptr addrspace(1) %out
96   ret void
99 define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) {
100 ; SI-LABEL: fptrunc_f64_to_f16:
101 ; SI:       ; %bb.0:
102 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
103 ; SI-NEXT:    s_mov_b32 s3, 0xf000
104 ; SI-NEXT:    s_mov_b32 s2, -1
105 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
106 ; SI-NEXT:    s_mov_b32 s0, s4
107 ; SI-NEXT:    s_mov_b32 s1, s5
108 ; SI-NEXT:    s_lshr_b32 s4, s7, 8
109 ; SI-NEXT:    s_and_b32 s5, s7, 0x1ff
110 ; SI-NEXT:    s_and_b32 s8, s4, 0xffe
111 ; SI-NEXT:    s_or_b32 s4, s5, s6
112 ; SI-NEXT:    s_cmp_lg_u32 s4, 0
113 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
114 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
115 ; SI-NEXT:    s_bfe_u32 s4, s7, 0xb0014
116 ; SI-NEXT:    v_readfirstlane_b32 s5, v0
117 ; SI-NEXT:    s_sub_i32 s6, 0x3f1, s4
118 ; SI-NEXT:    s_add_i32 s10, s4, 0xfffffc10
119 ; SI-NEXT:    s_or_b32 s11, s8, s5
120 ; SI-NEXT:    v_med3_i32 v0, s6, 0, 13
121 ; SI-NEXT:    s_lshl_b32 s4, s10, 12
122 ; SI-NEXT:    s_or_b32 s5, s11, 0x1000
123 ; SI-NEXT:    v_readfirstlane_b32 s6, v0
124 ; SI-NEXT:    s_or_b32 s4, s11, s4
125 ; SI-NEXT:    s_lshr_b32 s6, s5, s6
126 ; SI-NEXT:    v_lshl_b32_e32 v0, s6, v0
127 ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, s5, v0
128 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
129 ; SI-NEXT:    v_readfirstlane_b32 s5, v0
130 ; SI-NEXT:    s_or_b32 s5, s6, s5
131 ; SI-NEXT:    s_cmp_lt_i32 s10, 1
132 ; SI-NEXT:    s_cselect_b32 s6, s5, s4
133 ; SI-NEXT:    s_and_b32 s8, s6, 7
134 ; SI-NEXT:    s_cmp_gt_i32 s8, 5
135 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
136 ; SI-NEXT:    s_cmp_eq_u32 s8, 3
137 ; SI-NEXT:    s_cselect_b64 s[8:9], -1, 0
138 ; SI-NEXT:    s_lshr_b32 s6, s6, 2
139 ; SI-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
140 ; SI-NEXT:    s_or_b32 s4, s4, s5
141 ; SI-NEXT:    s_cmp_lg_u32 s4, 0
142 ; SI-NEXT:    s_addc_u32 s4, s6, 0
143 ; SI-NEXT:    s_cmp_lt_i32 s10, 31
144 ; SI-NEXT:    s_cselect_b32 s6, s4, 0x7c00
145 ; SI-NEXT:    s_cmp_lg_u32 s11, 0
146 ; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
147 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
148 ; SI-NEXT:    s_cmpk_eq_i32 s10, 0x40f
149 ; SI-NEXT:    v_mov_b32_e32 v1, s6
150 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
151 ; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
152 ; SI-NEXT:    s_cselect_b64 vcc, -1, 0
153 ; SI-NEXT:    s_lshr_b32 s4, s7, 16
154 ; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
155 ; SI-NEXT:    s_and_b32 s4, s4, 0x8000
156 ; SI-NEXT:    v_or_b32_e32 v0, s4, v0
157 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
158 ; SI-NEXT:    s_endpgm
160 ; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
161 ; VI-SAFE-SDAG:       ; %bb.0:
162 ; VI-SAFE-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
163 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
164 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
165 ; VI-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
166 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s0, s4
167 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 8
168 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s4, 0xffe
169 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s7, 0x1ff
170 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s6
171 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
172 ; VI-SAFE-SDAG-NEXT:    s_mov_b32 s1, s5
173 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
174 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
175 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
176 ; VI-SAFE-SDAG-NEXT:    s_bfe_u32 s5, s7, 0xb0014
177 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s6, s8, s4
178 ; VI-SAFE-SDAG-NEXT:    s_sub_i32 s8, 0x3f1, s5
179 ; VI-SAFE-SDAG-NEXT:    v_med3_i32 v0, s8, 0, 13
180 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s6, 0x1000
181 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s8, v0
182 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s4, s8
183 ; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v0, s8
184 ; VI-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
185 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
186 ; VI-SAFE-SDAG-NEXT:    s_add_i32 s10, s5, 0xfffffc10
187 ; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
188 ; VI-SAFE-SDAG-NEXT:    s_lshl_b32 s5, s10, 12
189 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s8, s4
190 ; VI-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
191 ; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 1
192 ; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s11, s4, s5
193 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s11, 7
194 ; VI-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s8, 5
195 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
196 ; VI-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s8, 3
197 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[8:9], -1, 0
198 ; VI-SAFE-SDAG-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
199 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s11, 2
200 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
201 ; VI-SAFE-SDAG-NEXT:    s_addc_u32 s4, s8, 0
202 ; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 31
203 ; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s8, s4, 0x7c00
204 ; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
205 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
206 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
207 ; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
208 ; VI-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s10, 0x40f
209 ; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
210 ; VI-SAFE-SDAG-NEXT:    v_mov_b32_e32 v1, s8
211 ; VI-SAFE-SDAG-NEXT:    s_cselect_b64 vcc, -1, 0
212 ; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 16
213 ; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
214 ; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s4, 0x8000
215 ; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s4, v0
216 ; VI-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
217 ; VI-SAFE-SDAG-NEXT:    s_endpgm
219 ; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
220 ; VI-SAFE-GISEL:       ; %bb.0:
221 ; VI-SAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
222 ; VI-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
223 ; VI-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
224 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
225 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
226 ; VI-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
227 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
228 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
229 ; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
230 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
231 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
232 ; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
233 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s5, 1, 0
234 ; VI-SAFE-GISEL-NEXT:    s_sub_i32 s7, 1, s4
235 ; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s4, 12
236 ; VI-SAFE-GISEL-NEXT:    s_max_i32 s7, s7, 0
237 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s6, s2, s6
238 ; VI-SAFE-GISEL-NEXT:    s_min_i32 s7, s7, 13
239 ; VI-SAFE-GISEL-NEXT:    s_bitset1_b32 s2, 12
240 ; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s5, s5, 9
241 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s8, s2, s7
242 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s5, s5, 0x7c00
243 ; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s8, s7
244 ; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s7, s2
245 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
246 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s8, s2
247 ; VI-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
248 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s2, s6
249 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
250 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
251 ; VI-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
252 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
253 ; VI-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
254 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
255 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
256 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
257 ; VI-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
258 ; VI-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
259 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
260 ; VI-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
261 ; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
262 ; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
263 ; VI-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
264 ; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
265 ; VI-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
266 ; VI-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
267 ; VI-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0xf000
268 ; VI-SAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
269 ; VI-SAFE-GISEL-NEXT:    s_endpgm
271 ; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
272 ; VI-UNSAFE-SDAG:       ; %bb.0:
273 ; VI-UNSAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
274 ; VI-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
275 ; VI-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
276 ; VI-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
277 ; VI-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
278 ; VI-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
279 ; VI-UNSAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
280 ; VI-UNSAFE-SDAG-NEXT:    s_endpgm
282 ; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
283 ; VI-UNSAFE-GISEL:       ; %bb.0:
284 ; VI-UNSAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
285 ; VI-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
286 ; VI-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
287 ; VI-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
288 ; VI-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0xf000
289 ; VI-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
290 ; VI-UNSAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
291 ; VI-UNSAFE-GISEL-NEXT:    s_endpgm
293 ; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
294 ; GFX10-SAFE-SDAG:       ; %bb.0:
295 ; GFX10-SAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
296 ; GFX10-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
297 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s4, s3, 0x1ff
298 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s3, 8
299 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s2, s4, s2
300 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s4, s5, 0xffe
301 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
302 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s2, -1, 0
303 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
304 ; GFX10-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
305 ; GFX10-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
306 ; GFX10-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
307 ; GFX10-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
308 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
309 ; GFX10-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
310 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
311 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
312 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
313 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
314 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
315 ; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
316 ; GFX10-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
317 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
318 ; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
319 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
320 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
321 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
322 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
323 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
324 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
325 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
326 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
327 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
328 ; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
329 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
330 ; GFX10-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
331 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
332 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
333 ; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
334 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
335 ; GFX10-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
336 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
337 ; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
338 ; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
339 ; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
340 ; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
341 ; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
342 ; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
343 ; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
344 ; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
345 ; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
346 ; GFX10-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
347 ; GFX10-SAFE-SDAG-NEXT:    s_endpgm
349 ; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
350 ; GFX10-SAFE-GISEL:       ; %bb.0:
351 ; GFX10-SAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
352 ; GFX10-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
353 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
354 ; GFX10-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
355 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
356 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
357 ; GFX10-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
358 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
359 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
360 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
361 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
362 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
363 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s5, 1, 0
364 ; GFX10-SAFE-GISEL-NEXT:    s_sub_i32 s6, 1, s4
365 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s8, s2, 0x1000
366 ; GFX10-SAFE-GISEL-NEXT:    s_max_i32 s6, s6, 0
367 ; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s4, 12
368 ; GFX10-SAFE-GISEL-NEXT:    s_min_i32 s6, s6, 13
369 ; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s5, s5, 9
370 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s9, s8, s6
371 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s2, s7
372 ; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s9, s6
373 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s5, s5, 0x7c00
374 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s6, s8
375 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
376 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s6, s9, s6
377 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
378 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s6, s2
379 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
380 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
381 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
382 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
383 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
384 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
385 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
386 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
387 ; GFX10-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
388 ; GFX10-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
389 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
390 ; GFX10-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
391 ; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
392 ; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
393 ; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
394 ; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
395 ; GFX10-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
396 ; GFX10-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
397 ; GFX10-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
398 ; GFX10-SAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
399 ; GFX10-SAFE-GISEL-NEXT:    s_endpgm
401 ; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
402 ; GFX10-UNSAFE-SDAG:       ; %bb.0:
403 ; GFX10-UNSAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
404 ; GFX10-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
405 ; GFX10-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
406 ; GFX10-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
407 ; GFX10-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
408 ; GFX10-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
409 ; GFX10-UNSAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
410 ; GFX10-UNSAFE-SDAG-NEXT:    s_endpgm
412 ; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
413 ; GFX10-UNSAFE-GISEL:       ; %bb.0:
414 ; GFX10-UNSAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
415 ; GFX10-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
416 ; GFX10-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
417 ; GFX10-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
418 ; GFX10-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
419 ; GFX10-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
420 ; GFX10-UNSAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
421 ; GFX10-UNSAFE-GISEL-NEXT:    s_endpgm
423 ; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
424 ; GFX11-SAFE-SDAG:       ; %bb.0:
425 ; GFX11-SAFE-SDAG-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
426 ; GFX11-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
427 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s4, s3, 0x1ff
428 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s3, 8
429 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s2, s4, s2
430 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s4, s5, 0xffe
431 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
432 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s2, -1, 0
433 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
434 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
435 ; GFX11-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
436 ; GFX11-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
437 ; GFX11-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
438 ; GFX11-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
439 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
440 ; GFX11-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
441 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
442 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
443 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
444 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
445 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
446 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
447 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
448 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
449 ; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
450 ; GFX11-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
451 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
452 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
453 ; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
454 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
455 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
456 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
457 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
458 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
459 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
460 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
461 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
462 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
463 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
464 ; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
465 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
466 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
467 ; GFX11-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
468 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
469 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
470 ; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
471 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
472 ; GFX11-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
473 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
474 ; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
475 ; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
476 ; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
477 ; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
478 ; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
479 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
480 ; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
481 ; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
482 ; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
483 ; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
484 ; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
485 ; GFX11-SAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
486 ; GFX11-SAFE-SDAG-NEXT:    s_nop 0
487 ; GFX11-SAFE-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
488 ; GFX11-SAFE-SDAG-NEXT:    s_endpgm
490 ; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
491 ; GFX11-SAFE-GISEL:       ; %bb.0:
492 ; GFX11-SAFE-GISEL-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
493 ; GFX11-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
494 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
495 ; GFX11-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
496 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
497 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
498 ; GFX11-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
499 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
500 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
501 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
502 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
503 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
504 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
505 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s5, 1, 0
506 ; GFX11-SAFE-GISEL-NEXT:    s_sub_i32 s6, 1, s4
507 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s8, s2, 0x1000
508 ; GFX11-SAFE-GISEL-NEXT:    s_max_i32 s6, s6, 0
509 ; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s4, 12
510 ; GFX11-SAFE-GISEL-NEXT:    s_min_i32 s6, s6, 13
511 ; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s5, s5, 9
512 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s9, s8, s6
513 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s2, s7
514 ; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s9, s6
515 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s5, s5, 0x7c00
516 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s6, s8
517 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
518 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
519 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s6, s9, s6
520 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
521 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s6, s2
522 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
523 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
524 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
525 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
526 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
527 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
528 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
529 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
530 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
531 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
532 ; GFX11-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
533 ; GFX11-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
534 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
535 ; GFX11-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
536 ; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
537 ; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
538 ; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
539 ; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
540 ; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
541 ; GFX11-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
542 ; GFX11-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
543 ; GFX11-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
544 ; GFX11-SAFE-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
545 ; GFX11-SAFE-GISEL-NEXT:    s_nop 0
546 ; GFX11-SAFE-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
547 ; GFX11-SAFE-GISEL-NEXT:    s_endpgm
549 ; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
550 ; GFX11-UNSAFE-SDAG:       ; %bb.0:
551 ; GFX11-UNSAFE-SDAG-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
552 ; GFX11-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
553 ; GFX11-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
554 ; GFX11-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
555 ; GFX11-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
556 ; GFX11-UNSAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
557 ; GFX11-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
558 ; GFX11-UNSAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
559 ; GFX11-UNSAFE-SDAG-NEXT:    s_nop 0
560 ; GFX11-UNSAFE-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
561 ; GFX11-UNSAFE-SDAG-NEXT:    s_endpgm
563 ; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
564 ; GFX11-UNSAFE-GISEL:       ; %bb.0:
565 ; GFX11-UNSAFE-GISEL-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
566 ; GFX11-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
567 ; GFX11-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
568 ; GFX11-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
569 ; GFX11-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
570 ; GFX11-UNSAFE-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
571 ; GFX11-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
572 ; GFX11-UNSAFE-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
573 ; GFX11-UNSAFE-GISEL-NEXT:    s_nop 0
574 ; GFX11-UNSAFE-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
575 ; GFX11-UNSAFE-GISEL-NEXT:    s_endpgm
576   %result = fptrunc double %in to half
577   %result_i16 = bitcast half %result to i16
578   store i16 %result_i16, ptr addrspace(1) %out
579   ret void
582 define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {
583 ; SI-LABEL: fptrunc_v2f64_to_v2f32:
584 ; SI:       ; %bb.0:
585 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
586 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
587 ; SI-NEXT:    s_mov_b32 s3, 0xf000
588 ; SI-NEXT:    s_mov_b32 s2, -1
589 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
590 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
591 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
592 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
593 ; SI-NEXT:    s_endpgm
595 ; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
596 ; VI-SDAG:       ; %bb.0:
597 ; VI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
598 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
599 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
600 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
601 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
602 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
603 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
604 ; VI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
605 ; VI-SDAG-NEXT:    s_endpgm
607 ; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
608 ; VI-GISEL:       ; %bb.0:
609 ; VI-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
610 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
611 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
612 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
613 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
614 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
615 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
616 ; VI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
617 ; VI-GISEL-NEXT:    s_endpgm
619 ; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
620 ; GFX10-SDAG:       ; %bb.0:
621 ; GFX10-SDAG-NEXT:    s_clause 0x1
622 ; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
623 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
624 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
625 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
626 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
627 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
628 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
629 ; GFX10-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
630 ; GFX10-SDAG-NEXT:    s_endpgm
632 ; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
633 ; GFX10-GISEL:       ; %bb.0:
634 ; GFX10-GISEL-NEXT:    s_clause 0x1
635 ; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
636 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
637 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
638 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
639 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
640 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
641 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
642 ; GFX10-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
643 ; GFX10-GISEL-NEXT:    s_endpgm
645 ; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
646 ; GFX11-SDAG:       ; %bb.0:
647 ; GFX11-SDAG-NEXT:    s_clause 0x1
648 ; GFX11-SDAG-NEXT:    s_load_b128 s[4:7], s[0:1], 0x34
649 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
650 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
651 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
652 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
653 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
654 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
655 ; GFX11-SDAG-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
656 ; GFX11-SDAG-NEXT:    s_nop 0
657 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
658 ; GFX11-SDAG-NEXT:    s_endpgm
660 ; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
661 ; GFX11-GISEL:       ; %bb.0:
662 ; GFX11-GISEL-NEXT:    s_clause 0x1
663 ; GFX11-GISEL-NEXT:    s_load_b128 s[4:7], s[0:1], 0x34
664 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
665 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
666 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
667 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
668 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
669 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
670 ; GFX11-GISEL-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
671 ; GFX11-GISEL-NEXT:    s_nop 0
672 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
673 ; GFX11-GISEL-NEXT:    s_endpgm
674   %result = fptrunc <2 x double> %in to <2 x float>
675   store <2 x float> %result, ptr addrspace(1) %out
676   ret void
679 define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) {
680 ; SI-LABEL: fptrunc_v3f64_to_v3f32:
681 ; SI:       ; %bb.0:
682 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
683 ; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x11
684 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x15
685 ; SI-NEXT:    s_mov_b32 s7, 0xf000
686 ; SI-NEXT:    s_mov_b32 s6, -1
687 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
688 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
689 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
690 ; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[0:1]
691 ; SI-NEXT:    buffer_store_dword v2, off, s[4:7], 0 offset:8
692 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
693 ; SI-NEXT:    s_endpgm
695 ; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
696 ; VI-SDAG:       ; %bb.0:
697 ; VI-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x54
698 ; VI-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
699 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
700 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
701 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[2:3]
702 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
703 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
704 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
705 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
706 ; VI-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
707 ; VI-SDAG-NEXT:    s_endpgm
709 ; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
710 ; VI-GISEL:       ; %bb.0:
711 ; VI-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
712 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
713 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
714 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
715 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
716 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
717 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
718 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
719 ; VI-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
720 ; VI-GISEL-NEXT:    s_endpgm
722 ; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
723 ; GFX10-SDAG:       ; %bb.0:
724 ; GFX10-SDAG-NEXT:    s_clause 0x2
725 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x54
726 ; GFX10-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
727 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
728 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
729 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[2:3]
730 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
731 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
732 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
733 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
734 ; GFX10-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
735 ; GFX10-SDAG-NEXT:    s_endpgm
737 ; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
738 ; GFX10-GISEL:       ; %bb.0:
739 ; GFX10-GISEL-NEXT:    s_clause 0x1
740 ; GFX10-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
741 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
742 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
743 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
744 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
745 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
746 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
747 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
748 ; GFX10-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
749 ; GFX10-GISEL-NEXT:    s_endpgm
751 ; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
752 ; GFX11-SDAG:       ; %bb.0:
753 ; GFX11-SDAG-NEXT:    s_clause 0x2
754 ; GFX11-SDAG-NEXT:    s_load_b64 s[2:3], s[0:1], 0x54
755 ; GFX11-SDAG-NEXT:    s_load_b128 s[4:7], s[0:1], 0x44
756 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
757 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
758 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[2:3]
759 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
760 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
761 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
762 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
763 ; GFX11-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
764 ; GFX11-SDAG-NEXT:    s_nop 0
765 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
766 ; GFX11-SDAG-NEXT:    s_endpgm
768 ; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
769 ; GFX11-GISEL:       ; %bb.0:
770 ; GFX11-GISEL-NEXT:    s_clause 0x1
771 ; GFX11-GISEL-NEXT:    s_load_b256 s[4:11], s[0:1], 0x44
772 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
773 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
774 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
775 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
776 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
777 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
778 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
779 ; GFX11-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
780 ; GFX11-GISEL-NEXT:    s_nop 0
781 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
782 ; GFX11-GISEL-NEXT:    s_endpgm
783   %result = fptrunc <3 x double> %in to <3 x float>
784   store <3 x float> %result, ptr addrspace(1) %out
785   ret void
788 define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) {
789 ; SI-LABEL: fptrunc_v4f64_to_v4f32:
790 ; SI:       ; %bb.0:
791 ; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x11
792 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
793 ; SI-NEXT:    s_mov_b32 s3, 0xf000
794 ; SI-NEXT:    s_mov_b32 s2, -1
795 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
796 ; SI-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
797 ; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
798 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
799 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
800 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
801 ; SI-NEXT:    s_endpgm
803 ; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
804 ; VI-SDAG:       ; %bb.0:
805 ; VI-SDAG-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
806 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
807 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
808 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
809 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
810 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
811 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
812 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
813 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
814 ; VI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
815 ; VI-SDAG-NEXT:    s_endpgm
817 ; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
818 ; VI-GISEL:       ; %bb.0:
819 ; VI-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
820 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
821 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
822 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
823 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
824 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
825 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
826 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
827 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
828 ; VI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
829 ; VI-GISEL-NEXT:    s_endpgm
831 ; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
832 ; GFX10-SDAG:       ; %bb.0:
833 ; GFX10-SDAG-NEXT:    s_clause 0x1
834 ; GFX10-SDAG-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
835 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
836 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
837 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
838 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
839 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
840 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
841 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
842 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
843 ; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
844 ; GFX10-SDAG-NEXT:    s_endpgm
846 ; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
847 ; GFX10-GISEL:       ; %bb.0:
848 ; GFX10-GISEL-NEXT:    s_clause 0x1
849 ; GFX10-GISEL-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x44
850 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
851 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
852 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
853 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
854 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
855 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
856 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
857 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
858 ; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
859 ; GFX10-GISEL-NEXT:    s_endpgm
861 ; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
862 ; GFX11-SDAG:       ; %bb.0:
863 ; GFX11-SDAG-NEXT:    s_clause 0x1
864 ; GFX11-SDAG-NEXT:    s_load_b256 s[4:11], s[0:1], 0x44
865 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
866 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
867 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
868 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
869 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
870 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
871 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
872 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
873 ; GFX11-SDAG-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
874 ; GFX11-SDAG-NEXT:    s_nop 0
875 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
876 ; GFX11-SDAG-NEXT:    s_endpgm
878 ; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
879 ; GFX11-GISEL:       ; %bb.0:
880 ; GFX11-GISEL-NEXT:    s_clause 0x1
881 ; GFX11-GISEL-NEXT:    s_load_b256 s[4:11], s[0:1], 0x44
882 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
883 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
884 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
885 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
886 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
887 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
888 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
889 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
890 ; GFX11-GISEL-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
891 ; GFX11-GISEL-NEXT:    s_nop 0
892 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
893 ; GFX11-GISEL-NEXT:    s_endpgm
894   %result = fptrunc <4 x double> %in to <4 x float>
895   store <4 x float> %result, ptr addrspace(1) %out
896   ret void
899 define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) {
900 ; SI-LABEL: fptrunc_v8f64_to_v8f32:
901 ; SI:       ; %bb.0:
902 ; SI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x19
903 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
904 ; SI-NEXT:    s_mov_b32 s3, 0xf000
905 ; SI-NEXT:    s_mov_b32 s2, -1
906 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
907 ; SI-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
908 ; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
909 ; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
910 ; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
911 ; SI-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
912 ; SI-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
913 ; SI-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
914 ; SI-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
915 ; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
916 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
917 ; SI-NEXT:    s_endpgm
919 ; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
920 ; VI-SDAG:       ; %bb.0:
921 ; VI-SDAG-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
922 ; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
923 ; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
924 ; VI-SDAG-NEXT:    s_mov_b32 s2, -1
925 ; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
926 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
927 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
928 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
929 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
930 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
931 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
932 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
933 ; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
934 ; VI-SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
935 ; VI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
936 ; VI-SDAG-NEXT:    s_endpgm
938 ; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
939 ; VI-GISEL:       ; %bb.0:
940 ; VI-GISEL-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
941 ; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
942 ; VI-GISEL-NEXT:    s_mov_b32 s2, -1
943 ; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
944 ; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
945 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
946 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
947 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
948 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
949 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
950 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
951 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
952 ; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
953 ; VI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
954 ; VI-GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
955 ; VI-GISEL-NEXT:    s_endpgm
957 ; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
958 ; GFX10-SDAG:       ; %bb.0:
959 ; GFX10-SDAG-NEXT:    s_clause 0x1
960 ; GFX10-SDAG-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
961 ; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
962 ; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
963 ; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
964 ; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
965 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
966 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
967 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
968 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
969 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
970 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
971 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
972 ; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
973 ; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
974 ; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
975 ; GFX10-SDAG-NEXT:    s_endpgm
977 ; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
978 ; GFX10-GISEL:       ; %bb.0:
979 ; GFX10-GISEL-NEXT:    s_clause 0x1
980 ; GFX10-GISEL-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
981 ; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
982 ; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
983 ; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
984 ; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
985 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
986 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
987 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
988 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
989 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
990 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
991 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
992 ; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
993 ; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
994 ; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
995 ; GFX10-GISEL-NEXT:    s_endpgm
997 ; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
998 ; GFX11-SDAG:       ; %bb.0:
999 ; GFX11-SDAG-NEXT:    s_clause 0x1
1000 ; GFX11-SDAG-NEXT:    s_load_b512 s[4:19], s[0:1], 0x64
1001 ; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1002 ; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
1003 ; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
1004 ; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1005 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
1006 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
1007 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
1008 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
1009 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
1010 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
1011 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
1012 ; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
1013 ; GFX11-SDAG-NEXT:    s_clause 0x1
1014 ; GFX11-SDAG-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
1015 ; GFX11-SDAG-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1016 ; GFX11-SDAG-NEXT:    s_nop 0
1017 ; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1018 ; GFX11-SDAG-NEXT:    s_endpgm
1020 ; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
1021 ; GFX11-GISEL:       ; %bb.0:
1022 ; GFX11-GISEL-NEXT:    s_clause 0x1
1023 ; GFX11-GISEL-NEXT:    s_load_b512 s[4:19], s[0:1], 0x64
1024 ; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1025 ; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1026 ; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1027 ; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1028 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
1029 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
1030 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[8:9]
1031 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[10:11]
1032 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[12:13]
1033 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[14:15]
1034 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[16:17]
1035 ; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[18:19]
1036 ; GFX11-GISEL-NEXT:    s_clause 0x1
1037 ; GFX11-GISEL-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1038 ; GFX11-GISEL-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
1039 ; GFX11-GISEL-NEXT:    s_nop 0
1040 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1041 ; GFX11-GISEL-NEXT:    s_endpgm
1042   %result = fptrunc <8 x double> %in to <8 x float>
1043   store <8 x float> %result, ptr addrspace(1) %out
1044   ret void