1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mcpu=mvp -mattr=+simd128 | FileCheck %s
4 ; Test that vector float-to-int and int-to-float instructions lower correctly
6 target triple = "wasm32-unknown-unknown"
8 define <4 x float> @convert_s_v4f32(<4 x i32> %x) {
9 ; CHECK-LABEL: convert_s_v4f32:
10 ; CHECK: .functype convert_s_v4f32 (v128) -> (v128)
11 ; CHECK-NEXT: # %bb.0:
12 ; CHECK-NEXT: local.get 0
13 ; CHECK-NEXT: f32x4.convert_i32x4_s
14 ; CHECK-NEXT: # fallthrough-return
15 %a = sitofp <4 x i32> %x to <4 x float>
19 define <4 x float> @convert_u_v4f32(<4 x i32> %x) {
20 ; CHECK-LABEL: convert_u_v4f32:
21 ; CHECK: .functype convert_u_v4f32 (v128) -> (v128)
22 ; CHECK-NEXT: # %bb.0:
23 ; CHECK-NEXT: local.get 0
24 ; CHECK-NEXT: f32x4.convert_i32x4_u
25 ; CHECK-NEXT: # fallthrough-return
26 %a = uitofp <4 x i32> %x to <4 x float>
30 define <2 x double> @convert_s_v2f64(<2 x i64> %x) {
31 ; CHECK-LABEL: convert_s_v2f64:
32 ; CHECK: .functype convert_s_v2f64 (v128) -> (v128)
33 ; CHECK-NEXT: # %bb.0:
34 ; CHECK-NEXT: local.get 0
35 ; CHECK-NEXT: i64x2.extract_lane 0
36 ; CHECK-NEXT: f64.convert_i64_s
37 ; CHECK-NEXT: f64x2.splat
38 ; CHECK-NEXT: local.get 0
39 ; CHECK-NEXT: i64x2.extract_lane 1
40 ; CHECK-NEXT: f64.convert_i64_s
41 ; CHECK-NEXT: f64x2.replace_lane 1
42 ; CHECK-NEXT: # fallthrough-return
43 %a = sitofp <2 x i64> %x to <2 x double>
47 define <2 x double> @convert_u_v2f64(<2 x i64> %x) {
48 ; CHECK-LABEL: convert_u_v2f64:
49 ; CHECK: .functype convert_u_v2f64 (v128) -> (v128)
50 ; CHECK-NEXT: # %bb.0:
51 ; CHECK-NEXT: local.get 0
52 ; CHECK-NEXT: v128.const 4294967295, 4294967295
53 ; CHECK-NEXT: v128.and
54 ; CHECK-NEXT: v128.const 4841369599423283200, 4841369599423283200
56 ; CHECK-NEXT: local.get 0
57 ; CHECK-NEXT: i32.const 32
58 ; CHECK-NEXT: i64x2.shr_u
59 ; CHECK-NEXT: v128.const 4985484787499139072, 4985484787499139072
61 ; CHECK-NEXT: v128.const 0x1.00000001p84, 0x1.00000001p84
62 ; CHECK-NEXT: f64x2.sub
63 ; CHECK-NEXT: f64x2.add
64 ; CHECK-NEXT: # fallthrough-return
65 %a = uitofp <2 x i64> %x to <2 x double>
69 define <4 x i32> @trunc_sat_s_v4i32(<4 x float> %x) {
70 ; CHECK-LABEL: trunc_sat_s_v4i32:
71 ; CHECK: .functype trunc_sat_s_v4i32 (v128) -> (v128)
72 ; CHECK-NEXT: # %bb.0:
73 ; CHECK-NEXT: local.get 0
74 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
75 ; CHECK-NEXT: # fallthrough-return
76 %a = fptosi <4 x float> %x to <4 x i32>
80 define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) {
81 ; CHECK-LABEL: trunc_sat_u_v4i32:
82 ; CHECK: .functype trunc_sat_u_v4i32 (v128) -> (v128)
83 ; CHECK-NEXT: # %bb.0:
84 ; CHECK-NEXT: local.get 0
85 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
86 ; CHECK-NEXT: # fallthrough-return
87 %a = fptoui <4 x float> %x to <4 x i32>
91 define <2 x i64> @trunc_sat_s_v2i64(<2 x double> %x) {
92 ; CHECK-LABEL: trunc_sat_s_v2i64:
93 ; CHECK: .functype trunc_sat_s_v2i64 (v128) -> (v128)
94 ; CHECK-NEXT: .local f64, i64, i64
95 ; CHECK-NEXT: # %bb.0:
98 ; CHECK-NEXT: local.get 0
99 ; CHECK-NEXT: f64x2.extract_lane 1
100 ; CHECK-NEXT: local.tee 1
101 ; CHECK-NEXT: f64.abs
102 ; CHECK-NEXT: f64.const 0x1p63
104 ; CHECK-NEXT: i32.eqz
105 ; CHECK-NEXT: br_if 0 # 0: down to label1
106 ; CHECK-NEXT: # %bb.1:
107 ; CHECK-NEXT: local.get 1
108 ; CHECK-NEXT: i64.trunc_f64_s
109 ; CHECK-NEXT: local.set 2
110 ; CHECK-NEXT: br 1 # 1: down to label0
111 ; CHECK-NEXT: .LBB6_2:
112 ; CHECK-NEXT: end_block # label1:
113 ; CHECK-NEXT: i64.const -9223372036854775808
114 ; CHECK-NEXT: local.set 2
115 ; CHECK-NEXT: .LBB6_3:
116 ; CHECK-NEXT: end_block # label0:
119 ; CHECK-NEXT: local.get 0
120 ; CHECK-NEXT: f64x2.extract_lane 0
121 ; CHECK-NEXT: local.tee 1
122 ; CHECK-NEXT: f64.abs
123 ; CHECK-NEXT: f64.const 0x1p63
125 ; CHECK-NEXT: i32.eqz
126 ; CHECK-NEXT: br_if 0 # 0: down to label3
127 ; CHECK-NEXT: # %bb.4:
128 ; CHECK-NEXT: local.get 1
129 ; CHECK-NEXT: i64.trunc_f64_s
130 ; CHECK-NEXT: local.set 3
131 ; CHECK-NEXT: br 1 # 1: down to label2
132 ; CHECK-NEXT: .LBB6_5:
133 ; CHECK-NEXT: end_block # label3:
134 ; CHECK-NEXT: i64.const -9223372036854775808
135 ; CHECK-NEXT: local.set 3
136 ; CHECK-NEXT: .LBB6_6:
137 ; CHECK-NEXT: end_block # label2:
138 ; CHECK-NEXT: local.get 3
139 ; CHECK-NEXT: i64x2.splat
140 ; CHECK-NEXT: local.get 2
141 ; CHECK-NEXT: i64x2.replace_lane 1
142 ; CHECK-NEXT: # fallthrough-return
143 %a = fptosi <2 x double> %x to <2 x i64>
147 define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) {
148 ; CHECK-LABEL: trunc_sat_u_v2i64:
149 ; CHECK: .functype trunc_sat_u_v2i64 (v128) -> (v128)
150 ; CHECK-NEXT: .local f64, i64, i64
151 ; CHECK-NEXT: # %bb.0:
154 ; CHECK-NEXT: local.get 0
155 ; CHECK-NEXT: f64x2.extract_lane 1
156 ; CHECK-NEXT: local.tee 1
157 ; CHECK-NEXT: f64.const 0x1p64
159 ; CHECK-NEXT: local.get 1
160 ; CHECK-NEXT: f64.const 0x0p0
162 ; CHECK-NEXT: i32.and
163 ; CHECK-NEXT: i32.eqz
164 ; CHECK-NEXT: br_if 0 # 0: down to label5
165 ; CHECK-NEXT: # %bb.1:
166 ; CHECK-NEXT: local.get 1
167 ; CHECK-NEXT: i64.trunc_f64_u
168 ; CHECK-NEXT: local.set 2
169 ; CHECK-NEXT: br 1 # 1: down to label4
170 ; CHECK-NEXT: .LBB7_2:
171 ; CHECK-NEXT: end_block # label5:
172 ; CHECK-NEXT: i64.const 0
173 ; CHECK-NEXT: local.set 2
174 ; CHECK-NEXT: .LBB7_3:
175 ; CHECK-NEXT: end_block # label4:
178 ; CHECK-NEXT: local.get 0
179 ; CHECK-NEXT: f64x2.extract_lane 0
180 ; CHECK-NEXT: local.tee 1
181 ; CHECK-NEXT: f64.const 0x1p64
183 ; CHECK-NEXT: local.get 1
184 ; CHECK-NEXT: f64.const 0x0p0
186 ; CHECK-NEXT: i32.and
187 ; CHECK-NEXT: i32.eqz
188 ; CHECK-NEXT: br_if 0 # 0: down to label7
189 ; CHECK-NEXT: # %bb.4:
190 ; CHECK-NEXT: local.get 1
191 ; CHECK-NEXT: i64.trunc_f64_u
192 ; CHECK-NEXT: local.set 3
193 ; CHECK-NEXT: br 1 # 1: down to label6
194 ; CHECK-NEXT: .LBB7_5:
195 ; CHECK-NEXT: end_block # label7:
196 ; CHECK-NEXT: i64.const 0
197 ; CHECK-NEXT: local.set 3
198 ; CHECK-NEXT: .LBB7_6:
199 ; CHECK-NEXT: end_block # label6:
200 ; CHECK-NEXT: local.get 3
201 ; CHECK-NEXT: i64x2.splat
202 ; CHECK-NEXT: local.get 2
203 ; CHECK-NEXT: i64x2.replace_lane 1
204 ; CHECK-NEXT: # fallthrough-return
205 %a = fptoui <2 x double> %x to <2 x i64>
209 define <4 x float> @demote_zero_v4f32(<2 x double> %x) {
210 ; CHECK-LABEL: demote_zero_v4f32:
211 ; CHECK: .functype demote_zero_v4f32 (v128) -> (v128)
212 ; CHECK-NEXT: # %bb.0:
213 ; CHECK-NEXT: local.get 0
214 ; CHECK-NEXT: f32x4.demote_f64x2_zero
215 ; CHECK-NEXT: # fallthrough-return
216 %v = shufflevector <2 x double> %x, <2 x double> zeroinitializer,
217 <4 x i32> <i32 0, i32 1, i32 2, i32 3>
218 %a = fptrunc <4 x double> %v to <4 x float>
222 define <4 x float> @demote_zero_v4f32_2(<2 x double> %x) {
223 ; CHECK-LABEL: demote_zero_v4f32_2:
224 ; CHECK: .functype demote_zero_v4f32_2 (v128) -> (v128)
225 ; CHECK-NEXT: # %bb.0:
226 ; CHECK-NEXT: local.get 0
227 ; CHECK-NEXT: f32x4.demote_f64x2_zero
228 ; CHECK-NEXT: # fallthrough-return
229 %v = fptrunc <2 x double> %x to <2 x float>
230 %a = shufflevector <2 x float> %v, <2 x float> zeroinitializer,
231 <4 x i32> <i32 0, i32 1, i32 2, i32 3>
235 define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) {
236 ; CHECK-LABEL: convert_low_s_v2f64:
237 ; CHECK: .functype convert_low_s_v2f64 (v128) -> (v128)
238 ; CHECK-NEXT: # %bb.0:
239 ; CHECK-NEXT: local.get 0
240 ; CHECK-NEXT: f64x2.convert_low_i32x4_s
241 ; CHECK-NEXT: # fallthrough-return
242 %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
243 %a = sitofp <2 x i32> %v to <2 x double>
247 define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) {
248 ; CHECK-LABEL: convert_low_u_v2f64:
249 ; CHECK: .functype convert_low_u_v2f64 (v128) -> (v128)
250 ; CHECK-NEXT: # %bb.0:
251 ; CHECK-NEXT: local.get 0
252 ; CHECK-NEXT: f64x2.convert_low_i32x4_u
253 ; CHECK-NEXT: # fallthrough-return
254 %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
255 %a = uitofp <2 x i32> %v to <2 x double>
260 define <2 x double> @convert_low_s_v2f64_2(<4 x i32> %x) {
261 ; CHECK-LABEL: convert_low_s_v2f64_2:
262 ; CHECK: .functype convert_low_s_v2f64_2 (v128) -> (v128)
263 ; CHECK-NEXT: # %bb.0:
264 ; CHECK-NEXT: local.get 0
265 ; CHECK-NEXT: f64x2.convert_low_i32x4_s
266 ; CHECK-NEXT: # fallthrough-return
267 %v = sitofp <4 x i32> %x to <4 x double>
268 %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
272 define <2 x double> @convert_low_u_v2f64_2(<4 x i32> %x) {
273 ; CHECK-LABEL: convert_low_u_v2f64_2:
274 ; CHECK: .functype convert_low_u_v2f64_2 (v128) -> (v128)
275 ; CHECK-NEXT: # %bb.0:
276 ; CHECK-NEXT: local.get 0
277 ; CHECK-NEXT: f64x2.convert_low_i32x4_u
278 ; CHECK-NEXT: # fallthrough-return
279 %v = uitofp <4 x i32> %x to <4 x double>
280 %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
284 define <2 x double> @promote_low_v2f64(<4 x float> %x) {
285 ; CHECK-LABEL: promote_low_v2f64:
286 ; CHECK: .functype promote_low_v2f64 (v128) -> (v128)
287 ; CHECK-NEXT: # %bb.0:
288 ; CHECK-NEXT: local.get 0
289 ; CHECK-NEXT: f64x2.promote_low_f32x4
290 ; CHECK-NEXT: # fallthrough-return
291 %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 0, i32 1>
292 %a = fpext <2 x float> %v to <2 x double>
296 define <2 x double> @promote_low_v2f64_2(<4 x float> %x) {
297 ; CHECK-LABEL: promote_low_v2f64_2:
298 ; CHECK: .functype promote_low_v2f64_2 (v128) -> (v128)
299 ; CHECK-NEXT: # %bb.0:
300 ; CHECK-NEXT: local.get 0
301 ; CHECK-NEXT: f64x2.promote_low_f32x4
302 ; CHECK-NEXT: # fallthrough-return
303 %v = fpext <4 x float> %x to <4 x double>
304 %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
308 ;; Also check with illegally wide vectors
310 define <4 x double> @convert_low_s_v4f64(<8 x i32> %x) {
311 ; CHECK-LABEL: convert_low_s_v4f64:
312 ; CHECK: .functype convert_low_s_v4f64 (i32, v128, v128) -> ()
313 ; CHECK-NEXT: # %bb.0:
314 ; CHECK-NEXT: local.get 0
315 ; CHECK-NEXT: local.get 1
316 ; CHECK-NEXT: local.get 1
317 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
318 ; CHECK-NEXT: f64x2.convert_low_i32x4_s
319 ; CHECK-NEXT: v128.store 16
320 ; CHECK-NEXT: local.get 0
321 ; CHECK-NEXT: local.get 1
322 ; CHECK-NEXT: f64x2.convert_low_i32x4_s
323 ; CHECK-NEXT: v128.store 0
324 ; CHECK-NEXT: # fallthrough-return
325 %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
326 %a = sitofp <4 x i32> %v to <4 x double>
330 define <4 x double> @convert_low_u_v4f64(<8 x i32> %x) {
331 ; CHECK-LABEL: convert_low_u_v4f64:
332 ; CHECK: .functype convert_low_u_v4f64 (i32, v128, v128) -> ()
333 ; CHECK-NEXT: # %bb.0:
334 ; CHECK-NEXT: local.get 0
335 ; CHECK-NEXT: local.get 1
336 ; CHECK-NEXT: local.get 1
337 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
338 ; CHECK-NEXT: f64x2.convert_low_i32x4_u
339 ; CHECK-NEXT: v128.store 16
340 ; CHECK-NEXT: local.get 0
341 ; CHECK-NEXT: local.get 1
342 ; CHECK-NEXT: f64x2.convert_low_i32x4_u
343 ; CHECK-NEXT: v128.store 0
344 ; CHECK-NEXT: # fallthrough-return
345 %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
346 %a = uitofp <4 x i32> %v to <4 x double>
351 define <4 x double> @convert_low_s_v4f64_2(<8 x i32> %x) {
352 ; CHECK-LABEL: convert_low_s_v4f64_2:
353 ; CHECK: .functype convert_low_s_v4f64_2 (i32, v128, v128) -> ()
354 ; CHECK-NEXT: # %bb.0:
355 ; CHECK-NEXT: local.get 0
356 ; CHECK-NEXT: local.get 1
357 ; CHECK-NEXT: local.get 1
358 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
359 ; CHECK-NEXT: f64x2.convert_low_i32x4_s
360 ; CHECK-NEXT: v128.store 16
361 ; CHECK-NEXT: local.get 0
362 ; CHECK-NEXT: local.get 1
363 ; CHECK-NEXT: f64x2.convert_low_i32x4_s
364 ; CHECK-NEXT: v128.store 0
365 ; CHECK-NEXT: # fallthrough-return
366 %v = sitofp <8 x i32> %x to <8 x double>
367 %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
371 define <4 x double> @convert_low_u_v4f64_2(<8 x i32> %x) {
372 ; CHECK-LABEL: convert_low_u_v4f64_2:
373 ; CHECK: .functype convert_low_u_v4f64_2 (i32, v128, v128) -> ()
374 ; CHECK-NEXT: # %bb.0:
375 ; CHECK-NEXT: local.get 0
376 ; CHECK-NEXT: local.get 1
377 ; CHECK-NEXT: local.get 1
378 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
379 ; CHECK-NEXT: f64x2.convert_low_i32x4_u
380 ; CHECK-NEXT: v128.store 16
381 ; CHECK-NEXT: local.get 0
382 ; CHECK-NEXT: local.get 1
383 ; CHECK-NEXT: f64x2.convert_low_i32x4_u
384 ; CHECK-NEXT: v128.store 0
385 ; CHECK-NEXT: # fallthrough-return
386 %v = uitofp <8 x i32> %x to <8 x double>
387 %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
391 define <4 x double> @promote_low_v4f64(<8 x float> %x) {
392 ; CHECK-LABEL: promote_low_v4f64:
393 ; CHECK: .functype promote_low_v4f64 (i32, v128, v128) -> ()
394 ; CHECK-NEXT: # %bb.0:
395 ; CHECK-NEXT: local.get 0
396 ; CHECK-NEXT: local.get 1
397 ; CHECK-NEXT: local.get 1
398 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
399 ; CHECK-NEXT: f64x2.promote_low_f32x4
400 ; CHECK-NEXT: v128.store 16
401 ; CHECK-NEXT: local.get 0
402 ; CHECK-NEXT: local.get 1
403 ; CHECK-NEXT: f64x2.promote_low_f32x4
404 ; CHECK-NEXT: v128.store 0
405 ; CHECK-NEXT: # fallthrough-return
406 %v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
407 %a = fpext <4 x float> %v to <4 x double>
411 define <4 x double> @promote_low_v4f64_2(<8 x float> %x) {
412 ; CHECK-LABEL: promote_low_v4f64_2:
413 ; CHECK: .functype promote_low_v4f64_2 (i32, v128, v128) -> ()
414 ; CHECK-NEXT: # %bb.0:
415 ; CHECK-NEXT: local.get 0
416 ; CHECK-NEXT: local.get 1
417 ; CHECK-NEXT: local.get 1
418 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
419 ; CHECK-NEXT: f64x2.promote_low_f32x4
420 ; CHECK-NEXT: v128.store 16
421 ; CHECK-NEXT: local.get 0
422 ; CHECK-NEXT: local.get 1
423 ; CHECK-NEXT: f64x2.promote_low_f32x4
424 ; CHECK-NEXT: v128.store 0
425 ; CHECK-NEXT: # fallthrough-return
426 %v = fpext <8 x float> %x to <8 x double>
427 %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
431 define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) {
432 ; CHECK-LABEL: promote_mixed_v2f64:
433 ; CHECK: .functype promote_mixed_v2f64 (v128, v128) -> (v128)
434 ; CHECK-NEXT: # %bb.0:
435 ; CHECK-NEXT: local.get 0
436 ; CHECK-NEXT: local.get 1
437 ; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
438 ; CHECK-NEXT: f64x2.promote_low_f32x4
439 ; CHECK-NEXT: # fallthrough-return
440 %v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> <i32 2, i32 7>
441 %a = fpext <2 x float> %v to <2 x double>