1 ; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck --check-prefixes=CHECK,NOFP16 %s
2 ; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra -mattr=+v8.2a,+fullfp16 | FileCheck --check-prefixes=CHECK,FP16 %s
4 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
9 define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
10 ; CHECK-LABEL: test_copysign_v1f32_v1f32:
11 ; CHECK-NEXT: movi.2s v2, #128, lsl #24
12 ; CHECK-NEXT: bit.8b v0, v1, v2
14 %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
18 ; WidenVecRes mismatched
19 define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
20 ; CHECK-LABEL: test_copysign_v1f32_v1f64:
21 ; CHECK-NEXT: fcvtn v1.2s, v1.2d
22 ; CHECK-NEXT: movi.2s v2, #128, lsl #24
23 ; CHECK-NEXT: bit.8b v0, v1, v2
25 %tmp0 = fptrunc <1 x double> %b to <1 x float>
26 %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
30 declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
35 define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
36 ; CHECK-LABEL: test_copysign_v1f64_v1f32:
37 ; CHECK-NEXT: fcvtl v1.2d, v1.2s
38 ; CHECK-NEXT: movi.2d v2, #0000000000000000
39 ; CHECK-NEXT: fneg.2d v2, v2
40 ; CHECK-NEXT: bit.16b v0, v1, v2
42 %tmp0 = fpext <1 x float> %b to <1 x double>
43 %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
47 define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
48 ; CHECK-LABEL: test_copysign_v1f64_v1f64:
49 ; CHECK-NEXT: movi.2d v2, #0000000000000000
50 ; CHECK-NEXT: fneg.2d v2, v2
51 ; CHECK-NEXT: bit.16b v0, v1, v2
53 %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
57 declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
61 define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
62 ; CHECK-LABEL: test_copysign_v2f32_v2f32:
63 ; CHECK-NEXT: movi.2s v2, #128, lsl #24
64 ; CHECK-NEXT: bit.8b v0, v1, v2
66 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
70 define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
71 ; CHECK-LABEL: test_copysign_v2f32_v2f64:
72 ; CHECK-NEXT: fcvtn v1.2s, v1.2d
73 ; CHECK-NEXT: movi.2s v2, #128, lsl #24
74 ; CHECK-NEXT: bit.8b v0, v1, v2
76 %tmp0 = fptrunc <2 x double> %b to <2 x float>
77 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
81 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
85 define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
86 ; CHECK-LABEL: test_copysign_v4f32_v4f32:
87 ; CHECK-NEXT: movi.4s v2, #128, lsl #24
88 ; CHECK-NEXT: bit.16b v0, v1, v2
90 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
95 define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
96 ; CHECK-LABEL: test_copysign_v4f32_v4f64:
97 ; CHECK-NEXT: fcvtn v1.2s, v1.2d
98 ; CHECK-NEXT: fcvtn2 v1.4s, v2.2d
99 ; CHECK-NEXT: movi.4s v2, #128, lsl #24
100 ; CHECK-NEXT: bit.16b v0, v1, v2
102 %tmp0 = fptrunc <4 x double> %b to <4 x float>
103 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
107 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
111 define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
112 ; CHECK-LABEL: test_copysign_v2f64_v232:
113 ; CHECK-NEXT: fcvtl v1.2d, v1.2s
114 ; CHECK-NEXT: movi.2d v2, #0000000000000000
115 ; CHECK-NEXT: fneg.2d v2, v2
116 ; CHECK-NEXT: bit.16b v0, v1, v2
118 %tmp0 = fpext <2 x float> %b to <2 x double>
119 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
123 define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
124 ; CHECK-LABEL: test_copysign_v2f64_v2f64:
125 ; CHECK-NEXT: movi.2d v2, #0000000000000000
126 ; CHECK-NEXT: fneg.2d v2, v2
127 ; CHECK-NEXT: bit.16b v0, v1, v2
129 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
133 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
137 ; SplitVecRes mismatched
138 define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
139 ; CHECK-LABEL: test_copysign_v4f64_v4f32:
140 ; CHECK-NEXT: fcvtl v3.2d, v2.2s
141 ; CHECK-NEXT: fcvtl2 v2.2d, v2.4s
142 ; CHECK-NEXT: movi.2d v4, #0000000000000000
143 ; CHECK-NEXT: fneg.2d v4, v4
144 ; CHECK-NEXT: bit.16b v1, v2, v4
145 ; CHECK-NEXT: bit.16b v0, v3, v4
147 %tmp0 = fpext <4 x float> %b to <4 x double>
148 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
153 define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
154 ; CHECK-LABEL: test_copysign_v4f64_v4f64:
155 ; CHECK-NEXT: movi.2d v4, #0000000000000000
156 ; CHECK-NEXT: fneg.2d v4, v4
157 ; CHECK-NEXT: bit.16b v0, v2, v4
158 ; CHECK-NEXT: bit.16b v1, v3, v4
160 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
164 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
168 define <4 x half> @test_copysign_v4f16_v4f16(<4 x half> %a, <4 x half> %b) #0 {
169 ; CHECK-LABEL: test_copysign_v4f16_v4f16:
170 ; NOFP16-NEXT: mov h2, v1[1]
171 ; NOFP16-NEXT: mov h3, v0[1]
172 ; NOFP16-NEXT: movi.4s v4, #128, lsl #24
173 ; NOFP16-NEXT: fcvt s5, h1
174 ; NOFP16-NEXT: fcvt s6, h0
175 ; NOFP16-NEXT: bit.16b v6, v5, v4
176 ; NOFP16-NEXT: mov h5, v1[2]
177 ; NOFP16-NEXT: fcvt s2, h2
178 ; NOFP16-NEXT: fcvt s3, h3
179 ; NOFP16-NEXT: bit.16b v3, v2, v4
180 ; NOFP16-NEXT: mov h2, v0[2]
181 ; NOFP16-NEXT: fcvt s5, h5
182 ; NOFP16-NEXT: fcvt s2, h2
183 ; NOFP16-NEXT: bit.16b v2, v5, v4
184 ; NOFP16-NEXT: mov h1, v1[3]
185 ; NOFP16-NEXT: mov h0, v0[3]
186 ; NOFP16-NEXT: fcvt s1, h1
187 ; NOFP16-NEXT: fcvt s5, h0
188 ; NOFP16-NEXT: fcvt h0, s6
189 ; NOFP16-NEXT: bit.16b v5, v1, v4
190 ; NOFP16-NEXT: fcvt h1, s3
191 ; NOFP16-NEXT: fcvt h2, s2
192 ; NOFP16-NEXT: mov.h v0[1], v1[0]
193 ; NOFP16-NEXT: mov.h v0[2], v2[0]
194 ; NOFP16-NEXT: fcvt h1, s5
195 ; NOFP16-NEXT: mov.h v0[3], v1[0]
198 ; FP16-NEXT: movi.4h v2, #128, lsl #8
199 ; FP16-NEXT: bit.8b v0, v1, v2
201 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
205 define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 {
206 ; CHECK-LABEL: test_copysign_v4f16_v4f32:
207 ; NOFP16-NEXT: fcvtn v1.4h, v1.4s
208 ; NOFP16-NEXT: mov h2, v0[1]
209 ; NOFP16-NEXT: movi.4s v3, #128, lsl #24
210 ; NOFP16-NEXT: fcvt s4, h0
211 ; NOFP16-NEXT: mov h5, v0[2]
212 ; NOFP16-NEXT: fcvt s2, h2
213 ; NOFP16-NEXT: fcvt s6, h1
214 ; NOFP16-NEXT: bit.16b v4, v6, v3
215 ; NOFP16-NEXT: mov h6, v1[1]
216 ; NOFP16-NEXT: fcvt s5, h5
217 ; NOFP16-NEXT: fcvt s6, h6
218 ; NOFP16-NEXT: bit.16b v2, v6, v3
219 ; NOFP16-NEXT: mov h6, v1[2]
220 ; NOFP16-NEXT: fcvt s6, h6
221 ; NOFP16-NEXT: bit.16b v5, v6, v3
222 ; NOFP16-NEXT: mov h0, v0[3]
223 ; NOFP16-NEXT: fcvt s6, h0
224 ; NOFP16-NEXT: mov h0, v1[3]
225 ; NOFP16-NEXT: fcvt s1, h0
226 ; NOFP16-NEXT: fcvt h0, s4
227 ; NOFP16-NEXT: bit.16b v6, v1, v3
228 ; NOFP16-NEXT: fcvt h1, s2
229 ; NOFP16-NEXT: fcvt h2, s5
230 ; NOFP16-NEXT: mov.h v0[1], v1[0]
231 ; NOFP16-NEXT: mov.h v0[2], v2[0]
232 ; NOFP16-NEXT: fcvt h1, s6
233 ; NOFP16-NEXT: mov.h v0[3], v1[0]
236 ; FP16-NEXT: fcvtn v1.4h, v1.4s
237 ; FP16-NEXT: movi.4h v2, #128, lsl #8
238 ; FP16-NEXT: bit.8b v0, v1, v2
240 %tmp0 = fptrunc <4 x float> %b to <4 x half>
241 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
245 define <4 x half> @test_copysign_v4f16_v4f64(<4 x half> %a, <4 x double> %b) #0 {
246 ; CHECK-LABEL: test_copysign_v4f16_v4f64:
247 ; NOFP16-NEXT: mov d3, v2[1]
248 ; NOFP16-NEXT: mov d4, v1[1]
249 ; NOFP16-NEXT: movi.4s v5, #128, lsl #24
250 ; NOFP16-NEXT: fcvt s1, d1
251 ; NOFP16-NEXT: fcvt s6, h0
252 ; NOFP16-NEXT: bit.16b v6, v1, v5
253 ; NOFP16-NEXT: mov h1, v0[1]
254 ; NOFP16-NEXT: fcvt s2, d2
255 ; NOFP16-NEXT: fcvt s4, d4
256 ; NOFP16-NEXT: fcvt s1, h1
257 ; NOFP16-NEXT: bit.16b v1, v4, v5
258 ; NOFP16-NEXT: mov h4, v0[2]
259 ; NOFP16-NEXT: mov h0, v0[3]
260 ; NOFP16-NEXT: fcvt s4, h4
261 ; NOFP16-NEXT: fcvt s3, d3
262 ; NOFP16-NEXT: fcvt s7, h0
263 ; NOFP16-NEXT: fcvt h0, s6
264 ; NOFP16-NEXT: bit.16b v4, v2, v5
265 ; NOFP16-NEXT: bit.16b v7, v3, v5
266 ; NOFP16-NEXT: fcvt h1, s1
267 ; NOFP16-NEXT: fcvt h2, s4
268 ; NOFP16-NEXT: mov.h v0[1], v1[0]
269 ; NOFP16-NEXT: mov.h v0[2], v2[0]
270 ; NOFP16-NEXT: fcvt h1, s7
271 ; NOFP16-NEXT: mov.h v0[3], v1[0]
274 ; FP16-NEXT: mov d3, v1[1]
275 ; FP16-NEXT: fcvt h1, d1
276 ; FP16-NEXT: fcvt h3, d3
277 ; FP16-NEXT: mov.h v1[1], v3[0]
278 ; FP16-NEXT: fcvt h3, d2
279 ; FP16-NEXT: mov d2, v2[1]
280 ; FP16-NEXT: fcvt h2, d2
281 ; FP16-NEXT: mov.h v1[2], v3[0]
282 ; FP16-NEXT: mov.h v1[3], v2[0]
283 ; FP16-NEXT: movi.4h v2, #128, lsl #8
284 ; FP16-NEXT: bit.8b v0, v1, v2
286 %tmp0 = fptrunc <4 x double> %b to <4 x half>
287 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
291 declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
295 define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 {
296 ; CHECK-LABEL: test_copysign_v8f16_v8f16:
297 ; NOFP16-NEXT: mov h4, v1[1]
298 ; NOFP16-NEXT: mov h5, v0[1]
299 ; NOFP16-NEXT: movi.4s v2, #128, lsl #24
300 ; NOFP16-NEXT: fcvt s6, h1
301 ; NOFP16-NEXT: fcvt s3, h0
302 ; NOFP16-NEXT: mov h7, v1[2]
303 ; NOFP16-NEXT: mov h16, v0[2]
304 ; NOFP16-NEXT: mov h17, v1[3]
305 ; NOFP16-NEXT: mov h18, v0[3]
306 ; NOFP16-NEXT: bit.16b v3, v6, v2
307 ; NOFP16-NEXT: mov h6, v1[4]
308 ; NOFP16-NEXT: fcvt s4, h4
309 ; NOFP16-NEXT: fcvt s5, h5
310 ; NOFP16-NEXT: bit.16b v5, v4, v2
311 ; NOFP16-NEXT: mov h4, v0[4]
312 ; NOFP16-NEXT: fcvt s7, h7
313 ; NOFP16-NEXT: fcvt s16, h16
314 ; NOFP16-NEXT: bit.16b v16, v7, v2
315 ; NOFP16-NEXT: mov h7, v1[5]
316 ; NOFP16-NEXT: fcvt s17, h17
317 ; NOFP16-NEXT: fcvt s18, h18
318 ; NOFP16-NEXT: bit.16b v18, v17, v2
319 ; NOFP16-NEXT: mov h17, v0[5]
320 ; NOFP16-NEXT: fcvt s6, h6
321 ; NOFP16-NEXT: fcvt s4, h4
322 ; NOFP16-NEXT: bit.16b v4, v6, v2
323 ; NOFP16-NEXT: mov h6, v1[6]
324 ; NOFP16-NEXT: fcvt s7, h7
325 ; NOFP16-NEXT: fcvt s17, h17
326 ; NOFP16-NEXT: bit.16b v17, v7, v2
327 ; NOFP16-NEXT: mov h7, v0[6]
328 ; NOFP16-NEXT: fcvt s6, h6
329 ; NOFP16-NEXT: fcvt s7, h7
330 ; NOFP16-NEXT: bit.16b v7, v6, v2
331 ; NOFP16-NEXT: mov h1, v1[7]
332 ; NOFP16-NEXT: mov h0, v0[7]
333 ; NOFP16-NEXT: fcvt s1, h1
334 ; NOFP16-NEXT: fcvt s6, h0
335 ; NOFP16-NEXT: bit.16b v6, v1, v2
336 ; NOFP16-NEXT: fcvt h0, s3
337 ; NOFP16-NEXT: fcvt h1, s5
338 ; NOFP16-NEXT: mov.h v0[1], v1[0]
339 ; NOFP16-NEXT: fcvt h1, s16
340 ; NOFP16-NEXT: mov.h v0[2], v1[0]
341 ; NOFP16-NEXT: fcvt h1, s18
342 ; NOFP16-NEXT: fcvt h2, s4
343 ; NOFP16-NEXT: fcvt h3, s17
344 ; NOFP16-NEXT: fcvt h4, s7
345 ; NOFP16-NEXT: mov.h v0[3], v1[0]
346 ; NOFP16-NEXT: mov.h v0[4], v2[0]
347 ; NOFP16-NEXT: mov.h v0[5], v3[0]
348 ; NOFP16-NEXT: mov.h v0[6], v4[0]
349 ; NOFP16-NEXT: fcvt h1, s6
350 ; NOFP16-NEXT: mov.h v0[7], v1[0]
353 ; FP16-NEXT: movi.8h v2, #128, lsl #8
354 ; FP16-NEXT: bit.16b v0, v1, v2
356 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
360 define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 {
361 ; CHECK-LABEL: test_copysign_v8f16_v8f32:
362 ; NOFP16-NEXT: fcvtn v2.4h, v2.4s
363 ; NOFP16-NEXT: fcvtn v4.4h, v1.4s
364 ; NOFP16-NEXT: mov h5, v0[1]
365 ; NOFP16-NEXT: movi.4s v1, #128, lsl #24
366 ; NOFP16-NEXT: fcvt s3, h0
367 ; NOFP16-NEXT: mov h6, v0[2]
368 ; NOFP16-NEXT: mov h7, v0[3]
369 ; NOFP16-NEXT: mov h16, v0[4]
370 ; NOFP16-NEXT: mov h17, v0[5]
371 ; NOFP16-NEXT: fcvt s5, h5
372 ; NOFP16-NEXT: fcvt s18, h4
373 ; NOFP16-NEXT: fcvt s16, h16
374 ; NOFP16-NEXT: bit.16b v3, v18, v1
375 ; NOFP16-NEXT: fcvt s18, h2
376 ; NOFP16-NEXT: bit.16b v16, v18, v1
377 ; NOFP16-NEXT: mov h18, v4[1]
378 ; NOFP16-NEXT: fcvt s6, h6
379 ; NOFP16-NEXT: fcvt s18, h18
380 ; NOFP16-NEXT: bit.16b v5, v18, v1
381 ; NOFP16-NEXT: mov h18, v4[2]
382 ; NOFP16-NEXT: fcvt s18, h18
383 ; NOFP16-NEXT: bit.16b v6, v18, v1
384 ; NOFP16-NEXT: mov h18, v0[6]
385 ; NOFP16-NEXT: fcvt s7, h7
386 ; NOFP16-NEXT: mov h4, v4[3]
387 ; NOFP16-NEXT: fcvt s17, h17
388 ; NOFP16-NEXT: fcvt s4, h4
389 ; NOFP16-NEXT: bit.16b v7, v4, v1
390 ; NOFP16-NEXT: mov h4, v2[1]
391 ; NOFP16-NEXT: fcvt s18, h18
392 ; NOFP16-NEXT: fcvt s4, h4
393 ; NOFP16-NEXT: bit.16b v17, v4, v1
394 ; NOFP16-NEXT: mov h4, v2[2]
395 ; NOFP16-NEXT: fcvt s4, h4
396 ; NOFP16-NEXT: bit.16b v18, v4, v1
397 ; NOFP16-NEXT: mov h0, v0[7]
398 ; NOFP16-NEXT: fcvt s4, h0
399 ; NOFP16-NEXT: mov h0, v2[3]
400 ; NOFP16-NEXT: fcvt s0, h0
401 ; NOFP16-NEXT: bit.16b v4, v0, v1
402 ; NOFP16-NEXT: fcvt h0, s3
403 ; NOFP16-NEXT: fcvt h1, s5
404 ; NOFP16-NEXT: mov.h v0[1], v1[0]
405 ; NOFP16-NEXT: fcvt h1, s16
406 ; NOFP16-NEXT: fcvt h2, s6
407 ; NOFP16-NEXT: fcvt h3, s7
408 ; NOFP16-NEXT: fcvt h5, s17
409 ; NOFP16-NEXT: fcvt h6, s18
410 ; NOFP16-NEXT: mov.h v0[2], v2[0]
411 ; NOFP16-NEXT: mov.h v0[3], v3[0]
412 ; NOFP16-NEXT: mov.h v0[4], v1[0]
413 ; NOFP16-NEXT: mov.h v0[5], v5[0]
414 ; NOFP16-NEXT: mov.h v0[6], v6[0]
415 ; NOFP16-NEXT: fcvt h1, s4
416 ; NOFP16-NEXT: mov.h v0[7]
419 ; FP16-NEXT: fcvtn v2.4h, v2.4s
420 ; FP16-NEXT: fcvtn v1.4h, v1.4s
421 ; FP16-NEXT: mov.d v1[1], v2[0]
422 ; FP16-NEXT: movi.8h v2, #128, lsl #8
423 ; FP16-NEXT: bit.16b v0, v1, v2
425 %tmp0 = fptrunc <8 x float> %b to <8 x half>
426 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
430 declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
432 attributes #0 = { nounwind }