1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
5 ; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND
6 ; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
7 ; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
9 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
11 target triple = "aarch64-unknown-linux-gnu"
15 define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
16 ; CHECK-LABEL: test_copysign_v4f16_v4f16:
18 ; CHECK-NEXT: mvni v0.4h, #128, lsl #8
19 ; CHECK-NEXT: ldr d1, [x0]
20 ; CHECK-NEXT: ldr d2, [x1]
21 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
22 ; CHECK-NEXT: str d0, [x0]
24 %a = load <4 x half>, ptr %ap
25 %b = load <4 x half>, ptr %bp
26 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
27 store <4 x half> %r, ptr %ap
31 define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
32 ; CHECK-LABEL: test_copysign_v8f16_v8f16:
34 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
35 ; CHECK-NEXT: ldr q1, [x0]
36 ; CHECK-NEXT: ldr q2, [x1]
37 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
38 ; CHECK-NEXT: str q0, [x0]
40 %a = load <8 x half>, ptr %ap
41 %b = load <8 x half>, ptr %bp
42 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
43 store <8 x half> %r, ptr %ap
47 define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
48 ; CHECK-LABEL: test_copysign_v16f16_v16f16:
50 ; CHECK-NEXT: ptrue p0.h, vl16
51 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
52 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
53 ; CHECK-NEXT: and z1.h, z1.h, #0x8000
54 ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
55 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
56 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
58 %a = load <16 x half>, ptr %ap
59 %b = load <16 x half>, ptr %bp
60 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b)
61 store <16 x half> %r, ptr %ap
65 define void @test_copysign_v32f16_v32f16(ptr %ap, ptr %bp) #0 {
66 ; VBITS_GE_256-LABEL: test_copysign_v32f16_v32f16:
67 ; VBITS_GE_256: // %bb.0:
68 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
69 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
70 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
71 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
72 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0]
73 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
74 ; VBITS_GE_256-NEXT: and z1.h, z1.h, #0x8000
75 ; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x7fff
76 ; VBITS_GE_256-NEXT: and z2.h, z2.h, #0x7fff
77 ; VBITS_GE_256-NEXT: and z3.h, z3.h, #0x8000
78 ; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d
79 ; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d
80 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
81 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
82 ; VBITS_GE_256-NEXT: ret
84 ; VBITS_GE_512-LABEL: test_copysign_v32f16_v32f16:
85 ; VBITS_GE_512: // %bb.0:
86 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
87 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
88 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
89 ; VBITS_GE_512-NEXT: and z1.h, z1.h, #0x8000
90 ; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x7fff
91 ; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
92 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
93 ; VBITS_GE_512-NEXT: ret
94 %a = load <32 x half>, ptr %ap
95 %b = load <32 x half>, ptr %bp
96 %r = call <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b)
97 store <32 x half> %r, ptr %ap
101 define void @test_copysign_v64f16_v64f16(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
102 ; CHECK-LABEL: test_copysign_v64f16_v64f16:
104 ; CHECK-NEXT: ptrue p0.h, vl64
105 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
106 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
107 ; CHECK-NEXT: and z1.h, z1.h, #0x8000
108 ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
109 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
110 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
112 %a = load <64 x half>, ptr %ap
113 %b = load <64 x half>, ptr %bp
114 %r = call <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b)
115 store <64 x half> %r, ptr %ap
119 define void @test_copysign_v128f16_v128f16(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
120 ; CHECK-LABEL: test_copysign_v128f16_v128f16:
122 ; CHECK-NEXT: ptrue p0.h, vl128
123 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
124 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
125 ; CHECK-NEXT: and z1.h, z1.h, #0x8000
126 ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
127 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
128 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
130 %a = load <128 x half>, ptr %ap
131 %b = load <128 x half>, ptr %bp
132 %r = call <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b)
133 store <128 x half> %r, ptr %ap
139 define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
140 ; CHECK-LABEL: test_copysign_v2f32_v2f32:
142 ; CHECK-NEXT: mvni v0.2s, #128, lsl #24
143 ; CHECK-NEXT: ldr d1, [x0]
144 ; CHECK-NEXT: ldr d2, [x1]
145 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
146 ; CHECK-NEXT: str d0, [x0]
148 %a = load <2 x float>, ptr %ap
149 %b = load <2 x float>, ptr %bp
150 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
151 store <2 x float> %r, ptr %ap
155 define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
156 ; CHECK-LABEL: test_copysign_v4f32_v4f32:
158 ; CHECK-NEXT: mvni v0.4s, #128, lsl #24
159 ; CHECK-NEXT: ldr q1, [x0]
160 ; CHECK-NEXT: ldr q2, [x1]
161 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
162 ; CHECK-NEXT: str q0, [x0]
164 %a = load <4 x float>, ptr %ap
165 %b = load <4 x float>, ptr %bp
166 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
167 store <4 x float> %r, ptr %ap
171 define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
172 ; CHECK-LABEL: test_copysign_v8f32_v8f32:
174 ; CHECK-NEXT: ptrue p0.s, vl8
175 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
176 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
177 ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
178 ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
179 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
180 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
182 %a = load <8 x float>, ptr %ap
183 %b = load <8 x float>, ptr %bp
184 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b)
185 store <8 x float> %r, ptr %ap
189 define void @test_copysign_v16f32_v16f32(ptr %ap, ptr %bp) #0 {
190 ; VBITS_GE_256-LABEL: test_copysign_v16f32_v16f32:
191 ; VBITS_GE_256: // %bb.0:
192 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
193 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
194 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
195 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
196 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
197 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
198 ; VBITS_GE_256-NEXT: and z1.s, z1.s, #0x80000000
199 ; VBITS_GE_256-NEXT: and z0.s, z0.s, #0x7fffffff
200 ; VBITS_GE_256-NEXT: and z2.s, z2.s, #0x7fffffff
201 ; VBITS_GE_256-NEXT: and z3.s, z3.s, #0x80000000
202 ; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d
203 ; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d
204 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
205 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
206 ; VBITS_GE_256-NEXT: ret
208 ; VBITS_GE_512-LABEL: test_copysign_v16f32_v16f32:
209 ; VBITS_GE_512: // %bb.0:
210 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
211 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
212 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
213 ; VBITS_GE_512-NEXT: and z1.s, z1.s, #0x80000000
214 ; VBITS_GE_512-NEXT: and z0.s, z0.s, #0x7fffffff
215 ; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
216 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
217 ; VBITS_GE_512-NEXT: ret
218 %a = load <16 x float>, ptr %ap
219 %b = load <16 x float>, ptr %bp
220 %r = call <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b)
221 store <16 x float> %r, ptr %ap
225 define void @test_copysign_v32f32_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
226 ; CHECK-LABEL: test_copysign_v32f32_v32f32:
228 ; CHECK-NEXT: ptrue p0.s, vl32
229 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
230 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
231 ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
232 ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
233 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
234 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
236 %a = load <32 x float>, ptr %ap
237 %b = load <32 x float>, ptr %bp
238 %r = call <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b)
239 store <32 x float> %r, ptr %ap
243 define void @test_copysign_v64f32_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
244 ; CHECK-LABEL: test_copysign_v64f32_v64f32:
246 ; CHECK-NEXT: ptrue p0.s, vl64
247 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
248 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
249 ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
250 ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
251 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
252 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
254 %a = load <64 x float>, ptr %ap
255 %b = load <64 x float>, ptr %bp
256 %r = call <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b)
257 store <64 x float> %r, ptr %ap
263 define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
264 ; CHECK-LABEL: test_copysign_v2f64_v2f64:
266 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
267 ; CHECK-NEXT: ldr q1, [x0]
268 ; CHECK-NEXT: ldr q2, [x1]
269 ; CHECK-NEXT: fneg v0.2d, v0.2d
270 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
271 ; CHECK-NEXT: str q0, [x0]
273 %a = load <2 x double>, ptr %ap
274 %b = load <2 x double>, ptr %bp
275 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
276 store <2 x double> %r, ptr %ap
280 define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
281 ; CHECK-LABEL: test_copysign_v4f64_v4f64:
283 ; CHECK-NEXT: ptrue p0.d, vl4
284 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
285 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
286 ; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
287 ; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
288 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
289 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
291 %a = load <4 x double>, ptr %ap
292 %b = load <4 x double>, ptr %bp
293 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
294 store <4 x double> %r, ptr %ap
298 define void @test_copysign_v8f64_v8f64(ptr %ap, ptr %bp) #0 {
299 ; VBITS_GE_256-LABEL: test_copysign_v8f64_v8f64:
300 ; VBITS_GE_256: // %bb.0:
301 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
302 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
303 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
304 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
305 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
306 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
307 ; VBITS_GE_256-NEXT: and z1.d, z1.d, #0x8000000000000000
308 ; VBITS_GE_256-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
309 ; VBITS_GE_256-NEXT: and z2.d, z2.d, #0x7fffffffffffffff
310 ; VBITS_GE_256-NEXT: and z3.d, z3.d, #0x8000000000000000
311 ; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d
312 ; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d
313 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
314 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
315 ; VBITS_GE_256-NEXT: ret
317 ; VBITS_GE_512-LABEL: test_copysign_v8f64_v8f64:
318 ; VBITS_GE_512: // %bb.0:
319 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
320 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
321 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
322 ; VBITS_GE_512-NEXT: and z1.d, z1.d, #0x8000000000000000
323 ; VBITS_GE_512-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
324 ; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d
325 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
326 ; VBITS_GE_512-NEXT: ret
327 %a = load <8 x double>, ptr %ap
328 %b = load <8 x double>, ptr %bp
329 %r = call <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b)
330 store <8 x double> %r, ptr %ap
334 define void @test_copysign_v16f64_v16f64(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
335 ; CHECK-LABEL: test_copysign_v16f64_v16f64:
337 ; CHECK-NEXT: ptrue p0.d, vl16
338 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
339 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
340 ; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
341 ; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
342 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
343 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
345 %a = load <16 x double>, ptr %ap
346 %b = load <16 x double>, ptr %bp
347 %r = call <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b)
348 store <16 x double> %r, ptr %ap
352 define void @test_copysign_v32f64_v32f64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
353 ; CHECK-LABEL: test_copysign_v32f64_v32f64:
355 ; CHECK-NEXT: ptrue p0.d, vl32
356 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
357 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
358 ; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
359 ; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
360 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
361 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
363 %a = load <32 x double>, ptr %ap
364 %b = load <32 x double>, ptr %bp
365 %r = call <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b)
366 store <32 x double> %r, ptr %ap
372 define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
373 ; CHECK-LABEL: test_copysign_v2f32_v2f64:
375 ; CHECK-NEXT: ldr q0, [x1]
376 ; CHECK-NEXT: mvni v1.2s, #128, lsl #24
377 ; CHECK-NEXT: ldr d2, [x0]
378 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
379 ; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b
380 ; CHECK-NEXT: str d0, [x0]
382 %a = load <2 x float>, ptr %ap
383 %b = load <2 x double>, ptr %bp
384 %tmp0 = fptrunc <2 x double> %b to <2 x float>
385 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
386 store <2 x float> %r, ptr %ap
393 define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
394 ; CHECK-LABEL: test_copysign_v4f32_v4f64:
396 ; CHECK-NEXT: ptrue p0.d, vl4
397 ; CHECK-NEXT: mvni v1.4s, #128, lsl #24
398 ; CHECK-NEXT: ldr q2, [x0]
399 ; CHECK-NEXT: ptrue p1.d
400 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
401 ; CHECK-NEXT: fcvt z0.s, p1/m, z0.d
402 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
403 ; CHECK-NEXT: bit v0.16b, v2.16b, v1.16b
404 ; CHECK-NEXT: str q0, [x0]
406 %a = load <4 x float>, ptr %ap
407 %b = load <4 x double>, ptr %bp
408 %tmp0 = fptrunc <4 x double> %b to <4 x float>
409 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
410 store <4 x float> %r, ptr %ap
416 define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
417 ; CHECK-LABEL: test_copysign_v2f64_v2f32:
419 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
420 ; CHECK-NEXT: ldr d1, [x1]
421 ; CHECK-NEXT: ldr q2, [x0]
422 ; CHECK-NEXT: fcvtl v1.2d, v1.2s
423 ; CHECK-NEXT: fneg v0.2d, v0.2d
424 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
425 ; CHECK-NEXT: str q0, [x0]
427 %a = load <2 x double>, ptr %ap
428 %b = load < 2 x float>, ptr %bp
429 %tmp0 = fpext <2 x float> %b to <2 x double>
430 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
431 store <2 x double> %r, ptr %ap
437 ; SplitVecRes mismatched
438 define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
439 ; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
440 ; CHECK_NO_EXTEND_ROUND: // %bb.0:
441 ; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
442 ; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1]
443 ; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0]
444 ; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s
445 ; CHECK_NO_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
446 ; CHECK_NO_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000
447 ; CHECK_NO_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d
448 ; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
449 ; CHECK_NO_EXTEND_ROUND-NEXT: ret
451 ; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
452 ; CHECK_EXTEND_ROUND: // %bb.0:
453 ; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
454 ; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1]
455 ; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s
456 ; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s
457 ; CHECK_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0]
458 ; CHECK_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
459 ; CHECK_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000
460 ; CHECK_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d
461 ; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
462 ; CHECK_EXTEND_ROUND-NEXT: ret
463 %a = load <4 x double>, ptr %ap
464 %b = load <4 x float>, ptr %bp
465 %tmp0 = fpext <4 x float> %b to <4 x double>
466 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
467 store <4 x double> %r, ptr %ap
473 define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
474 ; CHECK-LABEL: test_copysign_v4f16_v4f32:
476 ; CHECK-NEXT: ldr q0, [x1]
477 ; CHECK-NEXT: mvni v1.4h, #128, lsl #8
478 ; CHECK-NEXT: ldr d2, [x0]
479 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
480 ; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b
481 ; CHECK-NEXT: str d0, [x0]
483 %a = load <4 x half>, ptr %ap
484 %b = load <4 x float>, ptr %bp
485 %tmp0 = fptrunc <4 x float> %b to <4 x half>
486 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
487 store <4 x half> %r, ptr %ap
491 define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
492 ; CHECK-LABEL: test_copysign_v4f16_v4f64:
494 ; CHECK-NEXT: ptrue p0.d, vl4
495 ; CHECK-NEXT: mvni v1.4h, #128, lsl #8
496 ; CHECK-NEXT: ldr d2, [x0]
497 ; CHECK-NEXT: ptrue p1.d
498 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1]
499 ; CHECK-NEXT: fcvt z0.h, p1/m, z0.d
500 ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
501 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
502 ; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b
503 ; CHECK-NEXT: str d0, [x0]
505 %a = load <4 x half>, ptr %ap
506 %b = load <4 x double>, ptr %bp
507 %tmp0 = fptrunc <4 x double> %b to <4 x half>
508 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
509 store <4 x half> %r, ptr %ap
513 declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
518 define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
519 ; CHECK-LABEL: test_copysign_v8f16_v8f32:
521 ; CHECK-NEXT: ptrue p0.s, vl8
522 ; CHECK-NEXT: mvni v1.8h, #128, lsl #8
523 ; CHECK-NEXT: ldr q2, [x0]
524 ; CHECK-NEXT: ptrue p1.s
525 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1]
526 ; CHECK-NEXT: fcvt z0.h, p1/m, z0.s
527 ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
528 ; CHECK-NEXT: bit v0.16b, v2.16b, v1.16b
529 ; CHECK-NEXT: str q0, [x0]
531 %a = load <8 x half>, ptr %ap
532 %b = load <8 x float>, ptr %bp
533 %tmp0 = fptrunc <8 x float> %b to <8 x half>
534 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
535 store <8 x half> %r, ptr %ap
539 declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
540 declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0
541 declare <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b) #0
542 declare <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b) #0
543 declare <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b) #0
545 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
546 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
547 declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
548 declare <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b) #0
549 declare <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b) #0
550 declare <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b) #0
552 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
553 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
554 declare <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b) #0
555 declare <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b) #0
556 declare <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b) #0
558 attributes #0 = { "target-features"="+sve" }