1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND
5 ; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND
6 ; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
7 ; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND
10 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
12 target triple = "aarch64-unknown-linux-gnu"
16 define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
17 ; CHECK-LABEL: test_copysign_v4f16_v4f16:
19 ; CHECK-NEXT: mvni v0.4h, #128, lsl #8
20 ; CHECK-NEXT: ldr d1, [x0]
21 ; CHECK-NEXT: ldr d2, [x1]
22 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
23 ; CHECK-NEXT: str d0, [x0]
25 %a = load <4 x half>, ptr %ap
26 %b = load <4 x half>, ptr %bp
27 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
28 store <4 x half> %r, ptr %ap
32 define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
33 ; CHECK-LABEL: test_copysign_v8f16_v8f16:
35 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
36 ; CHECK-NEXT: ldr q1, [x0]
37 ; CHECK-NEXT: ldr q2, [x1]
38 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
39 ; CHECK-NEXT: str q0, [x0]
41 %a = load <8 x half>, ptr %ap
42 %b = load <8 x half>, ptr %bp
43 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
44 store <8 x half> %r, ptr %ap
48 define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
49 ; CHECK-LABEL: test_copysign_v16f16_v16f16:
51 ; CHECK-NEXT: ptrue p0.h, vl16
52 ; CHECK-NEXT: mov z0.h, #32767 // =0x7fff
53 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
54 ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1]
55 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
56 ; CHECK-NEXT: st1h { z1.h }, p0, [x0]
58 %a = load <16 x half>, ptr %ap
59 %b = load <16 x half>, ptr %bp
60 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b)
61 store <16 x half> %r, ptr %ap
65 define void @test_copysign_v32f16_v32f16(ptr %ap, ptr %bp) #0 {
66 ; VBITS_GE_256-LABEL: test_copysign_v32f16_v32f16:
67 ; VBITS_GE_256: // %bb.0:
68 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16
69 ; VBITS_GE_256-NEXT: mov x8, #16 // =0x10
70 ; VBITS_GE_256-NEXT: mov z0.h, #32767 // =0x7fff
71 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
72 ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0]
73 ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1]
74 ; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1]
75 ; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z0.d
76 ; VBITS_GE_256-NEXT: bsl z2.d, z2.d, z4.d, z0.d
77 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1]
78 ; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0]
79 ; VBITS_GE_256-NEXT: ret
81 ; VBITS_GE_512-LABEL: test_copysign_v32f16_v32f16:
82 ; VBITS_GE_512: // %bb.0:
83 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
84 ; VBITS_GE_512-NEXT: mov z0.h, #32767 // =0x7fff
85 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0]
86 ; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1]
87 ; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d
88 ; VBITS_GE_512-NEXT: st1h { z1.h }, p0, [x0]
89 ; VBITS_GE_512-NEXT: ret
90 %a = load <32 x half>, ptr %ap
91 %b = load <32 x half>, ptr %bp
92 %r = call <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b)
93 store <32 x half> %r, ptr %ap
97 define void @test_copysign_v64f16_v64f16(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
98 ; CHECK-LABEL: test_copysign_v64f16_v64f16:
100 ; CHECK-NEXT: ptrue p0.h, vl64
101 ; CHECK-NEXT: mov z0.h, #32767 // =0x7fff
102 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
103 ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1]
104 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
105 ; CHECK-NEXT: st1h { z1.h }, p0, [x0]
107 %a = load <64 x half>, ptr %ap
108 %b = load <64 x half>, ptr %bp
109 %r = call <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b)
110 store <64 x half> %r, ptr %ap
114 define void @test_copysign_v128f16_v128f16(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
115 ; CHECK-LABEL: test_copysign_v128f16_v128f16:
117 ; CHECK-NEXT: ptrue p0.h, vl128
118 ; CHECK-NEXT: mov z0.h, #32767 // =0x7fff
119 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
120 ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1]
121 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
122 ; CHECK-NEXT: st1h { z1.h }, p0, [x0]
124 %a = load <128 x half>, ptr %ap
125 %b = load <128 x half>, ptr %bp
126 %r = call <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b)
127 store <128 x half> %r, ptr %ap
133 define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
134 ; CHECK-LABEL: test_copysign_v2f32_v2f32:
136 ; CHECK-NEXT: mvni v0.2s, #128, lsl #24
137 ; CHECK-NEXT: ldr d1, [x0]
138 ; CHECK-NEXT: ldr d2, [x1]
139 ; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b
140 ; CHECK-NEXT: str d0, [x0]
142 %a = load <2 x float>, ptr %ap
143 %b = load <2 x float>, ptr %bp
144 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
145 store <2 x float> %r, ptr %ap
149 define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
150 ; CHECK-LABEL: test_copysign_v4f32_v4f32:
152 ; CHECK-NEXT: mvni v0.4s, #128, lsl #24
153 ; CHECK-NEXT: ldr q1, [x0]
154 ; CHECK-NEXT: ldr q2, [x1]
155 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
156 ; CHECK-NEXT: str q0, [x0]
158 %a = load <4 x float>, ptr %ap
159 %b = load <4 x float>, ptr %bp
160 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
161 store <4 x float> %r, ptr %ap
165 define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
166 ; CHECK-LABEL: test_copysign_v8f32_v8f32:
168 ; CHECK-NEXT: ptrue p0.s, vl8
169 ; CHECK-NEXT: mov z0.s, #0x7fffffff
170 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
171 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
172 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
173 ; CHECK-NEXT: st1w { z1.s }, p0, [x0]
175 %a = load <8 x float>, ptr %ap
176 %b = load <8 x float>, ptr %bp
177 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b)
178 store <8 x float> %r, ptr %ap
182 define void @test_copysign_v16f32_v16f32(ptr %ap, ptr %bp) #0 {
183 ; VBITS_GE_256-LABEL: test_copysign_v16f32_v16f32:
184 ; VBITS_GE_256: // %bb.0:
185 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
186 ; VBITS_GE_256-NEXT: mov x8, #8 // =0x8
187 ; VBITS_GE_256-NEXT: mov z0.s, #0x7fffffff
188 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
189 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0]
190 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1, x8, lsl #2]
191 ; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1]
192 ; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z0.d
193 ; VBITS_GE_256-NEXT: bsl z2.d, z2.d, z4.d, z0.d
194 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2]
195 ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0]
196 ; VBITS_GE_256-NEXT: ret
198 ; VBITS_GE_512-LABEL: test_copysign_v16f32_v16f32:
199 ; VBITS_GE_512: // %bb.0:
200 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
201 ; VBITS_GE_512-NEXT: mov z0.s, #0x7fffffff
202 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
203 ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1]
204 ; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d
205 ; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x0]
206 ; VBITS_GE_512-NEXT: ret
207 %a = load <16 x float>, ptr %ap
208 %b = load <16 x float>, ptr %bp
209 %r = call <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b)
210 store <16 x float> %r, ptr %ap
214 define void @test_copysign_v32f32_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
215 ; CHECK-LABEL: test_copysign_v32f32_v32f32:
217 ; CHECK-NEXT: ptrue p0.s, vl32
218 ; CHECK-NEXT: mov z0.s, #0x7fffffff
219 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
220 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
221 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
222 ; CHECK-NEXT: st1w { z1.s }, p0, [x0]
224 %a = load <32 x float>, ptr %ap
225 %b = load <32 x float>, ptr %bp
226 %r = call <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b)
227 store <32 x float> %r, ptr %ap
231 define void @test_copysign_v64f32_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
232 ; CHECK-LABEL: test_copysign_v64f32_v64f32:
234 ; CHECK-NEXT: ptrue p0.s, vl64
235 ; CHECK-NEXT: mov z0.s, #0x7fffffff
236 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
237 ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1]
238 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
239 ; CHECK-NEXT: st1w { z1.s }, p0, [x0]
241 %a = load <64 x float>, ptr %ap
242 %b = load <64 x float>, ptr %bp
243 %r = call <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b)
244 store <64 x float> %r, ptr %ap
250 define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
251 ; CHECK-LABEL: test_copysign_v2f64_v2f64:
253 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
254 ; CHECK-NEXT: ldr q1, [x0]
255 ; CHECK-NEXT: ldr q2, [x1]
256 ; CHECK-NEXT: fneg v0.2d, v0.2d
257 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
258 ; CHECK-NEXT: str q0, [x0]
260 %a = load <2 x double>, ptr %ap
261 %b = load <2 x double>, ptr %bp
262 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
263 store <2 x double> %r, ptr %ap
267 define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
268 ; CHECK-LABEL: test_copysign_v4f64_v4f64:
270 ; CHECK-NEXT: ptrue p0.d, vl4
271 ; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff
272 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
273 ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1]
274 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
275 ; CHECK-NEXT: st1d { z1.d }, p0, [x0]
277 %a = load <4 x double>, ptr %ap
278 %b = load <4 x double>, ptr %bp
279 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
280 store <4 x double> %r, ptr %ap
284 define void @test_copysign_v8f64_v8f64(ptr %ap, ptr %bp) #0 {
285 ; VBITS_GE_256-LABEL: test_copysign_v8f64_v8f64:
286 ; VBITS_GE_256: // %bb.0:
287 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4
288 ; VBITS_GE_256-NEXT: mov x8, #4 // =0x4
289 ; VBITS_GE_256-NEXT: mov z0.d, #0x7fffffffffffffff
290 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
291 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0]
292 ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3]
293 ; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1]
294 ; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z0.d
295 ; VBITS_GE_256-NEXT: bsl z2.d, z2.d, z4.d, z0.d
296 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3]
297 ; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0]
298 ; VBITS_GE_256-NEXT: ret
300 ; VBITS_GE_512-LABEL: test_copysign_v8f64_v8f64:
301 ; VBITS_GE_512: // %bb.0:
302 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
303 ; VBITS_GE_512-NEXT: mov z0.d, #0x7fffffffffffffff
304 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0]
305 ; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1]
306 ; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d
307 ; VBITS_GE_512-NEXT: st1d { z1.d }, p0, [x0]
308 ; VBITS_GE_512-NEXT: ret
309 %a = load <8 x double>, ptr %ap
310 %b = load <8 x double>, ptr %bp
311 %r = call <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b)
312 store <8 x double> %r, ptr %ap
316 define void @test_copysign_v16f64_v16f64(ptr %ap, ptr %bp) vscale_range(8,0) #0 {
317 ; CHECK-LABEL: test_copysign_v16f64_v16f64:
319 ; CHECK-NEXT: ptrue p0.d, vl16
320 ; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff
321 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
322 ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1]
323 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
324 ; CHECK-NEXT: st1d { z1.d }, p0, [x0]
326 %a = load <16 x double>, ptr %ap
327 %b = load <16 x double>, ptr %bp
328 %r = call <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b)
329 store <16 x double> %r, ptr %ap
333 define void @test_copysign_v32f64_v32f64(ptr %ap, ptr %bp) vscale_range(16,0) #0 {
334 ; CHECK-LABEL: test_copysign_v32f64_v32f64:
336 ; CHECK-NEXT: ptrue p0.d, vl32
337 ; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff
338 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
339 ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1]
340 ; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d
341 ; CHECK-NEXT: st1d { z1.d }, p0, [x0]
343 %a = load <32 x double>, ptr %ap
344 %b = load <32 x double>, ptr %bp
345 %r = call <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b)
346 store <32 x double> %r, ptr %ap
352 define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
353 ; CHECK-LABEL: test_copysign_v2f32_v2f64:
355 ; CHECK-NEXT: ldr q0, [x1]
356 ; CHECK-NEXT: mvni v1.2s, #128, lsl #24
357 ; CHECK-NEXT: ldr d2, [x0]
358 ; CHECK-NEXT: fcvtn v0.2s, v0.2d
359 ; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b
360 ; CHECK-NEXT: str d0, [x0]
362 %a = load <2 x float>, ptr %ap
363 %b = load <2 x double>, ptr %bp
364 %tmp0 = fptrunc <2 x double> %b to <2 x float>
365 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
366 store <2 x float> %r, ptr %ap
373 define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
374 ; CHECK-LABEL: test_copysign_v4f32_v4f64:
376 ; CHECK-NEXT: ptrue p0.d, vl4
377 ; CHECK-NEXT: ldr q0, [x0]
378 ; CHECK-NEXT: mvni v2.4s, #128, lsl #24
379 ; CHECK-NEXT: ptrue p1.d
380 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
381 ; CHECK-NEXT: fcvt z1.s, p1/m, z1.d
382 ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
383 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
384 ; CHECK-NEXT: str q0, [x0]
386 %a = load <4 x float>, ptr %ap
387 %b = load <4 x double>, ptr %bp
388 %tmp0 = fptrunc <4 x double> %b to <4 x float>
389 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
390 store <4 x float> %r, ptr %ap
396 define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
397 ; CHECK-LABEL: test_copysign_v2f64_v2f32:
399 ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff
400 ; CHECK-NEXT: ldr d1, [x1]
401 ; CHECK-NEXT: ldr q2, [x0]
402 ; CHECK-NEXT: fcvtl v1.2d, v1.2s
403 ; CHECK-NEXT: fneg v0.2d, v0.2d
404 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
405 ; CHECK-NEXT: str q0, [x0]
407 %a = load <2 x double>, ptr %ap
408 %b = load < 2 x float>, ptr %bp
409 %tmp0 = fpext <2 x float> %b to <2 x double>
410 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
411 store <2 x double> %r, ptr %ap
417 ; SplitVecRes mismatched
418 define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
419 ; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
420 ; CHECK_NO_EXTEND_ROUND: // %bb.0:
421 ; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
422 ; CHECK_NO_EXTEND_ROUND-NEXT: mov z2.d, #0x7fffffffffffffff
423 ; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z0.d }, p0/z, [x0]
424 ; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z1.d }, p0/z, [x1]
425 ; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.d, p0/m, z1.s
426 ; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z2.d
427 ; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
428 ; CHECK_NO_EXTEND_ROUND-NEXT: ret
430 ; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
431 ; CHECK_EXTEND_ROUND: // %bb.0:
432 ; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4
433 ; CHECK_EXTEND_ROUND-NEXT: mov z2.d, #0x7fffffffffffffff
434 ; CHECK_EXTEND_ROUND-NEXT: ld1d { z0.d }, p0/z, [x0]
435 ; CHECK_EXTEND_ROUND-NEXT: ldr q1, [x1]
436 ; CHECK_EXTEND_ROUND-NEXT: uunpklo z1.d, z1.s
437 ; CHECK_EXTEND_ROUND-NEXT: fcvt z1.d, p0/m, z1.s
438 ; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z2.d
439 ; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0]
440 ; CHECK_EXTEND_ROUND-NEXT: ret
441 %a = load <4 x double>, ptr %ap
442 %b = load <4 x float>, ptr %bp
443 %tmp0 = fpext <4 x float> %b to <4 x double>
444 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
445 store <4 x double> %r, ptr %ap
451 define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
452 ; CHECK-LABEL: test_copysign_v4f16_v4f32:
454 ; CHECK-NEXT: ldr q0, [x1]
455 ; CHECK-NEXT: mvni v1.4h, #128, lsl #8
456 ; CHECK-NEXT: ldr d2, [x0]
457 ; CHECK-NEXT: fcvtn v0.4h, v0.4s
458 ; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b
459 ; CHECK-NEXT: str d0, [x0]
461 %a = load <4 x half>, ptr %ap
462 %b = load <4 x float>, ptr %bp
463 %tmp0 = fptrunc <4 x float> %b to <4 x half>
464 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
465 store <4 x half> %r, ptr %ap
469 define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
470 ; CHECK-LABEL: test_copysign_v4f16_v4f64:
472 ; CHECK-NEXT: ptrue p0.d, vl4
473 ; CHECK-NEXT: ldr d0, [x0]
474 ; CHECK-NEXT: mvni v2.4h, #128, lsl #8
475 ; CHECK-NEXT: ptrue p1.d
476 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
477 ; CHECK-NEXT: fcvt z1.h, p1/m, z1.d
478 ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
479 ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
480 ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
481 ; CHECK-NEXT: str d0, [x0]
483 %a = load <4 x half>, ptr %ap
484 %b = load <4 x double>, ptr %bp
485 %tmp0 = fptrunc <4 x double> %b to <4 x half>
486 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
487 store <4 x half> %r, ptr %ap
491 declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
496 define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
497 ; CHECK-LABEL: test_copysign_v8f16_v8f32:
499 ; CHECK-NEXT: ptrue p0.s, vl8
500 ; CHECK-NEXT: ldr q0, [x0]
501 ; CHECK-NEXT: mvni v2.8h, #128, lsl #8
502 ; CHECK-NEXT: ptrue p1.s
503 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
504 ; CHECK-NEXT: fcvt z1.h, p1/m, z1.s
505 ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
506 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
507 ; CHECK-NEXT: str q0, [x0]
509 %a = load <8 x half>, ptr %ap
510 %b = load <8 x float>, ptr %bp
511 %tmp0 = fptrunc <8 x float> %b to <8 x half>
512 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
513 store <8 x half> %r, ptr %ap
517 declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
518 declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0
519 declare <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b) #0
520 declare <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b) #0
521 declare <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b) #0
523 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
524 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
525 declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
526 declare <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b) #0
527 declare <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b) #0
528 declare <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b) #0
530 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
531 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
532 declare <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b) #0
533 declare <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b) #0
534 declare <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b) #0
536 attributes #0 = { "target-features"="+sve2" }