1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE
3 ; RUN: llc -mattr=+sve2 -force-streaming-compatible-sve < %s | FileCheck %s --check-prefixes=CHECK,SVE2
5 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
7 target triple = "aarch64-unknown-linux-gnu"
11 define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) {
12 ; SVE-LABEL: test_copysign_v4f16_v4f16:
14 ; SVE-NEXT: ldr d0, [x0]
15 ; SVE-NEXT: ldr d1, [x1]
16 ; SVE-NEXT: and z1.h, z1.h, #0x8000
17 ; SVE-NEXT: and z0.h, z0.h, #0x7fff
18 ; SVE-NEXT: orr z0.d, z0.d, z1.d
19 ; SVE-NEXT: str d0, [x0]
22 ; SVE2-LABEL: test_copysign_v4f16_v4f16:
24 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
25 ; SVE2-NEXT: ldr d1, [x0]
26 ; SVE2-NEXT: ldr d2, [x1]
27 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
28 ; SVE2-NEXT: str d1, [x0]
30 %a = load <4 x half>, ptr %ap
31 %b = load <4 x half>, ptr %bp
32 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
33 store <4 x half> %r, ptr %ap
37 define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) {
38 ; SVE-LABEL: test_copysign_v8f16_v8f16:
40 ; SVE-NEXT: ldr q0, [x0]
41 ; SVE-NEXT: ldr q1, [x1]
42 ; SVE-NEXT: and z1.h, z1.h, #0x8000
43 ; SVE-NEXT: and z0.h, z0.h, #0x7fff
44 ; SVE-NEXT: orr z0.d, z0.d, z1.d
45 ; SVE-NEXT: str q0, [x0]
48 ; SVE2-LABEL: test_copysign_v8f16_v8f16:
50 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
51 ; SVE2-NEXT: ldr q1, [x0]
52 ; SVE2-NEXT: ldr q2, [x1]
53 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
54 ; SVE2-NEXT: str q1, [x0]
56 %a = load <8 x half>, ptr %ap
57 %b = load <8 x half>, ptr %bp
58 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
59 store <8 x half> %r, ptr %ap
63 define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) {
64 ; SVE-LABEL: test_copysign_v16f16_v16f16:
66 ; SVE-NEXT: ldp q0, q3, [x1]
67 ; SVE-NEXT: ldp q1, q2, [x0]
68 ; SVE-NEXT: and z0.h, z0.h, #0x8000
69 ; SVE-NEXT: and z3.h, z3.h, #0x8000
70 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
71 ; SVE-NEXT: and z2.h, z2.h, #0x7fff
72 ; SVE-NEXT: orr z0.d, z1.d, z0.d
73 ; SVE-NEXT: orr z1.d, z2.d, z3.d
74 ; SVE-NEXT: stp q0, q1, [x0]
77 ; SVE2-LABEL: test_copysign_v16f16_v16f16:
79 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
80 ; SVE2-NEXT: ldp q1, q4, [x1]
81 ; SVE2-NEXT: ldp q2, q3, [x0]
82 ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
83 ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
84 ; SVE2-NEXT: stp q2, q3, [x0]
86 %a = load <16 x half>, ptr %ap
87 %b = load <16 x half>, ptr %bp
88 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b)
89 store <16 x half> %r, ptr %ap
95 define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) {
96 ; SVE-LABEL: test_copysign_v2f32_v2f32:
98 ; SVE-NEXT: ldr d0, [x0]
99 ; SVE-NEXT: ldr d1, [x1]
100 ; SVE-NEXT: and z1.s, z1.s, #0x80000000
101 ; SVE-NEXT: and z0.s, z0.s, #0x7fffffff
102 ; SVE-NEXT: orr z0.d, z0.d, z1.d
103 ; SVE-NEXT: str d0, [x0]
106 ; SVE2-LABEL: test_copysign_v2f32_v2f32:
108 ; SVE2-NEXT: mov z0.s, #0x7fffffff
109 ; SVE2-NEXT: ldr d1, [x0]
110 ; SVE2-NEXT: ldr d2, [x1]
111 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
112 ; SVE2-NEXT: str d1, [x0]
114 %a = load <2 x float>, ptr %ap
115 %b = load <2 x float>, ptr %bp
116 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
117 store <2 x float> %r, ptr %ap
121 define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) {
122 ; SVE-LABEL: test_copysign_v4f32_v4f32:
124 ; SVE-NEXT: ldr q0, [x0]
125 ; SVE-NEXT: ldr q1, [x1]
126 ; SVE-NEXT: and z1.s, z1.s, #0x80000000
127 ; SVE-NEXT: and z0.s, z0.s, #0x7fffffff
128 ; SVE-NEXT: orr z0.d, z0.d, z1.d
129 ; SVE-NEXT: str q0, [x0]
132 ; SVE2-LABEL: test_copysign_v4f32_v4f32:
134 ; SVE2-NEXT: mov z0.s, #0x7fffffff
135 ; SVE2-NEXT: ldr q1, [x0]
136 ; SVE2-NEXT: ldr q2, [x1]
137 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
138 ; SVE2-NEXT: str q1, [x0]
140 %a = load <4 x float>, ptr %ap
141 %b = load <4 x float>, ptr %bp
142 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
143 store <4 x float> %r, ptr %ap
147 define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) {
148 ; SVE-LABEL: test_copysign_v8f32_v8f32:
150 ; SVE-NEXT: ldp q0, q3, [x1]
151 ; SVE-NEXT: ldp q1, q2, [x0]
152 ; SVE-NEXT: and z0.s, z0.s, #0x80000000
153 ; SVE-NEXT: and z3.s, z3.s, #0x80000000
154 ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
155 ; SVE-NEXT: and z2.s, z2.s, #0x7fffffff
156 ; SVE-NEXT: orr z0.d, z1.d, z0.d
157 ; SVE-NEXT: orr z1.d, z2.d, z3.d
158 ; SVE-NEXT: stp q0, q1, [x0]
161 ; SVE2-LABEL: test_copysign_v8f32_v8f32:
163 ; SVE2-NEXT: mov z0.s, #0x7fffffff
164 ; SVE2-NEXT: ldp q1, q4, [x1]
165 ; SVE2-NEXT: ldp q2, q3, [x0]
166 ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
167 ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
168 ; SVE2-NEXT: stp q2, q3, [x0]
170 %a = load <8 x float>, ptr %ap
171 %b = load <8 x float>, ptr %bp
172 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b)
173 store <8 x float> %r, ptr %ap
179 define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) {
180 ; SVE-LABEL: test_copysign_v2f64_v2f64:
182 ; SVE-NEXT: ldr q0, [x0]
183 ; SVE-NEXT: ldr q1, [x1]
184 ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000
185 ; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
186 ; SVE-NEXT: orr z0.d, z0.d, z1.d
187 ; SVE-NEXT: str q0, [x0]
190 ; SVE2-LABEL: test_copysign_v2f64_v2f64:
192 ; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
193 ; SVE2-NEXT: ldr q1, [x0]
194 ; SVE2-NEXT: ldr q2, [x1]
195 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
196 ; SVE2-NEXT: str q1, [x0]
198 %a = load <2 x double>, ptr %ap
199 %b = load <2 x double>, ptr %bp
200 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
201 store <2 x double> %r, ptr %ap
205 define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) {
206 ; SVE-LABEL: test_copysign_v4f64_v4f64:
208 ; SVE-NEXT: ldp q0, q3, [x1]
209 ; SVE-NEXT: ldp q1, q2, [x0]
210 ; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
211 ; SVE-NEXT: and z3.d, z3.d, #0x8000000000000000
212 ; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
213 ; SVE-NEXT: and z2.d, z2.d, #0x7fffffffffffffff
214 ; SVE-NEXT: orr z0.d, z1.d, z0.d
215 ; SVE-NEXT: orr z1.d, z2.d, z3.d
216 ; SVE-NEXT: stp q0, q1, [x0]
219 ; SVE2-LABEL: test_copysign_v4f64_v4f64:
221 ; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
222 ; SVE2-NEXT: ldp q1, q4, [x1]
223 ; SVE2-NEXT: ldp q2, q3, [x0]
224 ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
225 ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
226 ; SVE2-NEXT: stp q2, q3, [x0]
228 %a = load <4 x double>, ptr %ap
229 %b = load <4 x double>, ptr %bp
230 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
231 store <4 x double> %r, ptr %ap
237 define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) {
238 ; SVE-LABEL: test_copysign_v2f32_v2f64:
240 ; SVE-NEXT: ptrue p0.d
241 ; SVE-NEXT: ldr q0, [x1]
242 ; SVE-NEXT: ldr d1, [x0]
243 ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
244 ; SVE-NEXT: fcvt z0.s, p0/m, z0.d
245 ; SVE-NEXT: uzp1 z0.s, z0.s, z0.s
246 ; SVE-NEXT: and z0.s, z0.s, #0x80000000
247 ; SVE-NEXT: orr z0.d, z1.d, z0.d
248 ; SVE-NEXT: str d0, [x0]
251 ; SVE2-LABEL: test_copysign_v2f32_v2f64:
253 ; SVE2-NEXT: ptrue p0.d
254 ; SVE2-NEXT: ldr q0, [x1]
255 ; SVE2-NEXT: mov z1.s, #0x7fffffff
256 ; SVE2-NEXT: ldr d2, [x0]
257 ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
258 ; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s
259 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
260 ; SVE2-NEXT: str d2, [x0]
262 %a = load <2 x float>, ptr %ap
263 %b = load <2 x double>, ptr %bp
264 %tmp0 = fptrunc <2 x double> %b to <2 x float>
265 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
266 store <2 x float> %r, ptr %ap
273 define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
274 ; SVE-LABEL: test_copysign_v4f32_v4f64:
276 ; SVE-NEXT: ptrue p0.d
277 ; SVE-NEXT: ldp q0, q1, [x1]
278 ; SVE-NEXT: fcvt z1.s, p0/m, z1.d
279 ; SVE-NEXT: fcvt z0.s, p0/m, z0.d
280 ; SVE-NEXT: ptrue p0.s, vl2
281 ; SVE-NEXT: uzp1 z1.s, z1.s, z1.s
282 ; SVE-NEXT: uzp1 z0.s, z0.s, z0.s
283 ; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
284 ; SVE-NEXT: ldr q1, [x0]
285 ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
286 ; SVE-NEXT: and z0.s, z0.s, #0x80000000
287 ; SVE-NEXT: orr z0.d, z1.d, z0.d
288 ; SVE-NEXT: str q0, [x0]
291 ; SVE2-LABEL: test_copysign_v4f32_v4f64:
293 ; SVE2-NEXT: ptrue p0.d
294 ; SVE2-NEXT: ldp q0, q1, [x1]
295 ; SVE2-NEXT: ldr q2, [x0]
296 ; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
297 ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
298 ; SVE2-NEXT: ptrue p0.s, vl2
299 ; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s
300 ; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s
301 ; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
302 ; SVE2-NEXT: mov z1.s, #0x7fffffff
303 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
304 ; SVE2-NEXT: str q2, [x0]
306 %a = load <4 x float>, ptr %ap
307 %b = load <4 x double>, ptr %bp
308 %tmp0 = fptrunc <4 x double> %b to <4 x float>
309 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
310 store <4 x float> %r, ptr %ap
316 define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) {
317 ; SVE-LABEL: test_copysign_v2f64_v2f32:
319 ; SVE-NEXT: ptrue p0.d, vl2
320 ; SVE-NEXT: ldr q0, [x0]
321 ; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
322 ; SVE-NEXT: ld1w { z1.d }, p0/z, [x1]
323 ; SVE-NEXT: fcvt z1.d, p0/m, z1.s
324 ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000
325 ; SVE-NEXT: orr z0.d, z0.d, z1.d
326 ; SVE-NEXT: str q0, [x0]
329 ; SVE2-LABEL: test_copysign_v2f64_v2f32:
331 ; SVE2-NEXT: ptrue p0.d, vl2
332 ; SVE2-NEXT: ldr q0, [x0]
333 ; SVE2-NEXT: mov z2.d, #0x7fffffffffffffff
334 ; SVE2-NEXT: ld1w { z1.d }, p0/z, [x1]
335 ; SVE2-NEXT: fcvt z1.d, p0/m, z1.s
336 ; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d
337 ; SVE2-NEXT: str q0, [x0]
339 %a = load <2 x double>, ptr %ap
340 %b = load < 2 x float>, ptr %bp
341 %tmp0 = fpext <2 x float> %b to <2 x double>
342 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
343 store <2 x double> %r, ptr %ap
349 ; SplitVecRes mismatched
350 define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) {
351 ; SVE-LABEL: test_copysign_v4f64_v4f32:
353 ; SVE-NEXT: ptrue p0.d, vl2
354 ; SVE-NEXT: mov x8, #2 // =0x2
355 ; SVE-NEXT: ldp q0, q1, [x0]
356 ; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
357 ; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
358 ; SVE-NEXT: ld1w { z2.d }, p0/z, [x1, x8, lsl #2]
359 ; SVE-NEXT: ld1w { z3.d }, p0/z, [x1]
360 ; SVE-NEXT: fcvt z3.d, p0/m, z3.s
361 ; SVE-NEXT: fcvt z2.d, p0/m, z2.s
362 ; SVE-NEXT: and z3.d, z3.d, #0x8000000000000000
363 ; SVE-NEXT: and z2.d, z2.d, #0x8000000000000000
364 ; SVE-NEXT: orr z0.d, z0.d, z3.d
365 ; SVE-NEXT: orr z1.d, z1.d, z2.d
366 ; SVE-NEXT: stp q0, q1, [x0]
369 ; SVE2-LABEL: test_copysign_v4f64_v4f32:
371 ; SVE2-NEXT: ptrue p0.d, vl2
372 ; SVE2-NEXT: mov x8, #2 // =0x2
373 ; SVE2-NEXT: mov z4.d, #0x7fffffffffffffff
374 ; SVE2-NEXT: ldp q0, q1, [x0]
375 ; SVE2-NEXT: ld1w { z2.d }, p0/z, [x1, x8, lsl #2]
376 ; SVE2-NEXT: ld1w { z3.d }, p0/z, [x1]
377 ; SVE2-NEXT: fcvt z3.d, p0/m, z3.s
378 ; SVE2-NEXT: fcvt z2.d, p0/m, z2.s
379 ; SVE2-NEXT: bsl z0.d, z0.d, z3.d, z4.d
380 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z4.d
381 ; SVE2-NEXT: stp q0, q1, [x0]
383 %a = load <4 x double>, ptr %ap
384 %b = load <4 x float>, ptr %bp
385 %tmp0 = fpext <4 x float> %b to <4 x double>
386 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
387 store <4 x double> %r, ptr %ap
393 define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
394 ; SVE-LABEL: test_copysign_v4f16_v4f32:
396 ; SVE-NEXT: ptrue p0.s
397 ; SVE-NEXT: ldr q0, [x1]
398 ; SVE-NEXT: ldr d1, [x0]
399 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
400 ; SVE-NEXT: fcvt z0.h, p0/m, z0.s
401 ; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
402 ; SVE-NEXT: and z0.h, z0.h, #0x8000
403 ; SVE-NEXT: orr z0.d, z1.d, z0.d
404 ; SVE-NEXT: str d0, [x0]
407 ; SVE2-LABEL: test_copysign_v4f16_v4f32:
409 ; SVE2-NEXT: ptrue p0.s
410 ; SVE2-NEXT: ldr q0, [x1]
411 ; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
412 ; SVE2-NEXT: ldr d2, [x0]
413 ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
414 ; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
415 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
416 ; SVE2-NEXT: str d2, [x0]
418 %a = load <4 x half>, ptr %ap
419 %b = load <4 x float>, ptr %bp
420 %tmp0 = fptrunc <4 x float> %b to <4 x half>
421 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
422 store <4 x half> %r, ptr %ap
426 define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
427 ; SVE-LABEL: test_copysign_v4f16_v4f64:
429 ; SVE-NEXT: sub sp, sp, #16
430 ; SVE-NEXT: .cfi_def_cfa_offset 16
431 ; SVE-NEXT: ldp q1, q0, [x1]
432 ; SVE-NEXT: ldr d4, [x0]
433 ; SVE-NEXT: and z4.h, z4.h, #0x7fff
434 ; SVE-NEXT: mov z2.d, z0.d[1]
435 ; SVE-NEXT: mov z3.d, z1.d[1]
436 ; SVE-NEXT: fcvt h0, d0
437 ; SVE-NEXT: fcvt h1, d1
438 ; SVE-NEXT: fcvt h2, d2
439 ; SVE-NEXT: fcvt h3, d3
440 ; SVE-NEXT: str h0, [sp, #12]
441 ; SVE-NEXT: str h1, [sp, #8]
442 ; SVE-NEXT: str h2, [sp, #14]
443 ; SVE-NEXT: str h3, [sp, #10]
444 ; SVE-NEXT: ldr d0, [sp, #8]
445 ; SVE-NEXT: and z0.h, z0.h, #0x8000
446 ; SVE-NEXT: orr z0.d, z4.d, z0.d
447 ; SVE-NEXT: str d0, [x0]
448 ; SVE-NEXT: add sp, sp, #16
451 ; SVE2-LABEL: test_copysign_v4f16_v4f64:
453 ; SVE2-NEXT: sub sp, sp, #16
454 ; SVE2-NEXT: .cfi_def_cfa_offset 16
455 ; SVE2-NEXT: ldp q2, q1, [x1]
456 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
457 ; SVE2-NEXT: ldr d5, [x0]
458 ; SVE2-NEXT: mov z3.d, z1.d[1]
459 ; SVE2-NEXT: mov z4.d, z2.d[1]
460 ; SVE2-NEXT: fcvt h1, d1
461 ; SVE2-NEXT: fcvt h2, d2
462 ; SVE2-NEXT: fcvt h3, d3
463 ; SVE2-NEXT: fcvt h4, d4
464 ; SVE2-NEXT: str h1, [sp, #12]
465 ; SVE2-NEXT: str h2, [sp, #8]
466 ; SVE2-NEXT: str h3, [sp, #14]
467 ; SVE2-NEXT: str h4, [sp, #10]
468 ; SVE2-NEXT: ldr d1, [sp, #8]
469 ; SVE2-NEXT: bsl z5.d, z5.d, z1.d, z0.d
470 ; SVE2-NEXT: str d5, [x0]
471 ; SVE2-NEXT: add sp, sp, #16
473 %a = load <4 x half>, ptr %ap
474 %b = load <4 x double>, ptr %bp
475 %tmp0 = fptrunc <4 x double> %b to <4 x half>
476 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
477 store <4 x half> %r, ptr %ap
483 define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
484 ; SVE-LABEL: test_copysign_v8f16_v8f32:
486 ; SVE-NEXT: ptrue p0.s
487 ; SVE-NEXT: ldp q0, q1, [x1]
488 ; SVE-NEXT: fcvt z1.h, p0/m, z1.s
489 ; SVE-NEXT: fcvt z0.h, p0/m, z0.s
490 ; SVE-NEXT: ptrue p0.h, vl4
491 ; SVE-NEXT: uzp1 z1.h, z1.h, z1.h
492 ; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
493 ; SVE-NEXT: splice z0.h, p0, z0.h, z1.h
494 ; SVE-NEXT: ldr q1, [x0]
495 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
496 ; SVE-NEXT: and z0.h, z0.h, #0x8000
497 ; SVE-NEXT: orr z0.d, z1.d, z0.d
498 ; SVE-NEXT: str q0, [x0]
501 ; SVE2-LABEL: test_copysign_v8f16_v8f32:
503 ; SVE2-NEXT: ptrue p0.s
504 ; SVE2-NEXT: ldp q0, q1, [x1]
505 ; SVE2-NEXT: ldr q2, [x0]
506 ; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
507 ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
508 ; SVE2-NEXT: ptrue p0.h, vl4
509 ; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h
510 ; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
511 ; SVE2-NEXT: splice z0.h, p0, z0.h, z1.h
512 ; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
513 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
514 ; SVE2-NEXT: str q2, [x0]
516 %a = load <8 x half>, ptr %ap
517 %b = load <8 x float>, ptr %bp
518 %tmp0 = fptrunc <8 x float> %b to <8 x half>
519 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
520 store <8 x half> %r, ptr %ap
524 declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
525 declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
526 declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0
528 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
529 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
530 declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
532 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
533 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
534 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: