1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE
3 ; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE2
4 ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=SVE2
5 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
7 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
9 target triple = "aarch64-unknown-linux-gnu"
13 define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) {
14 ; SVE-LABEL: test_copysign_v4f16_v4f16:
16 ; SVE-NEXT: ldr d0, [x0]
17 ; SVE-NEXT: ldr d1, [x1]
18 ; SVE-NEXT: and z1.h, z1.h, #0x8000
19 ; SVE-NEXT: and z0.h, z0.h, #0x7fff
20 ; SVE-NEXT: orr z0.d, z0.d, z1.d
21 ; SVE-NEXT: str d0, [x0]
24 ; SVE2-LABEL: test_copysign_v4f16_v4f16:
26 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
27 ; SVE2-NEXT: ldr d1, [x0]
28 ; SVE2-NEXT: ldr d2, [x1]
29 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
30 ; SVE2-NEXT: str d1, [x0]
33 ; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f16:
34 ; NONEON-NOSVE: // %bb.0:
35 ; NONEON-NOSVE-NEXT: sub sp, sp, #48
36 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
37 ; NONEON-NOSVE-NEXT: ldr d0, [x1]
38 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
39 ; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #24]
40 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
41 ; NONEON-NOSVE-NEXT: str h0, [sp, #20]
42 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
43 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
44 ; NONEON-NOSVE-NEXT: str h0, [sp, #16]
45 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
46 ; NONEON-NOSVE-NEXT: tst w8, #0x80
47 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
48 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
49 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
50 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
51 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #30]
52 ; NONEON-NOSVE-NEXT: fcvt s0, h0
53 ; NONEON-NOSVE-NEXT: fabs s0, s0
54 ; NONEON-NOSVE-NEXT: fneg s1, s0
55 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
56 ; NONEON-NOSVE-NEXT: tst w8, #0x80
57 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
58 ; NONEON-NOSVE-NEXT: fcvt h0, s0
59 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
60 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #28]
61 ; NONEON-NOSVE-NEXT: fcvt s0, h0
62 ; NONEON-NOSVE-NEXT: fabs s0, s0
63 ; NONEON-NOSVE-NEXT: fneg s1, s0
64 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
65 ; NONEON-NOSVE-NEXT: tst w8, #0x80
66 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
67 ; NONEON-NOSVE-NEXT: fcvt h0, s0
68 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
69 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #26]
70 ; NONEON-NOSVE-NEXT: fcvt s0, h0
71 ; NONEON-NOSVE-NEXT: fabs s0, s0
72 ; NONEON-NOSVE-NEXT: fneg s1, s0
73 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
74 ; NONEON-NOSVE-NEXT: tst w8, #0x80
75 ; NONEON-NOSVE-NEXT: fcvt h0, s0
76 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
77 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #24]
78 ; NONEON-NOSVE-NEXT: fcvt s0, h0
79 ; NONEON-NOSVE-NEXT: fabs s0, s0
80 ; NONEON-NOSVE-NEXT: fneg s1, s0
81 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
82 ; NONEON-NOSVE-NEXT: fcvt h0, s0
83 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
84 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
85 ; NONEON-NOSVE-NEXT: str d0, [x0]
86 ; NONEON-NOSVE-NEXT: add sp, sp, #48
87 ; NONEON-NOSVE-NEXT: ret
88 %a = load <4 x half>, ptr %ap
89 %b = load <4 x half>, ptr %bp
90 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
91 store <4 x half> %r, ptr %ap
95 define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) {
96 ; SVE-LABEL: test_copysign_v8f16_v8f16:
98 ; SVE-NEXT: ldr q0, [x0]
99 ; SVE-NEXT: ldr q1, [x1]
100 ; SVE-NEXT: and z1.h, z1.h, #0x8000
101 ; SVE-NEXT: and z0.h, z0.h, #0x7fff
102 ; SVE-NEXT: orr z0.d, z0.d, z1.d
103 ; SVE-NEXT: str q0, [x0]
106 ; SVE2-LABEL: test_copysign_v8f16_v8f16:
108 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
109 ; SVE2-NEXT: ldr q1, [x0]
110 ; SVE2-NEXT: ldr q2, [x1]
111 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
112 ; SVE2-NEXT: str q1, [x0]
115 ; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f16:
116 ; NONEON-NOSVE: // %bb.0:
117 ; NONEON-NOSVE-NEXT: sub sp, sp, #80
118 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
119 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
120 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
121 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
122 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #62]
123 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
124 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #60]
125 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
126 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
127 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #58]
128 ; NONEON-NOSVE-NEXT: tst w8, #0x80
129 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
130 ; NONEON-NOSVE-NEXT: str h0, [sp, #20]
131 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #56]
132 ; NONEON-NOSVE-NEXT: str h0, [sp, #16]
133 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #54]
134 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
135 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #52]
136 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
137 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #50]
138 ; NONEON-NOSVE-NEXT: str h0, [sp, #4]
139 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #48]
140 ; NONEON-NOSVE-NEXT: str h0, [sp]
141 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #46]
142 ; NONEON-NOSVE-NEXT: fcvt s0, h0
143 ; NONEON-NOSVE-NEXT: fabs s0, s0
144 ; NONEON-NOSVE-NEXT: fneg s1, s0
145 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
146 ; NONEON-NOSVE-NEXT: tst w8, #0x80
147 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
148 ; NONEON-NOSVE-NEXT: fcvt h0, s0
149 ; NONEON-NOSVE-NEXT: str h0, [sp, #78]
150 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #44]
151 ; NONEON-NOSVE-NEXT: fcvt s0, h0
152 ; NONEON-NOSVE-NEXT: fabs s0, s0
153 ; NONEON-NOSVE-NEXT: fneg s1, s0
154 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
155 ; NONEON-NOSVE-NEXT: tst w8, #0x80
156 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
157 ; NONEON-NOSVE-NEXT: fcvt h0, s0
158 ; NONEON-NOSVE-NEXT: str h0, [sp, #76]
159 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #42]
160 ; NONEON-NOSVE-NEXT: fcvt s0, h0
161 ; NONEON-NOSVE-NEXT: fabs s0, s0
162 ; NONEON-NOSVE-NEXT: fneg s1, s0
163 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
164 ; NONEON-NOSVE-NEXT: tst w8, #0x80
165 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
166 ; NONEON-NOSVE-NEXT: fcvt h0, s0
167 ; NONEON-NOSVE-NEXT: str h0, [sp, #74]
168 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #40]
169 ; NONEON-NOSVE-NEXT: fcvt s0, h0
170 ; NONEON-NOSVE-NEXT: fabs s0, s0
171 ; NONEON-NOSVE-NEXT: fneg s1, s0
172 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
173 ; NONEON-NOSVE-NEXT: tst w8, #0x80
174 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
175 ; NONEON-NOSVE-NEXT: fcvt h0, s0
176 ; NONEON-NOSVE-NEXT: str h0, [sp, #72]
177 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #38]
178 ; NONEON-NOSVE-NEXT: fcvt s0, h0
179 ; NONEON-NOSVE-NEXT: fabs s0, s0
180 ; NONEON-NOSVE-NEXT: fneg s1, s0
181 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
182 ; NONEON-NOSVE-NEXT: tst w8, #0x80
183 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
184 ; NONEON-NOSVE-NEXT: fcvt h0, s0
185 ; NONEON-NOSVE-NEXT: str h0, [sp, #70]
186 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #36]
187 ; NONEON-NOSVE-NEXT: fcvt s0, h0
188 ; NONEON-NOSVE-NEXT: fabs s0, s0
189 ; NONEON-NOSVE-NEXT: fneg s1, s0
190 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
191 ; NONEON-NOSVE-NEXT: tst w8, #0x80
192 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
193 ; NONEON-NOSVE-NEXT: fcvt h0, s0
194 ; NONEON-NOSVE-NEXT: str h0, [sp, #68]
195 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #34]
196 ; NONEON-NOSVE-NEXT: fcvt s0, h0
197 ; NONEON-NOSVE-NEXT: fabs s0, s0
198 ; NONEON-NOSVE-NEXT: fneg s1, s0
199 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
200 ; NONEON-NOSVE-NEXT: tst w8, #0x80
201 ; NONEON-NOSVE-NEXT: fcvt h0, s0
202 ; NONEON-NOSVE-NEXT: str h0, [sp, #66]
203 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #32]
204 ; NONEON-NOSVE-NEXT: fcvt s0, h0
205 ; NONEON-NOSVE-NEXT: fabs s0, s0
206 ; NONEON-NOSVE-NEXT: fneg s1, s0
207 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
208 ; NONEON-NOSVE-NEXT: fcvt h0, s0
209 ; NONEON-NOSVE-NEXT: str h0, [sp, #64]
210 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #64]
211 ; NONEON-NOSVE-NEXT: str q0, [x0]
212 ; NONEON-NOSVE-NEXT: add sp, sp, #80
213 ; NONEON-NOSVE-NEXT: ret
214 %a = load <8 x half>, ptr %ap
215 %b = load <8 x half>, ptr %bp
216 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
217 store <8 x half> %r, ptr %ap
221 define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) {
222 ; SVE-LABEL: test_copysign_v16f16_v16f16:
224 ; SVE-NEXT: ldp q0, q3, [x1]
225 ; SVE-NEXT: ldp q1, q2, [x0]
226 ; SVE-NEXT: and z0.h, z0.h, #0x8000
227 ; SVE-NEXT: and z3.h, z3.h, #0x8000
228 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
229 ; SVE-NEXT: and z2.h, z2.h, #0x7fff
230 ; SVE-NEXT: orr z0.d, z1.d, z0.d
231 ; SVE-NEXT: orr z1.d, z2.d, z3.d
232 ; SVE-NEXT: stp q0, q1, [x0]
235 ; SVE2-LABEL: test_copysign_v16f16_v16f16:
237 ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
238 ; SVE2-NEXT: ldp q1, q4, [x1]
239 ; SVE2-NEXT: ldp q2, q3, [x0]
240 ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
241 ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
242 ; SVE2-NEXT: stp q2, q3, [x0]
245 ; NONEON-NOSVE-LABEL: test_copysign_v16f16_v16f16:
246 ; NONEON-NOSVE: // %bb.0:
247 ; NONEON-NOSVE-NEXT: sub sp, sp, #160
248 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160
249 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x1]
250 ; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
251 ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #64]
252 ; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #96]
253 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #126]
254 ; NONEON-NOSVE-NEXT: str h0, [sp, #28]
255 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #124]
256 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29]
257 ; NONEON-NOSVE-NEXT: str h0, [sp, #24]
258 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #122]
259 ; NONEON-NOSVE-NEXT: tst w8, #0x80
260 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25]
261 ; NONEON-NOSVE-NEXT: str h0, [sp, #20]
262 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #120]
263 ; NONEON-NOSVE-NEXT: str h0, [sp, #16]
264 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #118]
265 ; NONEON-NOSVE-NEXT: str h0, [sp, #12]
266 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #116]
267 ; NONEON-NOSVE-NEXT: str h0, [sp, #8]
268 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #114]
269 ; NONEON-NOSVE-NEXT: str h0, [sp, #4]
270 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #112]
271 ; NONEON-NOSVE-NEXT: str h0, [sp]
272 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #94]
273 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
274 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #92]
275 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
276 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #90]
277 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
278 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #88]
279 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
280 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #86]
281 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
282 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #84]
283 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
284 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #82]
285 ; NONEON-NOSVE-NEXT: str h0, [sp, #36]
286 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #80]
287 ; NONEON-NOSVE-NEXT: str h0, [sp, #32]
288 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #110]
289 ; NONEON-NOSVE-NEXT: fcvt s0, h0
290 ; NONEON-NOSVE-NEXT: fabs s0, s0
291 ; NONEON-NOSVE-NEXT: fneg s1, s0
292 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
293 ; NONEON-NOSVE-NEXT: tst w8, #0x80
294 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21]
295 ; NONEON-NOSVE-NEXT: fcvt h0, s0
296 ; NONEON-NOSVE-NEXT: str h0, [sp, #158]
297 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #108]
298 ; NONEON-NOSVE-NEXT: fcvt s0, h0
299 ; NONEON-NOSVE-NEXT: fabs s0, s0
300 ; NONEON-NOSVE-NEXT: fneg s1, s0
301 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
302 ; NONEON-NOSVE-NEXT: tst w8, #0x80
303 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17]
304 ; NONEON-NOSVE-NEXT: fcvt h0, s0
305 ; NONEON-NOSVE-NEXT: str h0, [sp, #156]
306 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #106]
307 ; NONEON-NOSVE-NEXT: fcvt s0, h0
308 ; NONEON-NOSVE-NEXT: fabs s0, s0
309 ; NONEON-NOSVE-NEXT: fneg s1, s0
310 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
311 ; NONEON-NOSVE-NEXT: tst w8, #0x80
312 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13]
313 ; NONEON-NOSVE-NEXT: fcvt h0, s0
314 ; NONEON-NOSVE-NEXT: str h0, [sp, #154]
315 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #104]
316 ; NONEON-NOSVE-NEXT: fcvt s0, h0
317 ; NONEON-NOSVE-NEXT: fabs s0, s0
318 ; NONEON-NOSVE-NEXT: fneg s1, s0
319 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
320 ; NONEON-NOSVE-NEXT: tst w8, #0x80
321 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9]
322 ; NONEON-NOSVE-NEXT: fcvt h0, s0
323 ; NONEON-NOSVE-NEXT: str h0, [sp, #152]
324 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #102]
325 ; NONEON-NOSVE-NEXT: fcvt s0, h0
326 ; NONEON-NOSVE-NEXT: fabs s0, s0
327 ; NONEON-NOSVE-NEXT: fneg s1, s0
328 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
329 ; NONEON-NOSVE-NEXT: tst w8, #0x80
330 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5]
331 ; NONEON-NOSVE-NEXT: fcvt h0, s0
332 ; NONEON-NOSVE-NEXT: str h0, [sp, #150]
333 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #100]
334 ; NONEON-NOSVE-NEXT: fcvt s0, h0
335 ; NONEON-NOSVE-NEXT: fabs s0, s0
336 ; NONEON-NOSVE-NEXT: fneg s1, s0
337 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
338 ; NONEON-NOSVE-NEXT: tst w8, #0x80
339 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1]
340 ; NONEON-NOSVE-NEXT: fcvt h0, s0
341 ; NONEON-NOSVE-NEXT: str h0, [sp, #148]
342 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #98]
343 ; NONEON-NOSVE-NEXT: fcvt s0, h0
344 ; NONEON-NOSVE-NEXT: fabs s0, s0
345 ; NONEON-NOSVE-NEXT: fneg s1, s0
346 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
347 ; NONEON-NOSVE-NEXT: tst w8, #0x80
348 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61]
349 ; NONEON-NOSVE-NEXT: fcvt h0, s0
350 ; NONEON-NOSVE-NEXT: str h0, [sp, #146]
351 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #96]
352 ; NONEON-NOSVE-NEXT: fcvt s0, h0
353 ; NONEON-NOSVE-NEXT: fabs s0, s0
354 ; NONEON-NOSVE-NEXT: fneg s1, s0
355 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
356 ; NONEON-NOSVE-NEXT: tst w8, #0x80
357 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57]
358 ; NONEON-NOSVE-NEXT: fcvt h0, s0
359 ; NONEON-NOSVE-NEXT: str h0, [sp, #144]
360 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #78]
361 ; NONEON-NOSVE-NEXT: fcvt s0, h0
362 ; NONEON-NOSVE-NEXT: fabs s0, s0
363 ; NONEON-NOSVE-NEXT: fneg s1, s0
364 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
365 ; NONEON-NOSVE-NEXT: tst w8, #0x80
366 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53]
367 ; NONEON-NOSVE-NEXT: fcvt h0, s0
368 ; NONEON-NOSVE-NEXT: str h0, [sp, #142]
369 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #76]
370 ; NONEON-NOSVE-NEXT: fcvt s0, h0
371 ; NONEON-NOSVE-NEXT: fabs s0, s0
372 ; NONEON-NOSVE-NEXT: fneg s1, s0
373 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
374 ; NONEON-NOSVE-NEXT: tst w8, #0x80
375 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49]
376 ; NONEON-NOSVE-NEXT: fcvt h0, s0
377 ; NONEON-NOSVE-NEXT: str h0, [sp, #140]
378 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #74]
379 ; NONEON-NOSVE-NEXT: fcvt s0, h0
380 ; NONEON-NOSVE-NEXT: fabs s0, s0
381 ; NONEON-NOSVE-NEXT: fneg s1, s0
382 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
383 ; NONEON-NOSVE-NEXT: tst w8, #0x80
384 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45]
385 ; NONEON-NOSVE-NEXT: fcvt h0, s0
386 ; NONEON-NOSVE-NEXT: str h0, [sp, #138]
387 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #72]
388 ; NONEON-NOSVE-NEXT: fcvt s0, h0
389 ; NONEON-NOSVE-NEXT: fabs s0, s0
390 ; NONEON-NOSVE-NEXT: fneg s1, s0
391 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
392 ; NONEON-NOSVE-NEXT: tst w8, #0x80
393 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41]
394 ; NONEON-NOSVE-NEXT: fcvt h0, s0
395 ; NONEON-NOSVE-NEXT: str h0, [sp, #136]
396 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #70]
397 ; NONEON-NOSVE-NEXT: fcvt s0, h0
398 ; NONEON-NOSVE-NEXT: fabs s0, s0
399 ; NONEON-NOSVE-NEXT: fneg s1, s0
400 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
401 ; NONEON-NOSVE-NEXT: tst w8, #0x80
402 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37]
403 ; NONEON-NOSVE-NEXT: fcvt h0, s0
404 ; NONEON-NOSVE-NEXT: str h0, [sp, #134]
405 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #68]
406 ; NONEON-NOSVE-NEXT: fcvt s0, h0
407 ; NONEON-NOSVE-NEXT: fabs s0, s0
408 ; NONEON-NOSVE-NEXT: fneg s1, s0
409 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
410 ; NONEON-NOSVE-NEXT: tst w8, #0x80
411 ; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33]
412 ; NONEON-NOSVE-NEXT: fcvt h0, s0
413 ; NONEON-NOSVE-NEXT: str h0, [sp, #132]
414 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #66]
415 ; NONEON-NOSVE-NEXT: fcvt s0, h0
416 ; NONEON-NOSVE-NEXT: fabs s0, s0
417 ; NONEON-NOSVE-NEXT: fneg s1, s0
418 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
419 ; NONEON-NOSVE-NEXT: tst w8, #0x80
420 ; NONEON-NOSVE-NEXT: fcvt h0, s0
421 ; NONEON-NOSVE-NEXT: str h0, [sp, #130]
422 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #64]
423 ; NONEON-NOSVE-NEXT: fcvt s0, h0
424 ; NONEON-NOSVE-NEXT: fabs s0, s0
425 ; NONEON-NOSVE-NEXT: fneg s1, s0
426 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
427 ; NONEON-NOSVE-NEXT: fcvt h0, s0
428 ; NONEON-NOSVE-NEXT: str h0, [sp, #128]
429 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128]
430 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
431 ; NONEON-NOSVE-NEXT: add sp, sp, #160
432 ; NONEON-NOSVE-NEXT: ret
433 %a = load <16 x half>, ptr %ap
434 %b = load <16 x half>, ptr %bp
435 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b)
436 store <16 x half> %r, ptr %ap
442 define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) {
443 ; SVE-LABEL: test_copysign_v2f32_v2f32:
445 ; SVE-NEXT: ldr d0, [x0]
446 ; SVE-NEXT: ldr d1, [x1]
447 ; SVE-NEXT: and z1.s, z1.s, #0x80000000
448 ; SVE-NEXT: and z0.s, z0.s, #0x7fffffff
449 ; SVE-NEXT: orr z0.d, z0.d, z1.d
450 ; SVE-NEXT: str d0, [x0]
453 ; SVE2-LABEL: test_copysign_v2f32_v2f32:
455 ; SVE2-NEXT: mov z0.s, #0x7fffffff
456 ; SVE2-NEXT: ldr d1, [x0]
457 ; SVE2-NEXT: ldr d2, [x1]
458 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
459 ; SVE2-NEXT: str d1, [x0]
462 ; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f32:
463 ; NONEON-NOSVE: // %bb.0:
464 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
465 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
466 ; NONEON-NOSVE-NEXT: ldr d0, [x0]
467 ; NONEON-NOSVE-NEXT: ldr d1, [x1]
468 ; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8]
469 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
470 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
471 ; NONEON-NOSVE-NEXT: fabs s0, s0
472 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
473 ; NONEON-NOSVE-NEXT: fneg s1, s0
474 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
475 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
476 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
477 ; NONEON-NOSVE-NEXT: fabs s0, s0
478 ; NONEON-NOSVE-NEXT: fneg s1, s0
479 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
480 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #24]
481 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
482 ; NONEON-NOSVE-NEXT: str d0, [x0]
483 ; NONEON-NOSVE-NEXT: add sp, sp, #32
484 ; NONEON-NOSVE-NEXT: ret
485 %a = load <2 x float>, ptr %ap
486 %b = load <2 x float>, ptr %bp
487 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
488 store <2 x float> %r, ptr %ap
492 define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) {
493 ; SVE-LABEL: test_copysign_v4f32_v4f32:
495 ; SVE-NEXT: ldr q0, [x0]
496 ; SVE-NEXT: ldr q1, [x1]
497 ; SVE-NEXT: and z1.s, z1.s, #0x80000000
498 ; SVE-NEXT: and z0.s, z0.s, #0x7fffffff
499 ; SVE-NEXT: orr z0.d, z0.d, z1.d
500 ; SVE-NEXT: str q0, [x0]
503 ; SVE2-LABEL: test_copysign_v4f32_v4f32:
505 ; SVE2-NEXT: mov z0.s, #0x7fffffff
506 ; SVE2-NEXT: ldr q1, [x0]
507 ; SVE2-NEXT: ldr q2, [x1]
508 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
509 ; SVE2-NEXT: str q1, [x0]
512 ; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f32:
513 ; NONEON-NOSVE: // %bb.0:
514 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
515 ; NONEON-NOSVE-NEXT: ldr q1, [x1]
516 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
517 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
518 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
519 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
520 ; NONEON-NOSVE-NEXT: fabs s0, s0
521 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
522 ; NONEON-NOSVE-NEXT: fneg s1, s0
523 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
524 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
525 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
526 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
527 ; NONEON-NOSVE-NEXT: fabs s0, s0
528 ; NONEON-NOSVE-NEXT: fneg s1, s0
529 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
530 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
531 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40]
532 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
533 ; NONEON-NOSVE-NEXT: fabs s0, s0
534 ; NONEON-NOSVE-NEXT: fneg s1, s0
535 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
536 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
537 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
538 ; NONEON-NOSVE-NEXT: fabs s0, s0
539 ; NONEON-NOSVE-NEXT: fneg s1, s0
540 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
541 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #32]
542 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
543 ; NONEON-NOSVE-NEXT: str q0, [x0]
544 ; NONEON-NOSVE-NEXT: add sp, sp, #48
545 ; NONEON-NOSVE-NEXT: ret
546 %a = load <4 x float>, ptr %ap
547 %b = load <4 x float>, ptr %bp
548 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
549 store <4 x float> %r, ptr %ap
553 define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) {
554 ; SVE-LABEL: test_copysign_v8f32_v8f32:
556 ; SVE-NEXT: ldp q0, q3, [x1]
557 ; SVE-NEXT: ldp q1, q2, [x0]
558 ; SVE-NEXT: and z0.s, z0.s, #0x80000000
559 ; SVE-NEXT: and z3.s, z3.s, #0x80000000
560 ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
561 ; SVE-NEXT: and z2.s, z2.s, #0x7fffffff
562 ; SVE-NEXT: orr z0.d, z1.d, z0.d
563 ; SVE-NEXT: orr z1.d, z2.d, z3.d
564 ; SVE-NEXT: stp q0, q1, [x0]
567 ; SVE2-LABEL: test_copysign_v8f32_v8f32:
569 ; SVE2-NEXT: mov z0.s, #0x7fffffff
570 ; SVE2-NEXT: ldp q1, q4, [x1]
571 ; SVE2-NEXT: ldp q2, q3, [x0]
572 ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
573 ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
574 ; SVE2-NEXT: stp q2, q3, [x0]
577 ; NONEON-NOSVE-LABEL: test_copysign_v8f32_v8f32:
578 ; NONEON-NOSVE: // %bb.0:
579 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
580 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
581 ; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
582 ; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
583 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
584 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #32]
585 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #44]
586 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56]
587 ; NONEON-NOSVE-NEXT: fabs s0, s0
588 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
589 ; NONEON-NOSVE-NEXT: fneg s1, s0
590 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
591 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #40]
592 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
593 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48]
594 ; NONEON-NOSVE-NEXT: fabs s0, s0
595 ; NONEON-NOSVE-NEXT: fneg s1, s0
596 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
597 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
598 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #88]
599 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #36]
600 ; NONEON-NOSVE-NEXT: fabs s0, s0
601 ; NONEON-NOSVE-NEXT: fneg s1, s0
602 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
603 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #32]
604 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
605 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
606 ; NONEON-NOSVE-NEXT: fabs s0, s0
607 ; NONEON-NOSVE-NEXT: fneg s1, s0
608 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
609 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
610 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #80]
611 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
612 ; NONEON-NOSVE-NEXT: fabs s0, s0
613 ; NONEON-NOSVE-NEXT: fneg s1, s0
614 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
615 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
616 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
617 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
618 ; NONEON-NOSVE-NEXT: fabs s0, s0
619 ; NONEON-NOSVE-NEXT: fneg s1, s0
620 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
621 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
622 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #72]
623 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
624 ; NONEON-NOSVE-NEXT: fabs s0, s0
625 ; NONEON-NOSVE-NEXT: fneg s1, s0
626 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
627 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
628 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
629 ; NONEON-NOSVE-NEXT: fabs s0, s0
630 ; NONEON-NOSVE-NEXT: fneg s1, s0
631 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
632 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #64]
633 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
634 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
635 ; NONEON-NOSVE-NEXT: add sp, sp, #96
636 ; NONEON-NOSVE-NEXT: ret
637 %a = load <8 x float>, ptr %ap
638 %b = load <8 x float>, ptr %bp
639 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b)
640 store <8 x float> %r, ptr %ap
646 define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) {
647 ; SVE-LABEL: test_copysign_v2f64_v2f64:
649 ; SVE-NEXT: ldr q0, [x0]
650 ; SVE-NEXT: ldr q1, [x1]
651 ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000
652 ; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
653 ; SVE-NEXT: orr z0.d, z0.d, z1.d
654 ; SVE-NEXT: str q0, [x0]
657 ; SVE2-LABEL: test_copysign_v2f64_v2f64:
659 ; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
660 ; SVE2-NEXT: ldr q1, [x0]
661 ; SVE2-NEXT: ldr q2, [x1]
662 ; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
663 ; SVE2-NEXT: str q1, [x0]
666 ; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f64:
667 ; NONEON-NOSVE: // %bb.0:
668 ; NONEON-NOSVE-NEXT: ldr q0, [x0]
669 ; NONEON-NOSVE-NEXT: ldr q1, [x1]
670 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
671 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
672 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
673 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
674 ; NONEON-NOSVE-NEXT: fabs d0, d0
675 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
676 ; NONEON-NOSVE-NEXT: fneg d1, d0
677 ; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne
678 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
679 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
680 ; NONEON-NOSVE-NEXT: fabs d0, d0
681 ; NONEON-NOSVE-NEXT: fneg d1, d0
682 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
683 ; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #32]
684 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
685 ; NONEON-NOSVE-NEXT: str q0, [x0]
686 ; NONEON-NOSVE-NEXT: add sp, sp, #48
687 ; NONEON-NOSVE-NEXT: ret
688 %a = load <2 x double>, ptr %ap
689 %b = load <2 x double>, ptr %bp
690 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
691 store <2 x double> %r, ptr %ap
695 define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) {
696 ; SVE-LABEL: test_copysign_v4f64_v4f64:
698 ; SVE-NEXT: ldp q0, q3, [x1]
699 ; SVE-NEXT: ldp q1, q2, [x0]
700 ; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
701 ; SVE-NEXT: and z3.d, z3.d, #0x8000000000000000
702 ; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
703 ; SVE-NEXT: and z2.d, z2.d, #0x7fffffffffffffff
704 ; SVE-NEXT: orr z0.d, z1.d, z0.d
705 ; SVE-NEXT: orr z1.d, z2.d, z3.d
706 ; SVE-NEXT: stp q0, q1, [x0]
709 ; SVE2-LABEL: test_copysign_v4f64_v4f64:
711 ; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
712 ; SVE2-NEXT: ldp q1, q4, [x1]
713 ; SVE2-NEXT: ldp q2, q3, [x0]
714 ; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
715 ; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
716 ; SVE2-NEXT: stp q2, q3, [x0]
719 ; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f64:
720 ; NONEON-NOSVE: // %bb.0:
721 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
722 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
723 ; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
724 ; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
725 ; NONEON-NOSVE-NEXT: stp q2, q3, [sp]
726 ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #32]
727 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
728 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #48]
729 ; NONEON-NOSVE-NEXT: fabs d0, d0
730 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
731 ; NONEON-NOSVE-NEXT: fneg d1, d0
732 ; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne
733 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
734 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
735 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
736 ; NONEON-NOSVE-NEXT: fabs d0, d0
737 ; NONEON-NOSVE-NEXT: fneg d1, d0
738 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
739 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
740 ; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80]
741 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
742 ; NONEON-NOSVE-NEXT: fabs d0, d0
743 ; NONEON-NOSVE-NEXT: fneg d1, d0
744 ; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne
745 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
746 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
747 ; NONEON-NOSVE-NEXT: fabs d0, d0
748 ; NONEON-NOSVE-NEXT: fneg d1, d0
749 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
750 ; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64]
751 ; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64]
752 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
753 ; NONEON-NOSVE-NEXT: add sp, sp, #96
754 ; NONEON-NOSVE-NEXT: ret
755 %a = load <4 x double>, ptr %ap
756 %b = load <4 x double>, ptr %bp
757 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
758 store <4 x double> %r, ptr %ap
764 define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) {
765 ; SVE-LABEL: test_copysign_v2f32_v2f64:
767 ; SVE-NEXT: ptrue p0.d
768 ; SVE-NEXT: ldr q0, [x1]
769 ; SVE-NEXT: ldr d1, [x0]
770 ; SVE-NEXT: fcvt z0.s, p0/m, z0.d
771 ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
772 ; SVE-NEXT: uzp1 z0.s, z0.s, z0.s
773 ; SVE-NEXT: and z0.s, z0.s, #0x80000000
774 ; SVE-NEXT: orr z0.d, z1.d, z0.d
775 ; SVE-NEXT: str d0, [x0]
778 ; SVE2-LABEL: test_copysign_v2f32_v2f64:
780 ; SVE2-NEXT: ptrue p0.d
781 ; SVE2-NEXT: ldr q0, [x1]
782 ; SVE2-NEXT: mov z1.s, #0x7fffffff
783 ; SVE2-NEXT: ldr d2, [x0]
784 ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
785 ; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s
786 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
787 ; SVE2-NEXT: str d2, [x0]
790 ; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f64:
791 ; NONEON-NOSVE: // %bb.0:
792 ; NONEON-NOSVE-NEXT: sub sp, sp, #48
793 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
794 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
795 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
796 ; NONEON-NOSVE-NEXT: str d1, [sp, #8]
797 ; NONEON-NOSVE-NEXT: str q0, [sp, #16]
798 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
799 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
800 ; NONEON-NOSVE-NEXT: fabs s0, s0
801 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
802 ; NONEON-NOSVE-NEXT: fneg s1, s0
803 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
804 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
805 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
806 ; NONEON-NOSVE-NEXT: fabs s0, s0
807 ; NONEON-NOSVE-NEXT: fneg s1, s0
808 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
809 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40]
810 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
811 ; NONEON-NOSVE-NEXT: str d0, [x0]
812 ; NONEON-NOSVE-NEXT: add sp, sp, #48
813 ; NONEON-NOSVE-NEXT: ret
814 %a = load <2 x float>, ptr %ap
815 %b = load <2 x double>, ptr %bp
816 %tmp0 = fptrunc <2 x double> %b to <2 x float>
817 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
818 store <2 x float> %r, ptr %ap
825 define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
826 ; SVE-LABEL: test_copysign_v4f32_v4f64:
828 ; SVE-NEXT: ldp q0, q1, [x1]
829 ; SVE-NEXT: ptrue p0.d
830 ; SVE-NEXT: fcvt z1.s, p0/m, z1.d
831 ; SVE-NEXT: fcvt z0.s, p0/m, z0.d
832 ; SVE-NEXT: ptrue p0.s, vl2
833 ; SVE-NEXT: uzp1 z1.s, z1.s, z1.s
834 ; SVE-NEXT: uzp1 z0.s, z0.s, z0.s
835 ; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
836 ; SVE-NEXT: ldr q1, [x0]
837 ; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
838 ; SVE-NEXT: and z0.s, z0.s, #0x80000000
839 ; SVE-NEXT: orr z0.d, z1.d, z0.d
840 ; SVE-NEXT: str q0, [x0]
843 ; SVE2-LABEL: test_copysign_v4f32_v4f64:
845 ; SVE2-NEXT: ldp q1, q0, [x1]
846 ; SVE2-NEXT: ptrue p0.d
847 ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
848 ; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
849 ; SVE2-NEXT: ptrue p0.s, vl2
850 ; SVE2-NEXT: uzp1 z3.s, z0.s, z0.s
851 ; SVE2-NEXT: uzp1 z2.s, z1.s, z1.s
852 ; SVE2-NEXT: mov z1.s, #0x7fffffff
853 ; SVE2-NEXT: splice z0.s, p0, { z2.s, z3.s }
854 ; SVE2-NEXT: ldr q2, [x0]
855 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
856 ; SVE2-NEXT: str q2, [x0]
859 ; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f64:
860 ; NONEON-NOSVE: // %bb.0:
861 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
862 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
863 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x1]
864 ; NONEON-NOSVE-NEXT: ldr q2, [x0]
865 ; NONEON-NOSVE-NEXT: str q2, [sp]
866 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
867 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #12]
868 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32]
869 ; NONEON-NOSVE-NEXT: fabs s0, s0
870 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
871 ; NONEON-NOSVE-NEXT: fneg s1, s0
872 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
873 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #8]
874 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
875 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
876 ; NONEON-NOSVE-NEXT: fabs s0, s0
877 ; NONEON-NOSVE-NEXT: fneg s1, s0
878 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
879 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
880 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #56]
881 ; NONEON-NOSVE-NEXT: ldr s0, [sp, #4]
882 ; NONEON-NOSVE-NEXT: fabs s0, s0
883 ; NONEON-NOSVE-NEXT: fneg s1, s0
884 ; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne
885 ; NONEON-NOSVE-NEXT: ldr s0, [sp]
886 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
887 ; NONEON-NOSVE-NEXT: fabs s0, s0
888 ; NONEON-NOSVE-NEXT: fneg s1, s0
889 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
890 ; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #48]
891 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
892 ; NONEON-NOSVE-NEXT: str q0, [x0]
893 ; NONEON-NOSVE-NEXT: add sp, sp, #64
894 ; NONEON-NOSVE-NEXT: ret
895 %a = load <4 x float>, ptr %ap
896 %b = load <4 x double>, ptr %bp
897 %tmp0 = fptrunc <4 x double> %b to <4 x float>
898 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
899 store <4 x float> %r, ptr %ap
905 define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) {
906 ; SVE-LABEL: test_copysign_v2f64_v2f32:
908 ; SVE-NEXT: ptrue p0.d, vl2
909 ; SVE-NEXT: ldr q0, [x0]
910 ; SVE-NEXT: ld1w { z1.d }, p0/z, [x1]
911 ; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
912 ; SVE-NEXT: fcvt z1.d, p0/m, z1.s
913 ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000
914 ; SVE-NEXT: orr z0.d, z0.d, z1.d
915 ; SVE-NEXT: str q0, [x0]
918 ; SVE2-LABEL: test_copysign_v2f64_v2f32:
920 ; SVE2-NEXT: ptrue p0.d, vl2
921 ; SVE2-NEXT: mov z1.d, #0x7fffffffffffffff
922 ; SVE2-NEXT: ldr q2, [x0]
923 ; SVE2-NEXT: ld1w { z0.d }, p0/z, [x1]
924 ; SVE2-NEXT: fcvt z0.d, p0/m, z0.s
925 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
926 ; SVE2-NEXT: str q2, [x0]
929 ; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f32:
930 ; NONEON-NOSVE: // %bb.0:
931 ; NONEON-NOSVE-NEXT: sub sp, sp, #48
932 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
933 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
934 ; NONEON-NOSVE-NEXT: ldr d0, [x1]
935 ; NONEON-NOSVE-NEXT: str q1, [sp]
936 ; NONEON-NOSVE-NEXT: str d0, [sp, #24]
937 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
938 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
939 ; NONEON-NOSVE-NEXT: fabs d0, d0
940 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
941 ; NONEON-NOSVE-NEXT: fneg d1, d0
942 ; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne
943 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
944 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
945 ; NONEON-NOSVE-NEXT: fabs d0, d0
946 ; NONEON-NOSVE-NEXT: fneg d1, d0
947 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
948 ; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #32]
949 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
950 ; NONEON-NOSVE-NEXT: str q0, [x0]
951 ; NONEON-NOSVE-NEXT: add sp, sp, #48
952 ; NONEON-NOSVE-NEXT: ret
953 %a = load <2 x double>, ptr %ap
954 %b = load < 2 x float>, ptr %bp
955 %tmp0 = fpext <2 x float> %b to <2 x double>
956 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
957 store <2 x double> %r, ptr %ap
963 ; SplitVecRes mismatched
964 define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) {
965 ; SVE-LABEL: test_copysign_v4f64_v4f32:
967 ; SVE-NEXT: ptrue p0.d, vl2
968 ; SVE-NEXT: mov x8, #2 // =0x2
969 ; SVE-NEXT: ldp q2, q3, [x0]
970 ; SVE-NEXT: ld1w { z0.d }, p0/z, [x1]
971 ; SVE-NEXT: ld1w { z1.d }, p0/z, [x1, x8, lsl #2]
972 ; SVE-NEXT: and z2.d, z2.d, #0x7fffffffffffffff
973 ; SVE-NEXT: and z3.d, z3.d, #0x7fffffffffffffff
974 ; SVE-NEXT: fcvt z0.d, p0/m, z0.s
975 ; SVE-NEXT: fcvt z1.d, p0/m, z1.s
976 ; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
977 ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000
978 ; SVE-NEXT: orr z0.d, z2.d, z0.d
979 ; SVE-NEXT: orr z1.d, z3.d, z1.d
980 ; SVE-NEXT: stp q0, q1, [x0]
983 ; SVE2-LABEL: test_copysign_v4f64_v4f32:
985 ; SVE2-NEXT: ptrue p0.d, vl2
986 ; SVE2-NEXT: mov x8, #2 // =0x2
987 ; SVE2-NEXT: mov z2.d, #0x7fffffffffffffff
988 ; SVE2-NEXT: ldp q3, q4, [x0]
989 ; SVE2-NEXT: ld1w { z0.d }, p0/z, [x1]
990 ; SVE2-NEXT: ld1w { z1.d }, p0/z, [x1, x8, lsl #2]
991 ; SVE2-NEXT: fcvt z0.d, p0/m, z0.s
992 ; SVE2-NEXT: fcvt z1.d, p0/m, z1.s
993 ; SVE2-NEXT: bsl z3.d, z3.d, z0.d, z2.d
994 ; SVE2-NEXT: bsl z4.d, z4.d, z1.d, z2.d
995 ; SVE2-NEXT: stp q3, q4, [x0]
998 ; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f32:
999 ; NONEON-NOSVE: // %bb.0:
1000 ; NONEON-NOSVE-NEXT: sub sp, sp, #96
1001 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
1002 ; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
1003 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
1004 ; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #16]
1005 ; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #16]
1006 ; NONEON-NOSVE-NEXT: str q1, [sp]
1007 ; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #48]
1008 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
1009 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48]
1010 ; NONEON-NOSVE-NEXT: fabs d0, d0
1011 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1012 ; NONEON-NOSVE-NEXT: fneg d1, d0
1013 ; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne
1014 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #32]
1015 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1016 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56]
1017 ; NONEON-NOSVE-NEXT: fabs d0, d0
1018 ; NONEON-NOSVE-NEXT: fneg d1, d0
1019 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
1020 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1021 ; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64]
1022 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
1023 ; NONEON-NOSVE-NEXT: fabs d0, d0
1024 ; NONEON-NOSVE-NEXT: fneg d1, d0
1025 ; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne
1026 ; NONEON-NOSVE-NEXT: ldr d0, [sp]
1027 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1028 ; NONEON-NOSVE-NEXT: fabs d0, d0
1029 ; NONEON-NOSVE-NEXT: fneg d1, d0
1030 ; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne
1031 ; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80]
1032 ; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64]
1033 ; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
1034 ; NONEON-NOSVE-NEXT: add sp, sp, #96
1035 ; NONEON-NOSVE-NEXT: ret
1036 %a = load <4 x double>, ptr %ap
1037 %b = load <4 x float>, ptr %bp
1038 %tmp0 = fpext <4 x float> %b to <4 x double>
1039 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
1040 store <4 x double> %r, ptr %ap
1046 define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
1047 ; SVE-LABEL: test_copysign_v4f16_v4f32:
1049 ; SVE-NEXT: ptrue p0.s
1050 ; SVE-NEXT: ldr q0, [x1]
1051 ; SVE-NEXT: ldr d1, [x0]
1052 ; SVE-NEXT: fcvt z0.h, p0/m, z0.s
1053 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
1054 ; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
1055 ; SVE-NEXT: and z0.h, z0.h, #0x8000
1056 ; SVE-NEXT: orr z0.d, z1.d, z0.d
1057 ; SVE-NEXT: str d0, [x0]
1060 ; SVE2-LABEL: test_copysign_v4f16_v4f32:
1062 ; SVE2-NEXT: ptrue p0.s
1063 ; SVE2-NEXT: ldr q0, [x1]
1064 ; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
1065 ; SVE2-NEXT: ldr d2, [x0]
1066 ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
1067 ; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
1068 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
1069 ; SVE2-NEXT: str d2, [x0]
1072 ; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f32:
1073 ; NONEON-NOSVE: // %bb.0:
1074 ; NONEON-NOSVE-NEXT: sub sp, sp, #48
1075 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
1076 ; NONEON-NOSVE-NEXT: ldr d1, [x0]
1077 ; NONEON-NOSVE-NEXT: ldr q0, [x1]
1078 ; NONEON-NOSVE-NEXT: str d1, [sp, #8]
1079 ; NONEON-NOSVE-NEXT: str q0, [sp, #16]
1080 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
1081 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
1082 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1083 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1084 ; NONEON-NOSVE-NEXT: fabs s0, s0
1085 ; NONEON-NOSVE-NEXT: fneg s1, s0
1086 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1087 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1088 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
1089 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1090 ; NONEON-NOSVE-NEXT: str h0, [sp, #46]
1091 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
1092 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1093 ; NONEON-NOSVE-NEXT: fabs s0, s0
1094 ; NONEON-NOSVE-NEXT: fneg s1, s0
1095 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1096 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1097 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1098 ; NONEON-NOSVE-NEXT: str h0, [sp, #44]
1099 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
1100 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1101 ; NONEON-NOSVE-NEXT: fabs s0, s0
1102 ; NONEON-NOSVE-NEXT: fneg s1, s0
1103 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1104 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1105 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1106 ; NONEON-NOSVE-NEXT: str h0, [sp, #42]
1107 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
1108 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1109 ; NONEON-NOSVE-NEXT: fabs s0, s0
1110 ; NONEON-NOSVE-NEXT: fneg s1, s0
1111 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1112 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1113 ; NONEON-NOSVE-NEXT: str h0, [sp, #40]
1114 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
1115 ; NONEON-NOSVE-NEXT: str d0, [x0]
1116 ; NONEON-NOSVE-NEXT: add sp, sp, #48
1117 ; NONEON-NOSVE-NEXT: ret
1118 %a = load <4 x half>, ptr %ap
1119 %b = load <4 x float>, ptr %bp
1120 %tmp0 = fptrunc <4 x float> %b to <4 x half>
1121 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
1122 store <4 x half> %r, ptr %ap
1126 define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
1127 ; SVE-LABEL: test_copysign_v4f16_v4f64:
1129 ; SVE-NEXT: ldp q0, q1, [x1]
1130 ; SVE-NEXT: mov z2.d, z1.d[1]
1131 ; SVE-NEXT: mov z3.d, z0.d[1]
1132 ; SVE-NEXT: fcvt h1, d1
1133 ; SVE-NEXT: fcvt h0, d0
1134 ; SVE-NEXT: fcvt h2, d2
1135 ; SVE-NEXT: fcvt h3, d3
1136 ; SVE-NEXT: zip1 z1.h, z1.h, z2.h
1137 ; SVE-NEXT: zip1 z0.h, z0.h, z3.h
1138 ; SVE-NEXT: zip1 z0.s, z0.s, z1.s
1139 ; SVE-NEXT: ldr d1, [x0]
1140 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
1141 ; SVE-NEXT: and z0.h, z0.h, #0x8000
1142 ; SVE-NEXT: orr z0.d, z1.d, z0.d
1143 ; SVE-NEXT: str d0, [x0]
1146 ; SVE2-LABEL: test_copysign_v4f16_v4f64:
1148 ; SVE2-NEXT: ldp q0, q1, [x1]
1149 ; SVE2-NEXT: mov z2.d, z1.d[1]
1150 ; SVE2-NEXT: mov z3.d, z0.d[1]
1151 ; SVE2-NEXT: fcvt h1, d1
1152 ; SVE2-NEXT: fcvt h0, d0
1153 ; SVE2-NEXT: fcvt h2, d2
1154 ; SVE2-NEXT: fcvt h3, d3
1155 ; SVE2-NEXT: zip1 z1.h, z1.h, z2.h
1156 ; SVE2-NEXT: zip1 z0.h, z0.h, z3.h
1157 ; SVE2-NEXT: mov z2.h, #32767 // =0x7fff
1158 ; SVE2-NEXT: zip1 z0.s, z0.s, z1.s
1159 ; SVE2-NEXT: ldr d1, [x0]
1160 ; SVE2-NEXT: bsl z1.d, z1.d, z0.d, z2.d
1161 ; SVE2-NEXT: str d1, [x0]
1164 ; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64:
1165 ; NONEON-NOSVE: // %bb.0:
1166 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
1167 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
1168 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x1]
1169 ; NONEON-NOSVE-NEXT: ldr d2, [x0]
1170 ; NONEON-NOSVE-NEXT: str d2, [sp, #8]
1171 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
1172 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
1173 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32]
1174 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1175 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
1176 ; NONEON-NOSVE-NEXT: fabs s0, s0
1177 ; NONEON-NOSVE-NEXT: fneg s1, s0
1178 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1179 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
1180 ; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16]
1181 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1182 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
1183 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
1184 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1185 ; NONEON-NOSVE-NEXT: fabs s0, s0
1186 ; NONEON-NOSVE-NEXT: fneg s1, s0
1187 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1188 ; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000
1189 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1190 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
1191 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
1192 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1193 ; NONEON-NOSVE-NEXT: fabs s0, s0
1194 ; NONEON-NOSVE-NEXT: fneg s1, s0
1195 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1196 ; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000
1197 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1198 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
1199 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
1200 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1201 ; NONEON-NOSVE-NEXT: fabs s0, s0
1202 ; NONEON-NOSVE-NEXT: fneg s1, s0
1203 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1204 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1205 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
1206 ; NONEON-NOSVE-NEXT: ldr d0, [sp, #56]
1207 ; NONEON-NOSVE-NEXT: str d0, [x0]
1208 ; NONEON-NOSVE-NEXT: add sp, sp, #64
1209 ; NONEON-NOSVE-NEXT: ret
1210 %a = load <4 x half>, ptr %ap
1211 %b = load <4 x double>, ptr %bp
1212 %tmp0 = fptrunc <4 x double> %b to <4 x half>
1213 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
1214 store <4 x half> %r, ptr %ap
1220 define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
1221 ; SVE-LABEL: test_copysign_v8f16_v8f32:
1223 ; SVE-NEXT: ldp q0, q1, [x1]
1224 ; SVE-NEXT: ptrue p0.s
1225 ; SVE-NEXT: fcvt z1.h, p0/m, z1.s
1226 ; SVE-NEXT: fcvt z0.h, p0/m, z0.s
1227 ; SVE-NEXT: ptrue p0.h, vl4
1228 ; SVE-NEXT: uzp1 z1.h, z1.h, z1.h
1229 ; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
1230 ; SVE-NEXT: splice z0.h, p0, z0.h, z1.h
1231 ; SVE-NEXT: ldr q1, [x0]
1232 ; SVE-NEXT: and z1.h, z1.h, #0x7fff
1233 ; SVE-NEXT: and z0.h, z0.h, #0x8000
1234 ; SVE-NEXT: orr z0.d, z1.d, z0.d
1235 ; SVE-NEXT: str q0, [x0]
1238 ; SVE2-LABEL: test_copysign_v8f16_v8f32:
1240 ; SVE2-NEXT: ldp q1, q0, [x1]
1241 ; SVE2-NEXT: ptrue p0.s
1242 ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
1243 ; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
1244 ; SVE2-NEXT: ptrue p0.h, vl4
1245 ; SVE2-NEXT: uzp1 z3.h, z0.h, z0.h
1246 ; SVE2-NEXT: uzp1 z2.h, z1.h, z1.h
1247 ; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
1248 ; SVE2-NEXT: splice z0.h, p0, { z2.h, z3.h }
1249 ; SVE2-NEXT: ldr q2, [x0]
1250 ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
1251 ; SVE2-NEXT: str q2, [x0]
1254 ; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f32:
1255 ; NONEON-NOSVE: // %bb.0:
1256 ; NONEON-NOSVE-NEXT: sub sp, sp, #64
1257 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
1258 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x1]
1259 ; NONEON-NOSVE-NEXT: ldr q2, [x0]
1260 ; NONEON-NOSVE-NEXT: str q2, [sp]
1261 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
1262 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #14]
1263 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40]
1264 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1265 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1266 ; NONEON-NOSVE-NEXT: fabs s0, s0
1267 ; NONEON-NOSVE-NEXT: fneg s1, s0
1268 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1269 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1270 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32]
1271 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1272 ; NONEON-NOSVE-NEXT: str h0, [sp, #62]
1273 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #12]
1274 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1275 ; NONEON-NOSVE-NEXT: fabs s0, s0
1276 ; NONEON-NOSVE-NEXT: fneg s1, s0
1277 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1278 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1279 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1280 ; NONEON-NOSVE-NEXT: str h0, [sp, #60]
1281 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #10]
1282 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1283 ; NONEON-NOSVE-NEXT: fabs s0, s0
1284 ; NONEON-NOSVE-NEXT: fneg s1, s0
1285 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1286 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1287 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24]
1288 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1289 ; NONEON-NOSVE-NEXT: str h0, [sp, #58]
1290 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
1291 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1292 ; NONEON-NOSVE-NEXT: fabs s0, s0
1293 ; NONEON-NOSVE-NEXT: fneg s1, s0
1294 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1295 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1296 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1297 ; NONEON-NOSVE-NEXT: str h0, [sp, #56]
1298 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #6]
1299 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1300 ; NONEON-NOSVE-NEXT: fabs s0, s0
1301 ; NONEON-NOSVE-NEXT: fneg s1, s0
1302 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1303 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1304 ; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16]
1305 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1306 ; NONEON-NOSVE-NEXT: str h0, [sp, #54]
1307 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #4]
1308 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1309 ; NONEON-NOSVE-NEXT: fabs s0, s0
1310 ; NONEON-NOSVE-NEXT: fneg s1, s0
1311 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1312 ; NONEON-NOSVE-NEXT: tst w9, #0x80000000
1313 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1314 ; NONEON-NOSVE-NEXT: str h0, [sp, #52]
1315 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #2]
1316 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1317 ; NONEON-NOSVE-NEXT: fabs s0, s0
1318 ; NONEON-NOSVE-NEXT: fneg s1, s0
1319 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1320 ; NONEON-NOSVE-NEXT: tst w8, #0x80000000
1321 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1322 ; NONEON-NOSVE-NEXT: str h0, [sp, #50]
1323 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
1324 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1325 ; NONEON-NOSVE-NEXT: fabs s0, s0
1326 ; NONEON-NOSVE-NEXT: fneg s1, s0
1327 ; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne
1328 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1329 ; NONEON-NOSVE-NEXT: str h0, [sp, #48]
1330 ; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
1331 ; NONEON-NOSVE-NEXT: str q0, [x0]
1332 ; NONEON-NOSVE-NEXT: add sp, sp, #64
1333 ; NONEON-NOSVE-NEXT: ret
1334 %a = load <8 x half>, ptr %ap
1335 %b = load <8 x float>, ptr %bp
1336 %tmp0 = fptrunc <8 x float> %b to <8 x half>
1337 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
1338 store <8 x half> %r, ptr %ap
1342 declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
1343 declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
1344 declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0
1346 declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
1347 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
1348 declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
1350 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
1351 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0