1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
4 define <8 x i8> @smax_8b(ptr %A, ptr %B) nounwind {
5 ; CHECK-LABEL: smax_8b:
7 ; CHECK-NEXT: ldr d0, [x0]
8 ; CHECK-NEXT: ldr d1, [x1]
9 ; CHECK-NEXT: smax.8b v0, v0, v1
11 %tmp1 = load <8 x i8>, ptr %A
12 %tmp2 = load <8 x i8>, ptr %B
13 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
17 define <16 x i8> @smax_16b(ptr %A, ptr %B) nounwind {
18 ; CHECK-LABEL: smax_16b:
20 ; CHECK-NEXT: ldr q0, [x0]
21 ; CHECK-NEXT: ldr q1, [x1]
22 ; CHECK-NEXT: smax.16b v0, v0, v1
24 %tmp1 = load <16 x i8>, ptr %A
25 %tmp2 = load <16 x i8>, ptr %B
26 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
30 define <4 x i16> @smax_4h(ptr %A, ptr %B) nounwind {
31 ; CHECK-LABEL: smax_4h:
33 ; CHECK-NEXT: ldr d0, [x0]
34 ; CHECK-NEXT: ldr d1, [x1]
35 ; CHECK-NEXT: smax.4h v0, v0, v1
37 %tmp1 = load <4 x i16>, ptr %A
38 %tmp2 = load <4 x i16>, ptr %B
39 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
43 define <8 x i16> @smax_8h(ptr %A, ptr %B) nounwind {
44 ; CHECK-LABEL: smax_8h:
46 ; CHECK-NEXT: ldr q0, [x0]
47 ; CHECK-NEXT: ldr q1, [x1]
48 ; CHECK-NEXT: smax.8h v0, v0, v1
50 %tmp1 = load <8 x i16>, ptr %A
51 %tmp2 = load <8 x i16>, ptr %B
52 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
56 define <2 x i32> @smax_2s(ptr %A, ptr %B) nounwind {
57 ; CHECK-LABEL: smax_2s:
59 ; CHECK-NEXT: ldr d0, [x0]
60 ; CHECK-NEXT: ldr d1, [x1]
61 ; CHECK-NEXT: smax.2s v0, v0, v1
63 %tmp1 = load <2 x i32>, ptr %A
64 %tmp2 = load <2 x i32>, ptr %B
65 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
69 define <4 x i32> @smax_4s(ptr %A, ptr %B) nounwind {
70 ; CHECK-LABEL: smax_4s:
72 ; CHECK-NEXT: ldr q0, [x0]
73 ; CHECK-NEXT: ldr q1, [x1]
74 ; CHECK-NEXT: smax.4s v0, v0, v1
76 %tmp1 = load <4 x i32>, ptr %A
77 %tmp2 = load <4 x i32>, ptr %B
78 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
82 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
83 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
84 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
85 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
86 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
87 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
89 define <8 x i8> @umax_8b(ptr %A, ptr %B) nounwind {
90 ; CHECK-LABEL: umax_8b:
92 ; CHECK-NEXT: ldr d0, [x0]
93 ; CHECK-NEXT: ldr d1, [x1]
94 ; CHECK-NEXT: umax.8b v0, v0, v1
96 %tmp1 = load <8 x i8>, ptr %A
97 %tmp2 = load <8 x i8>, ptr %B
98 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
102 define <16 x i8> @umax_16b(ptr %A, ptr %B) nounwind {
103 ; CHECK-LABEL: umax_16b:
105 ; CHECK-NEXT: ldr q0, [x0]
106 ; CHECK-NEXT: ldr q1, [x1]
107 ; CHECK-NEXT: umax.16b v0, v0, v1
109 %tmp1 = load <16 x i8>, ptr %A
110 %tmp2 = load <16 x i8>, ptr %B
111 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
115 define <4 x i16> @umax_4h(ptr %A, ptr %B) nounwind {
116 ; CHECK-LABEL: umax_4h:
118 ; CHECK-NEXT: ldr d0, [x0]
119 ; CHECK-NEXT: ldr d1, [x1]
120 ; CHECK-NEXT: umax.4h v0, v0, v1
122 %tmp1 = load <4 x i16>, ptr %A
123 %tmp2 = load <4 x i16>, ptr %B
124 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
128 define <8 x i16> @umax_8h(ptr %A, ptr %B) nounwind {
129 ; CHECK-LABEL: umax_8h:
131 ; CHECK-NEXT: ldr q0, [x0]
132 ; CHECK-NEXT: ldr q1, [x1]
133 ; CHECK-NEXT: umax.8h v0, v0, v1
135 %tmp1 = load <8 x i16>, ptr %A
136 %tmp2 = load <8 x i16>, ptr %B
137 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
141 define <2 x i32> @umax_2s(ptr %A, ptr %B) nounwind {
142 ; CHECK-LABEL: umax_2s:
144 ; CHECK-NEXT: ldr d0, [x0]
145 ; CHECK-NEXT: ldr d1, [x1]
146 ; CHECK-NEXT: umax.2s v0, v0, v1
148 %tmp1 = load <2 x i32>, ptr %A
149 %tmp2 = load <2 x i32>, ptr %B
150 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
154 define <4 x i32> @umax_4s(ptr %A, ptr %B) nounwind {
155 ; CHECK-LABEL: umax_4s:
157 ; CHECK-NEXT: ldr q0, [x0]
158 ; CHECK-NEXT: ldr q1, [x1]
159 ; CHECK-NEXT: umax.4s v0, v0, v1
161 %tmp1 = load <4 x i32>, ptr %A
162 %tmp2 = load <4 x i32>, ptr %B
163 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
167 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
168 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
169 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
170 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
171 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
172 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
174 define <8 x i8> @smin_8b(ptr %A, ptr %B) nounwind {
175 ; CHECK-LABEL: smin_8b:
177 ; CHECK-NEXT: ldr d0, [x0]
178 ; CHECK-NEXT: ldr d1, [x1]
179 ; CHECK-NEXT: smin.8b v0, v0, v1
181 %tmp1 = load <8 x i8>, ptr %A
182 %tmp2 = load <8 x i8>, ptr %B
183 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
187 define <16 x i8> @smin_16b(ptr %A, ptr %B) nounwind {
188 ; CHECK-LABEL: smin_16b:
190 ; CHECK-NEXT: ldr q0, [x0]
191 ; CHECK-NEXT: ldr q1, [x1]
192 ; CHECK-NEXT: smin.16b v0, v0, v1
194 %tmp1 = load <16 x i8>, ptr %A
195 %tmp2 = load <16 x i8>, ptr %B
196 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
200 define <4 x i16> @smin_4h(ptr %A, ptr %B) nounwind {
201 ; CHECK-LABEL: smin_4h:
203 ; CHECK-NEXT: ldr d0, [x0]
204 ; CHECK-NEXT: ldr d1, [x1]
205 ; CHECK-NEXT: smin.4h v0, v0, v1
207 %tmp1 = load <4 x i16>, ptr %A
208 %tmp2 = load <4 x i16>, ptr %B
209 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
213 define <8 x i16> @smin_8h(ptr %A, ptr %B) nounwind {
214 ; CHECK-LABEL: smin_8h:
216 ; CHECK-NEXT: ldr q0, [x0]
217 ; CHECK-NEXT: ldr q1, [x1]
218 ; CHECK-NEXT: smin.8h v0, v0, v1
220 %tmp1 = load <8 x i16>, ptr %A
221 %tmp2 = load <8 x i16>, ptr %B
222 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
226 define <2 x i32> @smin_2s(ptr %A, ptr %B) nounwind {
227 ; CHECK-LABEL: smin_2s:
229 ; CHECK-NEXT: ldr d0, [x0]
230 ; CHECK-NEXT: ldr d1, [x1]
231 ; CHECK-NEXT: smin.2s v0, v0, v1
233 %tmp1 = load <2 x i32>, ptr %A
234 %tmp2 = load <2 x i32>, ptr %B
235 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
239 define <4 x i32> @smin_4s(ptr %A, ptr %B) nounwind {
240 ; CHECK-LABEL: smin_4s:
242 ; CHECK-NEXT: ldr q0, [x0]
243 ; CHECK-NEXT: ldr q1, [x1]
244 ; CHECK-NEXT: smin.4s v0, v0, v1
246 %tmp1 = load <4 x i32>, ptr %A
247 %tmp2 = load <4 x i32>, ptr %B
248 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
252 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
253 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
254 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
255 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
256 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
257 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
259 define <8 x i8> @umin_8b(ptr %A, ptr %B) nounwind {
260 ; CHECK-LABEL: umin_8b:
262 ; CHECK-NEXT: ldr d0, [x0]
263 ; CHECK-NEXT: ldr d1, [x1]
264 ; CHECK-NEXT: umin.8b v0, v0, v1
266 %tmp1 = load <8 x i8>, ptr %A
267 %tmp2 = load <8 x i8>, ptr %B
268 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
272 define <16 x i8> @umin_16b(ptr %A, ptr %B) nounwind {
273 ; CHECK-LABEL: umin_16b:
275 ; CHECK-NEXT: ldr q0, [x0]
276 ; CHECK-NEXT: ldr q1, [x1]
277 ; CHECK-NEXT: umin.16b v0, v0, v1
279 %tmp1 = load <16 x i8>, ptr %A
280 %tmp2 = load <16 x i8>, ptr %B
281 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
285 define <4 x i16> @umin_4h(ptr %A, ptr %B) nounwind {
286 ; CHECK-LABEL: umin_4h:
288 ; CHECK-NEXT: ldr d0, [x0]
289 ; CHECK-NEXT: ldr d1, [x1]
290 ; CHECK-NEXT: umin.4h v0, v0, v1
292 %tmp1 = load <4 x i16>, ptr %A
293 %tmp2 = load <4 x i16>, ptr %B
294 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
298 define <8 x i16> @umin_8h(ptr %A, ptr %B) nounwind {
299 ; CHECK-LABEL: umin_8h:
301 ; CHECK-NEXT: ldr q0, [x0]
302 ; CHECK-NEXT: ldr q1, [x1]
303 ; CHECK-NEXT: umin.8h v0, v0, v1
305 %tmp1 = load <8 x i16>, ptr %A
306 %tmp2 = load <8 x i16>, ptr %B
307 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
311 define <2 x i32> @umin_2s(ptr %A, ptr %B) nounwind {
312 ; CHECK-LABEL: umin_2s:
314 ; CHECK-NEXT: ldr d0, [x0]
315 ; CHECK-NEXT: ldr d1, [x1]
316 ; CHECK-NEXT: umin.2s v0, v0, v1
318 %tmp1 = load <2 x i32>, ptr %A
319 %tmp2 = load <2 x i32>, ptr %B
320 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
324 define <4 x i32> @umin_4s(ptr %A, ptr %B) nounwind {
325 ; CHECK-LABEL: umin_4s:
327 ; CHECK-NEXT: ldr q0, [x0]
328 ; CHECK-NEXT: ldr q1, [x1]
329 ; CHECK-NEXT: umin.4s v0, v0, v1
331 %tmp1 = load <4 x i32>, ptr %A
332 %tmp2 = load <4 x i32>, ptr %B
333 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
337 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
338 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
339 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
340 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
341 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
342 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
344 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
346 define <8 x i8> @smaxp_8b(ptr %A, ptr %B) nounwind {
347 ; CHECK-LABEL: smaxp_8b:
349 ; CHECK-NEXT: ldr d0, [x0]
350 ; CHECK-NEXT: ldr d1, [x1]
351 ; CHECK-NEXT: smaxp.8b v0, v0, v1
353 %tmp1 = load <8 x i8>, ptr %A
354 %tmp2 = load <8 x i8>, ptr %B
355 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
359 define <16 x i8> @smaxp_16b(ptr %A, ptr %B) nounwind {
360 ; CHECK-LABEL: smaxp_16b:
362 ; CHECK-NEXT: ldr q0, [x0]
363 ; CHECK-NEXT: ldr q1, [x1]
364 ; CHECK-NEXT: smaxp.16b v0, v0, v1
366 %tmp1 = load <16 x i8>, ptr %A
367 %tmp2 = load <16 x i8>, ptr %B
368 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
372 define <4 x i16> @smaxp_4h(ptr %A, ptr %B) nounwind {
373 ; CHECK-LABEL: smaxp_4h:
375 ; CHECK-NEXT: ldr d0, [x0]
376 ; CHECK-NEXT: ldr d1, [x1]
377 ; CHECK-NEXT: smaxp.4h v0, v0, v1
379 %tmp1 = load <4 x i16>, ptr %A
380 %tmp2 = load <4 x i16>, ptr %B
381 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
385 define <8 x i16> @smaxp_8h(ptr %A, ptr %B) nounwind {
386 ; CHECK-LABEL: smaxp_8h:
388 ; CHECK-NEXT: ldr q0, [x0]
389 ; CHECK-NEXT: ldr q1, [x1]
390 ; CHECK-NEXT: smaxp.8h v0, v0, v1
392 %tmp1 = load <8 x i16>, ptr %A
393 %tmp2 = load <8 x i16>, ptr %B
394 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
398 define <2 x i32> @smaxp_2s(ptr %A, ptr %B) nounwind {
399 ; CHECK-LABEL: smaxp_2s:
401 ; CHECK-NEXT: ldr d0, [x0]
402 ; CHECK-NEXT: ldr d1, [x1]
403 ; CHECK-NEXT: smaxp.2s v0, v0, v1
405 %tmp1 = load <2 x i32>, ptr %A
406 %tmp2 = load <2 x i32>, ptr %B
407 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
411 define <4 x i32> @smaxp_4s(ptr %A, ptr %B) nounwind {
412 ; CHECK-LABEL: smaxp_4s:
414 ; CHECK-NEXT: ldr q0, [x0]
415 ; CHECK-NEXT: ldr q1, [x1]
416 ; CHECK-NEXT: smaxp.4s v0, v0, v1
418 %tmp1 = load <4 x i32>, ptr %A
419 %tmp2 = load <4 x i32>, ptr %B
420 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
424 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
425 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
426 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
427 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
428 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
429 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
431 define <8 x i8> @umaxp_8b(ptr %A, ptr %B) nounwind {
432 ; CHECK-LABEL: umaxp_8b:
434 ; CHECK-NEXT: ldr d0, [x0]
435 ; CHECK-NEXT: ldr d1, [x1]
436 ; CHECK-NEXT: umaxp.8b v0, v0, v1
438 %tmp1 = load <8 x i8>, ptr %A
439 %tmp2 = load <8 x i8>, ptr %B
440 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
444 define <16 x i8> @umaxp_16b(ptr %A, ptr %B) nounwind {
445 ; CHECK-LABEL: umaxp_16b:
447 ; CHECK-NEXT: ldr q0, [x0]
448 ; CHECK-NEXT: ldr q1, [x1]
449 ; CHECK-NEXT: umaxp.16b v0, v0, v1
451 %tmp1 = load <16 x i8>, ptr %A
452 %tmp2 = load <16 x i8>, ptr %B
453 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
457 define <4 x i16> @umaxp_4h(ptr %A, ptr %B) nounwind {
458 ; CHECK-LABEL: umaxp_4h:
460 ; CHECK-NEXT: ldr d0, [x0]
461 ; CHECK-NEXT: ldr d1, [x1]
462 ; CHECK-NEXT: umaxp.4h v0, v0, v1
464 %tmp1 = load <4 x i16>, ptr %A
465 %tmp2 = load <4 x i16>, ptr %B
466 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
470 define <8 x i16> @umaxp_8h(ptr %A, ptr %B) nounwind {
471 ; CHECK-LABEL: umaxp_8h:
473 ; CHECK-NEXT: ldr q0, [x0]
474 ; CHECK-NEXT: ldr q1, [x1]
475 ; CHECK-NEXT: umaxp.8h v0, v0, v1
477 %tmp1 = load <8 x i16>, ptr %A
478 %tmp2 = load <8 x i16>, ptr %B
479 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
483 define <2 x i32> @umaxp_2s(ptr %A, ptr %B) nounwind {
484 ; CHECK-LABEL: umaxp_2s:
486 ; CHECK-NEXT: ldr d0, [x0]
487 ; CHECK-NEXT: ldr d1, [x1]
488 ; CHECK-NEXT: umaxp.2s v0, v0, v1
490 %tmp1 = load <2 x i32>, ptr %A
491 %tmp2 = load <2 x i32>, ptr %B
492 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
496 define <4 x i32> @umaxp_4s(ptr %A, ptr %B) nounwind {
497 ; CHECK-LABEL: umaxp_4s:
499 ; CHECK-NEXT: ldr q0, [x0]
500 ; CHECK-NEXT: ldr q1, [x1]
501 ; CHECK-NEXT: umaxp.4s v0, v0, v1
503 %tmp1 = load <4 x i32>, ptr %A
504 %tmp2 = load <4 x i32>, ptr %B
505 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
509 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
510 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
511 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
512 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
513 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
514 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
516 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
518 define <8 x i8> @sminp_8b(ptr %A, ptr %B) nounwind {
519 ; CHECK-LABEL: sminp_8b:
521 ; CHECK-NEXT: ldr d0, [x0]
522 ; CHECK-NEXT: ldr d1, [x1]
523 ; CHECK-NEXT: sminp.8b v0, v0, v1
525 %tmp1 = load <8 x i8>, ptr %A
526 %tmp2 = load <8 x i8>, ptr %B
527 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
531 define <16 x i8> @sminp_16b(ptr %A, ptr %B) nounwind {
532 ; CHECK-LABEL: sminp_16b:
534 ; CHECK-NEXT: ldr q0, [x0]
535 ; CHECK-NEXT: ldr q1, [x1]
536 ; CHECK-NEXT: sminp.16b v0, v0, v1
538 %tmp1 = load <16 x i8>, ptr %A
539 %tmp2 = load <16 x i8>, ptr %B
540 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
544 define <4 x i16> @sminp_4h(ptr %A, ptr %B) nounwind {
545 ; CHECK-LABEL: sminp_4h:
547 ; CHECK-NEXT: ldr d0, [x0]
548 ; CHECK-NEXT: ldr d1, [x1]
549 ; CHECK-NEXT: sminp.4h v0, v0, v1
551 %tmp1 = load <4 x i16>, ptr %A
552 %tmp2 = load <4 x i16>, ptr %B
553 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
557 define <8 x i16> @sminp_8h(ptr %A, ptr %B) nounwind {
558 ; CHECK-LABEL: sminp_8h:
560 ; CHECK-NEXT: ldr q0, [x0]
561 ; CHECK-NEXT: ldr q1, [x1]
562 ; CHECK-NEXT: sminp.8h v0, v0, v1
564 %tmp1 = load <8 x i16>, ptr %A
565 %tmp2 = load <8 x i16>, ptr %B
566 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
570 define <2 x i32> @sminp_2s(ptr %A, ptr %B) nounwind {
571 ; CHECK-LABEL: sminp_2s:
573 ; CHECK-NEXT: ldr d0, [x0]
574 ; CHECK-NEXT: ldr d1, [x1]
575 ; CHECK-NEXT: sminp.2s v0, v0, v1
577 %tmp1 = load <2 x i32>, ptr %A
578 %tmp2 = load <2 x i32>, ptr %B
579 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
583 define <4 x i32> @sminp_4s(ptr %A, ptr %B) nounwind {
584 ; CHECK-LABEL: sminp_4s:
586 ; CHECK-NEXT: ldr q0, [x0]
587 ; CHECK-NEXT: ldr q1, [x1]
588 ; CHECK-NEXT: sminp.4s v0, v0, v1
590 %tmp1 = load <4 x i32>, ptr %A
591 %tmp2 = load <4 x i32>, ptr %B
592 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
596 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
597 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
598 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
599 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
600 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
601 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
603 define <8 x i8> @uminp_8b(ptr %A, ptr %B) nounwind {
604 ; CHECK-LABEL: uminp_8b:
606 ; CHECK-NEXT: ldr d0, [x0]
607 ; CHECK-NEXT: ldr d1, [x1]
608 ; CHECK-NEXT: uminp.8b v0, v0, v1
610 %tmp1 = load <8 x i8>, ptr %A
611 %tmp2 = load <8 x i8>, ptr %B
612 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
616 define <16 x i8> @uminp_16b(ptr %A, ptr %B) nounwind {
617 ; CHECK-LABEL: uminp_16b:
619 ; CHECK-NEXT: ldr q0, [x0]
620 ; CHECK-NEXT: ldr q1, [x1]
621 ; CHECK-NEXT: uminp.16b v0, v0, v1
623 %tmp1 = load <16 x i8>, ptr %A
624 %tmp2 = load <16 x i8>, ptr %B
625 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
629 define <4 x i16> @uminp_4h(ptr %A, ptr %B) nounwind {
630 ; CHECK-LABEL: uminp_4h:
632 ; CHECK-NEXT: ldr d0, [x0]
633 ; CHECK-NEXT: ldr d1, [x1]
634 ; CHECK-NEXT: uminp.4h v0, v0, v1
636 %tmp1 = load <4 x i16>, ptr %A
637 %tmp2 = load <4 x i16>, ptr %B
638 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
642 define <8 x i16> @uminp_8h(ptr %A, ptr %B) nounwind {
643 ; CHECK-LABEL: uminp_8h:
645 ; CHECK-NEXT: ldr q0, [x0]
646 ; CHECK-NEXT: ldr q1, [x1]
647 ; CHECK-NEXT: uminp.8h v0, v0, v1
649 %tmp1 = load <8 x i16>, ptr %A
650 %tmp2 = load <8 x i16>, ptr %B
651 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
655 define <2 x i32> @uminp_2s(ptr %A, ptr %B) nounwind {
656 ; CHECK-LABEL: uminp_2s:
658 ; CHECK-NEXT: ldr d0, [x0]
659 ; CHECK-NEXT: ldr d1, [x1]
660 ; CHECK-NEXT: uminp.2s v0, v0, v1
662 %tmp1 = load <2 x i32>, ptr %A
663 %tmp2 = load <2 x i32>, ptr %B
664 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
668 define <4 x i32> @uminp_4s(ptr %A, ptr %B) nounwind {
669 ; CHECK-LABEL: uminp_4s:
671 ; CHECK-NEXT: ldr q0, [x0]
672 ; CHECK-NEXT: ldr q1, [x1]
673 ; CHECK-NEXT: uminp.4s v0, v0, v1
675 %tmp1 = load <4 x i32>, ptr %A
676 %tmp2 = load <4 x i32>, ptr %B
677 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
681 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
682 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
683 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
684 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
685 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
686 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
688 define <2 x float> @fmax_2s(ptr %A, ptr %B) nounwind {
689 ; CHECK-LABEL: fmax_2s:
691 ; CHECK-NEXT: ldr d0, [x0]
692 ; CHECK-NEXT: ldr d1, [x1]
693 ; CHECK-NEXT: fmax.2s v0, v0, v1
695 %tmp1 = load <2 x float>, ptr %A
696 %tmp2 = load <2 x float>, ptr %B
697 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
698 ret <2 x float> %tmp3
701 define <4 x float> @fmax_4s(ptr %A, ptr %B) nounwind {
702 ; CHECK-LABEL: fmax_4s:
704 ; CHECK-NEXT: ldr q0, [x0]
705 ; CHECK-NEXT: ldr q1, [x1]
706 ; CHECK-NEXT: fmax.4s v0, v0, v1
708 %tmp1 = load <4 x float>, ptr %A
709 %tmp2 = load <4 x float>, ptr %B
710 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
711 ret <4 x float> %tmp3
714 define <2 x double> @fmax_2d(ptr %A, ptr %B) nounwind {
715 ; CHECK-LABEL: fmax_2d:
717 ; CHECK-NEXT: ldr q0, [x0]
718 ; CHECK-NEXT: ldr q1, [x1]
719 ; CHECK-NEXT: fmax.2d v0, v0, v1
721 %tmp1 = load <2 x double>, ptr %A
722 %tmp2 = load <2 x double>, ptr %B
723 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
724 ret <2 x double> %tmp3
727 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
728 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
729 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
731 define <2 x float> @fmaxp_2s(ptr %A, ptr %B) nounwind {
732 ; CHECK-LABEL: fmaxp_2s:
734 ; CHECK-NEXT: ldr d0, [x0]
735 ; CHECK-NEXT: ldr d1, [x1]
736 ; CHECK-NEXT: fmaxp.2s v0, v0, v1
738 %tmp1 = load <2 x float>, ptr %A
739 %tmp2 = load <2 x float>, ptr %B
740 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
741 ret <2 x float> %tmp3
744 define <4 x float> @fmaxp_4s(ptr %A, ptr %B) nounwind {
745 ; CHECK-LABEL: fmaxp_4s:
747 ; CHECK-NEXT: ldr q0, [x0]
748 ; CHECK-NEXT: ldr q1, [x1]
749 ; CHECK-NEXT: fmaxp.4s v0, v0, v1
751 %tmp1 = load <4 x float>, ptr %A
752 %tmp2 = load <4 x float>, ptr %B
753 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
754 ret <4 x float> %tmp3
757 define <2 x double> @fmaxp_2d(ptr %A, ptr %B) nounwind {
758 ; CHECK-LABEL: fmaxp_2d:
760 ; CHECK-NEXT: ldr q0, [x0]
761 ; CHECK-NEXT: ldr q1, [x1]
762 ; CHECK-NEXT: fmaxp.2d v0, v0, v1
764 %tmp1 = load <2 x double>, ptr %A
765 %tmp2 = load <2 x double>, ptr %B
766 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
767 ret <2 x double> %tmp3
770 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
771 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
772 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
774 define <2 x float> @fmin_2s(ptr %A, ptr %B) nounwind {
775 ; CHECK-LABEL: fmin_2s:
777 ; CHECK-NEXT: ldr d0, [x0]
778 ; CHECK-NEXT: ldr d1, [x1]
779 ; CHECK-NEXT: fmin.2s v0, v0, v1
781 %tmp1 = load <2 x float>, ptr %A
782 %tmp2 = load <2 x float>, ptr %B
783 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
784 ret <2 x float> %tmp3
787 define <4 x float> @fmin_4s(ptr %A, ptr %B) nounwind {
788 ; CHECK-LABEL: fmin_4s:
790 ; CHECK-NEXT: ldr q0, [x0]
791 ; CHECK-NEXT: ldr q1, [x1]
792 ; CHECK-NEXT: fmin.4s v0, v0, v1
794 %tmp1 = load <4 x float>, ptr %A
795 %tmp2 = load <4 x float>, ptr %B
796 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
797 ret <4 x float> %tmp3
800 define <2 x double> @fmin_2d(ptr %A, ptr %B) nounwind {
801 ; CHECK-LABEL: fmin_2d:
803 ; CHECK-NEXT: ldr q0, [x0]
804 ; CHECK-NEXT: ldr q1, [x1]
805 ; CHECK-NEXT: fmin.2d v0, v0, v1
807 %tmp1 = load <2 x double>, ptr %A
808 %tmp2 = load <2 x double>, ptr %B
809 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
810 ret <2 x double> %tmp3
813 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
814 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
815 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
817 define <2 x float> @fminp_2s(ptr %A, ptr %B) nounwind {
818 ; CHECK-LABEL: fminp_2s:
820 ; CHECK-NEXT: ldr d0, [x0]
821 ; CHECK-NEXT: ldr d1, [x1]
822 ; CHECK-NEXT: fminp.2s v0, v0, v1
824 %tmp1 = load <2 x float>, ptr %A
825 %tmp2 = load <2 x float>, ptr %B
826 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
827 ret <2 x float> %tmp3
830 define <4 x float> @fminp_4s(ptr %A, ptr %B) nounwind {
831 ; CHECK-LABEL: fminp_4s:
833 ; CHECK-NEXT: ldr q0, [x0]
834 ; CHECK-NEXT: ldr q1, [x1]
835 ; CHECK-NEXT: fminp.4s v0, v0, v1
837 %tmp1 = load <4 x float>, ptr %A
838 %tmp2 = load <4 x float>, ptr %B
839 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
840 ret <4 x float> %tmp3
843 define <2 x double> @fminp_2d(ptr %A, ptr %B) nounwind {
844 ; CHECK-LABEL: fminp_2d:
846 ; CHECK-NEXT: ldr q0, [x0]
847 ; CHECK-NEXT: ldr q1, [x1]
848 ; CHECK-NEXT: fminp.2d v0, v0, v1
850 %tmp1 = load <2 x double>, ptr %A
851 %tmp2 = load <2 x double>, ptr %B
852 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
853 ret <2 x double> %tmp3
856 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
857 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
858 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
860 define <2 x float> @fminnmp_2s(ptr %A, ptr %B) nounwind {
861 ; CHECK-LABEL: fminnmp_2s:
863 ; CHECK-NEXT: ldr d0, [x0]
864 ; CHECK-NEXT: ldr d1, [x1]
865 ; CHECK-NEXT: fminnmp.2s v0, v0, v1
867 %tmp1 = load <2 x float>, ptr %A
868 %tmp2 = load <2 x float>, ptr %B
869 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
870 ret <2 x float> %tmp3
873 define <4 x float> @fminnmp_4s(ptr %A, ptr %B) nounwind {
874 ; CHECK-LABEL: fminnmp_4s:
876 ; CHECK-NEXT: ldr q0, [x0]
877 ; CHECK-NEXT: ldr q1, [x1]
878 ; CHECK-NEXT: fminnmp.4s v0, v0, v1
880 %tmp1 = load <4 x float>, ptr %A
881 %tmp2 = load <4 x float>, ptr %B
882 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
883 ret <4 x float> %tmp3
886 define <2 x double> @fminnmp_2d(ptr %A, ptr %B) nounwind {
887 ; CHECK-LABEL: fminnmp_2d:
889 ; CHECK-NEXT: ldr q0, [x0]
890 ; CHECK-NEXT: ldr q1, [x1]
891 ; CHECK-NEXT: fminnmp.2d v0, v0, v1
893 %tmp1 = load <2 x double>, ptr %A
894 %tmp2 = load <2 x double>, ptr %B
895 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
896 ret <2 x double> %tmp3
899 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
900 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
901 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
903 define <2 x float> @fmaxnmp_2s(ptr %A, ptr %B) nounwind {
904 ; CHECK-LABEL: fmaxnmp_2s:
906 ; CHECK-NEXT: ldr d0, [x0]
907 ; CHECK-NEXT: ldr d1, [x1]
908 ; CHECK-NEXT: fmaxnmp.2s v0, v0, v1
910 %tmp1 = load <2 x float>, ptr %A
911 %tmp2 = load <2 x float>, ptr %B
912 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
913 ret <2 x float> %tmp3
916 define <4 x float> @fmaxnmp_4s(ptr %A, ptr %B) nounwind {
917 ; CHECK-LABEL: fmaxnmp_4s:
919 ; CHECK-NEXT: ldr q0, [x0]
920 ; CHECK-NEXT: ldr q1, [x1]
921 ; CHECK-NEXT: fmaxnmp.4s v0, v0, v1
923 %tmp1 = load <4 x float>, ptr %A
924 %tmp2 = load <4 x float>, ptr %B
925 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
926 ret <4 x float> %tmp3
929 define <2 x double> @fmaxnmp_2d(ptr %A, ptr %B) nounwind {
930 ; CHECK-LABEL: fmaxnmp_2d:
932 ; CHECK-NEXT: ldr q0, [x0]
933 ; CHECK-NEXT: ldr q1, [x1]
934 ; CHECK-NEXT: fmaxnmp.2d v0, v0, v1
936 %tmp1 = load <2 x double>, ptr %A
937 %tmp2 = load <2 x double>, ptr %B
938 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
939 ret <2 x double> %tmp3
942 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
943 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
944 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone