1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 ; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
5 define <8 x i8> @smax_8b(ptr %A, ptr %B) nounwind {
6 ; CHECK-LABEL: smax_8b:
8 ; CHECK-NEXT: ldr d0, [x0]
9 ; CHECK-NEXT: ldr d1, [x1]
10 ; CHECK-NEXT: smax.8b v0, v0, v1
12 %tmp1 = load <8 x i8>, ptr %A
13 %tmp2 = load <8 x i8>, ptr %B
14 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
18 define <16 x i8> @smax_16b(ptr %A, ptr %B) nounwind {
19 ; CHECK-LABEL: smax_16b:
21 ; CHECK-NEXT: ldr q0, [x0]
22 ; CHECK-NEXT: ldr q1, [x1]
23 ; CHECK-NEXT: smax.16b v0, v0, v1
25 %tmp1 = load <16 x i8>, ptr %A
26 %tmp2 = load <16 x i8>, ptr %B
27 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
31 define <4 x i16> @smax_4h(ptr %A, ptr %B) nounwind {
32 ; CHECK-LABEL: smax_4h:
34 ; CHECK-NEXT: ldr d0, [x0]
35 ; CHECK-NEXT: ldr d1, [x1]
36 ; CHECK-NEXT: smax.4h v0, v0, v1
38 %tmp1 = load <4 x i16>, ptr %A
39 %tmp2 = load <4 x i16>, ptr %B
40 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
44 define <8 x i16> @smax_8h(ptr %A, ptr %B) nounwind {
45 ; CHECK-LABEL: smax_8h:
47 ; CHECK-NEXT: ldr q0, [x0]
48 ; CHECK-NEXT: ldr q1, [x1]
49 ; CHECK-NEXT: smax.8h v0, v0, v1
51 %tmp1 = load <8 x i16>, ptr %A
52 %tmp2 = load <8 x i16>, ptr %B
53 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
57 define <2 x i32> @smax_2s(ptr %A, ptr %B) nounwind {
58 ; CHECK-LABEL: smax_2s:
60 ; CHECK-NEXT: ldr d0, [x0]
61 ; CHECK-NEXT: ldr d1, [x1]
62 ; CHECK-NEXT: smax.2s v0, v0, v1
64 %tmp1 = load <2 x i32>, ptr %A
65 %tmp2 = load <2 x i32>, ptr %B
66 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
70 define <4 x i32> @smax_4s(ptr %A, ptr %B) nounwind {
71 ; CHECK-LABEL: smax_4s:
73 ; CHECK-NEXT: ldr q0, [x0]
74 ; CHECK-NEXT: ldr q1, [x1]
75 ; CHECK-NEXT: smax.4s v0, v0, v1
77 %tmp1 = load <4 x i32>, ptr %A
78 %tmp2 = load <4 x i32>, ptr %B
79 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
83 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
84 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
85 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
86 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
87 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
88 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
90 define <8 x i8> @umax_8b(ptr %A, ptr %B) nounwind {
91 ; CHECK-LABEL: umax_8b:
93 ; CHECK-NEXT: ldr d0, [x0]
94 ; CHECK-NEXT: ldr d1, [x1]
95 ; CHECK-NEXT: umax.8b v0, v0, v1
97 %tmp1 = load <8 x i8>, ptr %A
98 %tmp2 = load <8 x i8>, ptr %B
99 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
103 define <16 x i8> @umax_16b(ptr %A, ptr %B) nounwind {
104 ; CHECK-LABEL: umax_16b:
106 ; CHECK-NEXT: ldr q0, [x0]
107 ; CHECK-NEXT: ldr q1, [x1]
108 ; CHECK-NEXT: umax.16b v0, v0, v1
110 %tmp1 = load <16 x i8>, ptr %A
111 %tmp2 = load <16 x i8>, ptr %B
112 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
116 define <4 x i16> @umax_4h(ptr %A, ptr %B) nounwind {
117 ; CHECK-LABEL: umax_4h:
119 ; CHECK-NEXT: ldr d0, [x0]
120 ; CHECK-NEXT: ldr d1, [x1]
121 ; CHECK-NEXT: umax.4h v0, v0, v1
123 %tmp1 = load <4 x i16>, ptr %A
124 %tmp2 = load <4 x i16>, ptr %B
125 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
129 define <8 x i16> @umax_8h(ptr %A, ptr %B) nounwind {
130 ; CHECK-LABEL: umax_8h:
132 ; CHECK-NEXT: ldr q0, [x0]
133 ; CHECK-NEXT: ldr q1, [x1]
134 ; CHECK-NEXT: umax.8h v0, v0, v1
136 %tmp1 = load <8 x i16>, ptr %A
137 %tmp2 = load <8 x i16>, ptr %B
138 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
142 define <2 x i32> @umax_2s(ptr %A, ptr %B) nounwind {
143 ; CHECK-LABEL: umax_2s:
145 ; CHECK-NEXT: ldr d0, [x0]
146 ; CHECK-NEXT: ldr d1, [x1]
147 ; CHECK-NEXT: umax.2s v0, v0, v1
149 %tmp1 = load <2 x i32>, ptr %A
150 %tmp2 = load <2 x i32>, ptr %B
151 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
155 define <4 x i32> @umax_4s(ptr %A, ptr %B) nounwind {
156 ; CHECK-LABEL: umax_4s:
158 ; CHECK-NEXT: ldr q0, [x0]
159 ; CHECK-NEXT: ldr q1, [x1]
160 ; CHECK-NEXT: umax.4s v0, v0, v1
162 %tmp1 = load <4 x i32>, ptr %A
163 %tmp2 = load <4 x i32>, ptr %B
164 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
168 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
169 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
170 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
171 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
172 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
173 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
175 define <8 x i8> @smin_8b(ptr %A, ptr %B) nounwind {
176 ; CHECK-LABEL: smin_8b:
178 ; CHECK-NEXT: ldr d0, [x0]
179 ; CHECK-NEXT: ldr d1, [x1]
180 ; CHECK-NEXT: smin.8b v0, v0, v1
182 %tmp1 = load <8 x i8>, ptr %A
183 %tmp2 = load <8 x i8>, ptr %B
184 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
188 define <16 x i8> @smin_16b(ptr %A, ptr %B) nounwind {
189 ; CHECK-LABEL: smin_16b:
191 ; CHECK-NEXT: ldr q0, [x0]
192 ; CHECK-NEXT: ldr q1, [x1]
193 ; CHECK-NEXT: smin.16b v0, v0, v1
195 %tmp1 = load <16 x i8>, ptr %A
196 %tmp2 = load <16 x i8>, ptr %B
197 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
201 define <4 x i16> @smin_4h(ptr %A, ptr %B) nounwind {
202 ; CHECK-LABEL: smin_4h:
204 ; CHECK-NEXT: ldr d0, [x0]
205 ; CHECK-NEXT: ldr d1, [x1]
206 ; CHECK-NEXT: smin.4h v0, v0, v1
208 %tmp1 = load <4 x i16>, ptr %A
209 %tmp2 = load <4 x i16>, ptr %B
210 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
214 define <8 x i16> @smin_8h(ptr %A, ptr %B) nounwind {
215 ; CHECK-LABEL: smin_8h:
217 ; CHECK-NEXT: ldr q0, [x0]
218 ; CHECK-NEXT: ldr q1, [x1]
219 ; CHECK-NEXT: smin.8h v0, v0, v1
221 %tmp1 = load <8 x i16>, ptr %A
222 %tmp2 = load <8 x i16>, ptr %B
223 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
227 define <2 x i32> @smin_2s(ptr %A, ptr %B) nounwind {
228 ; CHECK-LABEL: smin_2s:
230 ; CHECK-NEXT: ldr d0, [x0]
231 ; CHECK-NEXT: ldr d1, [x1]
232 ; CHECK-NEXT: smin.2s v0, v0, v1
234 %tmp1 = load <2 x i32>, ptr %A
235 %tmp2 = load <2 x i32>, ptr %B
236 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
240 define <4 x i32> @smin_4s(ptr %A, ptr %B) nounwind {
241 ; CHECK-LABEL: smin_4s:
243 ; CHECK-NEXT: ldr q0, [x0]
244 ; CHECK-NEXT: ldr q1, [x1]
245 ; CHECK-NEXT: smin.4s v0, v0, v1
247 %tmp1 = load <4 x i32>, ptr %A
248 %tmp2 = load <4 x i32>, ptr %B
249 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
253 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
254 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
255 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
256 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
257 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
258 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
260 define <8 x i8> @umin_8b(ptr %A, ptr %B) nounwind {
261 ; CHECK-LABEL: umin_8b:
263 ; CHECK-NEXT: ldr d0, [x0]
264 ; CHECK-NEXT: ldr d1, [x1]
265 ; CHECK-NEXT: umin.8b v0, v0, v1
267 %tmp1 = load <8 x i8>, ptr %A
268 %tmp2 = load <8 x i8>, ptr %B
269 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
273 define <16 x i8> @umin_16b(ptr %A, ptr %B) nounwind {
274 ; CHECK-LABEL: umin_16b:
276 ; CHECK-NEXT: ldr q0, [x0]
277 ; CHECK-NEXT: ldr q1, [x1]
278 ; CHECK-NEXT: umin.16b v0, v0, v1
280 %tmp1 = load <16 x i8>, ptr %A
281 %tmp2 = load <16 x i8>, ptr %B
282 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
286 define <4 x i16> @umin_4h(ptr %A, ptr %B) nounwind {
287 ; CHECK-LABEL: umin_4h:
289 ; CHECK-NEXT: ldr d0, [x0]
290 ; CHECK-NEXT: ldr d1, [x1]
291 ; CHECK-NEXT: umin.4h v0, v0, v1
293 %tmp1 = load <4 x i16>, ptr %A
294 %tmp2 = load <4 x i16>, ptr %B
295 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
299 define <8 x i16> @umin_8h(ptr %A, ptr %B) nounwind {
300 ; CHECK-LABEL: umin_8h:
302 ; CHECK-NEXT: ldr q0, [x0]
303 ; CHECK-NEXT: ldr q1, [x1]
304 ; CHECK-NEXT: umin.8h v0, v0, v1
306 %tmp1 = load <8 x i16>, ptr %A
307 %tmp2 = load <8 x i16>, ptr %B
308 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
312 define <2 x i32> @umin_2s(ptr %A, ptr %B) nounwind {
313 ; CHECK-LABEL: umin_2s:
315 ; CHECK-NEXT: ldr d0, [x0]
316 ; CHECK-NEXT: ldr d1, [x1]
317 ; CHECK-NEXT: umin.2s v0, v0, v1
319 %tmp1 = load <2 x i32>, ptr %A
320 %tmp2 = load <2 x i32>, ptr %B
321 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
325 define <4 x i32> @umin_4s(ptr %A, ptr %B) nounwind {
326 ; CHECK-LABEL: umin_4s:
328 ; CHECK-NEXT: ldr q0, [x0]
329 ; CHECK-NEXT: ldr q1, [x1]
330 ; CHECK-NEXT: umin.4s v0, v0, v1
332 %tmp1 = load <4 x i32>, ptr %A
333 %tmp2 = load <4 x i32>, ptr %B
334 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
338 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
339 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
340 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
341 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
342 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
343 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
345 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
347 define <8 x i8> @smaxp_8b(ptr %A, ptr %B) nounwind {
348 ; CHECK-LABEL: smaxp_8b:
350 ; CHECK-NEXT: ldr d0, [x0]
351 ; CHECK-NEXT: ldr d1, [x1]
352 ; CHECK-NEXT: smaxp.8b v0, v0, v1
354 %tmp1 = load <8 x i8>, ptr %A
355 %tmp2 = load <8 x i8>, ptr %B
356 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
360 define <16 x i8> @smaxp_16b(ptr %A, ptr %B) nounwind {
361 ; CHECK-LABEL: smaxp_16b:
363 ; CHECK-NEXT: ldr q0, [x0]
364 ; CHECK-NEXT: ldr q1, [x1]
365 ; CHECK-NEXT: smaxp.16b v0, v0, v1
367 %tmp1 = load <16 x i8>, ptr %A
368 %tmp2 = load <16 x i8>, ptr %B
369 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
373 define <4 x i16> @smaxp_4h(ptr %A, ptr %B) nounwind {
374 ; CHECK-LABEL: smaxp_4h:
376 ; CHECK-NEXT: ldr d0, [x0]
377 ; CHECK-NEXT: ldr d1, [x1]
378 ; CHECK-NEXT: smaxp.4h v0, v0, v1
380 %tmp1 = load <4 x i16>, ptr %A
381 %tmp2 = load <4 x i16>, ptr %B
382 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
386 define <8 x i16> @smaxp_8h(ptr %A, ptr %B) nounwind {
387 ; CHECK-LABEL: smaxp_8h:
389 ; CHECK-NEXT: ldr q0, [x0]
390 ; CHECK-NEXT: ldr q1, [x1]
391 ; CHECK-NEXT: smaxp.8h v0, v0, v1
393 %tmp1 = load <8 x i16>, ptr %A
394 %tmp2 = load <8 x i16>, ptr %B
395 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
399 define <2 x i32> @smaxp_2s(ptr %A, ptr %B) nounwind {
400 ; CHECK-LABEL: smaxp_2s:
402 ; CHECK-NEXT: ldr d0, [x0]
403 ; CHECK-NEXT: ldr d1, [x1]
404 ; CHECK-NEXT: smaxp.2s v0, v0, v1
406 %tmp1 = load <2 x i32>, ptr %A
407 %tmp2 = load <2 x i32>, ptr %B
408 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
412 define <4 x i32> @smaxp_4s(ptr %A, ptr %B) nounwind {
413 ; CHECK-LABEL: smaxp_4s:
415 ; CHECK-NEXT: ldr q0, [x0]
416 ; CHECK-NEXT: ldr q1, [x1]
417 ; CHECK-NEXT: smaxp.4s v0, v0, v1
419 %tmp1 = load <4 x i32>, ptr %A
420 %tmp2 = load <4 x i32>, ptr %B
421 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
425 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
426 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
427 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
428 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
429 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
430 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
432 define <8 x i8> @umaxp_8b(ptr %A, ptr %B) nounwind {
433 ; CHECK-LABEL: umaxp_8b:
435 ; CHECK-NEXT: ldr d0, [x0]
436 ; CHECK-NEXT: ldr d1, [x1]
437 ; CHECK-NEXT: umaxp.8b v0, v0, v1
439 %tmp1 = load <8 x i8>, ptr %A
440 %tmp2 = load <8 x i8>, ptr %B
441 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
445 define <16 x i8> @umaxp_16b(ptr %A, ptr %B) nounwind {
446 ; CHECK-LABEL: umaxp_16b:
448 ; CHECK-NEXT: ldr q0, [x0]
449 ; CHECK-NEXT: ldr q1, [x1]
450 ; CHECK-NEXT: umaxp.16b v0, v0, v1
452 %tmp1 = load <16 x i8>, ptr %A
453 %tmp2 = load <16 x i8>, ptr %B
454 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
458 define <4 x i16> @umaxp_4h(ptr %A, ptr %B) nounwind {
459 ; CHECK-LABEL: umaxp_4h:
461 ; CHECK-NEXT: ldr d0, [x0]
462 ; CHECK-NEXT: ldr d1, [x1]
463 ; CHECK-NEXT: umaxp.4h v0, v0, v1
465 %tmp1 = load <4 x i16>, ptr %A
466 %tmp2 = load <4 x i16>, ptr %B
467 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
471 define <8 x i16> @umaxp_8h(ptr %A, ptr %B) nounwind {
472 ; CHECK-LABEL: umaxp_8h:
474 ; CHECK-NEXT: ldr q0, [x0]
475 ; CHECK-NEXT: ldr q1, [x1]
476 ; CHECK-NEXT: umaxp.8h v0, v0, v1
478 %tmp1 = load <8 x i16>, ptr %A
479 %tmp2 = load <8 x i16>, ptr %B
480 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
484 define <2 x i32> @umaxp_2s(ptr %A, ptr %B) nounwind {
485 ; CHECK-LABEL: umaxp_2s:
487 ; CHECK-NEXT: ldr d0, [x0]
488 ; CHECK-NEXT: ldr d1, [x1]
489 ; CHECK-NEXT: umaxp.2s v0, v0, v1
491 %tmp1 = load <2 x i32>, ptr %A
492 %tmp2 = load <2 x i32>, ptr %B
493 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
497 define <4 x i32> @umaxp_4s(ptr %A, ptr %B) nounwind {
498 ; CHECK-LABEL: umaxp_4s:
500 ; CHECK-NEXT: ldr q0, [x0]
501 ; CHECK-NEXT: ldr q1, [x1]
502 ; CHECK-NEXT: umaxp.4s v0, v0, v1
504 %tmp1 = load <4 x i32>, ptr %A
505 %tmp2 = load <4 x i32>, ptr %B
506 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
510 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
511 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
512 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
513 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
514 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
515 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
517 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
519 define <8 x i8> @sminp_8b(ptr %A, ptr %B) nounwind {
520 ; CHECK-LABEL: sminp_8b:
522 ; CHECK-NEXT: ldr d0, [x0]
523 ; CHECK-NEXT: ldr d1, [x1]
524 ; CHECK-NEXT: sminp.8b v0, v0, v1
526 %tmp1 = load <8 x i8>, ptr %A
527 %tmp2 = load <8 x i8>, ptr %B
528 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
532 define <16 x i8> @sminp_16b(ptr %A, ptr %B) nounwind {
533 ; CHECK-LABEL: sminp_16b:
535 ; CHECK-NEXT: ldr q0, [x0]
536 ; CHECK-NEXT: ldr q1, [x1]
537 ; CHECK-NEXT: sminp.16b v0, v0, v1
539 %tmp1 = load <16 x i8>, ptr %A
540 %tmp2 = load <16 x i8>, ptr %B
541 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
545 define <4 x i16> @sminp_4h(ptr %A, ptr %B) nounwind {
546 ; CHECK-LABEL: sminp_4h:
548 ; CHECK-NEXT: ldr d0, [x0]
549 ; CHECK-NEXT: ldr d1, [x1]
550 ; CHECK-NEXT: sminp.4h v0, v0, v1
552 %tmp1 = load <4 x i16>, ptr %A
553 %tmp2 = load <4 x i16>, ptr %B
554 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
558 define <8 x i16> @sminp_8h(ptr %A, ptr %B) nounwind {
559 ; CHECK-LABEL: sminp_8h:
561 ; CHECK-NEXT: ldr q0, [x0]
562 ; CHECK-NEXT: ldr q1, [x1]
563 ; CHECK-NEXT: sminp.8h v0, v0, v1
565 %tmp1 = load <8 x i16>, ptr %A
566 %tmp2 = load <8 x i16>, ptr %B
567 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
571 define <2 x i32> @sminp_2s(ptr %A, ptr %B) nounwind {
572 ; CHECK-LABEL: sminp_2s:
574 ; CHECK-NEXT: ldr d0, [x0]
575 ; CHECK-NEXT: ldr d1, [x1]
576 ; CHECK-NEXT: sminp.2s v0, v0, v1
578 %tmp1 = load <2 x i32>, ptr %A
579 %tmp2 = load <2 x i32>, ptr %B
580 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
584 define <4 x i32> @sminp_4s(ptr %A, ptr %B) nounwind {
585 ; CHECK-LABEL: sminp_4s:
587 ; CHECK-NEXT: ldr q0, [x0]
588 ; CHECK-NEXT: ldr q1, [x1]
589 ; CHECK-NEXT: sminp.4s v0, v0, v1
591 %tmp1 = load <4 x i32>, ptr %A
592 %tmp2 = load <4 x i32>, ptr %B
593 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
597 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
598 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
599 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
600 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
601 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
602 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
604 define <8 x i8> @uminp_8b(ptr %A, ptr %B) nounwind {
605 ; CHECK-LABEL: uminp_8b:
607 ; CHECK-NEXT: ldr d0, [x0]
608 ; CHECK-NEXT: ldr d1, [x1]
609 ; CHECK-NEXT: uminp.8b v0, v0, v1
611 %tmp1 = load <8 x i8>, ptr %A
612 %tmp2 = load <8 x i8>, ptr %B
613 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
617 define <16 x i8> @uminp_16b(ptr %A, ptr %B) nounwind {
618 ; CHECK-LABEL: uminp_16b:
620 ; CHECK-NEXT: ldr q0, [x0]
621 ; CHECK-NEXT: ldr q1, [x1]
622 ; CHECK-NEXT: uminp.16b v0, v0, v1
624 %tmp1 = load <16 x i8>, ptr %A
625 %tmp2 = load <16 x i8>, ptr %B
626 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
630 define <4 x i16> @uminp_4h(ptr %A, ptr %B) nounwind {
631 ; CHECK-LABEL: uminp_4h:
633 ; CHECK-NEXT: ldr d0, [x0]
634 ; CHECK-NEXT: ldr d1, [x1]
635 ; CHECK-NEXT: uminp.4h v0, v0, v1
637 %tmp1 = load <4 x i16>, ptr %A
638 %tmp2 = load <4 x i16>, ptr %B
639 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
643 define <8 x i16> @uminp_8h(ptr %A, ptr %B) nounwind {
644 ; CHECK-LABEL: uminp_8h:
646 ; CHECK-NEXT: ldr q0, [x0]
647 ; CHECK-NEXT: ldr q1, [x1]
648 ; CHECK-NEXT: uminp.8h v0, v0, v1
650 %tmp1 = load <8 x i16>, ptr %A
651 %tmp2 = load <8 x i16>, ptr %B
652 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
656 define <2 x i32> @uminp_2s(ptr %A, ptr %B) nounwind {
657 ; CHECK-LABEL: uminp_2s:
659 ; CHECK-NEXT: ldr d0, [x0]
660 ; CHECK-NEXT: ldr d1, [x1]
661 ; CHECK-NEXT: uminp.2s v0, v0, v1
663 %tmp1 = load <2 x i32>, ptr %A
664 %tmp2 = load <2 x i32>, ptr %B
665 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
669 define <4 x i32> @uminp_4s(ptr %A, ptr %B) nounwind {
670 ; CHECK-LABEL: uminp_4s:
672 ; CHECK-NEXT: ldr q0, [x0]
673 ; CHECK-NEXT: ldr q1, [x1]
674 ; CHECK-NEXT: uminp.4s v0, v0, v1
676 %tmp1 = load <4 x i32>, ptr %A
677 %tmp2 = load <4 x i32>, ptr %B
678 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
682 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
683 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
684 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
685 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
686 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
687 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
689 define <2 x float> @fmax_2s(ptr %A, ptr %B) nounwind {
690 ; CHECK-LABEL: fmax_2s:
692 ; CHECK-NEXT: ldr d0, [x0]
693 ; CHECK-NEXT: ldr d1, [x1]
694 ; CHECK-NEXT: fmax.2s v0, v0, v1
696 %tmp1 = load <2 x float>, ptr %A
697 %tmp2 = load <2 x float>, ptr %B
698 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
699 ret <2 x float> %tmp3
702 define <4 x float> @fmax_4s(ptr %A, ptr %B) nounwind {
703 ; CHECK-LABEL: fmax_4s:
705 ; CHECK-NEXT: ldr q0, [x0]
706 ; CHECK-NEXT: ldr q1, [x1]
707 ; CHECK-NEXT: fmax.4s v0, v0, v1
709 %tmp1 = load <4 x float>, ptr %A
710 %tmp2 = load <4 x float>, ptr %B
711 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
712 ret <4 x float> %tmp3
715 define <2 x double> @fmax_2d(ptr %A, ptr %B) nounwind {
716 ; CHECK-LABEL: fmax_2d:
718 ; CHECK-NEXT: ldr q0, [x0]
719 ; CHECK-NEXT: ldr q1, [x1]
720 ; CHECK-NEXT: fmax.2d v0, v0, v1
722 %tmp1 = load <2 x double>, ptr %A
723 %tmp2 = load <2 x double>, ptr %B
724 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
725 ret <2 x double> %tmp3
728 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
729 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
730 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
732 define <2 x float> @fmaxp_2s(ptr %A, ptr %B) nounwind {
733 ; CHECK-LABEL: fmaxp_2s:
735 ; CHECK-NEXT: ldr d0, [x0]
736 ; CHECK-NEXT: ldr d1, [x1]
737 ; CHECK-NEXT: fmaxp.2s v0, v0, v1
739 %tmp1 = load <2 x float>, ptr %A
740 %tmp2 = load <2 x float>, ptr %B
741 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
742 ret <2 x float> %tmp3
745 define <4 x float> @fmaxp_4s(ptr %A, ptr %B) nounwind {
746 ; CHECK-LABEL: fmaxp_4s:
748 ; CHECK-NEXT: ldr q0, [x0]
749 ; CHECK-NEXT: ldr q1, [x1]
750 ; CHECK-NEXT: fmaxp.4s v0, v0, v1
752 %tmp1 = load <4 x float>, ptr %A
753 %tmp2 = load <4 x float>, ptr %B
754 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
755 ret <4 x float> %tmp3
758 define <2 x double> @fmaxp_2d(ptr %A, ptr %B) nounwind {
759 ; CHECK-LABEL: fmaxp_2d:
761 ; CHECK-NEXT: ldr q0, [x0]
762 ; CHECK-NEXT: ldr q1, [x1]
763 ; CHECK-NEXT: fmaxp.2d v0, v0, v1
765 %tmp1 = load <2 x double>, ptr %A
766 %tmp2 = load <2 x double>, ptr %B
767 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
768 ret <2 x double> %tmp3
771 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
772 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
773 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
775 define <2 x float> @fmin_2s(ptr %A, ptr %B) nounwind {
776 ; CHECK-LABEL: fmin_2s:
778 ; CHECK-NEXT: ldr d0, [x0]
779 ; CHECK-NEXT: ldr d1, [x1]
780 ; CHECK-NEXT: fmin.2s v0, v0, v1
782 %tmp1 = load <2 x float>, ptr %A
783 %tmp2 = load <2 x float>, ptr %B
784 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
785 ret <2 x float> %tmp3
788 define <4 x float> @fmin_4s(ptr %A, ptr %B) nounwind {
789 ; CHECK-LABEL: fmin_4s:
791 ; CHECK-NEXT: ldr q0, [x0]
792 ; CHECK-NEXT: ldr q1, [x1]
793 ; CHECK-NEXT: fmin.4s v0, v0, v1
795 %tmp1 = load <4 x float>, ptr %A
796 %tmp2 = load <4 x float>, ptr %B
797 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
798 ret <4 x float> %tmp3
801 define <2 x double> @fmin_2d(ptr %A, ptr %B) nounwind {
802 ; CHECK-LABEL: fmin_2d:
804 ; CHECK-NEXT: ldr q0, [x0]
805 ; CHECK-NEXT: ldr q1, [x1]
806 ; CHECK-NEXT: fmin.2d v0, v0, v1
808 %tmp1 = load <2 x double>, ptr %A
809 %tmp2 = load <2 x double>, ptr %B
810 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
811 ret <2 x double> %tmp3
814 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
815 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
816 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
818 define <2 x float> @fminp_2s(ptr %A, ptr %B) nounwind {
819 ; CHECK-LABEL: fminp_2s:
821 ; CHECK-NEXT: ldr d0, [x0]
822 ; CHECK-NEXT: ldr d1, [x1]
823 ; CHECK-NEXT: fminp.2s v0, v0, v1
825 %tmp1 = load <2 x float>, ptr %A
826 %tmp2 = load <2 x float>, ptr %B
827 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
828 ret <2 x float> %tmp3
831 define <4 x float> @fminp_4s(ptr %A, ptr %B) nounwind {
832 ; CHECK-LABEL: fminp_4s:
834 ; CHECK-NEXT: ldr q0, [x0]
835 ; CHECK-NEXT: ldr q1, [x1]
836 ; CHECK-NEXT: fminp.4s v0, v0, v1
838 %tmp1 = load <4 x float>, ptr %A
839 %tmp2 = load <4 x float>, ptr %B
840 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
841 ret <4 x float> %tmp3
844 define <2 x double> @fminp_2d(ptr %A, ptr %B) nounwind {
845 ; CHECK-LABEL: fminp_2d:
847 ; CHECK-NEXT: ldr q0, [x0]
848 ; CHECK-NEXT: ldr q1, [x1]
849 ; CHECK-NEXT: fminp.2d v0, v0, v1
851 %tmp1 = load <2 x double>, ptr %A
852 %tmp2 = load <2 x double>, ptr %B
853 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
854 ret <2 x double> %tmp3
857 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
858 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
859 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
861 define <2 x float> @fminnmp_2s(ptr %A, ptr %B) nounwind {
862 ; CHECK-LABEL: fminnmp_2s:
864 ; CHECK-NEXT: ldr d0, [x0]
865 ; CHECK-NEXT: ldr d1, [x1]
866 ; CHECK-NEXT: fminnmp.2s v0, v0, v1
868 %tmp1 = load <2 x float>, ptr %A
869 %tmp2 = load <2 x float>, ptr %B
870 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
871 ret <2 x float> %tmp3
874 define <4 x float> @fminnmp_4s(ptr %A, ptr %B) nounwind {
875 ; CHECK-LABEL: fminnmp_4s:
877 ; CHECK-NEXT: ldr q0, [x0]
878 ; CHECK-NEXT: ldr q1, [x1]
879 ; CHECK-NEXT: fminnmp.4s v0, v0, v1
881 %tmp1 = load <4 x float>, ptr %A
882 %tmp2 = load <4 x float>, ptr %B
883 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
884 ret <4 x float> %tmp3
887 define <2 x double> @fminnmp_2d(ptr %A, ptr %B) nounwind {
888 ; CHECK-LABEL: fminnmp_2d:
890 ; CHECK-NEXT: ldr q0, [x0]
891 ; CHECK-NEXT: ldr q1, [x1]
892 ; CHECK-NEXT: fminnmp.2d v0, v0, v1
894 %tmp1 = load <2 x double>, ptr %A
895 %tmp2 = load <2 x double>, ptr %B
896 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
897 ret <2 x double> %tmp3
900 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
901 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
902 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
904 define <2 x float> @fmaxnmp_2s(ptr %A, ptr %B) nounwind {
905 ; CHECK-LABEL: fmaxnmp_2s:
907 ; CHECK-NEXT: ldr d0, [x0]
908 ; CHECK-NEXT: ldr d1, [x1]
909 ; CHECK-NEXT: fmaxnmp.2s v0, v0, v1
911 %tmp1 = load <2 x float>, ptr %A
912 %tmp2 = load <2 x float>, ptr %B
913 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
914 ret <2 x float> %tmp3
917 define <4 x float> @fmaxnmp_4s(ptr %A, ptr %B) nounwind {
918 ; CHECK-LABEL: fmaxnmp_4s:
920 ; CHECK-NEXT: ldr q0, [x0]
921 ; CHECK-NEXT: ldr q1, [x1]
922 ; CHECK-NEXT: fmaxnmp.4s v0, v0, v1
924 %tmp1 = load <4 x float>, ptr %A
925 %tmp2 = load <4 x float>, ptr %B
926 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
927 ret <4 x float> %tmp3
930 define <2 x double> @fmaxnmp_2d(ptr %A, ptr %B) nounwind {
931 ; CHECK-LABEL: fmaxnmp_2d:
933 ; CHECK-NEXT: ldr q0, [x0]
934 ; CHECK-NEXT: ldr q1, [x1]
935 ; CHECK-NEXT: fmaxnmp.2d v0, v0, v1
937 %tmp1 = load <2 x double>, ptr %A
938 %tmp2 = load <2 x double>, ptr %B
939 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
940 ret <2 x double> %tmp3
943 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
944 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
945 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone