1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
3 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
6 %tmp1 = load <8 x i8>, <8 x i8>* %A
7 %tmp2 = load <8 x i8>, <8 x i8>* %B
8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
12 define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
13 ;CHECK-LABEL: smax_16b:
15 %tmp1 = load <16 x i8>, <16 x i8>* %A
16 %tmp2 = load <16 x i8>, <16 x i8>* %B
17 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
21 define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
22 ;CHECK-LABEL: smax_4h:
24 %tmp1 = load <4 x i16>, <4 x i16>* %A
25 %tmp2 = load <4 x i16>, <4 x i16>* %B
26 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
30 define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
31 ;CHECK-LABEL: smax_8h:
33 %tmp1 = load <8 x i16>, <8 x i16>* %A
34 %tmp2 = load <8 x i16>, <8 x i16>* %B
35 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
39 define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
40 ;CHECK-LABEL: smax_2s:
42 %tmp1 = load <2 x i32>, <2 x i32>* %A
43 %tmp2 = load <2 x i32>, <2 x i32>* %B
44 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
48 define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
49 ;CHECK-LABEL: smax_4s:
51 %tmp1 = load <4 x i32>, <4 x i32>* %A
52 %tmp2 = load <4 x i32>, <4 x i32>* %B
53 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
57 declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
58 declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
59 declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
60 declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
61 declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
62 declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
64 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
65 ;CHECK-LABEL: umax_8b:
67 %tmp1 = load <8 x i8>, <8 x i8>* %A
68 %tmp2 = load <8 x i8>, <8 x i8>* %B
69 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
73 define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
74 ;CHECK-LABEL: umax_16b:
76 %tmp1 = load <16 x i8>, <16 x i8>* %A
77 %tmp2 = load <16 x i8>, <16 x i8>* %B
78 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
82 define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
83 ;CHECK-LABEL: umax_4h:
85 %tmp1 = load <4 x i16>, <4 x i16>* %A
86 %tmp2 = load <4 x i16>, <4 x i16>* %B
87 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
91 define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
92 ;CHECK-LABEL: umax_8h:
94 %tmp1 = load <8 x i16>, <8 x i16>* %A
95 %tmp2 = load <8 x i16>, <8 x i16>* %B
96 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
100 define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
101 ;CHECK-LABEL: umax_2s:
103 %tmp1 = load <2 x i32>, <2 x i32>* %A
104 %tmp2 = load <2 x i32>, <2 x i32>* %B
105 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
109 define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
110 ;CHECK-LABEL: umax_4s:
112 %tmp1 = load <4 x i32>, <4 x i32>* %A
113 %tmp2 = load <4 x i32>, <4 x i32>* %B
114 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
118 declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
119 declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
120 declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
121 declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
122 declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
123 declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
125 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
126 ;CHECK-LABEL: smin_8b:
128 %tmp1 = load <8 x i8>, <8 x i8>* %A
129 %tmp2 = load <8 x i8>, <8 x i8>* %B
130 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
134 define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
135 ;CHECK-LABEL: smin_16b:
137 %tmp1 = load <16 x i8>, <16 x i8>* %A
138 %tmp2 = load <16 x i8>, <16 x i8>* %B
139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
143 define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
144 ;CHECK-LABEL: smin_4h:
146 %tmp1 = load <4 x i16>, <4 x i16>* %A
147 %tmp2 = load <4 x i16>, <4 x i16>* %B
148 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
152 define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
153 ;CHECK-LABEL: smin_8h:
155 %tmp1 = load <8 x i16>, <8 x i16>* %A
156 %tmp2 = load <8 x i16>, <8 x i16>* %B
157 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
161 define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
162 ;CHECK-LABEL: smin_2s:
164 %tmp1 = load <2 x i32>, <2 x i32>* %A
165 %tmp2 = load <2 x i32>, <2 x i32>* %B
166 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
170 define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
171 ;CHECK-LABEL: smin_4s:
173 %tmp1 = load <4 x i32>, <4 x i32>* %A
174 %tmp2 = load <4 x i32>, <4 x i32>* %B
175 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
179 declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
180 declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
181 declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
182 declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
183 declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
184 declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
186 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
187 ;CHECK-LABEL: umin_8b:
189 %tmp1 = load <8 x i8>, <8 x i8>* %A
190 %tmp2 = load <8 x i8>, <8 x i8>* %B
191 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
195 define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
196 ;CHECK-LABEL: umin_16b:
198 %tmp1 = load <16 x i8>, <16 x i8>* %A
199 %tmp2 = load <16 x i8>, <16 x i8>* %B
200 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
204 define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
205 ;CHECK-LABEL: umin_4h:
207 %tmp1 = load <4 x i16>, <4 x i16>* %A
208 %tmp2 = load <4 x i16>, <4 x i16>* %B
209 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
213 define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
214 ;CHECK-LABEL: umin_8h:
216 %tmp1 = load <8 x i16>, <8 x i16>* %A
217 %tmp2 = load <8 x i16>, <8 x i16>* %B
218 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
222 define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
223 ;CHECK-LABEL: umin_2s:
225 %tmp1 = load <2 x i32>, <2 x i32>* %A
226 %tmp2 = load <2 x i32>, <2 x i32>* %B
227 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
231 define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
232 ;CHECK-LABEL: umin_4s:
234 %tmp1 = load <4 x i32>, <4 x i32>* %A
235 %tmp2 = load <4 x i32>, <4 x i32>* %B
236 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
240 declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
241 declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
242 declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
243 declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
244 declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
245 declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
247 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
249 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
250 ;CHECK-LABEL: smaxp_8b:
252 %tmp1 = load <8 x i8>, <8 x i8>* %A
253 %tmp2 = load <8 x i8>, <8 x i8>* %B
254 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
258 define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
259 ;CHECK-LABEL: smaxp_16b:
261 %tmp1 = load <16 x i8>, <16 x i8>* %A
262 %tmp2 = load <16 x i8>, <16 x i8>* %B
263 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
267 define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
268 ;CHECK-LABEL: smaxp_4h:
270 %tmp1 = load <4 x i16>, <4 x i16>* %A
271 %tmp2 = load <4 x i16>, <4 x i16>* %B
272 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
276 define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
277 ;CHECK-LABEL: smaxp_8h:
279 %tmp1 = load <8 x i16>, <8 x i16>* %A
280 %tmp2 = load <8 x i16>, <8 x i16>* %B
281 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
285 define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
286 ;CHECK-LABEL: smaxp_2s:
288 %tmp1 = load <2 x i32>, <2 x i32>* %A
289 %tmp2 = load <2 x i32>, <2 x i32>* %B
290 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
294 define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
295 ;CHECK-LABEL: smaxp_4s:
297 %tmp1 = load <4 x i32>, <4 x i32>* %A
298 %tmp2 = load <4 x i32>, <4 x i32>* %B
299 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
303 declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
304 declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
305 declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
306 declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
307 declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
308 declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
310 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
311 ;CHECK-LABEL: umaxp_8b:
313 %tmp1 = load <8 x i8>, <8 x i8>* %A
314 %tmp2 = load <8 x i8>, <8 x i8>* %B
315 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
319 define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
320 ;CHECK-LABEL: umaxp_16b:
322 %tmp1 = load <16 x i8>, <16 x i8>* %A
323 %tmp2 = load <16 x i8>, <16 x i8>* %B
324 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
328 define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
329 ;CHECK-LABEL: umaxp_4h:
331 %tmp1 = load <4 x i16>, <4 x i16>* %A
332 %tmp2 = load <4 x i16>, <4 x i16>* %B
333 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
337 define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
338 ;CHECK-LABEL: umaxp_8h:
340 %tmp1 = load <8 x i16>, <8 x i16>* %A
341 %tmp2 = load <8 x i16>, <8 x i16>* %B
342 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
346 define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
347 ;CHECK-LABEL: umaxp_2s:
349 %tmp1 = load <2 x i32>, <2 x i32>* %A
350 %tmp2 = load <2 x i32>, <2 x i32>* %B
351 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
355 define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
356 ;CHECK-LABEL: umaxp_4s:
358 %tmp1 = load <4 x i32>, <4 x i32>* %A
359 %tmp2 = load <4 x i32>, <4 x i32>* %B
360 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
364 declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
365 declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
366 declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
367 declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
368 declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
369 declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
371 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
373 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
374 ;CHECK-LABEL: sminp_8b:
376 %tmp1 = load <8 x i8>, <8 x i8>* %A
377 %tmp2 = load <8 x i8>, <8 x i8>* %B
378 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
382 define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
383 ;CHECK-LABEL: sminp_16b:
385 %tmp1 = load <16 x i8>, <16 x i8>* %A
386 %tmp2 = load <16 x i8>, <16 x i8>* %B
387 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
391 define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
392 ;CHECK-LABEL: sminp_4h:
394 %tmp1 = load <4 x i16>, <4 x i16>* %A
395 %tmp2 = load <4 x i16>, <4 x i16>* %B
396 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
400 define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
401 ;CHECK-LABEL: sminp_8h:
403 %tmp1 = load <8 x i16>, <8 x i16>* %A
404 %tmp2 = load <8 x i16>, <8 x i16>* %B
405 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
409 define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
410 ;CHECK-LABEL: sminp_2s:
412 %tmp1 = load <2 x i32>, <2 x i32>* %A
413 %tmp2 = load <2 x i32>, <2 x i32>* %B
414 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
418 define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
419 ;CHECK-LABEL: sminp_4s:
421 %tmp1 = load <4 x i32>, <4 x i32>* %A
422 %tmp2 = load <4 x i32>, <4 x i32>* %B
423 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
427 declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
428 declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
429 declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
430 declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
431 declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
432 declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
434 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
435 ;CHECK-LABEL: uminp_8b:
437 %tmp1 = load <8 x i8>, <8 x i8>* %A
438 %tmp2 = load <8 x i8>, <8 x i8>* %B
439 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
443 define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
444 ;CHECK-LABEL: uminp_16b:
446 %tmp1 = load <16 x i8>, <16 x i8>* %A
447 %tmp2 = load <16 x i8>, <16 x i8>* %B
448 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
452 define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
453 ;CHECK-LABEL: uminp_4h:
455 %tmp1 = load <4 x i16>, <4 x i16>* %A
456 %tmp2 = load <4 x i16>, <4 x i16>* %B
457 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
461 define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
462 ;CHECK-LABEL: uminp_8h:
464 %tmp1 = load <8 x i16>, <8 x i16>* %A
465 %tmp2 = load <8 x i16>, <8 x i16>* %B
466 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
470 define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
471 ;CHECK-LABEL: uminp_2s:
473 %tmp1 = load <2 x i32>, <2 x i32>* %A
474 %tmp2 = load <2 x i32>, <2 x i32>* %B
475 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
479 define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
480 ;CHECK-LABEL: uminp_4s:
482 %tmp1 = load <4 x i32>, <4 x i32>* %A
483 %tmp2 = load <4 x i32>, <4 x i32>* %B
484 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
488 declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
489 declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
490 declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
491 declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
492 declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
493 declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
495 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
496 ;CHECK-LABEL: fmax_2s:
498 %tmp1 = load <2 x float>, <2 x float>* %A
499 %tmp2 = load <2 x float>, <2 x float>* %B
500 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
501 ret <2 x float> %tmp3
504 define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
505 ;CHECK-LABEL: fmax_4s:
507 %tmp1 = load <4 x float>, <4 x float>* %A
508 %tmp2 = load <4 x float>, <4 x float>* %B
509 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
510 ret <4 x float> %tmp3
513 define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
514 ;CHECK-LABEL: fmax_2d:
516 %tmp1 = load <2 x double>, <2 x double>* %A
517 %tmp2 = load <2 x double>, <2 x double>* %B
518 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
519 ret <2 x double> %tmp3
522 declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone
523 declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone
524 declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone
526 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
527 ;CHECK-LABEL: fmaxp_2s:
529 %tmp1 = load <2 x float>, <2 x float>* %A
530 %tmp2 = load <2 x float>, <2 x float>* %B
531 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
532 ret <2 x float> %tmp3
535 define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
536 ;CHECK-LABEL: fmaxp_4s:
538 %tmp1 = load <4 x float>, <4 x float>* %A
539 %tmp2 = load <4 x float>, <4 x float>* %B
540 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
541 ret <4 x float> %tmp3
544 define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
545 ;CHECK-LABEL: fmaxp_2d:
547 %tmp1 = load <2 x double>, <2 x double>* %A
548 %tmp2 = load <2 x double>, <2 x double>* %B
549 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
550 ret <2 x double> %tmp3
553 declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone
554 declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone
555 declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone
557 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
558 ;CHECK-LABEL: fmin_2s:
560 %tmp1 = load <2 x float>, <2 x float>* %A
561 %tmp2 = load <2 x float>, <2 x float>* %B
562 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
563 ret <2 x float> %tmp3
566 define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
567 ;CHECK-LABEL: fmin_4s:
569 %tmp1 = load <4 x float>, <4 x float>* %A
570 %tmp2 = load <4 x float>, <4 x float>* %B
571 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
572 ret <4 x float> %tmp3
575 define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
576 ;CHECK-LABEL: fmin_2d:
578 %tmp1 = load <2 x double>, <2 x double>* %A
579 %tmp2 = load <2 x double>, <2 x double>* %B
580 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
581 ret <2 x double> %tmp3
584 declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone
585 declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone
586 declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone
588 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
589 ;CHECK-LABEL: fminp_2s:
591 %tmp1 = load <2 x float>, <2 x float>* %A
592 %tmp2 = load <2 x float>, <2 x float>* %B
593 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
594 ret <2 x float> %tmp3
597 define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
598 ;CHECK-LABEL: fminp_4s:
600 %tmp1 = load <4 x float>, <4 x float>* %A
601 %tmp2 = load <4 x float>, <4 x float>* %B
602 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
603 ret <4 x float> %tmp3
606 define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
607 ;CHECK-LABEL: fminp_2d:
609 %tmp1 = load <2 x double>, <2 x double>* %A
610 %tmp2 = load <2 x double>, <2 x double>* %B
611 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
612 ret <2 x double> %tmp3
615 declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone
616 declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone
617 declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone
619 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
620 ;CHECK-LABEL: fminnmp_2s:
622 %tmp1 = load <2 x float>, <2 x float>* %A
623 %tmp2 = load <2 x float>, <2 x float>* %B
624 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
625 ret <2 x float> %tmp3
628 define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
629 ;CHECK-LABEL: fminnmp_4s:
631 %tmp1 = load <4 x float>, <4 x float>* %A
632 %tmp2 = load <4 x float>, <4 x float>* %B
633 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
634 ret <4 x float> %tmp3
637 define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
638 ;CHECK-LABEL: fminnmp_2d:
640 %tmp1 = load <2 x double>, <2 x double>* %A
641 %tmp2 = load <2 x double>, <2 x double>* %B
642 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
643 ret <2 x double> %tmp3
646 declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
647 declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
648 declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone
650 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
651 ;CHECK-LABEL: fmaxnmp_2s:
653 %tmp1 = load <2 x float>, <2 x float>* %A
654 %tmp2 = load <2 x float>, <2 x float>* %B
655 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
656 ret <2 x float> %tmp3
659 define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
660 ;CHECK-LABEL: fmaxnmp_4s:
662 %tmp1 = load <4 x float>, <4 x float>* %A
663 %tmp2 = load <4 x float>, <4 x float>* %B
664 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
665 ret <4 x float> %tmp3
668 define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
669 ;CHECK-LABEL: fmaxnmp_2d:
671 %tmp1 = load <2 x double>, <2 x double>* %A
672 %tmp2 = load <2 x double>, <2 x double>* %B
673 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
674 ret <2 x double> %tmp3
677 declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone
678 declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone
679 declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone