1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: fmaxnm_v4f16:
16 ; CHECK-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
18 %res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2)
22 ; Don't use SVE for 128-bit vectors.
23 define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
24 ; CHECK-LABEL: fmaxnm_v8f16:
26 ; CHECK-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
28 %res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2)
32 define void @fmaxnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
33 ; CHECK-LABEL: fmaxnm_v16f16:
35 ; CHECK-NEXT: ptrue p0.h, vl16
36 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
37 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
38 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
39 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
41 %op1 = load <16 x half>, ptr %a
42 %op2 = load <16 x half>, ptr %b
43 %res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2)
44 store <16 x half> %res, ptr %a
48 define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 {
49 ; VBITS_EQ_256-LABEL: fmaxnm_v32f16:
50 ; VBITS_EQ_256: // %bb.0:
51 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
52 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
53 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
54 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
55 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
56 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
57 ; VBITS_EQ_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z2.h
58 ; VBITS_EQ_256-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h
59 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
60 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
61 ; VBITS_EQ_256-NEXT: ret
63 ; VBITS_GE_512-LABEL: fmaxnm_v32f16:
64 ; VBITS_GE_512: // %bb.0:
65 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
66 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
67 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
68 ; VBITS_GE_512-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
69 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
70 ; VBITS_GE_512-NEXT: ret
71 %op1 = load <32 x half>, ptr %a
72 %op2 = load <32 x half>, ptr %b
73 %res = call <32 x half> @llvm.maxnum.v32f16(<32 x half> %op1, <32 x half> %op2)
74 store <32 x half> %res, ptr %a
78 define void @fmaxnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
79 ; CHECK-LABEL: fmaxnm_v64f16:
81 ; CHECK-NEXT: ptrue p0.h, vl64
82 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
83 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
84 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
85 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
87 %op1 = load <64 x half>, ptr %a
88 %op2 = load <64 x half>, ptr %b
89 %res = call <64 x half> @llvm.maxnum.v64f16(<64 x half> %op1, <64 x half> %op2)
90 store <64 x half> %res, ptr %a
94 define void @fmaxnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
95 ; CHECK-LABEL: fmaxnm_v128f16:
97 ; CHECK-NEXT: ptrue p0.h, vl128
98 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
99 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
100 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
101 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
103 %op1 = load <128 x half>, ptr %a
104 %op2 = load <128 x half>, ptr %b
105 %res = call <128 x half> @llvm.maxnum.v128f16(<128 x half> %op1, <128 x half> %op2)
106 store <128 x half> %res, ptr %a
110 ; Don't use SVE for 64-bit vectors.
111 define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
112 ; CHECK-LABEL: fmaxnm_v2f32:
114 ; CHECK-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
116 %res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2)
120 ; Don't use SVE for 128-bit vectors.
121 define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
122 ; CHECK-LABEL: fmaxnm_v4f32:
124 ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
126 %res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2)
130 define void @fmaxnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
131 ; CHECK-LABEL: fmaxnm_v8f32:
133 ; CHECK-NEXT: ptrue p0.s, vl8
134 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
135 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
136 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
137 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
139 %op1 = load <8 x float>, ptr %a
140 %op2 = load <8 x float>, ptr %b
141 %res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2)
142 store <8 x float> %res, ptr %a
146 define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 {
147 ; VBITS_EQ_256-LABEL: fmaxnm_v16f32:
148 ; VBITS_EQ_256: // %bb.0:
149 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
150 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
151 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
152 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0]
153 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
154 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
155 ; VBITS_EQ_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z2.s
156 ; VBITS_EQ_256-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s
157 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
158 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
159 ; VBITS_EQ_256-NEXT: ret
161 ; VBITS_GE_512-LABEL: fmaxnm_v16f32:
162 ; VBITS_GE_512: // %bb.0:
163 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
164 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
165 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
166 ; VBITS_GE_512-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
167 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
168 ; VBITS_GE_512-NEXT: ret
169 %op1 = load <16 x float>, ptr %a
170 %op2 = load <16 x float>, ptr %b
171 %res = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %op1, <16 x float> %op2)
172 store <16 x float> %res, ptr %a
176 define void @fmaxnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
177 ; CHECK-LABEL: fmaxnm_v32f32:
179 ; CHECK-NEXT: ptrue p0.s, vl32
180 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
181 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
182 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
183 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
185 %op1 = load <32 x float>, ptr %a
186 %op2 = load <32 x float>, ptr %b
187 %res = call <32 x float> @llvm.maxnum.v32f32(<32 x float> %op1, <32 x float> %op2)
188 store <32 x float> %res, ptr %a
192 define void @fmaxnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
193 ; CHECK-LABEL: fmaxnm_v64f32:
195 ; CHECK-NEXT: ptrue p0.s, vl64
196 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
197 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
198 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
199 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
201 %op1 = load <64 x float>, ptr %a
202 %op2 = load <64 x float>, ptr %b
203 %res = call <64 x float> @llvm.maxnum.v64f32(<64 x float> %op1, <64 x float> %op2)
204 store <64 x float> %res, ptr %a
208 ; Don't use SVE for 64-bit vectors.
209 define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
210 ; CHECK-LABEL: fmaxnm_v1f64:
212 ; CHECK-NEXT: fmaxnm d0, d0, d1
214 %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2)
215 ret <1 x double> %res
218 ; Don't use SVE for 128-bit vectors.
219 define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
220 ; CHECK-LABEL: fmaxnm_v2f64:
222 ; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
224 %res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2)
225 ret <2 x double> %res
228 define void @fmaxnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
229 ; CHECK-LABEL: fmaxnm_v4f64:
231 ; CHECK-NEXT: ptrue p0.d, vl4
232 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
233 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
234 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
235 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
237 %op1 = load <4 x double>, ptr %a
238 %op2 = load <4 x double>, ptr %b
239 %res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2)
240 store <4 x double> %res, ptr %a
244 define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 {
245 ; VBITS_EQ_256-LABEL: fmaxnm_v8f64:
246 ; VBITS_EQ_256: // %bb.0:
247 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
248 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
249 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
250 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0]
251 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
252 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
253 ; VBITS_EQ_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z2.d
254 ; VBITS_EQ_256-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d
255 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
256 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
257 ; VBITS_EQ_256-NEXT: ret
259 ; VBITS_GE_512-LABEL: fmaxnm_v8f64:
260 ; VBITS_GE_512: // %bb.0:
261 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
262 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
263 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
264 ; VBITS_GE_512-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
265 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
266 ; VBITS_GE_512-NEXT: ret
267 %op1 = load <8 x double>, ptr %a
268 %op2 = load <8 x double>, ptr %b
269 %res = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %op1, <8 x double> %op2)
270 store <8 x double> %res, ptr %a
274 define void @fmaxnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
275 ; CHECK-LABEL: fmaxnm_v16f64:
277 ; CHECK-NEXT: ptrue p0.d, vl16
278 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
279 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
280 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
281 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
283 %op1 = load <16 x double>, ptr %a
284 %op2 = load <16 x double>, ptr %b
285 %res = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %op1, <16 x double> %op2)
286 store <16 x double> %res, ptr %a
290 define void @fmaxnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
291 ; CHECK-LABEL: fmaxnm_v32f64:
293 ; CHECK-NEXT: ptrue p0.d, vl32
294 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
295 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
296 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
297 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
299 %op1 = load <32 x double>, ptr %a
300 %op2 = load <32 x double>, ptr %b
301 %res = call <32 x double> @llvm.maxnum.v32f64(<32 x double> %op1, <32 x double> %op2)
302 store <32 x double> %res, ptr %a
310 ; Don't use SVE for 64-bit vectors.
311 define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
312 ; CHECK-LABEL: fminnm_v4f16:
314 ; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
316 %res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2)
320 ; Don't use SVE for 128-bit vectors.
321 define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
322 ; CHECK-LABEL: fminnm_v8f16:
324 ; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
326 %res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2)
330 define void @fminnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
331 ; CHECK-LABEL: fminnm_v16f16:
333 ; CHECK-NEXT: ptrue p0.h, vl16
334 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
335 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
336 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
337 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
339 %op1 = load <16 x half>, ptr %a
340 %op2 = load <16 x half>, ptr %b
341 %res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2)
342 store <16 x half> %res, ptr %a
346 define void @fminnm_v32f16(ptr %a, ptr %b) #0 {
347 ; VBITS_EQ_256-LABEL: fminnm_v32f16:
348 ; VBITS_EQ_256: // %bb.0:
349 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
350 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
351 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
352 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
353 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
354 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
355 ; VBITS_EQ_256-NEXT: fminnm z0.h, p0/m, z0.h, z2.h
356 ; VBITS_EQ_256-NEXT: fminnm z1.h, p0/m, z1.h, z3.h
357 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
358 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
359 ; VBITS_EQ_256-NEXT: ret
361 ; VBITS_GE_512-LABEL: fminnm_v32f16:
362 ; VBITS_GE_512: // %bb.0:
363 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
364 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
365 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
366 ; VBITS_GE_512-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
367 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
368 ; VBITS_GE_512-NEXT: ret
369 %op1 = load <32 x half>, ptr %a
370 %op2 = load <32 x half>, ptr %b
371 %res = call <32 x half> @llvm.minnum.v32f16(<32 x half> %op1, <32 x half> %op2)
372 store <32 x half> %res, ptr %a
376 define void @fminnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
377 ; CHECK-LABEL: fminnm_v64f16:
379 ; CHECK-NEXT: ptrue p0.h, vl64
380 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
381 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
382 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
383 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
385 %op1 = load <64 x half>, ptr %a
386 %op2 = load <64 x half>, ptr %b
387 %res = call <64 x half> @llvm.minnum.v64f16(<64 x half> %op1, <64 x half> %op2)
388 store <64 x half> %res, ptr %a
392 define void @fminnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
393 ; CHECK-LABEL: fminnm_v128f16:
395 ; CHECK-NEXT: ptrue p0.h, vl128
396 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
397 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
398 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
399 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
401 %op1 = load <128 x half>, ptr %a
402 %op2 = load <128 x half>, ptr %b
403 %res = call <128 x half> @llvm.minnum.v128f16(<128 x half> %op1, <128 x half> %op2)
404 store <128 x half> %res, ptr %a
408 ; Don't use SVE for 64-bit vectors.
409 define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
410 ; CHECK-LABEL: fminnm_v2f32:
412 ; CHECK-NEXT: fminnm v0.2s, v0.2s, v1.2s
414 %res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2)
418 ; Don't use SVE for 128-bit vectors.
419 define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
420 ; CHECK-LABEL: fminnm_v4f32:
422 ; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
424 %res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2)
428 define void @fminnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
429 ; CHECK-LABEL: fminnm_v8f32:
431 ; CHECK-NEXT: ptrue p0.s, vl8
432 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
433 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
434 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
435 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
437 %op1 = load <8 x float>, ptr %a
438 %op2 = load <8 x float>, ptr %b
439 %res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2)
440 store <8 x float> %res, ptr %a
444 define void @fminnm_v16f32(ptr %a, ptr %b) #0 {
445 ; VBITS_EQ_256-LABEL: fminnm_v16f32:
446 ; VBITS_EQ_256: // %bb.0:
447 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
448 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
449 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
450 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0]
451 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
452 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
453 ; VBITS_EQ_256-NEXT: fminnm z0.s, p0/m, z0.s, z2.s
454 ; VBITS_EQ_256-NEXT: fminnm z1.s, p0/m, z1.s, z3.s
455 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
456 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
457 ; VBITS_EQ_256-NEXT: ret
459 ; VBITS_GE_512-LABEL: fminnm_v16f32:
460 ; VBITS_GE_512: // %bb.0:
461 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
462 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
463 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
464 ; VBITS_GE_512-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
465 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
466 ; VBITS_GE_512-NEXT: ret
467 %op1 = load <16 x float>, ptr %a
468 %op2 = load <16 x float>, ptr %b
469 %res = call <16 x float> @llvm.minnum.v16f32(<16 x float> %op1, <16 x float> %op2)
470 store <16 x float> %res, ptr %a
474 define void @fminnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
475 ; CHECK-LABEL: fminnm_v32f32:
477 ; CHECK-NEXT: ptrue p0.s, vl32
478 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
479 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
480 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
481 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
483 %op1 = load <32 x float>, ptr %a
484 %op2 = load <32 x float>, ptr %b
485 %res = call <32 x float> @llvm.minnum.v32f32(<32 x float> %op1, <32 x float> %op2)
486 store <32 x float> %res, ptr %a
490 define void @fminnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
491 ; CHECK-LABEL: fminnm_v64f32:
493 ; CHECK-NEXT: ptrue p0.s, vl64
494 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
495 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
496 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
497 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
499 %op1 = load <64 x float>, ptr %a
500 %op2 = load <64 x float>, ptr %b
501 %res = call <64 x float> @llvm.minnum.v64f32(<64 x float> %op1, <64 x float> %op2)
502 store <64 x float> %res, ptr %a
506 ; Don't use SVE for 64-bit vectors.
507 define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
508 ; CHECK-LABEL: fminnm_v1f64:
510 ; CHECK-NEXT: fminnm d0, d0, d1
512 %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2)
513 ret <1 x double> %res
516 ; Don't use SVE for 128-bit vectors.
517 define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
518 ; CHECK-LABEL: fminnm_v2f64:
520 ; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d
522 %res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2)
523 ret <2 x double> %res
526 define void @fminnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
527 ; CHECK-LABEL: fminnm_v4f64:
529 ; CHECK-NEXT: ptrue p0.d, vl4
530 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
531 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
532 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
533 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
535 %op1 = load <4 x double>, ptr %a
536 %op2 = load <4 x double>, ptr %b
537 %res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2)
538 store <4 x double> %res, ptr %a
542 define void @fminnm_v8f64(ptr %a, ptr %b) #0 {
543 ; VBITS_EQ_256-LABEL: fminnm_v8f64:
544 ; VBITS_EQ_256: // %bb.0:
545 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
546 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
547 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
548 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0]
549 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
550 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
551 ; VBITS_EQ_256-NEXT: fminnm z0.d, p0/m, z0.d, z2.d
552 ; VBITS_EQ_256-NEXT: fminnm z1.d, p0/m, z1.d, z3.d
553 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
554 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
555 ; VBITS_EQ_256-NEXT: ret
557 ; VBITS_GE_512-LABEL: fminnm_v8f64:
558 ; VBITS_GE_512: // %bb.0:
559 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
560 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
561 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
562 ; VBITS_GE_512-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
563 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
564 ; VBITS_GE_512-NEXT: ret
565 %op1 = load <8 x double>, ptr %a
566 %op2 = load <8 x double>, ptr %b
567 %res = call <8 x double> @llvm.minnum.v8f64(<8 x double> %op1, <8 x double> %op2)
568 store <8 x double> %res, ptr %a
572 define void @fminnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
573 ; CHECK-LABEL: fminnm_v16f64:
575 ; CHECK-NEXT: ptrue p0.d, vl16
576 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
577 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
578 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
579 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
581 %op1 = load <16 x double>, ptr %a
582 %op2 = load <16 x double>, ptr %b
583 %res = call <16 x double> @llvm.minnum.v16f64(<16 x double> %op1, <16 x double> %op2)
584 store <16 x double> %res, ptr %a
588 define void @fminnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
589 ; CHECK-LABEL: fminnm_v32f64:
591 ; CHECK-NEXT: ptrue p0.d, vl32
592 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
593 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
594 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
595 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
597 %op1 = load <32 x double>, ptr %a
598 %op2 = load <32 x double>, ptr %b
599 %res = call <32 x double> @llvm.minnum.v32f64(<32 x double> %op1, <32 x double> %op2)
600 store <32 x double> %res, ptr %a
608 ; Don't use SVE for 64-bit vectors.
609 define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
610 ; CHECK-LABEL: fmax_v4f16:
612 ; CHECK-NEXT: fmax v0.4h, v0.4h, v1.4h
614 %res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2)
618 ; Don't use SVE for 128-bit vectors.
619 define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
620 ; CHECK-LABEL: fmax_v8f16:
622 ; CHECK-NEXT: fmax v0.8h, v0.8h, v1.8h
624 %res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2)
628 define void @fmax_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
629 ; CHECK-LABEL: fmax_v16f16:
631 ; CHECK-NEXT: ptrue p0.h, vl16
632 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
633 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
634 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
635 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
637 %op1 = load <16 x half>, ptr %a
638 %op2 = load <16 x half>, ptr %b
639 %res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2)
640 store <16 x half> %res, ptr %a
644 define void @fmax_v32f16(ptr %a, ptr %b) #0 {
645 ; VBITS_EQ_256-LABEL: fmax_v32f16:
646 ; VBITS_EQ_256: // %bb.0:
647 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
648 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
649 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
650 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
651 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
652 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
653 ; VBITS_EQ_256-NEXT: fmax z0.h, p0/m, z0.h, z2.h
654 ; VBITS_EQ_256-NEXT: fmax z1.h, p0/m, z1.h, z3.h
655 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
656 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
657 ; VBITS_EQ_256-NEXT: ret
659 ; VBITS_GE_512-LABEL: fmax_v32f16:
660 ; VBITS_GE_512: // %bb.0:
661 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
662 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
663 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
664 ; VBITS_GE_512-NEXT: fmax z0.h, p0/m, z0.h, z1.h
665 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
666 ; VBITS_GE_512-NEXT: ret
667 %op1 = load <32 x half>, ptr %a
668 %op2 = load <32 x half>, ptr %b
669 %res = call <32 x half> @llvm.maximum.v32f16(<32 x half> %op1, <32 x half> %op2)
670 store <32 x half> %res, ptr %a
674 define void @fmax_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
675 ; CHECK-LABEL: fmax_v64f16:
677 ; CHECK-NEXT: ptrue p0.h, vl64
678 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
679 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
680 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
681 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
683 %op1 = load <64 x half>, ptr %a
684 %op2 = load <64 x half>, ptr %b
685 %res = call <64 x half> @llvm.maximum.v64f16(<64 x half> %op1, <64 x half> %op2)
686 store <64 x half> %res, ptr %a
690 define void @fmax_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
691 ; CHECK-LABEL: fmax_v128f16:
693 ; CHECK-NEXT: ptrue p0.h, vl128
694 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
695 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
696 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
697 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
699 %op1 = load <128 x half>, ptr %a
700 %op2 = load <128 x half>, ptr %b
701 %res = call <128 x half> @llvm.maximum.v128f16(<128 x half> %op1, <128 x half> %op2)
702 store <128 x half> %res, ptr %a
706 ; Don't use SVE for 64-bit vectors.
707 define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
708 ; CHECK-LABEL: fmax_v2f32:
710 ; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
712 %res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2)
716 ; Don't use SVE for 128-bit vectors.
717 define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
718 ; CHECK-LABEL: fmax_v4f32:
720 ; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s
722 %res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2)
726 define void @fmax_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
727 ; CHECK-LABEL: fmax_v8f32:
729 ; CHECK-NEXT: ptrue p0.s, vl8
730 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
731 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
732 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
733 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
735 %op1 = load <8 x float>, ptr %a
736 %op2 = load <8 x float>, ptr %b
737 %res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2)
738 store <8 x float> %res, ptr %a
742 define void @fmax_v16f32(ptr %a, ptr %b) #0 {
743 ; VBITS_EQ_256-LABEL: fmax_v16f32:
744 ; VBITS_EQ_256: // %bb.0:
745 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
746 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
747 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
748 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0]
749 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
750 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
751 ; VBITS_EQ_256-NEXT: fmax z0.s, p0/m, z0.s, z2.s
752 ; VBITS_EQ_256-NEXT: fmax z1.s, p0/m, z1.s, z3.s
753 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
754 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
755 ; VBITS_EQ_256-NEXT: ret
757 ; VBITS_GE_512-LABEL: fmax_v16f32:
758 ; VBITS_GE_512: // %bb.0:
759 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
760 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
761 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
762 ; VBITS_GE_512-NEXT: fmax z0.s, p0/m, z0.s, z1.s
763 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
764 ; VBITS_GE_512-NEXT: ret
765 %op1 = load <16 x float>, ptr %a
766 %op2 = load <16 x float>, ptr %b
767 %res = call <16 x float> @llvm.maximum.v16f32(<16 x float> %op1, <16 x float> %op2)
768 store <16 x float> %res, ptr %a
772 define void @fmax_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
773 ; CHECK-LABEL: fmax_v32f32:
775 ; CHECK-NEXT: ptrue p0.s, vl32
776 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
777 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
778 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
779 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
781 %op1 = load <32 x float>, ptr %a
782 %op2 = load <32 x float>, ptr %b
783 %res = call <32 x float> @llvm.maximum.v32f32(<32 x float> %op1, <32 x float> %op2)
784 store <32 x float> %res, ptr %a
788 define void @fmax_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
789 ; CHECK-LABEL: fmax_v64f32:
791 ; CHECK-NEXT: ptrue p0.s, vl64
792 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
793 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
794 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
795 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
797 %op1 = load <64 x float>, ptr %a
798 %op2 = load <64 x float>, ptr %b
799 %res = call <64 x float> @llvm.maximum.v64f32(<64 x float> %op1, <64 x float> %op2)
800 store <64 x float> %res, ptr %a
804 ; Don't use SVE for 64-bit vectors.
805 define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
806 ; CHECK-LABEL: fmax_v1f64:
808 ; CHECK-NEXT: fmax d0, d0, d1
810 %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2)
811 ret <1 x double> %res
814 ; Don't use SVE for 128-bit vectors.
815 define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
816 ; CHECK-LABEL: fmax_v2f64:
818 ; CHECK-NEXT: fmax v0.2d, v0.2d, v1.2d
820 %res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2)
821 ret <2 x double> %res
824 define void @fmax_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
825 ; CHECK-LABEL: fmax_v4f64:
827 ; CHECK-NEXT: ptrue p0.d, vl4
828 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
829 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
830 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
831 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
833 %op1 = load <4 x double>, ptr %a
834 %op2 = load <4 x double>, ptr %b
835 %res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2)
836 store <4 x double> %res, ptr %a
840 define void @fmax_v8f64(ptr %a, ptr %b) #0 {
841 ; VBITS_EQ_256-LABEL: fmax_v8f64:
842 ; VBITS_EQ_256: // %bb.0:
843 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
844 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
845 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
846 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0]
847 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
848 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
849 ; VBITS_EQ_256-NEXT: fmax z0.d, p0/m, z0.d, z2.d
850 ; VBITS_EQ_256-NEXT: fmax z1.d, p0/m, z1.d, z3.d
851 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
852 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
853 ; VBITS_EQ_256-NEXT: ret
855 ; VBITS_GE_512-LABEL: fmax_v8f64:
856 ; VBITS_GE_512: // %bb.0:
857 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
858 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
859 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
860 ; VBITS_GE_512-NEXT: fmax z0.d, p0/m, z0.d, z1.d
861 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
862 ; VBITS_GE_512-NEXT: ret
863 %op1 = load <8 x double>, ptr %a
864 %op2 = load <8 x double>, ptr %b
865 %res = call <8 x double> @llvm.maximum.v8f64(<8 x double> %op1, <8 x double> %op2)
866 store <8 x double> %res, ptr %a
870 define void @fmax_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
871 ; CHECK-LABEL: fmax_v16f64:
873 ; CHECK-NEXT: ptrue p0.d, vl16
874 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
875 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
876 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
877 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
879 %op1 = load <16 x double>, ptr %a
880 %op2 = load <16 x double>, ptr %b
881 %res = call <16 x double> @llvm.maximum.v16f64(<16 x double> %op1, <16 x double> %op2)
882 store <16 x double> %res, ptr %a
886 define void @fmax_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
887 ; CHECK-LABEL: fmax_v32f64:
889 ; CHECK-NEXT: ptrue p0.d, vl32
890 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
891 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
892 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
893 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
895 %op1 = load <32 x double>, ptr %a
896 %op2 = load <32 x double>, ptr %b
897 %res = call <32 x double> @llvm.maximum.v32f64(<32 x double> %op1, <32 x double> %op2)
898 store <32 x double> %res, ptr %a
906 ; Don't use SVE for 64-bit vectors.
907 define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
908 ; CHECK-LABEL: fmin_v4f16:
910 ; CHECK-NEXT: fmin v0.4h, v0.4h, v1.4h
912 %res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2)
916 ; Don't use SVE for 128-bit vectors.
917 define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
918 ; CHECK-LABEL: fmin_v8f16:
920 ; CHECK-NEXT: fmin v0.8h, v0.8h, v1.8h
922 %res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2)
926 define void @fmin_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
927 ; CHECK-LABEL: fmin_v16f16:
929 ; CHECK-NEXT: ptrue p0.h, vl16
930 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
931 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
932 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
933 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
935 %op1 = load <16 x half>, ptr %a
936 %op2 = load <16 x half>, ptr %b
937 %res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2)
938 store <16 x half> %res, ptr %a
942 define void @fmin_v32f16(ptr %a, ptr %b) #0 {
943 ; VBITS_EQ_256-LABEL: fmin_v32f16:
944 ; VBITS_EQ_256: // %bb.0:
945 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
946 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
947 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
948 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0]
949 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1]
950 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
951 ; VBITS_EQ_256-NEXT: fmin z0.h, p0/m, z0.h, z2.h
952 ; VBITS_EQ_256-NEXT: fmin z1.h, p0/m, z1.h, z3.h
953 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
954 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
955 ; VBITS_EQ_256-NEXT: ret
957 ; VBITS_GE_512-LABEL: fmin_v32f16:
958 ; VBITS_GE_512: // %bb.0:
959 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
960 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
961 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
962 ; VBITS_GE_512-NEXT: fmin z0.h, p0/m, z0.h, z1.h
963 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
964 ; VBITS_GE_512-NEXT: ret
965 %op1 = load <32 x half>, ptr %a
966 %op2 = load <32 x half>, ptr %b
967 %res = call <32 x half> @llvm.minimum.v32f16(<32 x half> %op1, <32 x half> %op2)
968 store <32 x half> %res, ptr %a
972 define void @fmin_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
973 ; CHECK-LABEL: fmin_v64f16:
975 ; CHECK-NEXT: ptrue p0.h, vl64
976 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
977 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
978 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
979 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
981 %op1 = load <64 x half>, ptr %a
982 %op2 = load <64 x half>, ptr %b
983 %res = call <64 x half> @llvm.minimum.v64f16(<64 x half> %op1, <64 x half> %op2)
984 store <64 x half> %res, ptr %a
988 define void @fmin_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
989 ; CHECK-LABEL: fmin_v128f16:
991 ; CHECK-NEXT: ptrue p0.h, vl128
992 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
993 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
994 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
995 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
997 %op1 = load <128 x half>, ptr %a
998 %op2 = load <128 x half>, ptr %b
999 %res = call <128 x half> @llvm.minimum.v128f16(<128 x half> %op1, <128 x half> %op2)
1000 store <128 x half> %res, ptr %a
1004 ; Don't use SVE for 64-bit vectors.
1005 define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
1006 ; CHECK-LABEL: fmin_v2f32:
1008 ; CHECK-NEXT: fmin v0.2s, v0.2s, v1.2s
1010 %res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2)
1011 ret <2 x float> %res
1014 ; Don't use SVE for 128-bit vectors.
1015 define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
1016 ; CHECK-LABEL: fmin_v4f32:
1018 ; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s
1020 %res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2)
1021 ret <4 x float> %res
1024 define void @fmin_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1025 ; CHECK-LABEL: fmin_v8f32:
1027 ; CHECK-NEXT: ptrue p0.s, vl8
1028 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1029 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1030 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1031 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1033 %op1 = load <8 x float>, ptr %a
1034 %op2 = load <8 x float>, ptr %b
1035 %res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2)
1036 store <8 x float> %res, ptr %a
1040 define void @fmin_v16f32(ptr %a, ptr %b) #0 {
1041 ; VBITS_EQ_256-LABEL: fmin_v16f32:
1042 ; VBITS_EQ_256: // %bb.0:
1043 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
1044 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
1045 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1046 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0]
1047 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2]
1048 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
1049 ; VBITS_EQ_256-NEXT: fmin z0.s, p0/m, z0.s, z2.s
1050 ; VBITS_EQ_256-NEXT: fmin z1.s, p0/m, z1.s, z3.s
1051 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
1052 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
1053 ; VBITS_EQ_256-NEXT: ret
1055 ; VBITS_GE_512-LABEL: fmin_v16f32:
1056 ; VBITS_GE_512: // %bb.0:
1057 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1058 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1059 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
1060 ; VBITS_GE_512-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1061 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
1062 ; VBITS_GE_512-NEXT: ret
1063 %op1 = load <16 x float>, ptr %a
1064 %op2 = load <16 x float>, ptr %b
1065 %res = call <16 x float> @llvm.minimum.v16f32(<16 x float> %op1, <16 x float> %op2)
1066 store <16 x float> %res, ptr %a
1070 define void @fmin_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1071 ; CHECK-LABEL: fmin_v32f32:
1073 ; CHECK-NEXT: ptrue p0.s, vl32
1074 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1075 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1076 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1077 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1079 %op1 = load <32 x float>, ptr %a
1080 %op2 = load <32 x float>, ptr %b
1081 %res = call <32 x float> @llvm.minimum.v32f32(<32 x float> %op1, <32 x float> %op2)
1082 store <32 x float> %res, ptr %a
1086 define void @fmin_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1087 ; CHECK-LABEL: fmin_v64f32:
1089 ; CHECK-NEXT: ptrue p0.s, vl64
1090 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1091 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1092 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1093 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1095 %op1 = load <64 x float>, ptr %a
1096 %op2 = load <64 x float>, ptr %b
1097 %res = call <64 x float> @llvm.minimum.v64f32(<64 x float> %op1, <64 x float> %op2)
1098 store <64 x float> %res, ptr %a
1102 ; Don't use SVE for 64-bit vectors.
1103 define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
1104 ; CHECK-LABEL: fmin_v1f64:
1106 ; CHECK-NEXT: fmin d0, d0, d1
1108 %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2)
1109 ret <1 x double> %res
1112 ; Don't use SVE for 128-bit vectors.
1113 define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
1114 ; CHECK-LABEL: fmin_v2f64:
1116 ; CHECK-NEXT: fmin v0.2d, v0.2d, v1.2d
1118 %res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2)
1119 ret <2 x double> %res
1122 define void @fmin_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1123 ; CHECK-LABEL: fmin_v4f64:
1125 ; CHECK-NEXT: ptrue p0.d, vl4
1126 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1127 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
1128 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1129 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1131 %op1 = load <4 x double>, ptr %a
1132 %op2 = load <4 x double>, ptr %b
1133 %res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2)
1134 store <4 x double> %res, ptr %a
1138 define void @fmin_v8f64(ptr %a, ptr %b) #0 {
1139 ; VBITS_EQ_256-LABEL: fmin_v8f64:
1140 ; VBITS_EQ_256: // %bb.0:
1141 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
1142 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
1143 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1144 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0]
1145 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3]
1146 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
1147 ; VBITS_EQ_256-NEXT: fmin z0.d, p0/m, z0.d, z2.d
1148 ; VBITS_EQ_256-NEXT: fmin z1.d, p0/m, z1.d, z3.d
1149 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
1150 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
1151 ; VBITS_EQ_256-NEXT: ret
1153 ; VBITS_GE_512-LABEL: fmin_v8f64:
1154 ; VBITS_GE_512: // %bb.0:
1155 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1156 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1157 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
1158 ; VBITS_GE_512-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1159 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
1160 ; VBITS_GE_512-NEXT: ret
1161 %op1 = load <8 x double>, ptr %a
1162 %op2 = load <8 x double>, ptr %b
1163 %res = call <8 x double> @llvm.minimum.v8f64(<8 x double> %op1, <8 x double> %op2)
1164 store <8 x double> %res, ptr %a
1168 define void @fmin_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1169 ; CHECK-LABEL: fmin_v16f64:
1171 ; CHECK-NEXT: ptrue p0.d, vl16
1172 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1173 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
1174 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1175 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1177 %op1 = load <16 x double>, ptr %a
1178 %op2 = load <16 x double>, ptr %b
1179 %res = call <16 x double> @llvm.minimum.v16f64(<16 x double> %op1, <16 x double> %op2)
1180 store <16 x double> %res, ptr %a
1184 define void @fmin_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1185 ; CHECK-LABEL: fmin_v32f64:
1187 ; CHECK-NEXT: ptrue p0.d, vl32
1188 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1189 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
1190 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1191 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1193 %op1 = load <32 x double>, ptr %a
1194 %op2 = load <32 x double>, ptr %b
1195 %res = call <32 x double> @llvm.minimum.v32f64(<32 x double> %op1, <32 x double> %op2)
1196 store <32 x double> %res, ptr %a
1200 attributes #0 = { "target-features"="+sve" }
1202 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
1203 declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>)
1204 declare <16 x half> @llvm.minnum.v16f16(<16 x half>, <16 x half>)
1205 declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>)
1206 declare <64 x half> @llvm.minnum.v64f16(<64 x half>, <64 x half>)
1207 declare <128 x half> @llvm.minnum.v128f16(<128 x half>, <128 x half>)
1208 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
1209 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1210 declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
1211 declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>)
1212 declare <32 x float> @llvm.minnum.v32f32(<32 x float>, <32 x float>)
1213 declare <64 x float> @llvm.minnum.v64f32(<64 x float>, <64 x float>)
1214 declare <1 x double> @llvm.minnum.v1f64(<1 x double>, <1 x double>)
1215 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
1216 declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
1217 declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
1218 declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>)
1219 declare <32 x double> @llvm.minnum.v32f64(<32 x double>, <32 x double>)
1221 declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>)
1222 declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
1223 declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>)
1224 declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>)
1225 declare <64 x half> @llvm.maxnum.v64f16(<64 x half>, <64 x half>)
1226 declare <128 x half> @llvm.maxnum.v128f16(<128 x half>, <128 x half>)
1227 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
1228 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1229 declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
1230 declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>)
1231 declare <32 x float> @llvm.maxnum.v32f32(<32 x float>, <32 x float>)
1232 declare <64 x float> @llvm.maxnum.v64f32(<64 x float>, <64 x float>)
1233 declare <1 x double> @llvm.maxnum.v1f64(<1 x double>, <1 x double>)
1234 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
1235 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
1236 declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
1237 declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>)
1238 declare <32 x double> @llvm.maxnum.v32f64(<32 x double>, <32 x double>)
1240 declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
1241 declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
1242 declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>)
1243 declare <32 x half> @llvm.minimum.v32f16(<32 x half>, <32 x half>)
1244 declare <64 x half> @llvm.minimum.v64f16(<64 x half>, <64 x half>)
1245 declare <128 x half> @llvm.minimum.v128f16(<128 x half>, <128 x half>)
1246 declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
1247 declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1248 declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>)
1249 declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>)
1250 declare <32 x float> @llvm.minimum.v32f32(<32 x float>, <32 x float>)
1251 declare <64 x float> @llvm.minimum.v64f32(<64 x float>, <64 x float>)
1252 declare <1 x double> @llvm.minimum.v1f64(<1 x double>, <1 x double>)
1253 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1254 declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
1255 declare <8 x double> @llvm.minimum.v8f64(<8 x double>, <8 x double>)
1256 declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>)
1257 declare <32 x double> @llvm.minimum.v32f64(<32 x double>, <32 x double>)
1259 declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
1260 declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
1261 declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)
1262 declare <32 x half> @llvm.maximum.v32f16(<32 x half>, <32 x half>)
1263 declare <64 x half> @llvm.maximum.v64f16(<64 x half>, <64 x half>)
1264 declare <128 x half> @llvm.maximum.v128f16(<128 x half>, <128 x half>)
1265 declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
1266 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1267 declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
1268 declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
1269 declare <32 x float> @llvm.maximum.v32f32(<32 x float>, <32 x float>)
1270 declare <64 x float> @llvm.maximum.v64f32(<64 x float>, <64 x float>)
1271 declare <1 x double> @llvm.maximum.v1f64(<1 x double>, <1 x double>)
1272 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1273 declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
1274 declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>)
1275 declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
1276 declare <32 x double> @llvm.maximum.v32f64(<32 x double>, <32 x double>)