1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 target triple = "aarch64-unknown-linux-gnu"
12 ; Don't use SVE for 64-bit vectors.
13 define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
14 ; CHECK-LABEL: fmaxnm_v4f16:
16 ; CHECK-NEXT: fmaxnm v0.4h, v0.4h, v1.4h
18 %res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2)
22 ; Don't use SVE for 128-bit vectors.
23 define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
24 ; CHECK-LABEL: fmaxnm_v8f16:
26 ; CHECK-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
28 %res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2)
32 define void @fmaxnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
33 ; CHECK-LABEL: fmaxnm_v16f16:
35 ; CHECK-NEXT: ptrue p0.h, vl16
36 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
37 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
38 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
39 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
41 %op1 = load <16 x half>, ptr %a
42 %op2 = load <16 x half>, ptr %b
43 %res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2)
44 store <16 x half> %res, ptr %a
48 define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 {
49 ; VBITS_EQ_256-LABEL: fmaxnm_v32f16:
50 ; VBITS_EQ_256: // %bb.0:
51 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
52 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
53 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
54 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
55 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0]
56 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
57 ; VBITS_EQ_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
58 ; VBITS_EQ_256-NEXT: movprfx z1, z2
59 ; VBITS_EQ_256-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h
60 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
61 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
62 ; VBITS_EQ_256-NEXT: ret
64 ; VBITS_GE_512-LABEL: fmaxnm_v32f16:
65 ; VBITS_GE_512: // %bb.0:
66 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
67 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
68 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
69 ; VBITS_GE_512-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
70 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
71 ; VBITS_GE_512-NEXT: ret
72 %op1 = load <32 x half>, ptr %a
73 %op2 = load <32 x half>, ptr %b
74 %res = call <32 x half> @llvm.maxnum.v32f16(<32 x half> %op1, <32 x half> %op2)
75 store <32 x half> %res, ptr %a
79 define void @fmaxnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
80 ; CHECK-LABEL: fmaxnm_v64f16:
82 ; CHECK-NEXT: ptrue p0.h, vl64
83 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
84 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
85 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
86 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
88 %op1 = load <64 x half>, ptr %a
89 %op2 = load <64 x half>, ptr %b
90 %res = call <64 x half> @llvm.maxnum.v64f16(<64 x half> %op1, <64 x half> %op2)
91 store <64 x half> %res, ptr %a
95 define void @fmaxnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
96 ; CHECK-LABEL: fmaxnm_v128f16:
98 ; CHECK-NEXT: ptrue p0.h, vl128
99 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
100 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
101 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
102 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
104 %op1 = load <128 x half>, ptr %a
105 %op2 = load <128 x half>, ptr %b
106 %res = call <128 x half> @llvm.maxnum.v128f16(<128 x half> %op1, <128 x half> %op2)
107 store <128 x half> %res, ptr %a
111 ; Don't use SVE for 64-bit vectors.
112 define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
113 ; CHECK-LABEL: fmaxnm_v2f32:
115 ; CHECK-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
117 %res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2)
121 ; Don't use SVE for 128-bit vectors.
122 define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
123 ; CHECK-LABEL: fmaxnm_v4f32:
125 ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
127 %res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2)
131 define void @fmaxnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
132 ; CHECK-LABEL: fmaxnm_v8f32:
134 ; CHECK-NEXT: ptrue p0.s, vl8
135 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
136 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
137 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
138 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
140 %op1 = load <8 x float>, ptr %a
141 %op2 = load <8 x float>, ptr %b
142 %res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2)
143 store <8 x float> %res, ptr %a
147 define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 {
148 ; VBITS_EQ_256-LABEL: fmaxnm_v16f32:
149 ; VBITS_EQ_256: // %bb.0:
150 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
151 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
152 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
153 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
154 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0]
155 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
156 ; VBITS_EQ_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
157 ; VBITS_EQ_256-NEXT: movprfx z1, z2
158 ; VBITS_EQ_256-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s
159 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
160 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
161 ; VBITS_EQ_256-NEXT: ret
163 ; VBITS_GE_512-LABEL: fmaxnm_v16f32:
164 ; VBITS_GE_512: // %bb.0:
165 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
166 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
167 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
168 ; VBITS_GE_512-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
169 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
170 ; VBITS_GE_512-NEXT: ret
171 %op1 = load <16 x float>, ptr %a
172 %op2 = load <16 x float>, ptr %b
173 %res = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %op1, <16 x float> %op2)
174 store <16 x float> %res, ptr %a
178 define void @fmaxnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
179 ; CHECK-LABEL: fmaxnm_v32f32:
181 ; CHECK-NEXT: ptrue p0.s, vl32
182 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
183 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
184 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
185 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
187 %op1 = load <32 x float>, ptr %a
188 %op2 = load <32 x float>, ptr %b
189 %res = call <32 x float> @llvm.maxnum.v32f32(<32 x float> %op1, <32 x float> %op2)
190 store <32 x float> %res, ptr %a
194 define void @fmaxnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
195 ; CHECK-LABEL: fmaxnm_v64f32:
197 ; CHECK-NEXT: ptrue p0.s, vl64
198 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
199 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
200 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
201 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
203 %op1 = load <64 x float>, ptr %a
204 %op2 = load <64 x float>, ptr %b
205 %res = call <64 x float> @llvm.maxnum.v64f32(<64 x float> %op1, <64 x float> %op2)
206 store <64 x float> %res, ptr %a
210 ; Don't use SVE for 64-bit vectors.
211 define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
212 ; CHECK-LABEL: fmaxnm_v1f64:
214 ; CHECK-NEXT: fmaxnm d0, d0, d1
216 %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2)
217 ret <1 x double> %res
220 ; Don't use SVE for 128-bit vectors.
221 define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
222 ; CHECK-LABEL: fmaxnm_v2f64:
224 ; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
226 %res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2)
227 ret <2 x double> %res
230 define void @fmaxnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
231 ; CHECK-LABEL: fmaxnm_v4f64:
233 ; CHECK-NEXT: ptrue p0.d, vl4
234 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
235 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
236 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
237 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
239 %op1 = load <4 x double>, ptr %a
240 %op2 = load <4 x double>, ptr %b
241 %res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2)
242 store <4 x double> %res, ptr %a
246 define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 {
247 ; VBITS_EQ_256-LABEL: fmaxnm_v8f64:
248 ; VBITS_EQ_256: // %bb.0:
249 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
250 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
251 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
252 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
253 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0]
254 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
255 ; VBITS_EQ_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
256 ; VBITS_EQ_256-NEXT: movprfx z1, z2
257 ; VBITS_EQ_256-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d
258 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
259 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
260 ; VBITS_EQ_256-NEXT: ret
262 ; VBITS_GE_512-LABEL: fmaxnm_v8f64:
263 ; VBITS_GE_512: // %bb.0:
264 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
265 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
266 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
267 ; VBITS_GE_512-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
268 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
269 ; VBITS_GE_512-NEXT: ret
270 %op1 = load <8 x double>, ptr %a
271 %op2 = load <8 x double>, ptr %b
272 %res = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %op1, <8 x double> %op2)
273 store <8 x double> %res, ptr %a
277 define void @fmaxnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
278 ; CHECK-LABEL: fmaxnm_v16f64:
280 ; CHECK-NEXT: ptrue p0.d, vl16
281 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
282 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
283 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
284 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
286 %op1 = load <16 x double>, ptr %a
287 %op2 = load <16 x double>, ptr %b
288 %res = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %op1, <16 x double> %op2)
289 store <16 x double> %res, ptr %a
293 define void @fmaxnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
294 ; CHECK-LABEL: fmaxnm_v32f64:
296 ; CHECK-NEXT: ptrue p0.d, vl32
297 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
298 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
299 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
300 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
302 %op1 = load <32 x double>, ptr %a
303 %op2 = load <32 x double>, ptr %b
304 %res = call <32 x double> @llvm.maxnum.v32f64(<32 x double> %op1, <32 x double> %op2)
305 store <32 x double> %res, ptr %a
313 ; Don't use SVE for 64-bit vectors.
314 define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
315 ; CHECK-LABEL: fminnm_v4f16:
317 ; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
319 %res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2)
323 ; Don't use SVE for 128-bit vectors.
324 define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
325 ; CHECK-LABEL: fminnm_v8f16:
327 ; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
329 %res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2)
333 define void @fminnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
334 ; CHECK-LABEL: fminnm_v16f16:
336 ; CHECK-NEXT: ptrue p0.h, vl16
337 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
338 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
339 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
340 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
342 %op1 = load <16 x half>, ptr %a
343 %op2 = load <16 x half>, ptr %b
344 %res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2)
345 store <16 x half> %res, ptr %a
349 define void @fminnm_v32f16(ptr %a, ptr %b) #0 {
350 ; VBITS_EQ_256-LABEL: fminnm_v32f16:
351 ; VBITS_EQ_256: // %bb.0:
352 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
353 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
354 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
355 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
356 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0]
357 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
358 ; VBITS_EQ_256-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
359 ; VBITS_EQ_256-NEXT: movprfx z1, z2
360 ; VBITS_EQ_256-NEXT: fminnm z1.h, p0/m, z1.h, z3.h
361 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
362 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
363 ; VBITS_EQ_256-NEXT: ret
365 ; VBITS_GE_512-LABEL: fminnm_v32f16:
366 ; VBITS_GE_512: // %bb.0:
367 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
368 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
369 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
370 ; VBITS_GE_512-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
371 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
372 ; VBITS_GE_512-NEXT: ret
373 %op1 = load <32 x half>, ptr %a
374 %op2 = load <32 x half>, ptr %b
375 %res = call <32 x half> @llvm.minnum.v32f16(<32 x half> %op1, <32 x half> %op2)
376 store <32 x half> %res, ptr %a
380 define void @fminnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
381 ; CHECK-LABEL: fminnm_v64f16:
383 ; CHECK-NEXT: ptrue p0.h, vl64
384 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
385 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
386 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
387 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
389 %op1 = load <64 x half>, ptr %a
390 %op2 = load <64 x half>, ptr %b
391 %res = call <64 x half> @llvm.minnum.v64f16(<64 x half> %op1, <64 x half> %op2)
392 store <64 x half> %res, ptr %a
396 define void @fminnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
397 ; CHECK-LABEL: fminnm_v128f16:
399 ; CHECK-NEXT: ptrue p0.h, vl128
400 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
401 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
402 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
403 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
405 %op1 = load <128 x half>, ptr %a
406 %op2 = load <128 x half>, ptr %b
407 %res = call <128 x half> @llvm.minnum.v128f16(<128 x half> %op1, <128 x half> %op2)
408 store <128 x half> %res, ptr %a
412 ; Don't use SVE for 64-bit vectors.
413 define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
414 ; CHECK-LABEL: fminnm_v2f32:
416 ; CHECK-NEXT: fminnm v0.2s, v0.2s, v1.2s
418 %res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2)
422 ; Don't use SVE for 128-bit vectors.
423 define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
424 ; CHECK-LABEL: fminnm_v4f32:
426 ; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
428 %res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2)
432 define void @fminnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
433 ; CHECK-LABEL: fminnm_v8f32:
435 ; CHECK-NEXT: ptrue p0.s, vl8
436 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
437 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
438 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
439 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
441 %op1 = load <8 x float>, ptr %a
442 %op2 = load <8 x float>, ptr %b
443 %res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2)
444 store <8 x float> %res, ptr %a
448 define void @fminnm_v16f32(ptr %a, ptr %b) #0 {
449 ; VBITS_EQ_256-LABEL: fminnm_v16f32:
450 ; VBITS_EQ_256: // %bb.0:
451 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
452 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
453 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
454 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
455 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0]
456 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
457 ; VBITS_EQ_256-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
458 ; VBITS_EQ_256-NEXT: movprfx z1, z2
459 ; VBITS_EQ_256-NEXT: fminnm z1.s, p0/m, z1.s, z3.s
460 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
461 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
462 ; VBITS_EQ_256-NEXT: ret
464 ; VBITS_GE_512-LABEL: fminnm_v16f32:
465 ; VBITS_GE_512: // %bb.0:
466 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
467 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
468 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
469 ; VBITS_GE_512-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
470 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
471 ; VBITS_GE_512-NEXT: ret
472 %op1 = load <16 x float>, ptr %a
473 %op2 = load <16 x float>, ptr %b
474 %res = call <16 x float> @llvm.minnum.v16f32(<16 x float> %op1, <16 x float> %op2)
475 store <16 x float> %res, ptr %a
479 define void @fminnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
480 ; CHECK-LABEL: fminnm_v32f32:
482 ; CHECK-NEXT: ptrue p0.s, vl32
483 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
484 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
485 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
486 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
488 %op1 = load <32 x float>, ptr %a
489 %op2 = load <32 x float>, ptr %b
490 %res = call <32 x float> @llvm.minnum.v32f32(<32 x float> %op1, <32 x float> %op2)
491 store <32 x float> %res, ptr %a
495 define void @fminnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
496 ; CHECK-LABEL: fminnm_v64f32:
498 ; CHECK-NEXT: ptrue p0.s, vl64
499 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
500 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
501 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
502 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
504 %op1 = load <64 x float>, ptr %a
505 %op2 = load <64 x float>, ptr %b
506 %res = call <64 x float> @llvm.minnum.v64f32(<64 x float> %op1, <64 x float> %op2)
507 store <64 x float> %res, ptr %a
511 ; Don't use SVE for 64-bit vectors.
512 define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
513 ; CHECK-LABEL: fminnm_v1f64:
515 ; CHECK-NEXT: fminnm d0, d0, d1
517 %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2)
518 ret <1 x double> %res
521 ; Don't use SVE for 128-bit vectors.
522 define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
523 ; CHECK-LABEL: fminnm_v2f64:
525 ; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d
527 %res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2)
528 ret <2 x double> %res
531 define void @fminnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
532 ; CHECK-LABEL: fminnm_v4f64:
534 ; CHECK-NEXT: ptrue p0.d, vl4
535 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
536 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
537 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
538 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
540 %op1 = load <4 x double>, ptr %a
541 %op2 = load <4 x double>, ptr %b
542 %res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2)
543 store <4 x double> %res, ptr %a
547 define void @fminnm_v8f64(ptr %a, ptr %b) #0 {
548 ; VBITS_EQ_256-LABEL: fminnm_v8f64:
549 ; VBITS_EQ_256: // %bb.0:
550 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
551 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
552 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
553 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
554 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0]
555 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
556 ; VBITS_EQ_256-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
557 ; VBITS_EQ_256-NEXT: movprfx z1, z2
558 ; VBITS_EQ_256-NEXT: fminnm z1.d, p0/m, z1.d, z3.d
559 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
560 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
561 ; VBITS_EQ_256-NEXT: ret
563 ; VBITS_GE_512-LABEL: fminnm_v8f64:
564 ; VBITS_GE_512: // %bb.0:
565 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
566 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
567 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
568 ; VBITS_GE_512-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
569 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
570 ; VBITS_GE_512-NEXT: ret
571 %op1 = load <8 x double>, ptr %a
572 %op2 = load <8 x double>, ptr %b
573 %res = call <8 x double> @llvm.minnum.v8f64(<8 x double> %op1, <8 x double> %op2)
574 store <8 x double> %res, ptr %a
578 define void @fminnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
579 ; CHECK-LABEL: fminnm_v16f64:
581 ; CHECK-NEXT: ptrue p0.d, vl16
582 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
583 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
584 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
585 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
587 %op1 = load <16 x double>, ptr %a
588 %op2 = load <16 x double>, ptr %b
589 %res = call <16 x double> @llvm.minnum.v16f64(<16 x double> %op1, <16 x double> %op2)
590 store <16 x double> %res, ptr %a
594 define void @fminnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
595 ; CHECK-LABEL: fminnm_v32f64:
597 ; CHECK-NEXT: ptrue p0.d, vl32
598 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
599 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
600 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
601 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
603 %op1 = load <32 x double>, ptr %a
604 %op2 = load <32 x double>, ptr %b
605 %res = call <32 x double> @llvm.minnum.v32f64(<32 x double> %op1, <32 x double> %op2)
606 store <32 x double> %res, ptr %a
614 ; Don't use SVE for 64-bit vectors.
615 define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
616 ; CHECK-LABEL: fmax_v4f16:
618 ; CHECK-NEXT: fmax v0.4h, v0.4h, v1.4h
620 %res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2)
624 ; Don't use SVE for 128-bit vectors.
625 define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
626 ; CHECK-LABEL: fmax_v8f16:
628 ; CHECK-NEXT: fmax v0.8h, v0.8h, v1.8h
630 %res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2)
634 define void @fmax_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
635 ; CHECK-LABEL: fmax_v16f16:
637 ; CHECK-NEXT: ptrue p0.h, vl16
638 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
639 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
640 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
641 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
643 %op1 = load <16 x half>, ptr %a
644 %op2 = load <16 x half>, ptr %b
645 %res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2)
646 store <16 x half> %res, ptr %a
650 define void @fmax_v32f16(ptr %a, ptr %b) #0 {
651 ; VBITS_EQ_256-LABEL: fmax_v32f16:
652 ; VBITS_EQ_256: // %bb.0:
653 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
654 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
655 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
656 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
657 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0]
658 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
659 ; VBITS_EQ_256-NEXT: fmax z0.h, p0/m, z0.h, z1.h
660 ; VBITS_EQ_256-NEXT: movprfx z1, z2
661 ; VBITS_EQ_256-NEXT: fmax z1.h, p0/m, z1.h, z3.h
662 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
663 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
664 ; VBITS_EQ_256-NEXT: ret
666 ; VBITS_GE_512-LABEL: fmax_v32f16:
667 ; VBITS_GE_512: // %bb.0:
668 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
669 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
670 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
671 ; VBITS_GE_512-NEXT: fmax z0.h, p0/m, z0.h, z1.h
672 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
673 ; VBITS_GE_512-NEXT: ret
674 %op1 = load <32 x half>, ptr %a
675 %op2 = load <32 x half>, ptr %b
676 %res = call <32 x half> @llvm.maximum.v32f16(<32 x half> %op1, <32 x half> %op2)
677 store <32 x half> %res, ptr %a
681 define void @fmax_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
682 ; CHECK-LABEL: fmax_v64f16:
684 ; CHECK-NEXT: ptrue p0.h, vl64
685 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
686 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
687 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
688 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
690 %op1 = load <64 x half>, ptr %a
691 %op2 = load <64 x half>, ptr %b
692 %res = call <64 x half> @llvm.maximum.v64f16(<64 x half> %op1, <64 x half> %op2)
693 store <64 x half> %res, ptr %a
697 define void @fmax_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
698 ; CHECK-LABEL: fmax_v128f16:
700 ; CHECK-NEXT: ptrue p0.h, vl128
701 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
702 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
703 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
704 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
706 %op1 = load <128 x half>, ptr %a
707 %op2 = load <128 x half>, ptr %b
708 %res = call <128 x half> @llvm.maximum.v128f16(<128 x half> %op1, <128 x half> %op2)
709 store <128 x half> %res, ptr %a
713 ; Don't use SVE for 64-bit vectors.
714 define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
715 ; CHECK-LABEL: fmax_v2f32:
717 ; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s
719 %res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2)
723 ; Don't use SVE for 128-bit vectors.
724 define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
725 ; CHECK-LABEL: fmax_v4f32:
727 ; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s
729 %res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2)
733 define void @fmax_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
734 ; CHECK-LABEL: fmax_v8f32:
736 ; CHECK-NEXT: ptrue p0.s, vl8
737 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
738 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
739 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
740 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
742 %op1 = load <8 x float>, ptr %a
743 %op2 = load <8 x float>, ptr %b
744 %res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2)
745 store <8 x float> %res, ptr %a
749 define void @fmax_v16f32(ptr %a, ptr %b) #0 {
750 ; VBITS_EQ_256-LABEL: fmax_v16f32:
751 ; VBITS_EQ_256: // %bb.0:
752 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
753 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
754 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
755 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
756 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0]
757 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
758 ; VBITS_EQ_256-NEXT: fmax z0.s, p0/m, z0.s, z1.s
759 ; VBITS_EQ_256-NEXT: movprfx z1, z2
760 ; VBITS_EQ_256-NEXT: fmax z1.s, p0/m, z1.s, z3.s
761 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
762 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
763 ; VBITS_EQ_256-NEXT: ret
765 ; VBITS_GE_512-LABEL: fmax_v16f32:
766 ; VBITS_GE_512: // %bb.0:
767 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
768 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
769 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
770 ; VBITS_GE_512-NEXT: fmax z0.s, p0/m, z0.s, z1.s
771 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
772 ; VBITS_GE_512-NEXT: ret
773 %op1 = load <16 x float>, ptr %a
774 %op2 = load <16 x float>, ptr %b
775 %res = call <16 x float> @llvm.maximum.v16f32(<16 x float> %op1, <16 x float> %op2)
776 store <16 x float> %res, ptr %a
780 define void @fmax_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
781 ; CHECK-LABEL: fmax_v32f32:
783 ; CHECK-NEXT: ptrue p0.s, vl32
784 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
785 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
786 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
787 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
789 %op1 = load <32 x float>, ptr %a
790 %op2 = load <32 x float>, ptr %b
791 %res = call <32 x float> @llvm.maximum.v32f32(<32 x float> %op1, <32 x float> %op2)
792 store <32 x float> %res, ptr %a
796 define void @fmax_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
797 ; CHECK-LABEL: fmax_v64f32:
799 ; CHECK-NEXT: ptrue p0.s, vl64
800 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
801 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
802 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
803 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
805 %op1 = load <64 x float>, ptr %a
806 %op2 = load <64 x float>, ptr %b
807 %res = call <64 x float> @llvm.maximum.v64f32(<64 x float> %op1, <64 x float> %op2)
808 store <64 x float> %res, ptr %a
812 ; Don't use SVE for 64-bit vectors.
813 define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
814 ; CHECK-LABEL: fmax_v1f64:
816 ; CHECK-NEXT: fmax d0, d0, d1
818 %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2)
819 ret <1 x double> %res
822 ; Don't use SVE for 128-bit vectors.
823 define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
824 ; CHECK-LABEL: fmax_v2f64:
826 ; CHECK-NEXT: fmax v0.2d, v0.2d, v1.2d
828 %res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2)
829 ret <2 x double> %res
832 define void @fmax_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
833 ; CHECK-LABEL: fmax_v4f64:
835 ; CHECK-NEXT: ptrue p0.d, vl4
836 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
837 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
838 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
839 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
841 %op1 = load <4 x double>, ptr %a
842 %op2 = load <4 x double>, ptr %b
843 %res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2)
844 store <4 x double> %res, ptr %a
848 define void @fmax_v8f64(ptr %a, ptr %b) #0 {
849 ; VBITS_EQ_256-LABEL: fmax_v8f64:
850 ; VBITS_EQ_256: // %bb.0:
851 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
852 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
853 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
854 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
855 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0]
856 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
857 ; VBITS_EQ_256-NEXT: fmax z0.d, p0/m, z0.d, z1.d
858 ; VBITS_EQ_256-NEXT: movprfx z1, z2
859 ; VBITS_EQ_256-NEXT: fmax z1.d, p0/m, z1.d, z3.d
860 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
861 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
862 ; VBITS_EQ_256-NEXT: ret
864 ; VBITS_GE_512-LABEL: fmax_v8f64:
865 ; VBITS_GE_512: // %bb.0:
866 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
867 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
868 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
869 ; VBITS_GE_512-NEXT: fmax z0.d, p0/m, z0.d, z1.d
870 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
871 ; VBITS_GE_512-NEXT: ret
872 %op1 = load <8 x double>, ptr %a
873 %op2 = load <8 x double>, ptr %b
874 %res = call <8 x double> @llvm.maximum.v8f64(<8 x double> %op1, <8 x double> %op2)
875 store <8 x double> %res, ptr %a
879 define void @fmax_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
880 ; CHECK-LABEL: fmax_v16f64:
882 ; CHECK-NEXT: ptrue p0.d, vl16
883 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
884 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
885 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
886 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
888 %op1 = load <16 x double>, ptr %a
889 %op2 = load <16 x double>, ptr %b
890 %res = call <16 x double> @llvm.maximum.v16f64(<16 x double> %op1, <16 x double> %op2)
891 store <16 x double> %res, ptr %a
895 define void @fmax_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
896 ; CHECK-LABEL: fmax_v32f64:
898 ; CHECK-NEXT: ptrue p0.d, vl32
899 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
900 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
901 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
902 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
904 %op1 = load <32 x double>, ptr %a
905 %op2 = load <32 x double>, ptr %b
906 %res = call <32 x double> @llvm.maximum.v32f64(<32 x double> %op1, <32 x double> %op2)
907 store <32 x double> %res, ptr %a
915 ; Don't use SVE for 64-bit vectors.
916 define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
917 ; CHECK-LABEL: fmin_v4f16:
919 ; CHECK-NEXT: fmin v0.4h, v0.4h, v1.4h
921 %res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2)
925 ; Don't use SVE for 128-bit vectors.
926 define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 {
927 ; CHECK-LABEL: fmin_v8f16:
929 ; CHECK-NEXT: fmin v0.8h, v0.8h, v1.8h
931 %res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2)
935 define void @fmin_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
936 ; CHECK-LABEL: fmin_v16f16:
938 ; CHECK-NEXT: ptrue p0.h, vl16
939 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
940 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
941 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
942 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
944 %op1 = load <16 x half>, ptr %a
945 %op2 = load <16 x half>, ptr %b
946 %res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2)
947 store <16 x half> %res, ptr %a
951 define void @fmin_v32f16(ptr %a, ptr %b) #0 {
952 ; VBITS_EQ_256-LABEL: fmin_v32f16:
953 ; VBITS_EQ_256: // %bb.0:
954 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16
955 ; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10
956 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
957 ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
958 ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0]
959 ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1]
960 ; VBITS_EQ_256-NEXT: fmin z0.h, p0/m, z0.h, z1.h
961 ; VBITS_EQ_256-NEXT: movprfx z1, z2
962 ; VBITS_EQ_256-NEXT: fmin z1.h, p0/m, z1.h, z3.h
963 ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
964 ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0]
965 ; VBITS_EQ_256-NEXT: ret
967 ; VBITS_GE_512-LABEL: fmin_v32f16:
968 ; VBITS_GE_512: // %bb.0:
969 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32
970 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
971 ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
972 ; VBITS_GE_512-NEXT: fmin z0.h, p0/m, z0.h, z1.h
973 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
974 ; VBITS_GE_512-NEXT: ret
975 %op1 = load <32 x half>, ptr %a
976 %op2 = load <32 x half>, ptr %b
977 %res = call <32 x half> @llvm.minimum.v32f16(<32 x half> %op1, <32 x half> %op2)
978 store <32 x half> %res, ptr %a
982 define void @fmin_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
983 ; CHECK-LABEL: fmin_v64f16:
985 ; CHECK-NEXT: ptrue p0.h, vl64
986 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
987 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
988 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
989 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
991 %op1 = load <64 x half>, ptr %a
992 %op2 = load <64 x half>, ptr %b
993 %res = call <64 x half> @llvm.minimum.v64f16(<64 x half> %op1, <64 x half> %op2)
994 store <64 x half> %res, ptr %a
998 define void @fmin_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
999 ; CHECK-LABEL: fmin_v128f16:
1001 ; CHECK-NEXT: ptrue p0.h, vl128
1002 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
1003 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
1004 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
1005 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
1007 %op1 = load <128 x half>, ptr %a
1008 %op2 = load <128 x half>, ptr %b
1009 %res = call <128 x half> @llvm.minimum.v128f16(<128 x half> %op1, <128 x half> %op2)
1010 store <128 x half> %res, ptr %a
1014 ; Don't use SVE for 64-bit vectors.
1015 define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
1016 ; CHECK-LABEL: fmin_v2f32:
1018 ; CHECK-NEXT: fmin v0.2s, v0.2s, v1.2s
1020 %res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2)
1021 ret <2 x float> %res
1024 ; Don't use SVE for 128-bit vectors.
1025 define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 {
1026 ; CHECK-LABEL: fmin_v4f32:
1028 ; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s
1030 %res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2)
1031 ret <4 x float> %res
1034 define void @fmin_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
1035 ; CHECK-LABEL: fmin_v8f32:
1037 ; CHECK-NEXT: ptrue p0.s, vl8
1038 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1039 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1040 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1041 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1043 %op1 = load <8 x float>, ptr %a
1044 %op2 = load <8 x float>, ptr %b
1045 %res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2)
1046 store <8 x float> %res, ptr %a
1050 define void @fmin_v16f32(ptr %a, ptr %b) #0 {
1051 ; VBITS_EQ_256-LABEL: fmin_v16f32:
1052 ; VBITS_EQ_256: // %bb.0:
1053 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8
1054 ; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8
1055 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1056 ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
1057 ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0]
1058 ; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1]
1059 ; VBITS_EQ_256-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1060 ; VBITS_EQ_256-NEXT: movprfx z1, z2
1061 ; VBITS_EQ_256-NEXT: fmin z1.s, p0/m, z1.s, z3.s
1062 ; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
1063 ; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0]
1064 ; VBITS_EQ_256-NEXT: ret
1066 ; VBITS_GE_512-LABEL: fmin_v16f32:
1067 ; VBITS_GE_512: // %bb.0:
1068 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
1069 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
1070 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
1071 ; VBITS_GE_512-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1072 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
1073 ; VBITS_GE_512-NEXT: ret
1074 %op1 = load <16 x float>, ptr %a
1075 %op2 = load <16 x float>, ptr %b
1076 %res = call <16 x float> @llvm.minimum.v16f32(<16 x float> %op1, <16 x float> %op2)
1077 store <16 x float> %res, ptr %a
1081 define void @fmin_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
1082 ; CHECK-LABEL: fmin_v32f32:
1084 ; CHECK-NEXT: ptrue p0.s, vl32
1085 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1086 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1087 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1088 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1090 %op1 = load <32 x float>, ptr %a
1091 %op2 = load <32 x float>, ptr %b
1092 %res = call <32 x float> @llvm.minimum.v32f32(<32 x float> %op1, <32 x float> %op2)
1093 store <32 x float> %res, ptr %a
1097 define void @fmin_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
1098 ; CHECK-LABEL: fmin_v64f32:
1100 ; CHECK-NEXT: ptrue p0.s, vl64
1101 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
1102 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
1103 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
1104 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1106 %op1 = load <64 x float>, ptr %a
1107 %op2 = load <64 x float>, ptr %b
1108 %res = call <64 x float> @llvm.minimum.v64f32(<64 x float> %op1, <64 x float> %op2)
1109 store <64 x float> %res, ptr %a
1113 ; Don't use SVE for 64-bit vectors.
1114 define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
1115 ; CHECK-LABEL: fmin_v1f64:
1117 ; CHECK-NEXT: fmin d0, d0, d1
1119 %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2)
1120 ret <1 x double> %res
1123 ; Don't use SVE for 128-bit vectors.
1124 define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 {
1125 ; CHECK-LABEL: fmin_v2f64:
1127 ; CHECK-NEXT: fmin v0.2d, v0.2d, v1.2d
1129 %res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2)
1130 ret <2 x double> %res
1133 define void @fmin_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
1134 ; CHECK-LABEL: fmin_v4f64:
1136 ; CHECK-NEXT: ptrue p0.d, vl4
1137 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1138 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
1139 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1140 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1142 %op1 = load <4 x double>, ptr %a
1143 %op2 = load <4 x double>, ptr %b
1144 %res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2)
1145 store <4 x double> %res, ptr %a
1149 define void @fmin_v8f64(ptr %a, ptr %b) #0 {
1150 ; VBITS_EQ_256-LABEL: fmin_v8f64:
1151 ; VBITS_EQ_256: // %bb.0:
1152 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4
1153 ; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4
1154 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1155 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
1156 ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0]
1157 ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1]
1158 ; VBITS_EQ_256-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1159 ; VBITS_EQ_256-NEXT: movprfx z1, z2
1160 ; VBITS_EQ_256-NEXT: fmin z1.d, p0/m, z1.d, z3.d
1161 ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
1162 ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0]
1163 ; VBITS_EQ_256-NEXT: ret
1165 ; VBITS_GE_512-LABEL: fmin_v8f64:
1166 ; VBITS_GE_512: // %bb.0:
1167 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8
1168 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
1169 ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
1170 ; VBITS_GE_512-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1171 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
1172 ; VBITS_GE_512-NEXT: ret
1173 %op1 = load <8 x double>, ptr %a
1174 %op2 = load <8 x double>, ptr %b
1175 %res = call <8 x double> @llvm.minimum.v8f64(<8 x double> %op1, <8 x double> %op2)
1176 store <8 x double> %res, ptr %a
1180 define void @fmin_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
1181 ; CHECK-LABEL: fmin_v16f64:
1183 ; CHECK-NEXT: ptrue p0.d, vl16
1184 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1185 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
1186 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1187 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1189 %op1 = load <16 x double>, ptr %a
1190 %op2 = load <16 x double>, ptr %b
1191 %res = call <16 x double> @llvm.minimum.v16f64(<16 x double> %op1, <16 x double> %op2)
1192 store <16 x double> %res, ptr %a
1196 define void @fmin_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
1197 ; CHECK-LABEL: fmin_v32f64:
1199 ; CHECK-NEXT: ptrue p0.d, vl32
1200 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
1201 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
1202 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
1203 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
1205 %op1 = load <32 x double>, ptr %a
1206 %op2 = load <32 x double>, ptr %b
1207 %res = call <32 x double> @llvm.minimum.v32f64(<32 x double> %op1, <32 x double> %op2)
1208 store <32 x double> %res, ptr %a
1212 attributes #0 = { "target-features"="+sve" }
1214 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
1215 declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>)
1216 declare <16 x half> @llvm.minnum.v16f16(<16 x half>, <16 x half>)
1217 declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>)
1218 declare <64 x half> @llvm.minnum.v64f16(<64 x half>, <64 x half>)
1219 declare <128 x half> @llvm.minnum.v128f16(<128 x half>, <128 x half>)
1220 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
1221 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
1222 declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
1223 declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>)
1224 declare <32 x float> @llvm.minnum.v32f32(<32 x float>, <32 x float>)
1225 declare <64 x float> @llvm.minnum.v64f32(<64 x float>, <64 x float>)
1226 declare <1 x double> @llvm.minnum.v1f64(<1 x double>, <1 x double>)
1227 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
1228 declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
1229 declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
1230 declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>)
1231 declare <32 x double> @llvm.minnum.v32f64(<32 x double>, <32 x double>)
1233 declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>)
1234 declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>)
1235 declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>)
1236 declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>)
1237 declare <64 x half> @llvm.maxnum.v64f16(<64 x half>, <64 x half>)
1238 declare <128 x half> @llvm.maxnum.v128f16(<128 x half>, <128 x half>)
1239 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
1240 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
1241 declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
1242 declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>)
1243 declare <32 x float> @llvm.maxnum.v32f32(<32 x float>, <32 x float>)
1244 declare <64 x float> @llvm.maxnum.v64f32(<64 x float>, <64 x float>)
1245 declare <1 x double> @llvm.maxnum.v1f64(<1 x double>, <1 x double>)
1246 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
1247 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
1248 declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
1249 declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>)
1250 declare <32 x double> @llvm.maxnum.v32f64(<32 x double>, <32 x double>)
1252 declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
1253 declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
1254 declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>)
1255 declare <32 x half> @llvm.minimum.v32f16(<32 x half>, <32 x half>)
1256 declare <64 x half> @llvm.minimum.v64f16(<64 x half>, <64 x half>)
1257 declare <128 x half> @llvm.minimum.v128f16(<128 x half>, <128 x half>)
1258 declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
1259 declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
1260 declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>)
1261 declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>)
1262 declare <32 x float> @llvm.minimum.v32f32(<32 x float>, <32 x float>)
1263 declare <64 x float> @llvm.minimum.v64f32(<64 x float>, <64 x float>)
1264 declare <1 x double> @llvm.minimum.v1f64(<1 x double>, <1 x double>)
1265 declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
1266 declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
1267 declare <8 x double> @llvm.minimum.v8f64(<8 x double>, <8 x double>)
1268 declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>)
1269 declare <32 x double> @llvm.minimum.v32f64(<32 x double>, <32 x double>)
1271 declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
1272 declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
1273 declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>)
1274 declare <32 x half> @llvm.maximum.v32f16(<32 x half>, <32 x half>)
1275 declare <64 x half> @llvm.maximum.v64f16(<64 x half>, <64 x half>)
1276 declare <128 x half> @llvm.maximum.v128f16(<128 x half>, <128 x half>)
1277 declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
1278 declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
1279 declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
1280 declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
1281 declare <32 x float> @llvm.maximum.v32f32(<32 x float>, <32 x float>)
1282 declare <64 x float> @llvm.maximum.v64f32(<64 x float>, <64 x float>)
1283 declare <1 x double> @llvm.maximum.v1f64(<1 x double>, <1 x double>)
1284 declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
1285 declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
1286 declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>)
1287 declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
1288 declare <32 x double> @llvm.maximum.v32f64(<32 x double>, <32 x double>)