llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
   3 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   4 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   5
   6 target triple = "aarch64-unknown-linux-gnu"
   7
   8 ;
   9 ; FADDA
  10 ;
  11
  12 ; No single instruction NEON support. Use SVE.
  13 define half @fadda_v4f16(half %start, <4 x half> %a) vscale_range(1,0) #0 {
  14 ; CHECK-LABEL: fadda_v4f16:
  15 ; CHECK:       // %bb.0:
  16 ; CHECK-NEXT:    ptrue p0.h, vl4
  17 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
  18 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
  19 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
  20 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
  21 ; CHECK-NEXT:    ret
  22   %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
  23   ret half %res
  24 }
  25
  26 ; No single instruction NEON support. Use SVE.
  27 define half @fadda_v8f16(half %start, <8 x half> %a) vscale_range(1,0) #0 {
  28 ; CHECK-LABEL: fadda_v8f16:
  29 ; CHECK:       // %bb.0:
  30 ; CHECK-NEXT:    ptrue p0.h, vl8
  31 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
  32 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
  33 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
  34 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
  35 ; CHECK-NEXT:    ret
  36   %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
  37   ret half %res
  38 }
  39
  40 define half @fadda_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
  41 ; CHECK-LABEL: fadda_v16f16:
  42 ; CHECK:       // %bb.0:
  43 ; CHECK-NEXT:    ptrue p0.h, vl16
  44 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
  45 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
  46 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
  47 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
  48 ; CHECK-NEXT:    ret
  49   %op = load <16 x half>, ptr %a
  50   %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
  51   ret half %res
  52 }
  53
  54 define half @fadda_v32f16(half %start, ptr %a) #0 {
  55 ; VBITS_GE_256-LABEL: fadda_v32f16:
  56 ; VBITS_GE_256:       // %bb.0:
  57 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
  58 ; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 def $z0
  59 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
  60 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
  61 ; VBITS_GE_256-NEXT:    fadda h0, p0, h0, z1.h
  62 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
  63 ; VBITS_GE_256-NEXT:    fadda h0, p0, h0, z1.h
  64 ; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
  65 ; VBITS_GE_256-NEXT:    ret
  66 ;
  67 ; VBITS_GE_512-LABEL: fadda_v32f16:
  68 ; VBITS_GE_512:       // %bb.0:
  69 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
  70 ; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 def $z0
  71 ; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x0]
  72 ; VBITS_GE_512-NEXT:    fadda h0, p0, h0, z1.h
  73 ; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
  74 ; VBITS_GE_512-NEXT:    ret
  75   %op = load <32 x half>, ptr %a
  76   %res = call half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
  77   ret half %res
  78 }
  79
  80 define half @fadda_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
  81 ; CHECK-LABEL: fadda_v64f16:
  82 ; CHECK:       // %bb.0:
  83 ; CHECK-NEXT:    ptrue p0.h, vl64
  84 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
  85 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
  86 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
  87 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
  88 ; CHECK-NEXT:    ret
  89   %op = load <64 x half>, ptr %a
  90   %res = call half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
  91   ret half %res
  92 }
  93
  94 define half @fadda_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
  95 ; CHECK-LABEL: fadda_v128f16:
  96 ; CHECK:       // %bb.0:
  97 ; CHECK-NEXT:    ptrue p0.h, vl128
  98 ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
  99 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 100 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
 101 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 102 ; CHECK-NEXT:    ret
 103   %op = load <128 x half>, ptr %a
 104   %res = call half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
 105   ret half %res
 106 }
 107
 108 ; No single instruction NEON support. Use SVE.
 109 define float @fadda_v2f32(float %start, <2 x float> %a) vscale_range(1,0) #0 {
 110 ; CHECK-LABEL: fadda_v2f32:
 111 ; CHECK:       // %bb.0:
 112 ; CHECK-NEXT:    ptrue p0.s, vl2
 113 ; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 114 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 115 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
 116 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 117 ; CHECK-NEXT:    ret
 118   %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
 119   ret float %res
 120 }
 121
 122 ; No single instruction NEON support. Use SVE.
 123 define float @fadda_v4f32(float %start, <4 x float> %a) vscale_range(1,0) #0 {
 124 ; CHECK-LABEL: fadda_v4f32:
 125 ; CHECK:       // %bb.0:
 126 ; CHECK-NEXT:    ptrue p0.s, vl4
 127 ; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 128 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 129 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
 130 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 131 ; CHECK-NEXT:    ret
 132   %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
 133   ret float %res
 134 }
 135
 136 define float @fadda_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
 137 ; CHECK-LABEL: fadda_v8f32:
 138 ; CHECK:       // %bb.0:
 139 ; CHECK-NEXT:    ptrue p0.s, vl8
 140 ; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 141 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 142 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
 143 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 144 ; CHECK-NEXT:    ret
 145   %op = load <8 x float>, ptr %a
 146   %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
 147   ret float %res
 148 }
 149
 150 define float @fadda_v16f32(float %start, ptr %a) #0 {
 151 ; VBITS_GE_256-LABEL: fadda_v16f32:
 152 ; VBITS_GE_256:       // %bb.0:
 153 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 154 ; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 def $z0
 155 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 156 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 157 ; VBITS_GE_256-NEXT:    fadda s0, p0, s0, z1.s
 158 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
 159 ; VBITS_GE_256-NEXT:    fadda s0, p0, s0, z1.s
 160 ; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 161 ; VBITS_GE_256-NEXT:    ret
 162 ;
 163 ; VBITS_GE_512-LABEL: fadda_v16f32:
 164 ; VBITS_GE_512:       // %bb.0:
 165 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 166 ; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 def $z0
 167 ; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x0]
 168 ; VBITS_GE_512-NEXT:    fadda s0, p0, s0, z1.s
 169 ; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 170 ; VBITS_GE_512-NEXT:    ret
 171   %op = load <16 x float>, ptr %a
 172   %res = call float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
 173   ret float %res
 174 }
 175
 176 define float @fadda_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
 177 ; CHECK-LABEL: fadda_v32f32:
 178 ; CHECK:       // %bb.0:
 179 ; CHECK-NEXT:    ptrue p0.s, vl32
 180 ; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 181 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 182 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
 183 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 184 ; CHECK-NEXT:    ret
 185   %op = load <32 x float>, ptr %a
 186   %res = call float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
 187   ret float %res
 188 }
 189
 190 define float @fadda_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
 191 ; CHECK-LABEL: fadda_v64f32:
 192 ; CHECK:       // %bb.0:
 193 ; CHECK-NEXT:    ptrue p0.s, vl64
 194 ; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 195 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 196 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
 197 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 198 ; CHECK-NEXT:    ret
 199   %op = load <64 x float>, ptr %a
 200   %res = call float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
 201   ret float %res
 202 }
 203
 204 ; No single instruction NEON support. Use SVE.
 205 define double @fadda_v1f64(double %start, <1 x double> %a) vscale_range(1,0) #0 {
 206 ; CHECK-LABEL: fadda_v1f64:
 207 ; CHECK:       // %bb.0:
 208 ; CHECK-NEXT:    fadd d0, d0, d1
 209 ; CHECK-NEXT:    ret
 210   %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
 211   ret double %res
 212 }
 213
 214 ; No single instruction NEON support. Use SVE.
 215 define double @fadda_v2f64(double %start, <2 x double> %a) vscale_range(1,0) #0 {
 216 ; CHECK-LABEL: fadda_v2f64:
 217 ; CHECK:       // %bb.0:
 218 ; CHECK-NEXT:    ptrue p0.d, vl2
 219 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 220 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 221 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 222 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 223 ; CHECK-NEXT:    ret
 224   %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
 225   ret double %res
 226 }
 227
 228 define double @fadda_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
 229 ; CHECK-LABEL: fadda_v4f64:
 230 ; CHECK:       // %bb.0:
 231 ; CHECK-NEXT:    ptrue p0.d, vl4
 232 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 233 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 234 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 235 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 236 ; CHECK-NEXT:    ret
 237   %op = load <4 x double>, ptr %a
 238   %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
 239   ret double %res
 240 }
 241
 242 define double @fadda_v8f64(double %start, ptr %a) #0 {
 243 ; VBITS_GE_256-LABEL: fadda_v8f64:
 244 ; VBITS_GE_256:       // %bb.0:
 245 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 246 ; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 247 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 248 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 249 ; VBITS_GE_256-NEXT:    fadda d0, p0, d0, z1.d
 250 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
 251 ; VBITS_GE_256-NEXT:    fadda d0, p0, d0, z1.d
 252 ; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 253 ; VBITS_GE_256-NEXT:    ret
 254 ;
 255 ; VBITS_GE_512-LABEL: fadda_v8f64:
 256 ; VBITS_GE_512:       // %bb.0:
 257 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 258 ; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 259 ; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x0]
 260 ; VBITS_GE_512-NEXT:    fadda d0, p0, d0, z1.d
 261 ; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 262 ; VBITS_GE_512-NEXT:    ret
 263   %op = load <8 x double>, ptr %a
 264   %res = call double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
 265   ret double %res
 266 }
 267
 268 define double @fadda_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
 269 ; CHECK-LABEL: fadda_v16f64:
 270 ; CHECK:       // %bb.0:
 271 ; CHECK-NEXT:    ptrue p0.d, vl16
 272 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 273 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 274 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 275 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 276 ; CHECK-NEXT:    ret
 277   %op = load <16 x double>, ptr %a
 278   %res = call double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
 279   ret double %res
 280 }
 281
 282 define double @fadda_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
 283 ; CHECK-LABEL: fadda_v32f64:
 284 ; CHECK:       // %bb.0:
 285 ; CHECK-NEXT:    ptrue p0.d, vl32
 286 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 287 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 288 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 289 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 290 ; CHECK-NEXT:    ret
 291   %op = load <32 x double>, ptr %a
 292   %res = call double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
 293   ret double %res
 294 }
 295
 296 ;
 297 ; FADDV
 298 ;
 299
 300 ; No single instruction NEON support for 4 element vectors.
 301 define half @faddv_v4f16(half %start, <4 x half> %a) vscale_range(2,0) #0 {
 302 ; CHECK-LABEL: faddv_v4f16:
 303 ; CHECK:       // %bb.0:
 304 ; CHECK-NEXT:    ptrue p0.h, vl4
 305 ; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 306 ; CHECK-NEXT:    faddv h1, p0, z1.h
 307 ; CHECK-NEXT:    fadd h0, h0, h1
 308 ; CHECK-NEXT:    ret
 309   %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
 310   ret half %res
 311 }
 312
 313 ; No single instruction NEON support for 8 element vectors.
 314 define half @faddv_v8f16(half %start, <8 x half> %a) vscale_range(2,0) #0 {
 315 ; CHECK-LABEL: faddv_v8f16:
 316 ; CHECK:       // %bb.0:
 317 ; CHECK-NEXT:    ptrue p0.h, vl8
 318 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 319 ; CHECK-NEXT:    faddv h1, p0, z1.h
 320 ; CHECK-NEXT:    fadd h0, h0, h1
 321 ; CHECK-NEXT:    ret
 322   %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
 323   ret half %res
 324 }
 325
 326 define half @faddv_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
 327 ; CHECK-LABEL: faddv_v16f16:
 328 ; CHECK:       // %bb.0:
 329 ; CHECK-NEXT:    ptrue p0.h, vl16
 330 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 331 ; CHECK-NEXT:    faddv h1, p0, z1.h
 332 ; CHECK-NEXT:    fadd h0, h0, h1
 333 ; CHECK-NEXT:    ret
 334   %op = load <16 x half>, ptr %a
 335   %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
 336   ret half %res
 337 }
 338
 339 define half @faddv_v32f16(half %start, ptr %a) #0 {
 340 ; VBITS_GE_256-LABEL: faddv_v32f16:
 341 ; VBITS_GE_256:       // %bb.0:
 342 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 343 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 344 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
 345 ; VBITS_GE_256-NEXT:    ld1h { z2.h }, p0/z, [x0]
 346 ; VBITS_GE_256-NEXT:    fadd z1.h, p0/m, z1.h, z2.h
 347 ; VBITS_GE_256-NEXT:    faddv h1, p0, z1.h
 348 ; VBITS_GE_256-NEXT:    fadd h0, h0, h1
 349 ; VBITS_GE_256-NEXT:    ret
 350 ;
 351 ; VBITS_GE_512-LABEL: faddv_v32f16:
 352 ; VBITS_GE_512:       // %bb.0:
 353 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 354 ; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x0]
 355 ; VBITS_GE_512-NEXT:    faddv h1, p0, z1.h
 356 ; VBITS_GE_512-NEXT:    fadd h0, h0, h1
 357 ; VBITS_GE_512-NEXT:    ret
 358   %op = load <32 x half>, ptr %a
 359   %res = call fast half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
 360   ret half %res
 361 }
 362
 363 define half @faddv_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
 364 ; CHECK-LABEL: faddv_v64f16:
 365 ; CHECK:       // %bb.0:
 366 ; CHECK-NEXT:    ptrue p0.h, vl64
 367 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 368 ; CHECK-NEXT:    faddv h1, p0, z1.h
 369 ; CHECK-NEXT:    fadd h0, h0, h1
 370 ; CHECK-NEXT:    ret
 371   %op = load <64 x half>, ptr %a
 372   %res = call fast half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
 373   ret half %res
 374 }
 375
 376 define half @faddv_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
 377 ; CHECK-LABEL: faddv_v128f16:
 378 ; CHECK:       // %bb.0:
 379 ; CHECK-NEXT:    ptrue p0.h, vl128
 380 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 381 ; CHECK-NEXT:    faddv h1, p0, z1.h
 382 ; CHECK-NEXT:    fadd h0, h0, h1
 383 ; CHECK-NEXT:    ret
 384   %op = load <128 x half>, ptr %a
 385   %res = call fast half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
 386   ret half %res
 387 }
 388
 389 ; Don't use SVE for 2 element vectors.
 390 define float @faddv_v2f32(float %start, <2 x float> %a) vscale_range(2,0) #0 {
 391 ; CHECK-LABEL: faddv_v2f32:
 392 ; CHECK:       // %bb.0:
 393 ; CHECK-NEXT:    faddp s1, v1.2s
 394 ; CHECK-NEXT:    fadd s0, s0, s1
 395 ; CHECK-NEXT:    ret
 396   %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
 397   ret float %res
 398 }
 399
 400 ; No single instruction NEON support for 4 element vectors.
 401 define float @faddv_v4f32(float %start, <4 x float> %a) vscale_range(2,0) #0 {
 402 ; CHECK-LABEL: faddv_v4f32:
 403 ; CHECK:       // %bb.0:
 404 ; CHECK-NEXT:    ptrue p0.s, vl4
 405 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 406 ; CHECK-NEXT:    faddv s1, p0, z1.s
 407 ; CHECK-NEXT:    fadd s0, s0, s1
 408 ; CHECK-NEXT:    ret
 409   %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
 410   ret float %res
 411 }
 412
 413 define float @faddv_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
 414 ; CHECK-LABEL: faddv_v8f32:
 415 ; CHECK:       // %bb.0:
 416 ; CHECK-NEXT:    ptrue p0.s, vl8
 417 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 418 ; CHECK-NEXT:    faddv s1, p0, z1.s
 419 ; CHECK-NEXT:    fadd s0, s0, s1
 420 ; CHECK-NEXT:    ret
 421   %op = load <8 x float>, ptr %a
 422   %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
 423   ret float %res
 424 }
 425
 426 define float @faddv_v16f32(float %start, ptr %a) #0 {
 427 ; VBITS_GE_256-LABEL: faddv_v16f32:
 428 ; VBITS_GE_256:       // %bb.0:
 429 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 430 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 431 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
 432 ; VBITS_GE_256-NEXT:    ld1w { z2.s }, p0/z, [x0]
 433 ; VBITS_GE_256-NEXT:    fadd z1.s, p0/m, z1.s, z2.s
 434 ; VBITS_GE_256-NEXT:    faddv s1, p0, z1.s
 435 ; VBITS_GE_256-NEXT:    fadd s0, s0, s1
 436 ; VBITS_GE_256-NEXT:    ret
 437 ;
 438 ; VBITS_GE_512-LABEL: faddv_v16f32:
 439 ; VBITS_GE_512:       // %bb.0:
 440 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 441 ; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x0]
 442 ; VBITS_GE_512-NEXT:    faddv s1, p0, z1.s
 443 ; VBITS_GE_512-NEXT:    fadd s0, s0, s1
 444 ; VBITS_GE_512-NEXT:    ret
 445   %op = load <16 x float>, ptr %a
 446   %res = call fast float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
 447   ret float %res
 448 }
 449
 450 define float @faddv_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
 451 ; CHECK-LABEL: faddv_v32f32:
 452 ; CHECK:       // %bb.0:
 453 ; CHECK-NEXT:    ptrue p0.s, vl32
 454 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 455 ; CHECK-NEXT:    faddv s1, p0, z1.s
 456 ; CHECK-NEXT:    fadd s0, s0, s1
 457 ; CHECK-NEXT:    ret
 458   %op = load <32 x float>, ptr %a
 459   %res = call fast float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
 460   ret float %res
 461 }
 462
 463 define float @faddv_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
 464 ; CHECK-LABEL: faddv_v64f32:
 465 ; CHECK:       // %bb.0:
 466 ; CHECK-NEXT:    ptrue p0.s, vl64
 467 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 468 ; CHECK-NEXT:    faddv s1, p0, z1.s
 469 ; CHECK-NEXT:    fadd s0, s0, s1
 470 ; CHECK-NEXT:    ret
 471   %op = load <64 x float>, ptr %a
 472   %res = call fast float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
 473   ret float %res
 474 }
 475
 476 ; Don't use SVE for 1 element vectors.
 477 define double @faddv_v1f64(double %start, <1 x double> %a) vscale_range(2,0) #0 {
 478 ; CHECK-LABEL: faddv_v1f64:
 479 ; CHECK:       // %bb.0:
 480 ; CHECK-NEXT:    fadd d0, d0, d1
 481 ; CHECK-NEXT:    ret
 482   %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
 483   ret double %res
 484 }
 485
 486 ; Don't use SVE for 2 element vectors.
 487 define double @faddv_v2f64(double %start, <2 x double> %a) vscale_range(2,0) #0 {
 488 ; CHECK-LABEL: faddv_v2f64:
 489 ; CHECK:       // %bb.0:
 490 ; CHECK-NEXT:    faddp d1, v1.2d
 491 ; CHECK-NEXT:    fadd d0, d0, d1
 492 ; CHECK-NEXT:    ret
 493   %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
 494   ret double %res
 495 }
 496
 497 define double @faddv_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
 498 ; CHECK-LABEL: faddv_v4f64:
 499 ; CHECK:       // %bb.0:
 500 ; CHECK-NEXT:    ptrue p0.d, vl4
 501 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 502 ; CHECK-NEXT:    faddv d1, p0, z1.d
 503 ; CHECK-NEXT:    fadd d0, d0, d1
 504 ; CHECK-NEXT:    ret
 505   %op = load <4 x double>, ptr %a
 506   %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
 507   ret double %res
 508 }
 509
 510 define double @faddv_v8f64(double %start, ptr %a) #0 {
 511 ; VBITS_GE_256-LABEL: faddv_v8f64:
 512 ; VBITS_GE_256:       // %bb.0:
 513 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 514 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 515 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
 516 ; VBITS_GE_256-NEXT:    ld1d { z2.d }, p0/z, [x0]
 517 ; VBITS_GE_256-NEXT:    fadd z1.d, p0/m, z1.d, z2.d
 518 ; VBITS_GE_256-NEXT:    faddv d1, p0, z1.d
 519 ; VBITS_GE_256-NEXT:    fadd d0, d0, d1
 520 ; VBITS_GE_256-NEXT:    ret
 521 ;
 522 ; VBITS_GE_512-LABEL: faddv_v8f64:
 523 ; VBITS_GE_512:       // %bb.0:
 524 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 525 ; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x0]
 526 ; VBITS_GE_512-NEXT:    faddv d1, p0, z1.d
 527 ; VBITS_GE_512-NEXT:    fadd d0, d0, d1
 528 ; VBITS_GE_512-NEXT:    ret
 529   %op = load <8 x double>, ptr %a
 530   %res = call fast double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
 531   ret double %res
 532 }
 533
 534 define double @faddv_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
 535 ; CHECK-LABEL: faddv_v16f64:
 536 ; CHECK:       // %bb.0:
 537 ; CHECK-NEXT:    ptrue p0.d, vl16
 538 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 539 ; CHECK-NEXT:    faddv d1, p0, z1.d
 540 ; CHECK-NEXT:    fadd d0, d0, d1
 541 ; CHECK-NEXT:    ret
 542   %op = load <16 x double>, ptr %a
 543   %res = call fast double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
 544   ret double %res
 545 }
 546
 547 define double @faddv_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
 548 ; CHECK-LABEL: faddv_v32f64:
 549 ; CHECK:       // %bb.0:
 550 ; CHECK-NEXT:    ptrue p0.d, vl32
 551 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 552 ; CHECK-NEXT:    faddv d1, p0, z1.d
 553 ; CHECK-NEXT:    fadd d0, d0, d1
 554 ; CHECK-NEXT:    ret
 555   %op = load <32 x double>, ptr %a
 556   %res = call fast double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
 557   ret double %res
 558 }
 559
 560 ;
 561 ; FMAXNMV
 562 ;
 563
 564 ; No NEON 16-bit vector FMAXNMV support. Use SVE.
 565 define half @fmaxv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
 566 ; CHECK-LABEL: fmaxv_v4f16:
 567 ; CHECK:       // %bb.0:
 568 ; CHECK-NEXT:    fmaxnmv h0, v0.4h
 569 ; CHECK-NEXT:    ret
 570   %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
 571   ret half %res
 572 }
 573
 574 ; No NEON 16-bit vector FMAXNMV support. Use SVE.
 575 define half @fmaxv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
 576 ; CHECK-LABEL: fmaxv_v8f16:
 577 ; CHECK:       // %bb.0:
 578 ; CHECK-NEXT:    fmaxnmv h0, v0.8h
 579 ; CHECK-NEXT:    ret
 580   %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
 581   ret half %res
 582 }
 583
 584 define half @fmaxv_v16f16(ptr %a) vscale_range(2,0) #0 {
 585 ; CHECK-LABEL: fmaxv_v16f16:
 586 ; CHECK:       // %bb.0:
 587 ; CHECK-NEXT:    ptrue p0.h, vl16
 588 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 589 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
 590 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 591 ; CHECK-NEXT:    ret
 592   %op = load <16 x half>, ptr %a
 593   %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
 594   ret half %res
 595 }
 596
 597 define half @fmaxv_v32f16(ptr %a) #0 {
 598 ; VBITS_GE_256-LABEL: fmaxv_v32f16:
 599 ; VBITS_GE_256:       // %bb.0:
 600 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 601 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 602 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 603 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 604 ; VBITS_GE_256-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
 605 ; VBITS_GE_256-NEXT:    fmaxnmv h0, p0, z0.h
 606 ; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 607 ; VBITS_GE_256-NEXT:    ret
 608 ;
 609 ; VBITS_GE_512-LABEL: fmaxv_v32f16:
 610 ; VBITS_GE_512:       // %bb.0:
 611 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 612 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 613 ; VBITS_GE_512-NEXT:    fmaxnmv h0, p0, z0.h
 614 ; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 615 ; VBITS_GE_512-NEXT:    ret
 616   %op = load <32 x half>, ptr %a
 617   %res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op)
 618   ret half %res
 619 }
 620
 621 define half @fmaxv_v64f16(ptr %a) vscale_range(8,0) #0 {
 622 ; CHECK-LABEL: fmaxv_v64f16:
 623 ; CHECK:       // %bb.0:
 624 ; CHECK-NEXT:    ptrue p0.h, vl64
 625 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 626 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
 627 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 628 ; CHECK-NEXT:    ret
 629   %op = load <64 x half>, ptr %a
 630   %res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op)
 631   ret half %res
 632 }
 633
 634 define half @fmaxv_v128f16(ptr %a) vscale_range(16,0) #0 {
 635 ; CHECK-LABEL: fmaxv_v128f16:
 636 ; CHECK:       // %bb.0:
 637 ; CHECK-NEXT:    ptrue p0.h, vl128
 638 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 639 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
 640 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 641 ; CHECK-NEXT:    ret
 642   %op = load <128 x half>, ptr %a
 643   %res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op)
 644   ret half %res
 645 }
 646
 647 ; Don't use SVE for 64-bit f32 vectors.
 648 define float @fmaxv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
 649 ; CHECK-LABEL: fmaxv_v2f32:
 650 ; CHECK:       // %bb.0:
 651 ; CHECK-NEXT:    fmaxnmp s0, v0.2s
 652 ; CHECK-NEXT:    ret
 653   %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
 654   ret float %res
 655 }
 656
 657 ; Don't use SVE for 128-bit f32 vectors.
 658 define float @fmaxv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
 659 ; CHECK-LABEL: fmaxv_v4f32:
 660 ; CHECK:       // %bb.0:
 661 ; CHECK-NEXT:    fmaxnmv s0, v0.4s
 662 ; CHECK-NEXT:    ret
 663   %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
 664   ret float %res
 665 }
 666
 667 define float @fmaxv_v8f32(ptr %a) vscale_range(2,0) #0 {
 668 ; CHECK-LABEL: fmaxv_v8f32:
 669 ; CHECK:       // %bb.0:
 670 ; CHECK-NEXT:    ptrue p0.s, vl8
 671 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 672 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
 673 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 674 ; CHECK-NEXT:    ret
 675   %op = load <8 x float>, ptr %a
 676   %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
 677   ret float %res
 678 }
 679
 680 define float @fmaxv_v16f32(ptr %a) #0 {
 681 ; VBITS_GE_256-LABEL: fmaxv_v16f32:
 682 ; VBITS_GE_256:       // %bb.0:
 683 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 684 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 685 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
 686 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 687 ; VBITS_GE_256-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 688 ; VBITS_GE_256-NEXT:    fmaxnmv s0, p0, z0.s
 689 ; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 690 ; VBITS_GE_256-NEXT:    ret
 691 ;
 692 ; VBITS_GE_512-LABEL: fmaxv_v16f32:
 693 ; VBITS_GE_512:       // %bb.0:
 694 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 695 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 696 ; VBITS_GE_512-NEXT:    fmaxnmv s0, p0, z0.s
 697 ; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 698 ; VBITS_GE_512-NEXT:    ret
 699   %op = load <16 x float>, ptr %a
 700   %res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op)
 701   ret float %res
 702 }
 703
 704 define float @fmaxv_v32f32(ptr %a) vscale_range(8,0) #0 {
 705 ; CHECK-LABEL: fmaxv_v32f32:
 706 ; CHECK:       // %bb.0:
 707 ; CHECK-NEXT:    ptrue p0.s, vl32
 708 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 709 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
 710 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 711 ; CHECK-NEXT:    ret
 712   %op = load <32 x float>, ptr %a
 713   %res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op)
 714   ret float %res
 715 }
 716
 717 define float @fmaxv_v64f32(ptr %a) vscale_range(16,0) #0 {
 718 ; CHECK-LABEL: fmaxv_v64f32:
 719 ; CHECK:       // %bb.0:
 720 ; CHECK-NEXT:    ptrue p0.s, vl64
 721 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 722 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
 723 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 724 ; CHECK-NEXT:    ret
 725   %op = load <64 x float>, ptr %a
 726   %res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op)
 727   ret float %res
 728 }
 729
 730 ; Nothing to do for single element vectors.
 731 define double @fmaxv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
 732 ; CHECK-LABEL: fmaxv_v1f64:
 733 ; CHECK:       // %bb.0:
 734 ; CHECK-NEXT:    ret
 735   %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
 736   ret double %res
 737 }
 738
 739 ; Don't use SVE for 128-bit f64 vectors.
 740 define double @fmaxv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
 741 ; CHECK-LABEL: fmaxv_v2f64:
 742 ; CHECK:       // %bb.0:
 743 ; CHECK-NEXT:    fmaxnmp d0, v0.2d
 744 ; CHECK-NEXT:    ret
 745   %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
 746   ret double %res
 747 }
 748
 749 define double @fmaxv_v4f64(ptr %a) vscale_range(2,0) #0 {
 750 ; CHECK-LABEL: fmaxv_v4f64:
 751 ; CHECK:       // %bb.0:
 752 ; CHECK-NEXT:    ptrue p0.d, vl4
 753 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 754 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
 755 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 756 ; CHECK-NEXT:    ret
 757   %op = load <4 x double>, ptr %a
 758   %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
 759   ret double %res
 760 }
 761
 762 define double @fmaxv_v8f64(ptr %a) #0 {
 763 ; VBITS_GE_256-LABEL: fmaxv_v8f64:
 764 ; VBITS_GE_256:       // %bb.0:
 765 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 766 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 767 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
 768 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 769 ; VBITS_GE_256-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 770 ; VBITS_GE_256-NEXT:    fmaxnmv d0, p0, z0.d
 771 ; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 772 ; VBITS_GE_256-NEXT:    ret
 773 ;
 774 ; VBITS_GE_512-LABEL: fmaxv_v8f64:
 775 ; VBITS_GE_512:       // %bb.0:
 776 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 777 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 778 ; VBITS_GE_512-NEXT:    fmaxnmv d0, p0, z0.d
 779 ; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 780 ; VBITS_GE_512-NEXT:    ret
 781   %op = load <8 x double>, ptr %a
 782   %res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op)
 783   ret double %res
 784 }
 785
 786 define double @fmaxv_v16f64(ptr %a) vscale_range(8,0) #0 {
 787 ; CHECK-LABEL: fmaxv_v16f64:
 788 ; CHECK:       // %bb.0:
 789 ; CHECK-NEXT:    ptrue p0.d, vl16
 790 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 791 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
 792 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 793 ; CHECK-NEXT:    ret
 794   %op = load <16 x double>, ptr %a
 795   %res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op)
 796   ret double %res
 797 }
 798
 799 define double @fmaxv_v32f64(ptr %a) vscale_range(16,0) #0 {
 800 ; CHECK-LABEL: fmaxv_v32f64:
 801 ; CHECK:       // %bb.0:
 802 ; CHECK-NEXT:    ptrue p0.d, vl32
 803 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 804 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
 805 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 806 ; CHECK-NEXT:    ret
 807   %op = load <32 x double>, ptr %a
 808   %res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op)
 809   ret double %res
 810 }
 811
 812 ;
 813 ; FMINNMV
 814 ;
 815
 816 ; No NEON 16-bit vector FMINNMV support. Use SVE.
 817 define half @fminv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
 818 ; CHECK-LABEL: fminv_v4f16:
 819 ; CHECK:       // %bb.0:
 820 ; CHECK-NEXT:    fminnmv h0, v0.4h
 821 ; CHECK-NEXT:    ret
 822   %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
 823   ret half %res
 824 }
 825
 826 ; No NEON 16-bit vector FMINNMV support. Use SVE.
 827 define half @fminv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
 828 ; CHECK-LABEL: fminv_v8f16:
 829 ; CHECK:       // %bb.0:
 830 ; CHECK-NEXT:    fminnmv h0, v0.8h
 831 ; CHECK-NEXT:    ret
 832   %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
 833   ret half %res
 834 }
 835
 836 define half @fminv_v16f16(ptr %a) vscale_range(2,0) #0 {
 837 ; CHECK-LABEL: fminv_v16f16:
 838 ; CHECK:       // %bb.0:
 839 ; CHECK-NEXT:    ptrue p0.h, vl16
 840 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 841 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
 842 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 843 ; CHECK-NEXT:    ret
 844   %op = load <16 x half>, ptr %a
 845   %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
 846   ret half %res
 847 }
 848
 849 define half @fminv_v32f16(ptr %a) #0 {
 850 ; VBITS_GE_256-LABEL: fminv_v32f16:
 851 ; VBITS_GE_256:       // %bb.0:
 852 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 853 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 854 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 855 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 856 ; VBITS_GE_256-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 857 ; VBITS_GE_256-NEXT:    fminnmv h0, p0, z0.h
 858 ; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 859 ; VBITS_GE_256-NEXT:    ret
 860 ;
 861 ; VBITS_GE_512-LABEL: fminv_v32f16:
 862 ; VBITS_GE_512:       // %bb.0:
 863 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 864 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 865 ; VBITS_GE_512-NEXT:    fminnmv h0, p0, z0.h
 866 ; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 867 ; VBITS_GE_512-NEXT:    ret
 868   %op = load <32 x half>, ptr %a
 869   %res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op)
 870   ret half %res
 871 }
 872
 873 define half @fminv_v64f16(ptr %a) vscale_range(8,0) #0 {
 874 ; CHECK-LABEL: fminv_v64f16:
 875 ; CHECK:       // %bb.0:
 876 ; CHECK-NEXT:    ptrue p0.h, vl64
 877 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 878 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
 879 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 880 ; CHECK-NEXT:    ret
 881   %op = load <64 x half>, ptr %a
 882   %res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op)
 883   ret half %res
 884 }
 885
 886 define half @fminv_v128f16(ptr %a) vscale_range(16,0) #0 {
 887 ; CHECK-LABEL: fminv_v128f16:
 888 ; CHECK:       // %bb.0:
 889 ; CHECK-NEXT:    ptrue p0.h, vl128
 890 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 891 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
 892 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 893 ; CHECK-NEXT:    ret
 894   %op = load <128 x half>, ptr %a
 895   %res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op)
 896   ret half %res
 897 }
 898
 899 ; Don't use SVE for 64-bit f32 vectors.
 900 define float @fminv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
 901 ; CHECK-LABEL: fminv_v2f32:
 902 ; CHECK:       // %bb.0:
 903 ; CHECK-NEXT:    fminnmp s0, v0.2s
 904 ; CHECK-NEXT:    ret
 905   %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
 906   ret float %res
 907 }
 908
 909 ; Don't use SVE for 128-bit f32 vectors.
 910 define float @fminv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
 911 ; CHECK-LABEL: fminv_v4f32:
 912 ; CHECK:       // %bb.0:
 913 ; CHECK-NEXT:    fminnmv s0, v0.4s
 914 ; CHECK-NEXT:    ret
 915   %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
 916   ret float %res
 917 }
 918
 919 define float @fminv_v8f32(ptr %a) vscale_range(2,0) #0 {
 920 ; CHECK-LABEL: fminv_v8f32:
 921 ; CHECK:       // %bb.0:
 922 ; CHECK-NEXT:    ptrue p0.s, vl8
 923 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 924 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
 925 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 926 ; CHECK-NEXT:    ret
 927   %op = load <8 x float>, ptr %a
 928   %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
 929   ret float %res
 930 }
 931
 932 define float @fminv_v16f32(ptr %a) #0 {
 933 ; VBITS_GE_256-LABEL: fminv_v16f32:
 934 ; VBITS_GE_256:       // %bb.0:
 935 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 936 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 937 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
 938 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 939 ; VBITS_GE_256-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 940 ; VBITS_GE_256-NEXT:    fminnmv s0, p0, z0.s
 941 ; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 942 ; VBITS_GE_256-NEXT:    ret
 943 ;
 944 ; VBITS_GE_512-LABEL: fminv_v16f32:
 945 ; VBITS_GE_512:       // %bb.0:
 946 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 947 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 948 ; VBITS_GE_512-NEXT:    fminnmv s0, p0, z0.s
 949 ; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 950 ; VBITS_GE_512-NEXT:    ret
 951   %op = load <16 x float>, ptr %a
 952   %res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op)
 953   ret float %res
 954 }
 955
 956 define float @fminv_v32f32(ptr %a) vscale_range(8,0) #0 {
 957 ; CHECK-LABEL: fminv_v32f32:
 958 ; CHECK:       // %bb.0:
 959 ; CHECK-NEXT:    ptrue p0.s, vl32
 960 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 961 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
 962 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 963 ; CHECK-NEXT:    ret
 964   %op = load <32 x float>, ptr %a
 965   %res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op)
 966   ret float %res
 967 }
 968
 969 define float @fminv_v64f32(ptr %a) vscale_range(16,0) #0 {
 970 ; CHECK-LABEL: fminv_v64f32:
 971 ; CHECK:       // %bb.0:
 972 ; CHECK-NEXT:    ptrue p0.s, vl64
 973 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 974 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
 975 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 976 ; CHECK-NEXT:    ret
 977   %op = load <64 x float>, ptr %a
 978   %res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op)
 979   ret float %res
 980 }
 981
 982 ; Nothing to do for single element vectors.
 983 define double @fminv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
 984 ; CHECK-LABEL: fminv_v1f64:
 985 ; CHECK:       // %bb.0:
 986 ; CHECK-NEXT:    ret
 987   %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
 988   ret double %res
 989 }
 990
 991 ; Don't use SVE for 128-bit f64 vectors.
 992 define double @fminv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
 993 ; CHECK-LABEL: fminv_v2f64:
 994 ; CHECK:       // %bb.0:
 995 ; CHECK-NEXT:    fminnmp d0, v0.2d
 996 ; CHECK-NEXT:    ret
 997   %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
 998   ret double %res
 999 }
1000
1001 define double @fminv_v4f64(ptr %a) vscale_range(2,0) #0 {
1002 ; CHECK-LABEL: fminv_v4f64:
1003 ; CHECK:       // %bb.0:
1004 ; CHECK-NEXT:    ptrue p0.d, vl4
1005 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1006 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
1007 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1008 ; CHECK-NEXT:    ret
1009   %op = load <4 x double>, ptr %a
1010   %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
1011   ret double %res
1012 }
1013
1014 define double @fminv_v8f64(ptr %a) #0 {
1015 ; VBITS_GE_256-LABEL: fminv_v8f64:
1016 ; VBITS_GE_256:       // %bb.0:
1017 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1018 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1019 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1020 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1021 ; VBITS_GE_256-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
1022 ; VBITS_GE_256-NEXT:    fminnmv d0, p0, z0.d
1023 ; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
1024 ; VBITS_GE_256-NEXT:    ret
1025 ;
1026 ; VBITS_GE_512-LABEL: fminv_v8f64:
1027 ; VBITS_GE_512:       // %bb.0:
1028 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1029 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1030 ; VBITS_GE_512-NEXT:    fminnmv d0, p0, z0.d
1031 ; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
1032 ; VBITS_GE_512-NEXT:    ret
1033   %op = load <8 x double>, ptr %a
1034   %res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op)
1035   ret double %res
1036 }
1037
1038 define double @fminv_v16f64(ptr %a) vscale_range(8,0) #0 {
1039 ; CHECK-LABEL: fminv_v16f64:
1040 ; CHECK:       // %bb.0:
1041 ; CHECK-NEXT:    ptrue p0.d, vl16
1042 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1043 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
1044 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1045 ; CHECK-NEXT:    ret
1046   %op = load <16 x double>, ptr %a
1047   %res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op)
1048   ret double %res
1049 }
1050
1051 define double @fminv_v32f64(ptr %a) vscale_range(16,0) #0 {
1052 ; CHECK-LABEL: fminv_v32f64:
1053 ; CHECK:       // %bb.0:
1054 ; CHECK-NEXT:    ptrue p0.d, vl32
1055 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1056 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
1057 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1058 ; CHECK-NEXT:    ret
1059   %op = load <32 x double>, ptr %a
1060   %res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op)
1061   ret double %res
1062 }
1063
1064 ;
1065 ; FMAXV
1066 ;
1067
1068 define half @fmaximumv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
1069 ; CHECK-LABEL: fmaximumv_v4f16:
1070 ; CHECK:       // %bb.0:
1071 ; CHECK-NEXT:    fmaxv h0, v0.4h
1072 ; CHECK-NEXT:    ret
1073   %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
1074   ret half %res
1075 }
1076
1077 define half @fmaximumv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
1078 ; CHECK-LABEL: fmaximumv_v8f16:
1079 ; CHECK:       // %bb.0:
1080 ; CHECK-NEXT:    fmaxv h0, v0.8h
1081 ; CHECK-NEXT:    ret
1082   %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a)
1083   ret half %res
1084 }
1085
1086 define half @fmaximumv_v16f16(ptr %a) vscale_range(2,0) #0 {
1087 ; CHECK-LABEL: fmaximumv_v16f16:
1088 ; CHECK:       // %bb.0:
1089 ; CHECK-NEXT:    ptrue p0.h, vl16
1090 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1091 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
1092 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1093 ; CHECK-NEXT:    ret
1094   %op = load <16 x half>, ptr %a
1095   %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
1096   ret half %res
1097 }
1098
1099 define half @fmaximumv_v32f16(ptr %a) #0 {
1100 ; VBITS_GE_256-LABEL: fmaximumv_v32f16:
1101 ; VBITS_GE_256:       // %bb.0:
1102 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1103 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
1104 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1105 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
1106 ; VBITS_GE_256-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
1107 ; VBITS_GE_256-NEXT:    fmaxv h0, p0, z0.h
1108 ; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
1109 ; VBITS_GE_256-NEXT:    ret
1110 ;
1111 ; VBITS_GE_512-LABEL: fmaximumv_v32f16:
1112 ; VBITS_GE_512:       // %bb.0:
1113 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
1114 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
1115 ; VBITS_GE_512-NEXT:    fmaxv h0, p0, z0.h
1116 ; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
1117 ; VBITS_GE_512-NEXT:    ret
1118   %op = load <32 x half>, ptr %a
1119   %res = call half @llvm.vector.reduce.fmaximum.v32f16(<32 x half> %op)
1120   ret half %res
1121 }
1122
1123 define half @fmaximumv_v64f16(ptr %a) vscale_range(8,0) #0 {
1124 ; CHECK-LABEL: fmaximumv_v64f16:
1125 ; CHECK:       // %bb.0:
1126 ; CHECK-NEXT:    ptrue p0.h, vl64
1127 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1128 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
1129 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1130 ; CHECK-NEXT:    ret
1131   %op = load <64 x half>, ptr %a
1132   %res = call half @llvm.vector.reduce.fmaximum.v64f16(<64 x half> %op)
1133   ret half %res
1134 }
1135
1136 define half @fmaximumv_v128f16(ptr %a) vscale_range(16,0) #0 {
1137 ; CHECK-LABEL: fmaximumv_v128f16:
1138 ; CHECK:       // %bb.0:
1139 ; CHECK-NEXT:    ptrue p0.h, vl128
1140 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1141 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
1142 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1143 ; CHECK-NEXT:    ret
1144   %op = load <128 x half>, ptr %a
1145   %res = call half @llvm.vector.reduce.fmaximum.v128f16(<128 x half> %op)
1146   ret half %res
1147 }
1148
1149 ; Don't use SVE for 64-bit f32 vectors.
1150 define float @fmaximumv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
1151 ; CHECK-LABEL: fmaximumv_v2f32:
1152 ; CHECK:       // %bb.0:
1153 ; CHECK-NEXT:    fmaxp s0, v0.2s
1154 ; CHECK-NEXT:    ret
1155   %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a)
1156   ret float %res
1157 }
1158
1159 ; Don't use SVE for 128-bit f32 vectors.
1160 define float @fmaximumv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
1161 ; CHECK-LABEL: fmaximumv_v4f32:
1162 ; CHECK:       // %bb.0:
1163 ; CHECK-NEXT:    fmaxv s0, v0.4s
1164 ; CHECK-NEXT:    ret
1165   %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
1166   ret float %res
1167 }
1168
1169 define float @fmaximumv_v8f32(ptr %a) vscale_range(2,0) #0 {
1170 ; CHECK-LABEL: fmaximumv_v8f32:
1171 ; CHECK:       // %bb.0:
1172 ; CHECK-NEXT:    ptrue p0.s, vl8
1173 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1174 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
1175 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1176 ; CHECK-NEXT:    ret
1177   %op = load <8 x float>, ptr %a
1178   %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
1179   ret float %res
1180 }
1181
1182 define float @fmaximumv_v16f32(ptr %a) #0 {
1183 ; VBITS_GE_256-LABEL: fmaximumv_v16f32:
1184 ; VBITS_GE_256:       // %bb.0:
1185 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1186 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1187 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1188 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1189 ; VBITS_GE_256-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
1190 ; VBITS_GE_256-NEXT:    fmaxv s0, p0, z0.s
1191 ; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
1192 ; VBITS_GE_256-NEXT:    ret
1193 ;
1194 ; VBITS_GE_512-LABEL: fmaximumv_v16f32:
1195 ; VBITS_GE_512:       // %bb.0:
1196 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1197 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1198 ; VBITS_GE_512-NEXT:    fmaxv s0, p0, z0.s
1199 ; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
1200 ; VBITS_GE_512-NEXT:    ret
1201   %op = load <16 x float>, ptr %a
1202   %res = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %op)
1203   ret float %res
1204 }
1205
1206 define float @fmaximumv_v32f32(ptr %a) vscale_range(8,0) #0 {
1207 ; CHECK-LABEL: fmaximumv_v32f32:
1208 ; CHECK:       // %bb.0:
1209 ; CHECK-NEXT:    ptrue p0.s, vl32
1210 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1211 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
1212 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1213 ; CHECK-NEXT:    ret
1214   %op = load <32 x float>, ptr %a
1215   %res = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %op)
1216   ret float %res
1217 }
1218
1219 define float @fmaximumv_v64f32(ptr %a) vscale_range(16,0) #0 {
1220 ; CHECK-LABEL: fmaximumv_v64f32:
1221 ; CHECK:       // %bb.0:
1222 ; CHECK-NEXT:    ptrue p0.s, vl64
1223 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1224 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
1225 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1226 ; CHECK-NEXT:    ret
1227   %op = load <64 x float>, ptr %a
1228   %res = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %op)
1229   ret float %res
1230 }
1231
1232 ; Nothing to do for single element vectors.
1233 define double @fmaximumv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
1234 ; CHECK-LABEL: fmaximumv_v1f64:
1235 ; CHECK:       // %bb.0:
1236 ; CHECK-NEXT:    ret
1237   %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
1238   ret double %res
1239 }
1240
1241 ; Don't use SVE for 128-bit f64 vectors.
1242 define double @fmaximumv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
1243 ; CHECK-LABEL: fmaximumv_v2f64:
1244 ; CHECK:       // %bb.0:
1245 ; CHECK-NEXT:    fmaxp d0, v0.2d
1246 ; CHECK-NEXT:    ret
1247   %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
1248   ret double %res
1249 }
1250
1251 define double @fmaximumv_v4f64(ptr %a) vscale_range(2,0) #0 {
1252 ; CHECK-LABEL: fmaximumv_v4f64:
1253 ; CHECK:       // %bb.0:
1254 ; CHECK-NEXT:    ptrue p0.d, vl4
1255 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1256 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
1257 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1258 ; CHECK-NEXT:    ret
1259   %op = load <4 x double>, ptr %a
1260   %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
1261   ret double %res
1262 }
1263
1264 define double @fmaximumv_v8f64(ptr %a) #0 {
1265 ; VBITS_GE_256-LABEL: fmaximumv_v8f64:
1266 ; VBITS_GE_256:       // %bb.0:
1267 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1268 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1269 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1270 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1271 ; VBITS_GE_256-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
1272 ; VBITS_GE_256-NEXT:    fmaxv d0, p0, z0.d
1273 ; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
1274 ; VBITS_GE_256-NEXT:    ret
1275 ;
1276 ; VBITS_GE_512-LABEL: fmaximumv_v8f64:
1277 ; VBITS_GE_512:       // %bb.0:
1278 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1279 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1280 ; VBITS_GE_512-NEXT:    fmaxv d0, p0, z0.d
1281 ; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
1282 ; VBITS_GE_512-NEXT:    ret
1283   %op = load <8 x double>, ptr %a
1284   %res = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %op)
1285   ret double %res
1286 }
1287
1288 define double @fmaximumv_v16f64(ptr %a) vscale_range(8,0) #0 {
1289 ; CHECK-LABEL: fmaximumv_v16f64:
1290 ; CHECK:       // %bb.0:
1291 ; CHECK-NEXT:    ptrue p0.d, vl16
1292 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1293 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
1294 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1295 ; CHECK-NEXT:    ret
1296   %op = load <16 x double>, ptr %a
1297   %res = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %op)
1298   ret double %res
1299 }
1300
1301 define double @fmaximumv_v32f64(ptr %a) vscale_range(16,0) #0 {
1302 ; CHECK-LABEL: fmaximumv_v32f64:
1303 ; CHECK:       // %bb.0:
1304 ; CHECK-NEXT:    ptrue p0.d, vl32
1305 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1306 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
1307 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1308 ; CHECK-NEXT:    ret
1309   %op = load <32 x double>, ptr %a
1310   %res = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %op)
1311   ret double %res
1312 }
1313
1314 ;
1315 ; FMINV
1316 ;
1317
1318 define half @fminimumv_v4f16(<4 x half> %a) vscale_range(2,0) #0 {
1319 ; CHECK-LABEL: fminimumv_v4f16:
1320 ; CHECK:       // %bb.0:
1321 ; CHECK-NEXT:    fminv h0, v0.4h
1322 ; CHECK-NEXT:    ret
1323   %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a)
1324   ret half %res
1325 }
1326
1327 define half @fminimumv_v8f16(<8 x half> %a) vscale_range(2,0) #0 {
1328 ; CHECK-LABEL: fminimumv_v8f16:
1329 ; CHECK:       // %bb.0:
1330 ; CHECK-NEXT:    fminv h0, v0.8h
1331 ; CHECK-NEXT:    ret
1332   %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a)
1333   ret half %res
1334 }
1335
1336 define half @fminimumv_v16f16(ptr %a) vscale_range(2,0) #0 {
1337 ; CHECK-LABEL: fminimumv_v16f16:
1338 ; CHECK:       // %bb.0:
1339 ; CHECK-NEXT:    ptrue p0.h, vl16
1340 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1341 ; CHECK-NEXT:    fminv h0, p0, z0.h
1342 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1343 ; CHECK-NEXT:    ret
1344   %op = load <16 x half>, ptr %a
1345   %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
1346   ret half %res
1347 }
1348
1349 define half @fminimumv_v32f16(ptr %a) #0 {
1350 ; VBITS_GE_256-LABEL: fminimumv_v32f16:
1351 ; VBITS_GE_256:       // %bb.0:
1352 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
1353 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
1354 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
1355 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
1356 ; VBITS_GE_256-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
1357 ; VBITS_GE_256-NEXT:    fminv h0, p0, z0.h
1358 ; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
1359 ; VBITS_GE_256-NEXT:    ret
1360 ;
1361 ; VBITS_GE_512-LABEL: fminimumv_v32f16:
1362 ; VBITS_GE_512:       // %bb.0:
1363 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
1364 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
1365 ; VBITS_GE_512-NEXT:    fminv h0, p0, z0.h
1366 ; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
1367 ; VBITS_GE_512-NEXT:    ret
1368   %op = load <32 x half>, ptr %a
1369   %res = call half @llvm.vector.reduce.fminimum.v32f16(<32 x half> %op)
1370   ret half %res
1371 }
1372
1373 define half @fminimumv_v64f16(ptr %a) vscale_range(8,0) #0 {
1374 ; CHECK-LABEL: fminimumv_v64f16:
1375 ; CHECK:       // %bb.0:
1376 ; CHECK-NEXT:    ptrue p0.h, vl64
1377 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1378 ; CHECK-NEXT:    fminv h0, p0, z0.h
1379 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1380 ; CHECK-NEXT:    ret
1381   %op = load <64 x half>, ptr %a
1382   %res = call half @llvm.vector.reduce.fminimum.v64f16(<64 x half> %op)
1383   ret half %res
1384 }
1385
1386 define half @fminimumv_v128f16(ptr %a) vscale_range(16,0) #0 {
1387 ; CHECK-LABEL: fminimumv_v128f16:
1388 ; CHECK:       // %bb.0:
1389 ; CHECK-NEXT:    ptrue p0.h, vl128
1390 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
1391 ; CHECK-NEXT:    fminv h0, p0, z0.h
1392 ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
1393 ; CHECK-NEXT:    ret
1394   %op = load <128 x half>, ptr %a
1395   %res = call half @llvm.vector.reduce.fminimum.v128f16(<128 x half> %op)
1396   ret half %res
1397 }
1398
1399 ; Don't use SVE for 64-bit f32 vectors.
1400 define float @fminimumv_v2f32(<2 x float> %a) vscale_range(2,0) #0 {
1401 ; CHECK-LABEL: fminimumv_v2f32:
1402 ; CHECK:       // %bb.0:
1403 ; CHECK-NEXT:    fminp s0, v0.2s
1404 ; CHECK-NEXT:    ret
1405   %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a)
1406   ret float %res
1407 }
1408
1409 ; Don't use SVE for 128-bit f32 vectors.
1410 define float @fminimumv_v4f32(<4 x float> %a) vscale_range(2,0) #0 {
1411 ; CHECK-LABEL: fminimumv_v4f32:
1412 ; CHECK:       // %bb.0:
1413 ; CHECK-NEXT:    fminv s0, v0.4s
1414 ; CHECK-NEXT:    ret
1415   %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
1416   ret float %res
1417 }
1418
1419 define float @fminimumv_v8f32(ptr %a) vscale_range(2,0) #0 {
1420 ; CHECK-LABEL: fminimumv_v8f32:
1421 ; CHECK:       // %bb.0:
1422 ; CHECK-NEXT:    ptrue p0.s, vl8
1423 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1424 ; CHECK-NEXT:    fminv s0, p0, z0.s
1425 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1426 ; CHECK-NEXT:    ret
1427   %op = load <8 x float>, ptr %a
1428   %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
1429   ret float %res
1430 }
1431
1432 define float @fminimumv_v16f32(ptr %a) #0 {
1433 ; VBITS_GE_256-LABEL: fminimumv_v16f32:
1434 ; VBITS_GE_256:       // %bb.0:
1435 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
1436 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
1437 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
1438 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
1439 ; VBITS_GE_256-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1440 ; VBITS_GE_256-NEXT:    fminv s0, p0, z0.s
1441 ; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
1442 ; VBITS_GE_256-NEXT:    ret
1443 ;
1444 ; VBITS_GE_512-LABEL: fminimumv_v16f32:
1445 ; VBITS_GE_512:       // %bb.0:
1446 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
1447 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
1448 ; VBITS_GE_512-NEXT:    fminv s0, p0, z0.s
1449 ; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
1450 ; VBITS_GE_512-NEXT:    ret
1451   %op = load <16 x float>, ptr %a
1452   %res = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %op)
1453   ret float %res
1454 }
1455
1456 define float @fminimumv_v32f32(ptr %a) vscale_range(8,0) #0 {
1457 ; CHECK-LABEL: fminimumv_v32f32:
1458 ; CHECK:       // %bb.0:
1459 ; CHECK-NEXT:    ptrue p0.s, vl32
1460 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1461 ; CHECK-NEXT:    fminv s0, p0, z0.s
1462 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1463 ; CHECK-NEXT:    ret
1464   %op = load <32 x float>, ptr %a
1465   %res = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %op)
1466   ret float %res
1467 }
1468
1469 define float @fminimumv_v64f32(ptr %a) vscale_range(16,0) #0 {
1470 ; CHECK-LABEL: fminimumv_v64f32:
1471 ; CHECK:       // %bb.0:
1472 ; CHECK-NEXT:    ptrue p0.s, vl64
1473 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
1474 ; CHECK-NEXT:    fminv s0, p0, z0.s
1475 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
1476 ; CHECK-NEXT:    ret
1477   %op = load <64 x float>, ptr %a
1478   %res = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %op)
1479   ret float %res
1480 }
1481
1482 ; Nothing to do for single element vectors.
1483 define double @fminimumv_v1f64(<1 x double> %a) vscale_range(2,0) #0 {
1484 ; CHECK-LABEL: fminimumv_v1f64:
1485 ; CHECK:       // %bb.0:
1486 ; CHECK-NEXT:    ret
1487   %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
1488   ret double %res
1489 }
1490
1491 ; Don't use SVE for 128-bit f64 vectors.
1492 define double @fminimumv_v2f64(<2 x double> %a) vscale_range(2,0) #0 {
1493 ; CHECK-LABEL: fminimumv_v2f64:
1494 ; CHECK:       // %bb.0:
1495 ; CHECK-NEXT:    fminp d0, v0.2d
1496 ; CHECK-NEXT:    ret
1497   %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
1498   ret double %res
1499 }
1500
1501 define double @fminimumv_v4f64(ptr %a) vscale_range(2,0) #0 {
1502 ; CHECK-LABEL: fminimumv_v4f64:
1503 ; CHECK:       // %bb.0:
1504 ; CHECK-NEXT:    ptrue p0.d, vl4
1505 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1506 ; CHECK-NEXT:    fminv d0, p0, z0.d
1507 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1508 ; CHECK-NEXT:    ret
1509   %op = load <4 x double>, ptr %a
1510   %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
1511   ret double %res
1512 }
1513
1514 define double @fminimumv_v8f64(ptr %a) #0 {
1515 ; VBITS_GE_256-LABEL: fminimumv_v8f64:
1516 ; VBITS_GE_256:       // %bb.0:
1517 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
1518 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
1519 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
1520 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
1521 ; VBITS_GE_256-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1522 ; VBITS_GE_256-NEXT:    fminv d0, p0, z0.d
1523 ; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
1524 ; VBITS_GE_256-NEXT:    ret
1525 ;
1526 ; VBITS_GE_512-LABEL: fminimumv_v8f64:
1527 ; VBITS_GE_512:       // %bb.0:
1528 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
1529 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
1530 ; VBITS_GE_512-NEXT:    fminv d0, p0, z0.d
1531 ; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
1532 ; VBITS_GE_512-NEXT:    ret
1533   %op = load <8 x double>, ptr %a
1534   %res = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %op)
1535   ret double %res
1536 }
1537
1538 define double @fminimumv_v16f64(ptr %a) vscale_range(8,0) #0 {
1539 ; CHECK-LABEL: fminimumv_v16f64:
1540 ; CHECK:       // %bb.0:
1541 ; CHECK-NEXT:    ptrue p0.d, vl16
1542 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1543 ; CHECK-NEXT:    fminv d0, p0, z0.d
1544 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1545 ; CHECK-NEXT:    ret
1546   %op = load <16 x double>, ptr %a
1547   %res = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %op)
1548   ret double %res
1549 }
1550
1551 define double @fminimumv_v32f64(ptr %a) vscale_range(16,0) #0 {
1552 ; CHECK-LABEL: fminimumv_v32f64:
1553 ; CHECK:       // %bb.0:
1554 ; CHECK-NEXT:    ptrue p0.d, vl32
1555 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
1556 ; CHECK-NEXT:    fminv d0, p0, z0.d
1557 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1558 ; CHECK-NEXT:    ret
1559   %op = load <32 x double>, ptr %a
1560   %res = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %op)
1561   ret double %res
1562 }
1563
1564 attributes #0 = { "target-features"="+sve" }
1565
1566 declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
1567 declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
1568 declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
1569 declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
1570 declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
1571 declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>)
1572
1573 declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
1574 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
1575 declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
1576 declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
1577 declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
1578 declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>)
1579
1580 declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
1581 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
1582 declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
1583 declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
1584 declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
1585 declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>)
1586
1587 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
1588 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
1589 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
1590 declare half @llvm.vector.reduce.fmax.v32f16(<32 x half>)
1591 declare half @llvm.vector.reduce.fmax.v64f16(<64 x half>)
1592 declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>)
1593
1594 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
1595 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
1596 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
1597 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
1598 declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>)
1599 declare float @llvm.vector.reduce.fmax.v64f32(<64 x float>)
1600
1601 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
1602 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
1603 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
1604 declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>)
1605 declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>)
1606 declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>)
1607
1608 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
1609 declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
1610 declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
1611 declare half @llvm.vector.reduce.fmin.v32f16(<32 x half>)
1612 declare half @llvm.vector.reduce.fmin.v64f16(<64 x half>)
1613 declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>)
1614
1615 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
1616 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
1617 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
1618 declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>)
1619 declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>)
1620 declare float @llvm.vector.reduce.fmin.v64f32(<64 x float>)
1621
1622 declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
1623 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
1624 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
1625 declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>)
1626 declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>)
1627 declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>)
1628
1629 declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
1630 declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
1631 declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
1632 declare half @llvm.vector.reduce.fmaximum.v32f16(<32 x half>)
1633 declare half @llvm.vector.reduce.fmaximum.v64f16(<64 x half>)
1634 declare half @llvm.vector.reduce.fmaximum.v128f16(<128 x half>)
1635
1636 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
1637 declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
1638 declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
1639 declare float @llvm.vector.reduce.fmaximum.v16f32(<16 x float>)
1640 declare float @llvm.vector.reduce.fmaximum.v32f32(<32 x float>)
1641 declare float @llvm.vector.reduce.fmaximum.v64f32(<64 x float>)
1642
1643 declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double>)
1644 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
1645 declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
1646 declare double @llvm.vector.reduce.fmaximum.v8f64(<8 x double>)
1647 declare double @llvm.vector.reduce.fmaximum.v16f64(<16 x double>)
1648 declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>)
1649
1650 declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
1651 declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
1652 declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
1653 declare half @llvm.vector.reduce.fminimum.v32f16(<32 x half>)
1654 declare half @llvm.vector.reduce.fminimum.v64f16(<64 x half>)
1655 declare half @llvm.vector.reduce.fminimum.v128f16(<128 x half>)
1656
1657 declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
1658 declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
1659 declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
1660 declare float @llvm.vector.reduce.fminimum.v16f32(<16 x float>)
1661 declare float @llvm.vector.reduce.fminimum.v32f32(<32 x float>)
1662 declare float @llvm.vector.reduce.fminimum.v64f32(<64 x float>)
1663
1664 declare double @llvm.vector.reduce.fminimum.v1f64(<1 x double>)
1665 declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
1666 declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)
1667 declare double @llvm.vector.reduce.fminimum.v8f64(<8 x double>)
1668 declare double @llvm.vector.reduce.fminimum.v16f64(<16 x double>)
1669 declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>)