llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll

   1 ; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
   2 ; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256
   3 ; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK
   4 ; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
   5 ; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
   6 ; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
   7 ; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512
   8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
   9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
  17
  18 target triple = "aarch64-unknown-linux-gnu"
  19
  20 ; Don't use SVE when its registers are no bigger than NEON.
  21 ; NO_SVE-NOT: ptrue
  22
  23 ;
  24 ; ANDV
  25 ;
  26
  27 ; No single instruction NEON ANDV support. Use SVE.
  28 define i8 @andv_v8i8(<8 x i8> %a) #0 {
  29 ; CHECK-LABEL: andv_v8i8:
  30 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl8
  31 ; CHECK: andv b[[REDUCE:[0-9]+]], [[PG]], z0.b
  32 ; CHECK: fmov w0, s[[REDUCE]]
  33 ; CHECK: ret
  34   %res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
  35   ret i8 %res
  36 }
  37
  38 ; No single instruction NEON ANDV support. Use SVE.
  39 define i8 @andv_v16i8(<16 x i8> %a) #0 {
  40 ; CHECK-LABEL: andv_v16i8:
  41 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl16
  42 ; CHECK: andv b[[REDUCE:[0-9]+]], [[PG]], z0.b
  43 ; CHECK: fmov w0, s[[REDUCE]]
  44 ; CHECK: ret
  45   %res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
  46   ret i8 %res
  47 }
  48
  49 define i8 @andv_v32i8(<32 x i8>* %a) #0 {
  50 ; CHECK-LABEL: andv_v32i8:
  51 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
  52 ; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
  53 ; CHECK-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
  54 ; CHECK-NEXT: fmov w0, s[[REDUCE]]
  55 ; CHECK-NEXT: ret
  56   %op = load <32 x i8>, <32 x i8>* %a
  57   %res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
  58   ret i8 %res
  59 }
  60
  61 define i8 @andv_v64i8(<64 x i8>* %a) #0 {
  62 ; CHECK-LABEL: andv_v64i8:
  63 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
  64 ; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
  65 ; VBITS_GE_512-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
  66 ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
  67 ; VBITS_GE_512-NEXT: ret
  68
  69 ; Ensure sensible type legalisation.
  70 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
  71 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
  72 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
  73 ; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
  74 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
  75 ; VBITS_EQ_256-DAG: andv b[[REDUCE:[0-9]+]], [[PG]], [[AND]].b
  76 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
  77 ; VBITS_EQ_256-NEXT: ret
  78
  79   %op = load <64 x i8>, <64 x i8>* %a
  80   %res = call i8 @llvm.vector.reduce.and.v64i8(<64 x i8> %op)
  81   ret i8 %res
  82 }
  83
  84 define i8 @andv_v128i8(<128 x i8>* %a) #0 {
  85 ; CHECK-LABEL: andv_v128i8:
  86 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
  87 ; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
  88 ; VBITS_GE_1024-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
  89 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
  90 ; VBITS_GE_1024-NEXT: ret
  91   %op = load <128 x i8>, <128 x i8>* %a
  92   %res = call i8 @llvm.vector.reduce.and.v128i8(<128 x i8> %op)
  93   ret i8 %res
  94 }
  95
  96 define i8 @andv_v256i8(<256 x i8>* %a) #0 {
  97 ; CHECK-LABEL: andv_v256i8:
  98 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
  99 ; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 100 ; VBITS_GE_2048-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 101 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 102 ; VBITS_GE_2048-NEXT: ret
 103   %op = load <256 x i8>, <256 x i8>* %a
 104   %res = call i8 @llvm.vector.reduce.and.v256i8(<256 x i8> %op)
 105   ret i8 %res
 106 }
 107
 108 ; No single instruction NEON ANDV support. Use SVE.
 109 define i16 @andv_v4i16(<4 x i16> %a) #0 {
 110 ; CHECK-LABEL: andv_v4i16:
 111 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl4
 112 ; CHECK: andv h[[REDUCE:[0-9]+]], [[PG]], z0.h
 113 ; CHECK: fmov w0, s[[REDUCE]]
 114 ; CHECK: ret
 115   %res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)
 116   ret i16 %res
 117 }
 118
 119 ; No single instruction NEON ANDV support. Use SVE.
 120 define i16 @andv_v8i16(<8 x i16> %a) #0 {
 121 ; CHECK-LABEL: andv_v8i16:
 122 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl8
 123 ; CHECK: andv h[[REDUCE:[0-9]+]], [[PG]], z0.h
 124 ; CHECK: fmov w0, s[[REDUCE]]
 125 ; CHECK: ret
 126   %res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)
 127   ret i16 %res
 128 }
 129
 130 define i16 @andv_v16i16(<16 x i16>* %a) #0 {
 131 ; CHECK-LABEL: andv_v16i16:
 132 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 133 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 134 ; CHECK-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 135 ; CHECK-NEXT: fmov w0, s[[REDUCE]]
 136 ; CHECK-NEXT: ret
 137   %op = load <16 x i16>, <16 x i16>* %a
 138   %res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
 139   ret i16 %res
 140 }
 141
 142 define i16 @andv_v32i16(<32 x i16>* %a) #0 {
 143 ; CHECK-LABEL: andv_v32i16:
 144 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 145 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 146 ; VBITS_GE_512-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 147 ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
 148 ; VBITS_GE_512-NEXT: ret
 149
 150 ; Ensure sensible type legalisation.
 151 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 152 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 153 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 154 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 155 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 156 ; VBITS_EQ_256-DAG: andv h[[REDUCE:[0-9]+]], [[PG]], [[AND]].h
 157 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 158 ; VBITS_EQ_256-NEXT: ret
 159   %op = load <32 x i16>, <32 x i16>* %a
 160   %res = call i16 @llvm.vector.reduce.and.v32i16(<32 x i16> %op)
 161   ret i16 %res
 162 }
 163
 164 define i16 @andv_v64i16(<64 x i16>* %a) #0 {
 165 ; CHECK-LABEL: andv_v64i16:
 166 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 167 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 168 ; VBITS_GE_1024-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 169 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 170 ; VBITS_GE_1024-NEXT: ret
 171   %op = load <64 x i16>, <64 x i16>* %a
 172   %res = call i16 @llvm.vector.reduce.and.v64i16(<64 x i16> %op)
 173   ret i16 %res
 174 }
 175
 176 define i16 @andv_v128i16(<128 x i16>* %a) #0 {
 177 ; CHECK-LABEL: andv_v128i16:
 178 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 179 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 180 ; VBITS_GE_2048-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 181 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 182 ; VBITS_GE_2048-NEXT: ret
 183   %op = load <128 x i16>, <128 x i16>* %a
 184   %res = call i16 @llvm.vector.reduce.and.v128i16(<128 x i16> %op)
 185   ret i16 %res
 186 }
 187
 188 ; No single instruction NEON ANDV support. Use SVE.
 189 define i32 @andv_v2i32(<2 x i32> %a) #0 {
 190 ; CHECK-LABEL: andv_v2i32:
 191 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl2
 192 ; CHECK: andv [[REDUCE:s[0-9]+]], [[PG]], z0.s
 193 ; CHECK: fmov w0, [[REDUCE]]
 194 ; CHECK: ret
 195   %res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
 196   ret i32 %res
 197 }
 198
 199 ; No single instruction NEON ANDV support. Use SVE.
 200 define i32 @andv_v4i32(<4 x i32> %a) #0 {
 201 ; CHECK-LABEL: andv_v4i32:
 202 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl4
 203 ; CHECK: andv [[REDUCE:s[0-9]+]], [[PG]], z0.s
 204 ; CHECK: fmov w0, [[REDUCE]]
 205 ; CHECK: ret
 206   %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
 207   ret i32 %res
 208 }
 209
 210 define i32 @andv_v8i32(<8 x i32>* %a) #0 {
 211 ; CHECK-LABEL: andv_v8i32:
 212 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 213 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 214 ; CHECK-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 215 ; CHECK-NEXT: fmov w0, [[REDUCE]]
 216 ; CHECK-NEXT: ret
 217   %op = load <8 x i32>, <8 x i32>* %a
 218   %res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
 219   ret i32 %res
 220 }
 221
 222 define i32 @andv_v16i32(<16 x i32>* %a) #0 {
 223 ; CHECK-LABEL: andv_v16i32:
 224 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 225 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 226 ; VBITS_GE_512-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 227 ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
 228 ; VBITS_GE_512-NEXT: ret
 229
 230 ; Ensure sensible type legalisation.
 231 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 232 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 233 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 234 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 235 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 236 ; VBITS_EQ_256-DAG: andv [[REDUCE:s[0-9]+]], [[PG]], [[AND]].s
 237 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 238 ; VBITS_EQ_256-NEXT: ret
 239   %op = load <16 x i32>, <16 x i32>* %a
 240   %res = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %op)
 241   ret i32 %res
 242 }
 243
 244 define i32 @andv_v32i32(<32 x i32>* %a) #0 {
 245 ; CHECK-LABEL: andv_v32i32:
 246 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 247 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 248 ; VBITS_GE_1024-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 249 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 250 ; VBITS_GE_1024-NEXT: ret
 251   %op = load <32 x i32>, <32 x i32>* %a
 252   %res = call i32 @llvm.vector.reduce.and.v32i32(<32 x i32> %op)
 253   ret i32 %res
 254 }
 255
 256 define i32 @andv_v64i32(<64 x i32>* %a) #0 {
 257 ; CHECK-LABEL: andv_v64i32:
 258 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 259 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 260 ; VBITS_GE_2048-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 261 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 262 ; VBITS_GE_2048-NEXT: ret
 263   %op = load <64 x i32>, <64 x i32>* %a
 264   %res = call i32 @llvm.vector.reduce.and.v64i32(<64 x i32> %op)
 265   ret i32 %res
 266 }
 267
 268 ; Nothing to do for single element vectors.
 269 define i64 @andv_v1i64(<1 x i64> %a) #0 {
 270 ; CHECK-LABEL: andv_v1i64:
 271 ; CHECK: fmov x0, d0
 272 ; CHECK: ret
 273   %res = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
 274   ret i64 %res
 275 }
 276
 277 ; Use SVE for 128-bit vectors
 278 define i64 @andv_v2i64(<2 x i64> %a) #0 {
 279 ; CHECK-LABEL: andv_v2i64:
 280 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
 281 ; CHECK: andv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 282 ; CHECK: fmov x0, [[REDUCE]]
 283 ; CHECK: ret
 284   %res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
 285   ret i64 %res
 286 }
 287
 288 define i64 @andv_v4i64(<4 x i64>* %a) #0 {
 289 ; CHECK-LABEL: andv_v4i64:
 290 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 291 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 292 ; CHECK-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 293 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 294 ; CHECK-NEXT: ret
 295   %op = load <4 x i64>, <4 x i64>* %a
 296   %res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
 297   ret i64 %res
 298 }
 299
 300 define i64 @andv_v8i64(<8 x i64>* %a) #0 {
 301 ; CHECK-LABEL: andv_v8i64:
 302 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 303 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 304 ; VBITS_GE_512-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 305 ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
 306 ; VBITS_GE_512-NEXT: ret
 307
 308 ; Ensure sensible type legalisation.
 309 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 310 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 311 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 312 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 313 ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d
 314 ; VBITS_EQ_256-DAG: andv [[REDUCE:d[0-9]+]], [[PG]], [[AND]].d
 315 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 316 ; VBITS_EQ_256-NEXT: ret
 317   %op = load <8 x i64>, <8 x i64>* %a
 318   %res = call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %op)
 319   ret i64 %res
 320 }
 321
 322 define i64 @andv_v16i64(<16 x i64>* %a) #0 {
 323 ; CHECK-LABEL: andv_v16i64:
 324 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 325 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 326 ; VBITS_GE_1024-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 327 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 328 ; VBITS_GE_1024-NEXT: ret
 329   %op = load <16 x i64>, <16 x i64>* %a
 330   %res = call i64 @llvm.vector.reduce.and.v16i64(<16 x i64> %op)
 331   ret i64 %res
 332 }
 333
 334 define i64 @andv_v32i64(<32 x i64>* %a) #0 {
 335 ; CHECK-LABEL: andv_v32i64:
 336 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 337 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 338 ; VBITS_GE_2048-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 339 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 340 ; VBITS_GE_2048-NEXT: ret
 341   %op = load <32 x i64>, <32 x i64>* %a
 342   %res = call i64 @llvm.vector.reduce.and.v32i64(<32 x i64> %op)
 343   ret i64 %res
 344 }
 345
 346 ;
 347 ; EORV
 348 ;
 349
 350 ; No single instruction NEON EORV support. Use SVE.
 351 define i8 @eorv_v8i8(<8 x i8> %a) #0 {
 352 ; CHECK-LABEL: eorv_v8i8:
 353 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl8
 354 ; CHECK: eorv b[[REDUCE:[0-9]+]], [[PG]], z0.b
 355 ; CHECK: fmov w0, s[[REDUCE]]
 356 ; CHECK: ret
 357   %res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
 358   ret i8 %res
 359 }
 360
 361 ; No single instruction NEON EORV support. Use SVE.
 362 define i8 @eorv_v16i8(<16 x i8> %a) #0 {
 363 ; CHECK-LABEL: eorv_v16i8:
 364 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl16
 365 ; CHECK: eorv b[[REDUCE:[0-9]+]], [[PG]], z0.b
 366 ; CHECK: fmov w0, s[[REDUCE]]
 367 ; CHECK: ret
 368   %res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
 369   ret i8 %res
 370 }
 371
 372 define i8 @eorv_v32i8(<32 x i8>* %a) #0 {
 373 ; CHECK-LABEL: eorv_v32i8:
 374 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
 375 ; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 376 ; CHECK-NEXT: eorv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 377 ; CHECK-NEXT: fmov w0, s[[REDUCE]]
 378 ; CHECK-NEXT: ret
 379   %op = load <32 x i8>, <32 x i8>* %a
 380   %res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
 381   ret i8 %res
 382 }
 383
 384 define i8 @eorv_v64i8(<64 x i8>* %a) #0 {
 385 ; CHECK-LABEL: eorv_v64i8:
 386 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
 387 ; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 388 ; VBITS_GE_512-NEXT: eorv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 389 ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
 390 ; VBITS_GE_512-NEXT: ret
 391
 392 ; Ensure sensible type legalisation.
 393 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
 394 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 395 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
 396 ; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 397 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 398 ; VBITS_EQ_256-DAG: eorv b[[REDUCE:[0-9]+]], [[PG]], [[EOR]].b
 399 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 400 ; VBITS_EQ_256-NEXT: ret
 401
 402   %op = load <64 x i8>, <64 x i8>* %a
 403   %res = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> %op)
 404   ret i8 %res
 405 }
 406
 407 define i8 @eorv_v128i8(<128 x i8>* %a) #0 {
 408 ; CHECK-LABEL: eorv_v128i8:
 409 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
 410 ; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 411 ; VBITS_GE_1024-NEXT: eorv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 412 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 413 ; VBITS_GE_1024-NEXT: ret
 414   %op = load <128 x i8>, <128 x i8>* %a
 415   %res = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> %op)
 416   ret i8 %res
 417 }
 418
 419 define i8 @eorv_v256i8(<256 x i8>* %a) #0 {
 420 ; CHECK-LABEL: eorv_v256i8:
 421 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
 422 ; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 423 ; VBITS_GE_2048-NEXT: eorv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 424 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 425 ; VBITS_GE_2048-NEXT: ret
 426   %op = load <256 x i8>, <256 x i8>* %a
 427   %res = call i8 @llvm.vector.reduce.xor.v256i8(<256 x i8> %op)
 428   ret i8 %res
 429 }
 430
 431 ; No single instruction NEON EORV support. Use SVE.
 432 define i16 @eorv_v4i16(<4 x i16> %a) #0 {
 433 ; CHECK-LABEL: eorv_v4i16:
 434 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl4
 435 ; CHECK: eorv h[[REDUCE:[0-9]+]], [[PG]], z0.h
 436 ; CHECK: fmov w0, s[[REDUCE]]
 437 ; CHECK: ret
 438   %res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)
 439   ret i16 %res
 440 }
 441
 442 ; No single instruction NEON EORV support. Use SVE.
 443 define i16 @eorv_v8i16(<8 x i16> %a) #0 {
 444 ; CHECK-LABEL: eorv_v8i16:
 445 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl8
 446 ; CHECK: eorv h[[REDUCE:[0-9]+]], [[PG]], z0.h
 447 ; CHECK: fmov w0, s[[REDUCE]]
 448 ; CHECK: ret
 449   %res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)
 450   ret i16 %res
 451 }
 452
 453 define i16 @eorv_v16i16(<16 x i16>* %a) #0 {
 454 ; CHECK-LABEL: eorv_v16i16:
 455 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 456 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 457 ; CHECK-NEXT: eorv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 458 ; CHECK-NEXT: fmov w0, s[[REDUCE]]
 459 ; CHECK-NEXT: ret
 460   %op = load <16 x i16>, <16 x i16>* %a
 461   %res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
 462   ret i16 %res
 463 }
 464
 465 define i16 @eorv_v32i16(<32 x i16>* %a) #0 {
 466 ; CHECK-LABEL: eorv_v32i16:
 467 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 468 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 469 ; VBITS_GE_512-NEXT: eorv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 470 ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
 471 ; VBITS_GE_512-NEXT: ret
 472
 473 ; Ensure sensible type legalisation.
 474 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 475 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 476 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 477 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 478 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 479 ; VBITS_EQ_256-DAG: eorv h[[REDUCE:[0-9]+]], [[PG]], [[EOR]].h
 480 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 481 ; VBITS_EQ_256-NEXT: ret
 482   %op = load <32 x i16>, <32 x i16>* %a
 483   %res = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> %op)
 484   ret i16 %res
 485 }
 486
 487 define i16 @eorv_v64i16(<64 x i16>* %a) #0 {
 488 ; CHECK-LABEL: eorv_v64i16:
 489 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 490 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 491 ; VBITS_GE_1024-NEXT: eorv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 492 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 493 ; VBITS_GE_1024-NEXT: ret
 494   %op = load <64 x i16>, <64 x i16>* %a
 495   %res = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> %op)
 496   ret i16 %res
 497 }
 498
 499 define i16 @eorv_v128i16(<128 x i16>* %a) #0 {
 500 ; CHECK-LABEL: eorv_v128i16:
 501 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 502 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 503 ; VBITS_GE_2048-NEXT: eorv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 504 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 505 ; VBITS_GE_2048-NEXT: ret
 506   %op = load <128 x i16>, <128 x i16>* %a
 507   %res = call i16 @llvm.vector.reduce.xor.v128i16(<128 x i16> %op)
 508   ret i16 %res
 509 }
 510
 511 ; No single instruction NEON EORV support. Use SVE.
 512 define i32 @eorv_v2i32(<2 x i32> %a) #0 {
 513 ; CHECK-LABEL: eorv_v2i32:
 514 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl2
 515 ; CHECK: eorv [[REDUCE:s[0-9]+]], [[PG]], z0.s
 516 ; CHECK: fmov w0, [[REDUCE]]
 517 ; CHECK: ret
 518   %res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
 519   ret i32 %res
 520 }
 521
 522 ; No single instruction NEON EORV support. Use SVE.
 523 define i32 @eorv_v4i32(<4 x i32> %a) #0 {
 524 ; CHECK-LABEL: eorv_v4i32:
 525 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl4
 526 ; CHECK: eorv [[REDUCE:s[0-9]+]], [[PG]], z0.s
 527 ; CHECK: fmov w0, [[REDUCE]]
 528 ; CHECK: ret
 529   %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
 530   ret i32 %res
 531 }
 532
 533 define i32 @eorv_v8i32(<8 x i32>* %a) #0 {
 534 ; CHECK-LABEL: eorv_v8i32:
 535 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 536 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 537 ; CHECK-NEXT: eorv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 538 ; CHECK-NEXT: fmov w0, [[REDUCE]]
 539 ; CHECK-NEXT: ret
 540   %op = load <8 x i32>, <8 x i32>* %a
 541   %res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
 542   ret i32 %res
 543 }
 544
 545 define i32 @eorv_v16i32(<16 x i32>* %a) #0 {
 546 ; CHECK-LABEL: eorv_v16i32:
 547 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 548 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 549 ; VBITS_GE_512-NEXT: eorv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 550 ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
 551 ; VBITS_GE_512-NEXT: ret
 552
 553 ; Ensure sensible type legalisation.
 554 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 555 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 556 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 557 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 558 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 559 ; VBITS_EQ_256-DAG: eorv [[REDUCE:s[0-9]+]], [[PG]], [[EOR]].s
 560 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 561 ; VBITS_EQ_256-NEXT: ret
 562   %op = load <16 x i32>, <16 x i32>* %a
 563   %res = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %op)
 564   ret i32 %res
 565 }
 566
 567 define i32 @eorv_v32i32(<32 x i32>* %a) #0 {
 568 ; CHECK-LABEL: eorv_v32i32:
 569 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 570 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 571 ; VBITS_GE_1024-NEXT: eorv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 572 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 573 ; VBITS_GE_1024-NEXT: ret
 574   %op = load <32 x i32>, <32 x i32>* %a
 575   %res = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> %op)
 576   ret i32 %res
 577 }
 578
 579 define i32 @eorv_v64i32(<64 x i32>* %a) #0 {
 580 ; CHECK-LABEL: eorv_v64i32:
 581 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 582 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 583 ; VBITS_GE_2048-NEXT: eorv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 584 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 585 ; VBITS_GE_2048-NEXT: ret
 586   %op = load <64 x i32>, <64 x i32>* %a
 587   %res = call i32 @llvm.vector.reduce.xor.v64i32(<64 x i32> %op)
 588   ret i32 %res
 589 }
 590
 591 ; Nothing to do for single element vectors.
 592 define i64 @eorv_v1i64(<1 x i64> %a) #0 {
 593 ; CHECK-LABEL: eorv_v1i64:
 594 ; CHECK: fmov x0, d0
 595 ; CHECK: ret
 596   %res = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %a)
 597   ret i64 %res
 598 }
 599
 600 ; Use SVE for 128-bit vectors
 601 define i64 @eorv_v2i64(<2 x i64> %a) #0 {
 602 ; CHECK-LABEL: eorv_v2i64:
 603 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
 604 ; CHECK: eorv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 605 ; CHECK: fmov x0, [[REDUCE]]
 606 ; CHECK: ret
 607   %res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)
 608   ret i64 %res
 609 }
 610
 611 define i64 @eorv_v4i64(<4 x i64>* %a) #0 {
 612 ; CHECK-LABEL: eorv_v4i64:
 613 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 614 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 615 ; CHECK-NEXT: eorv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 616 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 617 ; CHECK-NEXT: ret
 618   %op = load <4 x i64>, <4 x i64>* %a
 619   %res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
 620   ret i64 %res
 621 }
 622
 623 define i64 @eorv_v8i64(<8 x i64>* %a) #0 {
 624 ; CHECK-LABEL: eorv_v8i64:
 625 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 626 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 627 ; VBITS_GE_512-NEXT: eorv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 628 ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
 629 ; VBITS_GE_512-NEXT: ret
 630
 631 ; Ensure sensible type legalisation.
 632 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 633 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 634 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 635 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 636 ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 637 ; VBITS_EQ_256-DAG: eorv [[REDUCE:d[0-9]+]], [[PG]], [[EOR]].d
 638 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 639 ; VBITS_EQ_256-NEXT: ret
 640   %op = load <8 x i64>, <8 x i64>* %a
 641   %res = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %op)
 642   ret i64 %res
 643 }
 644
 645 define i64 @eorv_v16i64(<16 x i64>* %a) #0 {
 646 ; CHECK-LABEL: eorv_v16i64:
 647 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 648 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 649 ; VBITS_GE_1024-NEXT: eorv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 650 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 651 ; VBITS_GE_1024-NEXT: ret
 652   %op = load <16 x i64>, <16 x i64>* %a
 653   %res = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %op)
 654   ret i64 %res
 655 }
 656
 657 define i64 @eorv_v32i64(<32 x i64>* %a) #0 {
 658 ; CHECK-LABEL: eorv_v32i64:
 659 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 660 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 661 ; VBITS_GE_2048-NEXT: eorv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 662 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 663 ; VBITS_GE_2048-NEXT: ret
 664   %op = load <32 x i64>, <32 x i64>* %a
 665   %res = call i64 @llvm.vector.reduce.xor.v32i64(<32 x i64> %op)
 666   ret i64 %res
 667 }
 668
 669 ;
 670 ; ORV
 671 ;
 672
 673 ; No single instruction NEON ORV support. Use SVE.
 674 define i8 @orv_v8i8(<8 x i8> %a) #0 {
 675 ; CHECK-LABEL: orv_v8i8:
 676 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl8
 677 ; CHECK: orv b[[REDUCE:[0-9]+]], [[PG]], z0.b
 678 ; CHECK: fmov w0, s[[REDUCE]]
 679 ; CHECK: ret
 680   %res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
 681   ret i8 %res
 682 }
 683
 684 ; No single instruction NEON ORV support. Use SVE.
 685 define i8 @orv_v16i8(<16 x i8> %a) #0 {
 686 ; CHECK-LABEL: orv_v16i8:
 687 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl16
 688 ; CHECK: orv b[[REDUCE:[0-9]+]], [[PG]], z0.b
 689 ; CHECK: fmov w0, s[[REDUCE]]
 690 ; CHECK: ret
 691   %res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
 692   ret i8 %res
 693 }
 694
 695 define i8 @orv_v32i8(<32 x i8>* %a) #0 {
 696 ; CHECK-LABEL: orv_v32i8:
 697 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
 698 ; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 699 ; CHECK-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 700 ; CHECK-NEXT: fmov w0, s[[REDUCE]]
 701 ; CHECK-NEXT: ret
 702   %op = load <32 x i8>, <32 x i8>* %a
 703   %res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
 704   ret i8 %res
 705 }
 706
 707 define i8 @orv_v64i8(<64 x i8>* %a) #0 {
 708 ; CHECK-LABEL: orv_v64i8:
 709 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl64
 710 ; VBITS_GE_512-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 711 ; VBITS_GE_512-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 712 ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
 713 ; VBITS_GE_512-NEXT: ret
 714
 715 ; Ensure sensible type legalisation.
 716 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
 717 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
 718 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
 719 ; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
 720 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 721 ; VBITS_EQ_256-DAG: orv b[[REDUCE:[0-9]+]], [[PG]], [[OR]].b
 722 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 723 ; VBITS_EQ_256-NEXT: ret
 724
 725   %op = load <64 x i8>, <64 x i8>* %a
 726   %res = call i8 @llvm.vector.reduce.or.v64i8(<64 x i8> %op)
 727   ret i8 %res
 728 }
 729
 730 define i8 @orv_v128i8(<128 x i8>* %a) #0 {
 731 ; CHECK-LABEL: orv_v128i8:
 732 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].b, vl128
 733 ; VBITS_GE_1024-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 734 ; VBITS_GE_1024-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 735 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 736 ; VBITS_GE_1024-NEXT: ret
 737   %op = load <128 x i8>, <128 x i8>* %a
 738   %res = call i8 @llvm.vector.reduce.or.v128i8(<128 x i8> %op)
 739   ret i8 %res
 740 }
 741
 742 define i8 @orv_v256i8(<256 x i8>* %a) #0 {
 743 ; CHECK-LABEL: orv_v256i8:
 744 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].b, vl256
 745 ; VBITS_GE_2048-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
 746 ; VBITS_GE_2048-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
 747 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 748 ; VBITS_GE_2048-NEXT: ret
 749   %op = load <256 x i8>, <256 x i8>* %a
 750   %res = call i8 @llvm.vector.reduce.or.v256i8(<256 x i8> %op)
 751   ret i8 %res
 752 }
 753
 754 ; No single instruction NEON ORV support. Use SVE.
 755 define i16 @orv_v4i16(<4 x i16> %a) #0 {
 756 ; CHECK-LABEL: orv_v4i16:
 757 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl4
 758 ; CHECK: orv h[[REDUCE:[0-9]+]], [[PG]], z0.h
 759 ; CHECK: fmov w0, s[[REDUCE]]
 760 ; CHECK: ret
 761   %res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)
 762   ret i16 %res
 763 }
 764
 765 ; No single instruction NEON ORV support. Use SVE.
 766 define i16 @orv_v8i16(<8 x i16> %a) #0 {
 767 ; CHECK-LABEL: orv_v8i16:
 768 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl8
 769 ; CHECK: orv h[[REDUCE:[0-9]+]], [[PG]], z0.h
 770 ; CHECK: fmov w0, s[[REDUCE]]
 771 ; CHECK: ret
 772   %res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)
 773   ret i16 %res
 774 }
 775
 776 define i16 @orv_v16i16(<16 x i16>* %a) #0 {
 777 ; CHECK-LABEL: orv_v16i16:
 778 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
 779 ; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 780 ; CHECK-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 781 ; CHECK-NEXT: fmov w0, s[[REDUCE]]
 782 ; CHECK-NEXT: ret
 783   %op = load <16 x i16>, <16 x i16>* %a
 784   %res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
 785   ret i16 %res
 786 }
 787
 788 define i16 @orv_v32i16(<32 x i16>* %a) #0 {
 789 ; CHECK-LABEL: orv_v32i16:
 790 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32
 791 ; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 792 ; VBITS_GE_512-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 793 ; VBITS_GE_512-NEXT: fmov w0, s[[REDUCE]]
 794 ; VBITS_GE_512-NEXT: ret
 795
 796 ; Ensure sensible type legalisation.
 797 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 798 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 799 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
 800 ; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
 801 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 802 ; VBITS_EQ_256-DAG: orv h[[REDUCE:[0-9]+]], [[PG]], [[OR]].h
 803 ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]]
 804 ; VBITS_EQ_256-NEXT: ret
 805   %op = load <32 x i16>, <32 x i16>* %a
 806   %res = call i16 @llvm.vector.reduce.or.v32i16(<32 x i16> %op)
 807   ret i16 %res
 808 }
 809
 810 define i16 @orv_v64i16(<64 x i16>* %a) #0 {
 811 ; CHECK-LABEL: orv_v64i16:
 812 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64
 813 ; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 814 ; VBITS_GE_1024-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 815 ; VBITS_GE_1024-NEXT: fmov w0, s[[REDUCE]]
 816 ; VBITS_GE_1024-NEXT: ret
 817   %op = load <64 x i16>, <64 x i16>* %a
 818   %res = call i16 @llvm.vector.reduce.or.v64i16(<64 x i16> %op)
 819   ret i16 %res
 820 }
 821
 822 define i16 @orv_v128i16(<128 x i16>* %a) #0 {
 823 ; CHECK-LABEL: orv_v128i16:
 824 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128
 825 ; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
 826 ; VBITS_GE_2048-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
 827 ; VBITS_GE_2048-NEXT: fmov w0, s[[REDUCE]]
 828 ; VBITS_GE_2048-NEXT: ret
 829   %op = load <128 x i16>, <128 x i16>* %a
 830   %res = call i16 @llvm.vector.reduce.or.v128i16(<128 x i16> %op)
 831   ret i16 %res
 832 }
 833
 834 ; No single instruction NEON ORV support. Use SVE.
 835 define i32 @orv_v2i32(<2 x i32> %a) #0 {
 836 ; CHECK-LABEL: orv_v2i32:
 837 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl2
 838 ; CHECK: orv [[REDUCE:s[0-9]+]], [[PG]], z0.s
 839 ; CHECK: fmov w0, [[REDUCE]]
 840 ; CHECK: ret
 841   %res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
 842   ret i32 %res
 843 }
 844
 845 ; No single instruction NEON ORV support. Use SVE.
 846 define i32 @orv_v4i32(<4 x i32> %a) #0 {
 847 ; CHECK-LABEL: orv_v4i32:
 848 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl4
 849 ; CHECK: orv [[REDUCE:s[0-9]+]], [[PG]], z0.s
 850 ; CHECK: fmov w0, [[REDUCE]]
 851 ; CHECK: ret
 852   %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
 853   ret i32 %res
 854 }
 855
 856 define i32 @orv_v8i32(<8 x i32>* %a) #0 {
 857 ; CHECK-LABEL: orv_v8i32:
 858 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
 859 ; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 860 ; CHECK-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 861 ; CHECK-NEXT: fmov w0, [[REDUCE]]
 862 ; CHECK-NEXT: ret
 863   %op = load <8 x i32>, <8 x i32>* %a
 864   %res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
 865   ret i32 %res
 866 }
 867
 868 define i32 @orv_v16i32(<16 x i32>* %a) #0 {
 869 ; CHECK-LABEL: orv_v16i32:
 870 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
 871 ; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 872 ; VBITS_GE_512-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 873 ; VBITS_GE_512-NEXT: fmov w0, [[REDUCE]]
 874 ; VBITS_GE_512-NEXT: ret
 875
 876 ; Ensure sensible type legalisation.
 877 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 878 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 879 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
 880 ; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
 881 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 882 ; VBITS_EQ_256-DAG: orv [[REDUCE:s[0-9]+]], [[PG]], [[OR]].s
 883 ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]]
 884 ; VBITS_EQ_256-NEXT: ret
 885   %op = load <16 x i32>, <16 x i32>* %a
 886   %res = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %op)
 887   ret i32 %res
 888 }
 889
 890 define i32 @orv_v32i32(<32 x i32>* %a) #0 {
 891 ; CHECK-LABEL: orv_v32i32:
 892 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
 893 ; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 894 ; VBITS_GE_1024-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 895 ; VBITS_GE_1024-NEXT: fmov w0, [[REDUCE]]
 896 ; VBITS_GE_1024-NEXT: ret
 897   %op = load <32 x i32>, <32 x i32>* %a
 898   %res = call i32 @llvm.vector.reduce.or.v32i32(<32 x i32> %op)
 899   ret i32 %res
 900 }
 901
 902 define i32 @orv_v64i32(<64 x i32>* %a) #0 {
 903 ; CHECK-LABEL: orv_v64i32:
 904 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
 905 ; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
 906 ; VBITS_GE_2048-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
 907 ; VBITS_GE_2048-NEXT: fmov w0, [[REDUCE]]
 908 ; VBITS_GE_2048-NEXT: ret
 909   %op = load <64 x i32>, <64 x i32>* %a
 910   %res = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> %op)
 911   ret i32 %res
 912 }
 913
 914 ; Nothing to do for single element vectors.
 915 define i64 @orv_v1i64(<1 x i64> %a) #0 {
 916 ; CHECK-LABEL: orv_v1i64:
 917 ; CHECK: fmov x0, d0
 918 ; CHECK: ret
 919   %res = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %a)
 920   ret i64 %res
 921 }
 922
 923 ; Use SVE for 128-bit vectors
 924 define i64 @orv_v2i64(<2 x i64> %a) #0 {
 925 ; CHECK-LABEL: orv_v2i64:
 926 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl2
 927 ; CHECK: orv [[REDUCE:d[0-9]+]], [[PG]], z0.d
 928 ; CHECK: fmov x0, [[REDUCE]]
 929 ; CHECK: ret
 930   %res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
 931   ret i64 %res
 932 }
 933
 934 define i64 @orv_v4i64(<4 x i64>* %a) #0 {
 935 ; CHECK-LABEL: orv_v4i64:
 936 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
 937 ; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 938 ; CHECK-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 939 ; CHECK-NEXT: fmov x0, [[REDUCE]]
 940 ; CHECK-NEXT: ret
 941   %op = load <4 x i64>, <4 x i64>* %a
 942   %res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
 943   ret i64 %res
 944 }
 945
 946 define i64 @orv_v8i64(<8 x i64>* %a) #0 {
 947 ; CHECK-LABEL: orv_v8i64:
 948 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
 949 ; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 950 ; VBITS_GE_512-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 951 ; VBITS_GE_512-NEXT: fmov x0, [[REDUCE]]
 952 ; VBITS_GE_512-NEXT: ret
 953
 954 ; Ensure sensible type legalisation.
 955 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 956 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 957 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
 958 ; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
 959 ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d
 960 ; VBITS_EQ_256-DAG: orv [[REDUCE:d[0-9]+]], [[PG]], [[OR]].d
 961 ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
 962 ; VBITS_EQ_256-NEXT: ret
 963   %op = load <8 x i64>, <8 x i64>* %a
 964   %res = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %op)
 965   ret i64 %res
 966 }
 967
 968 define i64 @orv_v16i64(<16 x i64>* %a) #0 {
 969 ; CHECK-LABEL: orv_v16i64:
 970 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
 971 ; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 972 ; VBITS_GE_1024-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 973 ; VBITS_GE_1024-NEXT: fmov x0, [[REDUCE]]
 974 ; VBITS_GE_1024-NEXT: ret
 975   %op = load <16 x i64>, <16 x i64>* %a
 976   %res = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %op)
 977   ret i64 %res
 978 }
 979
 980 define i64 @orv_v32i64(<32 x i64>* %a) #0 {
 981 ; CHECK-LABEL: orv_v32i64:
 982 ; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32
 983 ; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
 984 ; VBITS_GE_2048-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
 985 ; VBITS_GE_2048-NEXT: fmov x0, [[REDUCE]]
 986 ; VBITS_GE_2048-NEXT: ret
 987   %op = load <32 x i64>, <32 x i64>* %a
 988   %res = call i64 @llvm.vector.reduce.or.v32i64(<32 x i64> %op)
 989   ret i64 %res
 990 }
 991
 992 attributes #0 = { "target-features"="+sve" }
 993
 994 declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
 995 declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
 996 declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
 997 declare i8 @llvm.vector.reduce.and.v64i8(<64 x i8>)
 998 declare i8 @llvm.vector.reduce.and.v128i8(<128 x i8>)
 999 declare i8 @llvm.vector.reduce.and.v256i8(<256 x i8>)
1000
1001 declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
1002 declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
1003 declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
1004 declare i16 @llvm.vector.reduce.and.v32i16(<32 x i16>)
1005 declare i16 @llvm.vector.reduce.and.v64i16(<64 x i16>)
1006 declare i16 @llvm.vector.reduce.and.v128i16(<128 x i16>)
1007
1008 declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
1009 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
1010 declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
1011 declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32>)
1012 declare i32 @llvm.vector.reduce.and.v32i32(<32 x i32>)
1013 declare i32 @llvm.vector.reduce.and.v64i32(<64 x i32>)
1014
1015 declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
1016 declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
1017 declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
1018 declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64>)
1019 declare i64 @llvm.vector.reduce.and.v16i64(<16 x i64>)
1020 declare i64 @llvm.vector.reduce.and.v32i64(<32 x i64>)
1021
1022 declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
1023 declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
1024 declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
1025 declare i8 @llvm.vector.reduce.or.v64i8(<64 x i8>)
1026 declare i8 @llvm.vector.reduce.or.v128i8(<128 x i8>)
1027 declare i8 @llvm.vector.reduce.or.v256i8(<256 x i8>)
1028
1029 declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
1030 declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
1031 declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
1032 declare i16 @llvm.vector.reduce.or.v32i16(<32 x i16>)
1033 declare i16 @llvm.vector.reduce.or.v64i16(<64 x i16>)
1034 declare i16 @llvm.vector.reduce.or.v128i16(<128 x i16>)
1035
1036 declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
1037 declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
1038 declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
1039 declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>)
1040 declare i32 @llvm.vector.reduce.or.v32i32(<32 x i32>)
1041 declare i32 @llvm.vector.reduce.or.v64i32(<64 x i32>)
1042
1043 declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
1044 declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
1045 declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
1046 declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>)
1047 declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>)
1048 declare i64 @llvm.vector.reduce.or.v32i64(<32 x i64>)
1049
1050 declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
1051 declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
1052 declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
1053 declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>)
1054 declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>)
1055 declare i8 @llvm.vector.reduce.xor.v256i8(<256 x i8>)
1056
1057 declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
1058 declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
1059 declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
1060 declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>)
1061 declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>)
1062 declare i16 @llvm.vector.reduce.xor.v128i16(<128 x i16>)
1063
1064 declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
1065 declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
1066 declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
1067 declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>)
1068 declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>)
1069 declare i32 @llvm.vector.reduce.xor.v64i32(<64 x i32>)
1070
1071 declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
1072 declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
1073 declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
1074 declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>)
1075 declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>)
1076 declare i64 @llvm.vector.reduce.xor.v32i64(<32 x i64>)