llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll

   1 ; RUN: llc -aarch64-sve-vector-bits-min=128  < %s | FileCheck %s -check-prefix=NO_SVE
   2 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
   3 ; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK
   4 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   5 ; RUN: llc -aarch64-sve-vector-bits-min=640  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   6 ; RUN: llc -aarch64-sve-vector-bits-min=768  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   7 ; RUN: llc -aarch64-sve-vector-bits-min=896  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
   8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
   9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
  16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
  17
  18 target triple = "aarch64-unknown-linux-gnu"
  19
  20 ; Don't use SVE when its registers are no bigger than NEON.
  21 ; NO_SVE-NOT: ptrue
  22
  23 ;
  24 ; DUP (integer)
  25 ;
  26
  27 ; Don't use SVE for 64-bit vectors.
  28 define <8 x i8> @splat_v8i8(i8 %a) #0 {
  29 ; CHECK-LABEL: splat_v8i8:
  30 ; CHECK: dup v0.8b, w0
  31 ; CHECK-NEXT: ret
  32   %insert = insertelement <8 x i8> undef, i8 %a, i64 0
  33   %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
  34   ret <8 x i8> %splat
  35 }
  36
  37 ; Don't use SVE for 128-bit vectors.
  38 define <16 x i8> @splat_v16i8(i8 %a) #0 {
  39 ; CHECK-LABEL: splat_v16i8:
  40 ; CHECK: dup v0.16b, w0
  41 ; CHECK-NEXT: ret
  42   %insert = insertelement <16 x i8> undef, i8 %a, i64 0
  43   %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
  44   ret <16 x i8> %splat
  45 }
  46
  47 define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 {
  48 ; CHECK-LABEL: splat_v32i8:
  49 ; CHECK-DAG: mov [[RES:z[0-9]+]].b, w0
  50 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].b, vl32
  51 ; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
  52 ; CHECK-NEXT: ret
  53   %insert = insertelement <32 x i8> undef, i8 %a, i64 0
  54   %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
  55   store <32 x i8> %splat, <32 x i8>* %b
  56   ret void
  57 }
  58
  59 define void @splat_v64i8(i8 %a, <64 x i8>* %b) #0 {
  60 ; CHECK-LABEL: splat_v64i8:
  61 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, w0
  62 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64
  63 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
  64 ; VBITS_GE_512-NEXT: ret
  65
  66 ; Ensure sensible type legalisation.
  67 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0
  68 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32
  69 ; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
  70 ; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1]
  71 ; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[NUMELTS]]]
  72 ; VBITS_EQ_256-NEXT: ret
  73   %insert = insertelement <64 x i8> undef, i8 %a, i64 0
  74   %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
  75   store <64 x i8> %splat, <64 x i8>* %b
  76   ret void
  77 }
  78
  79 define void @splat_v128i8(i8 %a, <128 x i8>* %b) #0 {
  80 ; CHECK-LABEL: splat_v128i8:
  81 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].b, w0
  82 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].b, vl128
  83 ; VBITS_GE_1024-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
  84 ; VBITS_GE_1024-NEXT: ret
  85   %insert = insertelement <128 x i8> undef, i8 %a, i64 0
  86   %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer
  87   store <128 x i8> %splat, <128 x i8>* %b
  88   ret void
  89 }
  90
  91 define void @splat_v256i8(i8 %a, <256 x i8>* %b) #0 {
  92 ; CHECK-LABEL: splat_v256i8:
  93 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].b, w0
  94 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].b, vl256
  95 ; VBITS_GE_2048-NEXT: st1b { [[RES]].b }, [[PG]], [x1]
  96 ; VBITS_GE_2048-NEXT: ret
  97   %insert = insertelement <256 x i8> undef, i8 %a, i64 0
  98   %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer
  99   store <256 x i8> %splat, <256 x i8>* %b
 100   ret void
 101 }
 102
 103 ; Don't use SVE for 64-bit vectors.
 104 define <4 x i16> @splat_v4i16(i16 %a) #0 {
 105 ; CHECK-LABEL: splat_v4i16:
 106 ; CHECK: dup v0.4h, w0
 107 ; CHECK-NEXT: ret
 108   %insert = insertelement <4 x i16> undef, i16 %a, i64 0
 109   %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
 110   ret <4 x i16> %splat
 111 }
 112
 113 ; Don't use SVE for 128-bit vectors.
 114 define <8 x i16> @splat_v8i16(i16 %a) #0 {
 115 ; CHECK-LABEL: splat_v8i16:
 116 ; CHECK: dup v0.8h, w0
 117 ; CHECK-NEXT: ret
 118   %insert = insertelement <8 x i16> undef, i16 %a, i64 0
 119   %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
 120   ret <8 x i16> %splat
 121 }
 122
 123 define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 {
 124 ; CHECK-LABEL: splat_v16i16:
 125 ; CHECK-DAG: mov [[RES:z[0-9]+]].h, w0
 126 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 127 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 128 ; CHECK-NEXT: ret
 129   %insert = insertelement <16 x i16> undef, i16 %a, i64 0
 130   %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
 131   store <16 x i16> %splat, <16 x i16>* %b
 132   ret void
 133 }
 134
 135 define void @splat_v32i16(i16 %a, <32 x i16>* %b) #0 {
 136 ; CHECK-LABEL: splat_v32i16:
 137 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, w0
 138 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
 139 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 140 ; VBITS_GE_512-NEXT: ret
 141
 142 ; Ensure sensible type legalisation.
 143 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0
 144 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 145 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 146 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1]
 147 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1]
 148 ; VBITS_EQ_256-NEXT: ret
 149   %insert = insertelement <32 x i16> undef, i16 %a, i64 0
 150   %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
 151   store <32 x i16> %splat, <32 x i16>* %b
 152   ret void
 153 }
 154
 155 define void @splat_v64i16(i16 %a, <64 x i16>* %b) #0 {
 156 ; CHECK-LABEL: splat_v64i16:
 157 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, w0
 158 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64
 159 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 160 ; VBITS_GE_1024-NEXT: ret
 161   %insert = insertelement <64 x i16> undef, i16 %a, i64 0
 162   %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer
 163   store <64 x i16> %splat, <64 x i16>* %b
 164   ret void
 165 }
 166
 167 define void @splat_v128i16(i16 %a, <128 x i16>* %b) #0 {
 168 ; CHECK-LABEL: splat_v128i16:
 169 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, w0
 170 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128
 171 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x1]
 172 ; VBITS_GE_2048-NEXT: ret
 173   %insert = insertelement <128 x i16> undef, i16 %a, i64 0
 174   %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer
 175   store <128 x i16> %splat, <128 x i16>* %b
 176   ret void
 177 }
 178
 179 ; Don't use SVE for 64-bit vectors.
 180 define <2 x i32> @splat_v2i32(i32 %a) #0 {
 181 ; CHECK-LABEL: splat_v2i32:
 182 ; CHECK: dup v0.2s, w0
 183 ; CHECK-NEXT: ret
 184   %insert = insertelement <2 x i32> undef, i32 %a, i64 0
 185   %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
 186   ret <2 x i32> %splat
 187 }
 188
 189 ; Don't use SVE for 128-bit vectors.
 190 define <4 x i32> @splat_v4i32(i32 %a) #0 {
 191 ; CHECK-LABEL: splat_v4i32:
 192 ; CHECK: dup v0.4s, w0
 193 ; CHECK-NEXT: ret
 194   %insert = insertelement <4 x i32> undef, i32 %a, i64 0
 195   %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
 196   ret <4 x i32> %splat
 197 }
 198
 199 define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 {
 200 ; CHECK-LABEL: splat_v8i32:
 201 ; CHECK-DAG: mov [[RES:z[0-9]+]].s, w0
 202 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 203 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 204 ; CHECK-NEXT: ret
 205   %insert = insertelement <8 x i32> undef, i32 %a, i64 0
 206   %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
 207   store <8 x i32> %splat, <8 x i32>* %b
 208   ret void
 209 }
 210
 211 define void @splat_v16i32(i32 %a, <16 x i32>* %b) #0 {
 212 ; CHECK-LABEL: splat_v16i32:
 213 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, w0
 214 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
 215 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 216 ; VBITS_GE_512-NEXT: ret
 217
 218 ; Ensure sensible type legalisation.
 219 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0
 220 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 221 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 222 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1]
 223 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2]
 224 ; VBITS_EQ_256-NEXT: ret
 225   %insert = insertelement <16 x i32> undef, i32 %a, i64 0
 226   %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 227   store <16 x i32> %splat, <16 x i32>* %b
 228   ret void
 229 }
 230
 231 define void @splat_v32i32(i32 %a, <32 x i32>* %b) #0 {
 232 ; CHECK-LABEL: splat_v32i32:
 233 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, w0
 234 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32
 235 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 236 ; VBITS_GE_1024-NEXT: ret
 237   %insert = insertelement <32 x i32> undef, i32 %a, i64 0
 238   %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer
 239   store <32 x i32> %splat, <32 x i32>* %b
 240   ret void
 241 }
 242
 243 define void @splat_v64i32(i32 %a, <64 x i32>* %b) #0 {
 244 ; CHECK-LABEL: splat_v64i32:
 245 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, w0
 246 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64
 247 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x1]
 248 ; VBITS_GE_2048-NEXT: ret
 249   %insert = insertelement <64 x i32> undef, i32 %a, i64 0
 250   %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer
 251   store <64 x i32> %splat, <64 x i32>* %b
 252   ret void
 253 }
 254
 255 ; Don't use SVE for 64-bit vectors.
 256 define <1 x i64> @splat_v1i64(i64 %a) #0 {
 257 ; CHECK-LABEL: splat_v1i64:
 258 ; CHECK: fmov d0, x0
 259 ; CHECK-NEXT: ret
 260   %insert = insertelement <1 x i64> undef, i64 %a, i64 0
 261   %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
 262   ret <1 x i64> %splat
 263 }
 264
 265 ; Don't use SVE for 128-bit vectors.
 266 define <2 x i64> @splat_v2i64(i64 %a) #0 {
 267 ; CHECK-LABEL: splat_v2i64:
 268 ; CHECK: dup v0.2d, x0
 269 ; CHECK-NEXT: ret
 270   %insert = insertelement <2 x i64> undef, i64 %a, i64 0
 271   %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
 272   ret <2 x i64> %splat
 273 }
 274
 275 define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 {
 276 ; CHECK-LABEL: splat_v4i64:
 277 ; CHECK-DAG: mov [[RES:z[0-9]+]].d, x0
 278 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 279 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 280 ; CHECK-NEXT: ret
 281   %insert = insertelement <4 x i64> undef, i64 %a, i64 0
 282   %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
 283   store <4 x i64> %splat, <4 x i64>* %b
 284   ret void
 285 }
 286
 287 define void @splat_v8i64(i64 %a, <8 x i64>* %b) #0 {
 288 ; CHECK-LABEL: splat_v8i64:
 289 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, x0
 290 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
 291 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 292 ; VBITS_GE_512-NEXT: ret
 293
 294 ; Ensure sensible type legalisation.
 295 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0
 296 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 297 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 298 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1]
 299 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3]
 300 ; VBITS_EQ_256-NEXT: ret
 301   %insert = insertelement <8 x i64> undef, i64 %a, i64 0
 302   %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 303   store <8 x i64> %splat, <8 x i64>* %b
 304   ret void
 305 }
 306
 307 define void @splat_v16i64(i64 %a, <16 x i64>* %b) #0 {
 308 ; CHECK-LABEL: splat_v16i64:
 309 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, x0
 310 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16
 311 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 312 ; VBITS_GE_1024-NEXT: ret
 313   %insert = insertelement <16 x i64> undef, i64 %a, i64 0
 314   %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer
 315   store <16 x i64> %splat, <16 x i64>* %b
 316   ret void
 317 }
 318
 319 define void @splat_v32i64(i64 %a, <32 x i64>* %b) #0 {
 320 ; CHECK-LABEL: splat_v32i64:
 321 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, x0
 322 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32
 323 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x1]
 324 ; VBITS_GE_2048-NEXT: ret
 325   %insert = insertelement <32 x i64> undef, i64 %a, i64 0
 326   %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer
 327   store <32 x i64> %splat, <32 x i64>* %b
 328   ret void
 329 }
 330
 331 ;
 332 ; DUP (floating-point)
 333 ;
 334
 335 ; Don't use SVE for 64-bit vectors.
 336 define <4 x half> @splat_v4f16(half %a) #0 {
 337 ; CHECK-LABEL: splat_v4f16:
 338 ; CHECK: dup v0.4h, v0.h[0]
 339 ; CHECK-NEXT: ret
 340   %insert = insertelement <4 x half> undef, half %a, i64 0
 341   %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
 342   ret <4 x half> %splat
 343 }
 344
 345 ; Don't use SVE for 128-bit vectors.
 346 define <8 x half> @splat_v8f16(half %a) #0 {
 347 ; CHECK-LABEL: splat_v8f16:
 348 ; CHECK: dup v0.8h, v0.h[0]
 349 ; CHECK-NEXT: ret
 350   %insert = insertelement <8 x half> undef, half %a, i64 0
 351   %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
 352   ret <8 x half> %splat
 353 }
 354
 355 define void @splat_v16f16(half %a, <16 x half>* %b) #0 {
 356 ; CHECK-LABEL: splat_v16f16:
 357 ; CHECK-DAG: mov [[RES:z[0-9]+]].h, h0
 358 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 359 ; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 360 ; CHECK-NEXT: ret
 361   %insert = insertelement <16 x half> undef, half %a, i64 0
 362   %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
 363   store <16 x half> %splat, <16 x half>* %b
 364   ret void
 365 }
 366
 367 define void @splat_v32f16(half %a, <32 x half>* %b) #0 {
 368 ; CHECK-LABEL: splat_v32f16:
 369 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, h0
 370 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
 371 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 372 ; VBITS_GE_512-NEXT: ret
 373
 374 ; Ensure sensible type legalisation.
 375 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0
 376 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16
 377 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
 378 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0]
 379 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1]
 380 ; VBITS_EQ_256-NEXT: ret
 381   %insert = insertelement <32 x half> undef, half %a, i64 0
 382   %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
 383   store <32 x half> %splat, <32 x half>* %b
 384   ret void
 385 }
 386
 387 define void @splat_v64f16(half %a, <64 x half>* %b) #0 {
 388 ; CHECK-LABEL: splat_v64f16:
 389 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].h, h0
 390 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].h, vl64
 391 ; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 392 ; VBITS_GE_1024-NEXT: ret
 393   %insert = insertelement <64 x half> undef, half %a, i64 0
 394   %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer
 395   store <64 x half> %splat, <64 x half>* %b
 396   ret void
 397 }
 398
 399 define void @splat_v128f16(half %a, <128 x half>* %b) #0 {
 400 ; CHECK-LABEL: splat_v128f16:
 401 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].h, h0
 402 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].h, vl128
 403 ; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 404 ; VBITS_GE_2048-NEXT: ret
 405   %insert = insertelement <128 x half> undef, half %a, i64 0
 406   %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer
 407   store <128 x half> %splat, <128 x half>* %b
 408   ret void
 409 }
 410
 411 ; Don't use SVE for 64-bit vectors.
 412 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
 413 ; CHECK-LABEL: splat_v2f32:
 414 ; CHECK: dup v0.2s, v0.s[0]
 415 ; CHECK-NEXT: ret
 416   %insert = insertelement <2 x float> undef, float %a, i64 0
 417   %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
 418   ret <2 x float> %splat
 419 }
 420
 421 ; Don't use SVE for 128-bit vectors.
 422 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
 423 ; CHECK-LABEL: splat_v4f32:
 424 ; CHECK: dup v0.4s, v0.s[0]
 425 ; CHECK-NEXT: ret
 426   %insert = insertelement <4 x float> undef, float %a, i64 0
 427   %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
 428   ret <4 x float> %splat
 429 }
 430
 431 define void @splat_v8f32(float %a, <8 x float>* %b) #0 {
 432 ; CHECK-LABEL: splat_v8f32:
 433 ; CHECK-DAG: mov [[RES:z[0-9]+]].s, s0
 434 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 435 ; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 436 ; CHECK-NEXT: ret
 437   %insert = insertelement <8 x float> undef, float %a, i64 0
 438   %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
 439   store <8 x float> %splat, <8 x float>* %b
 440   ret void
 441 }
 442
 443 define void @splat_v16f32(float %a, <16 x float>* %b) #0 {
 444 ; CHECK-LABEL: splat_v16f32:
 445 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, s0
 446 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
 447 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 448 ; VBITS_GE_512-NEXT: ret
 449
 450 ; Ensure sensible type legalisation.
 451 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0
 452 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
 453 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
 454 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0]
 455 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
 456 ; VBITS_EQ_256-NEXT: ret
 457   %insert = insertelement <16 x float> undef, float %a, i64 0
 458   %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
 459   store <16 x float> %splat, <16 x float>* %b
 460   ret void
 461 }
 462
 463 define void @splat_v32f32(float %a, <32 x float>* %b) #0 {
 464 ; CHECK-LABEL: splat_v32f32:
 465 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].s, s0
 466 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].s, vl32
 467 ; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 468 ; VBITS_GE_1024-NEXT: ret
 469   %insert = insertelement <32 x float> undef, float %a, i64 0
 470   %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer
 471   store <32 x float> %splat, <32 x float>* %b
 472   ret void
 473 }
 474
 475 define void @splat_v64f32(float %a, <64 x float>* %b) #0 {
 476 ; CHECK-LABEL: splat_v64f32:
 477 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].s, s0
 478 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].s, vl64
 479 ; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 480 ; VBITS_GE_2048-NEXT: ret
 481   %insert = insertelement <64 x float> undef, float %a, i64 0
 482   %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer
 483   store <64 x float> %splat, <64 x float>* %b
 484   ret void
 485 }
 486
 487 ; Don't use SVE for 64-bit vectors.
 488 define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 {
 489 ; CHECK-LABEL: splat_v1f64:
 490 ; CHECK: // %bb.0:
 491 ; CHECK-NEXT: ret
 492   %insert = insertelement <1 x double> undef, double %a, i64 0
 493   %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer
 494   ret <1 x double> %splat
 495 }
 496
 497 ; Don't use SVE for 128-bit vectors.
 498 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
 499 ; CHECK-LABEL: splat_v2f64:
 500 ; CHECK: dup v0.2d, v0.d[0]
 501 ; CHECK-NEXT: ret
 502   %insert = insertelement <2 x double> undef, double %a, i64 0
 503   %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
 504   ret <2 x double> %splat
 505 }
 506
 507 define void @splat_v4f64(double %a, <4 x double>* %b) #0 {
 508 ; CHECK-LABEL: splat_v4f64:
 509 ; CHECK-DAG: mov [[RES:z[0-9]+]].d, d0
 510 ; CHECK-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 511 ; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 512 ; CHECK-NEXT: ret
 513   %insert = insertelement <4 x double> undef, double %a, i64 0
 514   %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
 515   store <4 x double> %splat, <4 x double>* %b
 516   ret void
 517 }
 518
 519 define void @splat_v8f64(double %a, <8 x double>* %b) #0 {
 520 ; CHECK-LABEL: splat_v8f64:
 521 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, d0
 522 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
 523 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 524 ; VBITS_GE_512-NEXT: ret
 525
 526 ; Ensure sensible type legalisation.
 527 ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0
 528 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4
 529 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
 530 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0]
 531 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3]
 532 ; VBITS_EQ_256-NEXT: ret
 533   %insert = insertelement <8 x double> undef, double %a, i64 0
 534   %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
 535   store <8 x double> %splat, <8 x double>* %b
 536   ret void
 537 }
 538
 539 define void @splat_v16f64(double %a, <16 x double>* %b) #0 {
 540 ; CHECK-LABEL: splat_v16f64:
 541 ; VBITS_GE_1024-DAG: mov [[RES:z[0-9]+]].d, d0
 542 ; VBITS_GE_1024-DAG: ptrue [[PG:p[0-9]+]].d, vl16
 543 ; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 544 ; VBITS_GE_1024-NEXT: ret
 545   %insert = insertelement <16 x double> undef, double %a, i64 0
 546   %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer
 547   store <16 x double> %splat, <16 x double>* %b
 548   ret void
 549 }
 550
 551 define void @splat_v32f64(double %a, <32 x double>* %b) #0 {
 552 ; CHECK-LABEL: splat_v32f64:
 553 ; VBITS_GE_2048-DAG: mov [[RES:z[0-9]+]].d, d0
 554 ; VBITS_GE_2048-DAG: ptrue [[PG:p[0-9]+]].d, vl32
 555 ; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 556 ; VBITS_GE_2048-NEXT: ret
 557   %insert = insertelement <32 x double> undef, double %a, i64 0
 558   %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer
 559   store <32 x double> %splat, <32 x double>* %b
 560   ret void
 561 }
 562
 563 ;
 564 ; DUP (integer immediate)
 565 ;
 566
 567 define void @splat_imm_v64i8(<64 x i8>* %a) #0 {
 568 ; CHECK-LABEL: splat_imm_v64i8:
 569 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].b, #1
 570 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].b, vl64
 571 ; VBITS_GE_512-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
 572 ; VBITS_GE_512-NEXT: ret
 573   %insert = insertelement <64 x i8> undef, i8 1, i64 0
 574   %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
 575   store <64 x i8> %splat, <64 x i8>* %a
 576   ret void
 577 }
 578
 579 define void @splat_imm_v32i16(<32 x i16>* %a) #0 {
 580 ; CHECK-LABEL: splat_imm_v32i16:
 581 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].h, #2
 582 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
 583 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 584 ; VBITS_GE_512-NEXT: ret
 585   %insert = insertelement <32 x i16> undef, i16 2, i64 0
 586   %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer
 587   store <32 x i16> %splat, <32 x i16>* %a
 588   ret void
 589 }
 590
 591 define void @splat_imm_v16i32(<16 x i32>* %a) #0 {
 592 ; CHECK-LABEL: splat_imm_v16i32:
 593 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].s, #3
 594 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
 595 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 596 ; VBITS_GE_512-NEXT: ret
 597   %insert = insertelement <16 x i32> undef, i32 3, i64 0
 598   %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
 599   store <16 x i32> %splat, <16 x i32>* %a
 600   ret void
 601 }
 602
 603 define void @splat_imm_v8i64(<8 x i64>* %a) #0 {
 604 ; CHECK-LABEL: splat_imm_v8i64:
 605 ; VBITS_GE_512-DAG: mov [[RES:z[0-9]+]].d, #4
 606 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
 607 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 608 ; VBITS_GE_512-NEXT: ret
 609   %insert = insertelement <8 x i64> undef, i64 4, i64 0
 610   %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
 611   store <8 x i64> %splat, <8 x i64>* %a
 612   ret void
 613 }
 614
 615 ;
 616 ; DUP (floating-point immediate)
 617 ;
 618
 619 define void @splat_imm_v32f16(<32 x half>* %a) #0 {
 620 ; CHECK-LABEL: splat_imm_v32f16:
 621 ; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].h, #5.00000000
 622 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].h, vl32
 623 ; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
 624 ; VBITS_GE_512-NEXT: ret
 625   %insert = insertelement <32 x half> undef, half 5.0, i64 0
 626   %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer
 627   store <32 x half> %splat, <32 x half>* %a
 628   ret void
 629 }
 630
 631 define void @splat_imm_v16f32(<16 x float>* %a) #0 {
 632 ; CHECK-LABEL: splat_imm_v16f32:
 633 ; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].s, #6.00000000
 634 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].s, vl16
 635 ; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
 636 ; VBITS_GE_512-NEXT: ret
 637   %insert = insertelement <16 x float> undef, float 6.0, i64 0
 638   %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer
 639   store <16 x float> %splat, <16 x float>* %a
 640   ret void
 641 }
 642
 643 define void @splat_imm_v8f64(<8 x double>* %a) #0 {
 644 ; CHECK-LABEL: splat_imm_v8f64:
 645 ; VBITS_GE_512-DAG: fmov [[RES:z[0-9]+]].d, #7.00000000
 646 ; VBITS_GE_512-DAG: ptrue [[PG:p[0-9]+]].d, vl8
 647 ; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
 648 ; VBITS_GE_512-NEXT: ret
 649   %insert = insertelement <8 x double> undef, double 7.0, i64 0
 650   %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer
 651   store <8 x double> %splat, <8 x double>* %a
 652   ret void
 653 }
 654 attributes #0 = { "target-features"="+sve" }