llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s
   2
   3 ;
   4 ; ST2B
   5 ;
   6
   7 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr) {
   8 ; CHECK-LABEL: st2b_i8:
   9 ; CHECK: st2b { z0.b, z1.b }, p0, [x0]
  10 ; CHECK-NEXT: ret
  11   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
  12                                           <vscale x 16 x i8> %v1,
  13                                           <vscale x 16 x i1> %pred,
  14                                           i8* %addr)
  15   ret void
  16 }
  17
  18 ;
  19 ; ST2H
  20 ;
  21
  22 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr) {
  23 ; CHECK-LABEL: st2h_i16:
  24 ; CHECK: st2h { z0.h, z1.h }, p0, [x0]
  25 ; CHECK-NEXT: ret
  26   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
  27                                           <vscale x 8 x i16> %v1,
  28                                           <vscale x 8 x i1> %pred,
  29                                           i16* %addr)
  30   ret void
  31 }
  32
  33 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr) {
  34 ; CHECK-LABEL: st2h_f16:
  35 ; CHECK: st2h { z0.h, z1.h }, p0, [x0]
  36 ; CHECK-NEXT: ret
  37   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
  38                                           <vscale x 8 x half> %v1,
  39                                           <vscale x 8 x i1> %pred,
  40                                           half* %addr)
  41   ret void
  42 }
  43
  44 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
  45 ; CHECK-LABEL: st2h_bf16:
  46 ; CHECK: st2h { z0.h, z1.h }, p0, [x0]
  47 ; CHECK-NEXT: ret
  48   call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
  49                                           <vscale x 8 x bfloat> %v1,
  50                                           <vscale x 8 x i1> %pred,
  51                                           bfloat* %addr)
  52   ret void
  53 }
  54
  55 ;
  56 ; ST2W
  57 ;
  58
  59 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr) {
  60 ; CHECK-LABEL: st2w_i32:
  61 ; CHECK: st2w { z0.s, z1.s }, p0, [x0]
  62 ; CHECK-NEXT: ret
  63   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
  64                                           <vscale x 4 x i32> %v1,
  65                                           <vscale x 4 x i1> %pred,
  66                                           i32* %addr)
  67   ret void
  68 }
  69
  70 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr) {
  71 ; CHECK-LABEL: st2w_f32:
  72 ; CHECK: st2w { z0.s, z1.s }, p0, [x0]
  73 ; CHECK-NEXT: ret
  74   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
  75                                           <vscale x 4 x float> %v1,
  76                                           <vscale x 4 x i1> %pred,
  77                                           float* %addr)
  78   ret void
  79 }
  80
  81 ;
  82 ; ST2D
  83 ;
  84
  85 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr) {
  86 ; CHECK-LABEL: st2d_i64:
  87 ; CHECK: st2d { z0.d, z1.d }, p0, [x0]
  88 ; CHECK-NEXT: ret
  89   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
  90                                           <vscale x 2 x i64> %v1,
  91                                           <vscale x 2 x i1> %pred,
  92                                           i64* %addr)
  93   ret void
  94 }
  95
  96 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr) {
  97 ; CHECK-LABEL: st2d_f64:
  98 ; CHECK: st2d { z0.d, z1.d }, p0, [x0]
  99 ; CHECK-NEXT: ret
 100   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
 101                                           <vscale x 2 x double> %v1,
 102                                           <vscale x 2 x i1> %pred,
 103                                           double* %addr)
 104   ret void
 105 }
 106
 107 define void @st2d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i1> %pred, i8** %addr) {
 108 ; CHECK-LABEL: st2d_ptr:
 109 ; CHECK: st2d { z0.d, z1.d }, p0, [x0]
 110 ; CHECK-NEXT: ret
 111   call void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*> %v0,
 112                                            <vscale x 2 x i8*> %v1,
 113                                            <vscale x 2 x i1> %pred,
 114                                            i8** %addr)
 115   ret void
 116 }
 117
 118 ;
 119 ; ST3B
 120 ;
 121
 122 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr) {
 123 ; CHECK-LABEL: st3b_i8:
 124 ; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0]
 125 ; CHECK-NEXT: ret
 126   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
 127                                           <vscale x 16 x i8> %v1,
 128                                           <vscale x 16 x i8> %v2,
 129                                           <vscale x 16 x i1> %pred,
 130                                           i8* %addr)
 131   ret void
 132 }
 133
 134 ;
 135 ; ST3H
 136 ;
 137
 138 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr) {
 139 ; CHECK-LABEL: st3h_i16:
 140 ; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
 141 ; CHECK-NEXT: ret
 142   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
 143                                           <vscale x 8 x i16> %v1,
 144                                           <vscale x 8 x i16> %v2,
 145                                           <vscale x 8 x i1> %pred,
 146                                           i16* %addr)
 147   ret void
 148 }
 149
 150 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr) {
 151 ; CHECK-LABEL: st3h_f16:
 152 ; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
 153 ; CHECK-NEXT: ret
 154   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
 155                                           <vscale x 8 x half> %v1,
 156                                           <vscale x 8 x half> %v2,
 157                                           <vscale x 8 x i1> %pred,
 158                                           half* %addr)
 159   ret void
 160 }
 161
 162 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 163 ; CHECK-LABEL: st3h_bf16:
 164 ; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0]
 165 ; CHECK-NEXT: ret
 166   call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
 167                                           <vscale x 8 x bfloat> %v1,
 168                                           <vscale x 8 x bfloat> %v2,
 169                                           <vscale x 8 x i1> %pred,
 170                                           bfloat* %addr)
 171   ret void
 172 }
 173
 174 ;
 175 ; ST3W
 176 ;
 177
 178 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr) {
 179 ; CHECK-LABEL: st3w_i32:
 180 ; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
 181 ; CHECK-NEXT: ret
 182   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
 183                                           <vscale x 4 x i32> %v1,
 184                                           <vscale x 4 x i32> %v2,
 185                                           <vscale x 4 x i1> %pred,
 186                                           i32* %addr)
 187   ret void
 188 }
 189
 190 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr) {
 191 ; CHECK-LABEL: st3w_f32:
 192 ; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0]
 193 ; CHECK-NEXT: ret
 194   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
 195                                           <vscale x 4 x float> %v1,
 196                                           <vscale x 4 x float> %v2,
 197                                           <vscale x 4 x i1> %pred,
 198                                           float* %addr)
 199   ret void
 200 }
 201
 202 ;
 203 ; ST3D
 204 ;
 205
 206 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr) {
 207 ; CHECK-LABEL: st3d_i64:
 208 ; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
 209 ; CHECK-NEXT: ret
 210   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
 211                                           <vscale x 2 x i64> %v1,
 212                                           <vscale x 2 x i64> %v2,
 213                                           <vscale x 2 x i1> %pred,
 214                                           i64* %addr)
 215   ret void
 216 }
 217
 218 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr) {
 219 ; CHECK-LABEL: st3d_f64:
 220 ; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
 221 ; CHECK-NEXT: ret
 222   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
 223                                           <vscale x 2 x double> %v1,
 224                                           <vscale x 2 x double> %v2,
 225                                           <vscale x 2 x i1> %pred,
 226                                           double* %addr)
 227   ret void
 228 }
 229
 230 define void @st3d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i1> %pred, i8** %addr) {
 231 ; CHECK-LABEL: st3d_ptr:
 232 ; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0]
 233 ; CHECK-NEXT: ret
 234   call void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*> %v0,
 235                                            <vscale x 2 x i8*> %v1,
 236                                            <vscale x 2 x i8*> %v2,
 237                                            <vscale x 2 x i1> %pred,
 238                                            i8** %addr)
 239   ret void
 240 }
 241
 242 ;
 243 ; ST4B
 244 ;
 245
 246 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr) {
 247 ; CHECK-LABEL: st4b_i8:
 248 ; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0]
 249 ; CHECK-NEXT: ret
 250   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
 251                                           <vscale x 16 x i8> %v1,
 252                                           <vscale x 16 x i8> %v2,
 253                                           <vscale x 16 x i8> %v3,
 254                                           <vscale x 16 x i1> %pred,
 255                                           i8* %addr)
 256   ret void
 257 }
 258
 259 ;
 260 ; ST4H
 261 ;
 262
 263 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr) {
 264 ; CHECK-LABEL: st4h_i16:
 265 ; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
 266 ; CHECK-NEXT: ret
 267   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
 268                                           <vscale x 8 x i16> %v1,
 269                                           <vscale x 8 x i16> %v2,
 270                                           <vscale x 8 x i16> %v3,
 271                                           <vscale x 8 x i1> %pred,
 272                                           i16* %addr)
 273   ret void
 274 }
 275
 276 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr) {
 277 ; CHECK-LABEL: st4h_f16:
 278 ; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
 279 ; CHECK-NEXT: ret
 280   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
 281                                           <vscale x 8 x half> %v1,
 282                                           <vscale x 8 x half> %v2,
 283                                           <vscale x 8 x half> %v3,
 284                                           <vscale x 8 x i1> %pred,
 285                                           half* %addr)
 286   ret void
 287 }
 288
 289 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 290 ; CHECK-LABEL: st4h_bf16:
 291 ; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0]
 292 ; CHECK-NEXT: ret
 293   call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
 294                                           <vscale x 8 x bfloat> %v1,
 295                                           <vscale x 8 x bfloat> %v2,
 296                                           <vscale x 8 x bfloat> %v3,
 297                                           <vscale x 8 x i1> %pred,
 298                                           bfloat* %addr)
 299   ret void
 300 }
 301
 302 ;
 303 ; ST4W
 304 ;
 305
 306 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr) {
 307 ; CHECK-LABEL: st4w_i32:
 308 ; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
 309 ; CHECK-NEXT: ret
 310   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
 311                                           <vscale x 4 x i32> %v1,
 312                                           <vscale x 4 x i32> %v2,
 313                                           <vscale x 4 x i32> %v3,
 314                                           <vscale x 4 x i1> %pred,
 315                                           i32* %addr)
 316   ret void
 317 }
 318
 319 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr) {
 320 ; CHECK-LABEL: st4w_f32:
 321 ; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0]
 322 ; CHECK-NEXT: ret
 323   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
 324                                           <vscale x 4 x float> %v1,
 325                                           <vscale x 4 x float> %v2,
 326                                           <vscale x 4 x float> %v3,
 327                                           <vscale x 4 x i1> %pred,
 328                                           float* %addr)
 329   ret void
 330 }
 331
 332 ;
 333 ; ST4D
 334 ;
 335
 336 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr) {
 337 ; CHECK-LABEL: st4d_i64:
 338 ; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
 339 ; CHECK-NEXT: ret
 340   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
 341                                           <vscale x 2 x i64> %v1,
 342                                           <vscale x 2 x i64> %v2,
 343                                           <vscale x 2 x i64> %v3,
 344                                           <vscale x 2 x i1> %pred,
 345                                           i64* %addr)
 346   ret void
 347 }
 348
 349 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr) {
 350 ; CHECK-LABEL: st4d_f64:
 351 ; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
 352 ; CHECK-NEXT: ret
 353   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
 354                                           <vscale x 2 x double> %v1,
 355                                           <vscale x 2 x double> %v2,
 356                                           <vscale x 2 x double> %v3,
 357                                           <vscale x 2 x i1> %pred,
 358                                           double* %addr)
 359   ret void
 360 }
 361
 362 define void @st4d_ptr(<vscale x 2 x i8*> %v0, <vscale x 2 x i8*> %v1, <vscale x 2 x i8*> %v2, <vscale x 2 x i8*> %v3, <vscale x 2 x i1> %pred, i8** %addr) {
 363 ; CHECK-LABEL: st4d_ptr:
 364 ; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0]
 365 ; CHECK-NEXT: ret
 366   call void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*> %v0,
 367                                            <vscale x 2 x i8*> %v1,
 368                                            <vscale x 2 x i8*> %v2,
 369                                            <vscale x 2 x i8*> %v3,
 370                                            <vscale x 2 x i1> %pred,
 371                                            i8** %addr)
 372   ret void
 373 }
 374 ;
 375 ; STNT1B
 376 ;
 377
 378 define void @stnt1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %addr) {
 379 ; CHECK-LABEL: stnt1b_i8:
 380 ; CHECK: stnt1b { z0.b }, p0, [x0]
 381 ; CHECK-NEXT: ret
 382   call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
 383                                             <vscale x 16 x i1> %pred,
 384                                             i8* %addr)
 385   ret void
 386 }
 387
 388 ;
 389 ; STNT1H
 390 ;
 391
 392 define void @stnt1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %addr) {
 393 ; CHECK-LABEL: stnt1h_i16:
 394 ; CHECK: stnt1h { z0.h }, p0, [x0]
 395 ; CHECK-NEXT: ret
 396   call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
 397                                             <vscale x 8 x i1> %pred,
 398                                             i16* %addr)
 399   ret void
 400 }
 401
 402 define void @stnt1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %addr) {
 403 ; CHECK-LABEL: stnt1h_f16:
 404 ; CHECK: stnt1h { z0.h }, p0, [x0]
 405 ; CHECK-NEXT: ret
 406   call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
 407                                             <vscale x 8 x i1> %pred,
 408                                             half* %addr)
 409   ret void
 410 }
 411
 412 define void @stnt1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %addr) #0 {
 413 ; CHECK-LABEL: stnt1h_bf16:
 414 ; CHECK: stnt1h { z0.h }, p0, [x0]
 415 ; CHECK-NEXT: ret
 416   call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> %data,
 417                                              <vscale x 8 x i1> %pred,
 418                                              bfloat* %addr)
 419   ret void
 420 }
 421
 422 ;
 423 ; STNT1W
 424 ;
 425
 426 define void @stnt1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %addr) {
 427 ; CHECK-LABEL: stnt1w_i32:
 428 ; CHECK: stnt1w { z0.s }, p0, [x0]
 429 ; CHECK-NEXT: ret
 430   call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
 431                                             <vscale x 4 x i1> %pred,
 432                                             i32* %addr)
 433   ret void
 434 }
 435
 436 define void @stnt1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %addr) {
 437 ; CHECK-LABEL: stnt1w_f32:
 438 ; CHECK: stnt1w { z0.s }, p0, [x0]
 439 ; CHECK-NEXT: ret
 440   call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
 441                                             <vscale x 4 x i1> %pred,
 442                                             float* %addr)
 443   ret void
 444 }
 445
 446 ;
 447 ; STNT1D
 448 ;
 449
 450 define void @stnt1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %addr) {
 451 ; CHECK-LABEL: stnt1d_i64:
 452 ; CHECK: stnt1d { z0.d }, p0, [x0]
 453 ; CHECK-NEXT: ret
 454   call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
 455                                             <vscale x 2 x i1> %pred,
 456                                             i64* %addr)
 457   ret void
 458 }
 459
 460 define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %addr) {
 461 ; CHECK-LABEL: stnt1d_f64:
 462 ; CHECK: stnt1d { z0.d }, p0, [x0]
 463 ; CHECK-NEXT: ret
 464   call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
 465                                             <vscale x 2 x i1> %pred,
 466                                             double* %addr)
 467   ret void
 468 }
 469
 470
 471 ; Stores (tuples)
 472
 473 define void @store_i64_tuple3(<vscale x 6 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3) {
 474 ; CHECK-LABEL: store_i64_tuple3
 475 ; CHECK:      st1d { z2.d }, p0, [x0, #2, mul vl]
 476 ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
 477 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
 478   %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
 479   store <vscale x 6 x i64> %tuple, <vscale x 6 x i64>* %out
 480   ret void
 481 }
 482
 483 define void @store_i64_tuple4(<vscale x 8 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4) {
 484 ; CHECK-LABEL: store_i64_tuple4
 485 ; CHECK:      st1d { z3.d }, p0, [x0, #3, mul vl]
 486 ; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl]
 487 ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
 488 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
 489   %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4)
 490   store <vscale x 8 x i64> %tuple, <vscale x 8 x i64>* %out
 491   ret void
 492 }
 493
 494 define void @store_i16_tuple2(<vscale x 16 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2) {
 495 ; CHECK-LABEL: store_i16_tuple2
 496 ; CHECK:      st1h { z1.h }, p0, [x0, #1, mul vl]
 497 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
 498   %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2)
 499   store <vscale x 16 x i16> %tuple, <vscale x 16 x i16>* %out
 500   ret void
 501 }
 502
 503 define void @store_i16_tuple3(<vscale x 24 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3) {
 504 ; CHECK-LABEL: store_i16_tuple3
 505 ; CHECK:      st1h { z2.h }, p0, [x0, #2, mul vl]
 506 ; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl]
 507 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
 508   %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3)
 509   store <vscale x 24 x i16> %tuple, <vscale x 24 x i16>* %out
 510   ret void
 511 }
 512
 513 define void @store_f32_tuple3(<vscale x 12 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3) {
 514 ; CHECK-LABEL: store_f32_tuple3
 515 ; CHECK:      st1w { z2.s }, p0, [x0, #2, mul vl]
 516 ; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
 517 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
 518   %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3)
 519   store <vscale x 12 x float> %tuple, <vscale x 12 x float>* %out
 520   ret void
 521 }
 522
 523 define void @store_f32_tuple4(<vscale x 16 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4) {
 524 ; CHECK-LABEL: store_f32_tuple4
 525 ; CHECK:      st1w { z3.s }, p0, [x0, #3, mul vl]
 526 ; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl]
 527 ; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl]
 528 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
 529   %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4)
 530   store <vscale x 16 x float> %tuple, <vscale x 16 x float>* %out
 531   ret void
 532 }
 533
 534 declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 535 declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
 536 declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
 537 declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
 538 declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
 539 declare void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 540 declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
 541 declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
 542 declare void @llvm.aarch64.sve.st2.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
 543
 544 declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 545 declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
 546 declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
 547 declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
 548 declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
 549 declare void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 550 declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
 551 declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
 552 declare void @llvm.aarch64.sve.st3.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
 553
 554 declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 555 declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
 556 declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
 557 declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
 558 declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, half*)
 559 declare void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 560 declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, float*)
 561 declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, double*)
 562 declare void @llvm.aarch64.sve.st4.nxv2p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i8*>, <vscale x 2 x i1>, i8** nocapture)
 563
 564 declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 565 declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
 566 declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
 567 declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*)
 568 declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half*)
 569 declare void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*)
 570 declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*)
 571 declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*)
 572
 573 declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 574 declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
 575
 576 declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
 577 declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
 578
 579 declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 580 declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 581
 582 ; +bf16 is required for the bfloat version.
 583 attributes #0 = { "target-features"="+sve,+bf16" }