llvm/test/Analysis/CostModel/RISCV/scalable-scatter.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+experimental-zvfh < %s | FileCheck %s --check-prefixes=CHECK,GENERIC
   3 ; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+experimental-zvfh -riscv-v-vector-bits-max=256 < %s | FileCheck %s --check-prefixes=CHECK,MAX256
   4 ; RUN: opt -passes='print<cost-model>' 2>&1 -disable-output -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,UNSUPPORTED
   5
   6 define void @masked_scatter_aligned() {
   7 ; GENERIC-LABEL: 'masked_scatter_aligned'
   8 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
   9 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
  10 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
  11 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
  12 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
  13 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
  14 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
  15 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
  16 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
  17 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32768 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
  18 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
  19 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
  20 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
  21 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
  22 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
  23 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
  24 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
  25 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
  26 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
  27 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
  28 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
  29 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
  30 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
  31 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
  32 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32768 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
  33 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
  34 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
  35 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
  36 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
  37 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
  38 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 65536 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
  39 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 32768 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
  40 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 16384 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
  41 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
  42 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
  43 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
  44 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
  45 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 8192 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
  46 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 4096 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
  47 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 2048 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
  48 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1024 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
  49 ; GENERIC-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  50 ;
  51 ; MAX256-LABEL: 'masked_scatter_aligned'
  52 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
  53 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
  54 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
  55 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
  56 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
  57 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
  58 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
  59 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
  60 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
  61 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
  62 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
  63 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
  64 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
  65 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
  66 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
  67 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
  68 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
  69 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
  70 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
  71 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
  72 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
  73 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
  74 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
  75 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
  76 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
  77 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
  78 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
  79 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
  80 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
  81 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
  82 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
  83 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
  84 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
  85 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
  86 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
  87 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
  88 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
  89 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
  90 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
  91 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
  92 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
  93 ; MAX256-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  94 ;
  95 ; UNSUPPORTED-LABEL: 'masked_scatter_aligned'
  96 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
  97 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
  98 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
  99 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
 100 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
 101 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
 102 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
 103 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
 104 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
 105 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
 106 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
 107 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
 108 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
 109 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
 110 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
 111 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
 112 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
 113 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
 114 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
 115 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
 116 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
 117 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
 118 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
 119 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
 120 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
 121 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
 122 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
 123 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
 124 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
 125 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
 126 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
 127 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
 128 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
 129 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
 130 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
 131 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
 132 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
 133 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
 134 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
 135 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
 136 ; UNSUPPORTED-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
 137 ; UNSUPPORTED-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 138 ;
 139   call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 8, <vscale x 8 x i1> undef)
 140   call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 8, <vscale x 4 x i1> undef)
 141   call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 8, <vscale x 2 x i1> undef)
 142   call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 8, <vscale x 1 x i1> undef)
 143
 144   call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 4, <vscale x 16 x i1> undef)
 145   call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 4, <vscale x 8 x i1> undef)
 146   call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 4, <vscale x 4 x i1> undef)
 147   call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 4, <vscale x 2 x i1> undef)
 148   call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 4, <vscale x 1 x i1> undef)
 149
 150   call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 2, <vscale x 32 x i1> undef)
 151   call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 2, <vscale x 16 x i1> undef)
 152   call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 2, <vscale x 8 x i1> undef)
 153   call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 2, <vscale x 4 x i1> undef)
 154   call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 2, <vscale x 2 x i1> undef)
 155   call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 2, <vscale x 1 x i1> undef)
 156
 157   call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 8, <vscale x 8 x i1> undef)
 158   call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 8, <vscale x 4 x i1> undef)
 159   call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 8, <vscale x 2 x i1> undef)
 160   call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 8, <vscale x 1 x i1> undef)
 161
 162   call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 4, <vscale x 16 x i1> undef)
 163   call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 4, <vscale x 8 x i1> undef)
 164   call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 4, <vscale x 4 x i1> undef)
 165   call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 4, <vscale x 2 x i1> undef)
 166   call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 4, <vscale x 1 x i1> undef)
 167
 168   call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 2, <vscale x 32 x i1> undef)
 169   call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 2, <vscale x 16 x i1> undef)
 170   call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 2, <vscale x 8 x i1> undef)
 171   call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 2, <vscale x 4 x i1> undef)
 172   call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 2, <vscale x 2 x i1> undef)
 173   call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 2, <vscale x 1 x i1> undef)
 174
 175   call void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8> undef, <vscale x 64 x i8*> undef, i32 1, <vscale x 64 x i1> undef)
 176   call void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8> undef, <vscale x 32 x i8*> undef, i32 1, <vscale x 32 x i1> undef)
 177   call void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8> undef, <vscale x 16 x i8*> undef, i32 1, <vscale x 16 x i1> undef)
 178   call void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8> undef, <vscale x 8 x i8*> undef, i32 1, <vscale x 8 x i1> undef)
 179   call void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8> undef, <vscale x 4 x i8*> undef, i32 1, <vscale x 4 x i1> undef)
 180   call void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8> undef, <vscale x 2 x i8*> undef, i32 1, <vscale x 2 x i1> undef)
 181   call void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8> undef, <vscale x 1 x i8*> undef, i32 1, <vscale x 1 x i1> undef)
 182
 183   call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 8, <vscale x 8 x i1> undef)
 184   call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 8, <vscale x 4 x i1> undef)
 185   call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 8, <vscale x 2 x i1> undef)
 186   call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 8, <vscale x 1 x i1> undef)
 187
 188   ret void
 189 }
 190
 191 define void @masked_scatter_unaligned() {
 192 ; CHECK-LABEL: 'masked_scatter_unaligned'
 193 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef)
 194 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 2, <vscale x 4 x i1> undef)
 195 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 2, <vscale x 2 x i1> undef)
 196 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 2, <vscale x 1 x i1> undef)
 197 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 2, <vscale x 16 x i1> undef)
 198 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 2, <vscale x 8 x i1> undef)
 199 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 2, <vscale x 4 x i1> undef)
 200 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 2, <vscale x 2 x i1> undef)
 201 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 2, <vscale x 1 x i1> undef)
 202 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 1, <vscale x 32 x i1> undef)
 203 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 1, <vscale x 16 x i1> undef)
 204 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 1, <vscale x 8 x i1> undef)
 205 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 1, <vscale x 4 x i1> undef)
 206 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 1, <vscale x 2 x i1> undef)
 207 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 1, <vscale x 1 x i1> undef)
 208 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 1, <vscale x 8 x i1> undef)
 209 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 1, <vscale x 4 x i1> undef)
 210 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 1, <vscale x 2 x i1> undef)
 211 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 1, <vscale x 1 x i1> undef)
 212 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 1, <vscale x 16 x i1> undef)
 213 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 1, <vscale x 8 x i1> undef)
 214 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 1, <vscale x 4 x i1> undef)
 215 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 1, <vscale x 2 x i1> undef)
 216 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 1, <vscale x 1 x i1> undef)
 217 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 1, <vscale x 32 x i1> undef)
 218 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
 219 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
 220 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
 221 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef)
 222 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef)
 223 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef)
 224 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef)
 225 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef)
 226 ; CHECK-NEXT:  Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef)
 227 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 228 ;
 229   call void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> undef, <vscale x 8 x double*> undef, i32 2, <vscale x 8 x i1> undef)
 230   call void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double> undef, <vscale x 4 x double*> undef, i32 2, <vscale x 4 x i1> undef)
 231   call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> undef, <vscale x 2 x double*> undef, i32 2, <vscale x 2 x i1> undef)
 232   call void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double> undef, <vscale x 1 x double*> undef, i32 2, <vscale x 1 x i1> undef)
 233
 234   call void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float> undef, <vscale x 16 x float*> undef, i32 2, <vscale x 16 x i1> undef)
 235   call void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float> undef, <vscale x 8 x float*> undef, i32 2, <vscale x 8 x i1> undef)
 236   call void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float> undef, <vscale x 4 x float*> undef, i32 2, <vscale x 4 x i1> undef)
 237   call void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float> undef, <vscale x 2 x float*> undef, i32 2, <vscale x 2 x i1> undef)
 238   call void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float> undef, <vscale x 1 x float*> undef, i32 2, <vscale x 1 x i1> undef)
 239
 240   call void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half> undef, <vscale x 32 x half*> undef, i32 1, <vscale x 32 x i1> undef)
 241   call void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half> undef, <vscale x 16 x half*> undef, i32 1, <vscale x 16 x i1> undef)
 242   call void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half> undef, <vscale x 8 x half*> undef, i32 1, <vscale x 8 x i1> undef)
 243   call void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half> undef, <vscale x 4 x half*> undef, i32 1, <vscale x 4 x i1> undef)
 244   call void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half> undef, <vscale x 2 x half*> undef, i32 1, <vscale x 2 x i1> undef)
 245   call void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half> undef, <vscale x 1 x half*> undef, i32 1, <vscale x 1 x i1> undef)
 246
 247   call void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64> undef, <vscale x 8 x i64*> undef, i32 1, <vscale x 8 x i1> undef)
 248   call void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64> undef, <vscale x 4 x i64*> undef, i32 1, <vscale x 4 x i1> undef)
 249   call void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64> undef, <vscale x 2 x i64*> undef, i32 1, <vscale x 2 x i1> undef)
 250   call void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64> undef, <vscale x 1 x i64*> undef, i32 1, <vscale x 1 x i1> undef)
 251
 252   call void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32> undef, <vscale x 16 x i32*> undef, i32 1, <vscale x 16 x i1> undef)
 253   call void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32> undef, <vscale x 8 x i32*> undef, i32 1, <vscale x 8 x i1> undef)
 254   call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> undef, <vscale x 4 x i32*> undef, i32 1, <vscale x 4 x i1> undef)
 255   call void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32> undef, <vscale x 2 x i32*> undef, i32 1, <vscale x 2 x i1> undef)
 256   call void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32> undef, <vscale x 1 x i32*> undef, i32 1, <vscale x 1 x i1> undef)
 257
 258   call void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16> undef, <vscale x 32 x i16*> undef, i32 1, <vscale x 32 x i1> undef)
 259   call void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16> undef, <vscale x 16 x i16*> undef, i32 1, <vscale x 16 x i1> undef)
 260   call void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16> undef, <vscale x 8 x i16*> undef, i32 1, <vscale x 8 x i1> undef)
 261   call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> undef, <vscale x 4 x i16*> undef, i32 1, <vscale x 4 x i1> undef)
 262   call void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16> undef, <vscale x 2 x i16*> undef, i32 1, <vscale x 2 x i1> undef)
 263   call void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16> undef, <vscale x 1 x i16*> undef, i32 1, <vscale x 1 x i1> undef)
 264
 265   call void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*> undef, <vscale x 8 x i8**> undef, i32 1, <vscale x 8 x i1> undef)
 266   call void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*> undef, <vscale x 4 x i8**> undef, i32 1, <vscale x 4 x i1> undef)
 267   call void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*> undef, <vscale x 2 x i8**> undef, i32 1, <vscale x 2 x i1> undef)
 268   call void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*> undef, <vscale x 1 x i8**> undef, i32 1, <vscale x 1 x i1> undef)
 269
 270   ret void
 271 }
 272
 273 declare void @llvm.masked.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double>, <vscale x 8 x double*>, i32, <vscale x 8 x i1>)
 274 declare void @llvm.masked.scatter.nxv4f64.nxv4p0f64(<vscale x 4 x double>, <vscale x 4 x double*>, i32, <vscale x 4 x i1>)
 275 declare void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double>, <vscale x 2 x double*>, i32, <vscale x 2 x i1>)
 276 declare void @llvm.masked.scatter.nxv1f64.nxv1p0f64(<vscale x 1 x double>, <vscale x 1 x double*>, i32, <vscale x 1 x i1>)
 277
 278 declare void @llvm.masked.scatter.nxv16f32.nxv16p0f32(<vscale x 16 x float>, <vscale x 16 x float*>, i32, <vscale x 16 x i1>)
 279 declare void @llvm.masked.scatter.nxv8f32.nxv8p0f32(<vscale x 8 x float>, <vscale x 8 x float*>, i32, <vscale x 8 x i1>)
 280 declare void @llvm.masked.scatter.nxv4f32.nxv4p0f32(<vscale x 4 x float>, <vscale x 4 x float*>, i32, <vscale x 4 x i1>)
 281 declare void @llvm.masked.scatter.nxv2f32.nxv2p0f32(<vscale x 2 x float>, <vscale x 2 x float*>, i32, <vscale x 2 x i1>)
 282 declare void @llvm.masked.scatter.nxv1f32.nxv1p0f32(<vscale x 1 x float>, <vscale x 1 x float*>, i32, <vscale x 1 x i1>)
 283
 284 declare void @llvm.masked.scatter.nxv32f16.nxv32p0f16(<vscale x 32 x half>, <vscale x 32 x half*>, i32, <vscale x 32 x i1>)
 285 declare void @llvm.masked.scatter.nxv16f16.nxv16p0f16(<vscale x 16 x half>, <vscale x 16 x half*>, i32, <vscale x 16 x i1>)
 286 declare void @llvm.masked.scatter.nxv8f16.nxv8p0f16(<vscale x 8 x half>, <vscale x 8 x half*>, i32, <vscale x 8 x i1>)
 287 declare void @llvm.masked.scatter.nxv4f16.nxv4p0f16(<vscale x 4 x half>, <vscale x 4 x half*>, i32, <vscale x 4 x i1>)
 288 declare void @llvm.masked.scatter.nxv2f16.nxv2p0f16(<vscale x 2 x half>, <vscale x 2 x half*>, i32, <vscale x 2 x i1>)
 289 declare void @llvm.masked.scatter.nxv1f16.nxv1p0f16(<vscale x 1 x half>, <vscale x 1 x half*>, i32, <vscale x 1 x i1>)
 290
 291 declare void @llvm.masked.scatter.nxv8i64.nxv8p0i64(<vscale x 8 x i64>, <vscale x 8 x i64*>, i32, <vscale x 8 x i1>)
 292 declare void @llvm.masked.scatter.nxv4i64.nxv4p0i64(<vscale x 4 x i64>, <vscale x 4 x i64*>, i32, <vscale x 4 x i1>)
 293 declare void @llvm.masked.scatter.nxv2i64.nxv2p0i64(<vscale x 2 x i64>, <vscale x 2 x i64*>, i32, <vscale x 2 x i1>)
 294 declare void @llvm.masked.scatter.nxv1i64.nxv1p0i64(<vscale x 1 x i64>, <vscale x 1 x i64*>, i32, <vscale x 1 x i1>)
 295
 296 declare void @llvm.masked.scatter.nxv16i32.nxv16p0i32(<vscale x 16 x i32>, <vscale x 16 x i32*>, i32, <vscale x 16 x i1>)
 297 declare void @llvm.masked.scatter.nxv8i32.nxv8p0i32(<vscale x 8 x i32>, <vscale x 8 x i32*>, i32, <vscale x 8 x i1>)
 298 declare void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32>, <vscale x 4 x i32*>, i32, <vscale x 4 x i1>)
 299 declare void @llvm.masked.scatter.nxv2i32.nxv2p0i32(<vscale x 2 x i32>, <vscale x 2 x i32*>, i32, <vscale x 2 x i1>)
 300 declare void @llvm.masked.scatter.nxv1i32.nxv1p0i32(<vscale x 1 x i32>, <vscale x 1 x i32*>, i32, <vscale x 1 x i1>)
 301
 302 declare void @llvm.masked.scatter.nxv32i16.nxv32p0i16(<vscale x 32 x i16>, <vscale x 32 x i16*>, i32, <vscale x 32 x i1>)
 303 declare void @llvm.masked.scatter.nxv16i16.nxv16p0i16(<vscale x 16 x i16>, <vscale x 16 x i16*>, i32, <vscale x 16 x i1>)
 304 declare void @llvm.masked.scatter.nxv8i16.nxv8p0i16(<vscale x 8 x i16>, <vscale x 8 x i16*>, i32, <vscale x 8 x i1>)
 305 declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32, <vscale x 4 x i1>)
 306 declare void @llvm.masked.scatter.nxv2i16.nxv2p0i16(<vscale x 2 x i16>, <vscale x 2 x i16*>, i32, <vscale x 2 x i1>)
 307 declare void @llvm.masked.scatter.nxv1i16.nxv1p0i16(<vscale x 1 x i16>, <vscale x 1 x i16*>, i32, <vscale x 1 x i1>)
 308
 309 declare void @llvm.masked.scatter.nxv64i8.nxv64p0i8(<vscale x 64 x i8>, <vscale x 64 x i8*>, i32, <vscale x 64 x i1>)
 310 declare void @llvm.masked.scatter.nxv32i8.nxv32p0i8(<vscale x 32 x i8>, <vscale x 32 x i8*>, i32, <vscale x 32 x i1>)
 311 declare void @llvm.masked.scatter.nxv16i8.nxv16p0i8(<vscale x 16 x i8>, <vscale x 16 x i8*>, i32, <vscale x 16 x i1>)
 312 declare void @llvm.masked.scatter.nxv8i8.nxv8p0i8(<vscale x 8 x i8>, <vscale x 8 x i8*>, i32, <vscale x 8 x i1>)
 313 declare void @llvm.masked.scatter.nxv4i8.nxv4p0i8(<vscale x 4 x i8>, <vscale x 4 x i8*>, i32, <vscale x 4 x i1>)
 314 declare void @llvm.masked.scatter.nxv2i8.nxv2p0i8(<vscale x 2 x i8>, <vscale x 2 x i8*>, i32, <vscale x 2 x i1>)
 315 declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
 316
 317 declare void @llvm.masked.scatter.nxv8p0i8.nxv8p0p0i8(<vscale x 8 x i8*>, <vscale x 8 x i8**>, i32, <vscale x 8 x i1>)
 318 declare void @llvm.masked.scatter.nxv4p0i8.nxv4p0p0i8(<vscale x 4 x i8*>, <vscale x 4 x i8**>, i32, <vscale x 4 x i1>)
 319 declare void @llvm.masked.scatter.nxv2p0i8.nxv2p0p0i8(<vscale x 2 x i8*>, <vscale x 2 x i8**>, i32, <vscale x 2 x i1>)
 320 declare void @llvm.masked.scatter.nxv1p0i8.nxv1p0p0i8(<vscale x 1 x i8*>, <vscale x 1 x i8**>, i32, <vscale x 1 x i1>)
 321