llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
   2
   3 ;
   4 ; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index)
   5 ;   e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
   6 ;
   7
   8 ; LD1B
   9 define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  10 ; CHECK-LABEL: gld1b_s_scalar_offset:
  11 ; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
  12 ; CHECK-NEXT: ret
  13   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
  14                                                                                            <vscale x 4 x i32> %base,
  15                                                                                            i64 %offset)
  16   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
  17   ret <vscale x 4 x i32> %res
  18 }
  19
  20 define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  21 ; CHECK-LABEL: gld1b_d_scalar_offset:
  22 ; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d]
  23 ; CHECK-NEXT: ret
  24   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
  25                                                                                            <vscale x 2 x i64> %base,
  26                                                                                            i64 %offset)
  27   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
  28   ret <vscale x 2 x i64> %res
  29 }
  30
  31 ; LD1H
  32 define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  33 ; CHECK-LABEL: gld1h_s_scalar_offset:
  34 ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
  35 ; CHECK-NEXT: ret
  36   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
  37                                                                                              <vscale x 4 x i32> %base,
  38                                                                                              i64 %offset)
  39   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
  40   ret <vscale x 4 x i32> %res
  41 }
  42
  43 define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  44 ; CHECK-LABEL: gld1h_d_scalar_offset:
  45 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d]
  46 ; CHECK-NEXT: ret
  47   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
  48                                                                                              <vscale x 2 x i64> %base,
  49                                                                                              i64 %offset)
  50   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  51   ret <vscale x 2 x i64> %res
  52 }
  53
  54 ; LD1W
  55 define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  56 ; CHECK-LABEL: gld1w_s_scalar_offset:
  57 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
  58 ; CHECK-NEXT: ret
  59   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
  60                                                                                              <vscale x 4 x i32> %base,
  61                                                                                              i64 %offset)
  62   ret <vscale x 4 x i32> %load
  63 }
  64
  65 define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  66 ; CHECK-LABEL: gld1w_d_scalar_offset:
  67 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d]
  68 ; CHECK-NEXT: ret
  69   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
  70                                                                                              <vscale x 2 x i64> %base,
  71                                                                                              i64 %offset)
  72   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
  73   ret <vscale x 2 x i64> %res
  74 }
  75
  76 define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  77 ; CHECK-LABEL: gld1w_s_scalar_offset_float:
  78 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
  79 ; CHECK-NEXT: ret
  80   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
  81                                                                                                <vscale x 4 x i32> %base,
  82                                                                                                i64 %offset)
  83   ret <vscale x 4 x float> %load
  84 }
  85
  86 ; LD1D
  87 define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  88 ; CHECK-LABEL: gld1d_d_scalar_offset:
  89 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
  90 ; CHECK-NEXT: ret
  91   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
  92                                                                                              <vscale x 2 x i64> %base,
  93                                                                                              i64 %offset)
  94   ret <vscale x 2 x i64> %load
  95 }
  96
  97 define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  98 ; CHECK-LABEL: gld1d_d_scalar_offset_double:
  99 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d]
 100 ; CHECK-NEXT: ret
 101   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
 102                                                                                                 <vscale x 2 x i64> %base,
 103                                                                                                 i64 %offset)
 104   ret <vscale x 2 x double> %load
 105 }
 106
 107 ; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index)
 108 ;   e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
 109 ;
 110
 111 ; LD1SB
 112 define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 113 ; CHECK-LABEL: gld1sb_s_scalar_offset:
 114 ; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
 115 ; CHECK-NEXT: ret
 116   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
 117                                                                                            <vscale x 4 x i32> %base,
 118                                                                                            i64 %offset)
 119   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
 120   ret <vscale x 4 x i32> %res
 121 }
 122
 123 define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 124 ; CHECK-LABEL: gld1sb_d_scalar_offset:
 125 ; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d]
 126 ; CHECK-NEXT: ret
 127   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
 128                                                                                            <vscale x 2 x i64> %base,
 129                                                                                            i64 %offset)
 130   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
 131   ret <vscale x 2 x i64> %res
 132 }
 133
 134 ; LD1SH
 135 define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 136 ; CHECK-LABEL: gld1sh_s_scalar_offset:
 137 ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
 138 ; CHECK-NEXT: ret
 139   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
 140                                                                                              <vscale x 4 x i32> %base,
 141                                                                                              i64 %offset)
 142   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
 143   ret <vscale x 4 x i32> %res
 144 }
 145
 146 define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 147 ; CHECK-LABEL: gld1sh_d_scalar_offset:
 148 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d]
 149 ; CHECK-NEXT: ret
 150   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
 151                                                                                              <vscale x 2 x i64> %base,
 152                                                                                              i64 %offset)
 153   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 154   ret <vscale x 2 x i64> %res
 155 }
 156
 157 ; LD1SW
 158 define <vscale x 2 x i64> @gld1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 159 ; CHECK-LABEL: gld1sw_d_scalar_offset:
 160 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d]
 161 ; CHECK-NEXT: ret
 162   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
 163                                                                                              <vscale x 2 x i64> %base,
 164                                                                                              i64 %offset)
 165   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 166   ret <vscale x 2 x i64> %res
 167 }
 168
 169 ; LD1B/LD1SB
 170 declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 171 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 172
 173 ; LD1H/LD1SH
 174 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 175 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 176
 177 ; LD1W/LD1SW
 178 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 179 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 180
 181 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 182
 183 ; LD1D
 184 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 185
 186 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)