llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
   2
   3 ;
   4 ; LD1H, LD1W, LD1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
   5 ; extended to 64 bits
   6 ;   e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
   7 ;
   8
   9 ; LD1H
  10 define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
  11 ; CHECK-LABEL: gld1h_s_uxtw_index:
  12 ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
  13 ; CHECK-NEXT:   ret
  14   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
  15                                                                                   i16* %base,
  16                                                                                   <vscale x 4 x i32> %b)
  17   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
  18   ret <vscale x 4 x i32> %res
  19 }
  20
  21 define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
  22 ; CHECK-LABEL: gld1h_s_sxtw_index:
  23 ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
  24 ; CHECK-NEXT: ret
  25   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
  26                                                                                   i16* %base,
  27                                                                                   <vscale x 4 x i32> %b)
  28   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
  29   ret <vscale x 4 x i32> %res
  30 }
  31
  32 define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
  33 ; CHECK-LABEL: gld1h_d_uxtw_index:
  34 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
  35 ; CHECK-NEXT: ret
  36   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
  37                                                                                   i16* %base,
  38                                                                                   <vscale x 2 x i32> %b)
  39   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  40   ret <vscale x 2 x i64> %res
  41 }
  42
  43 define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
  44 ; CHECK-LABEL: gld1h_d_sxtw_index:
  45 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
  46 ; CHECK-NEXT: ret
  47   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
  48                                                                                   i16* %base,
  49                                                                                   <vscale x 2 x i32> %b)
  50   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  51   ret <vscale x 2 x i64> %res
  52 }
  53
  54 ; LD1W
  55 define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
  56 ; CHECK-LABEL: gld1w_s_uxtw_index:
  57 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
  58 ; CHECK-NEXT:   ret
  59   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
  60                                                                                   i32* %base,
  61                                                                                   <vscale x 4 x i32> %b)
  62   ret <vscale x 4 x i32> %load
  63 }
  64
  65 define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
  66 ; CHECK-LABEL: gld1w_s_sxtw_index:
  67 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
  68 ; CHECK-NEXT: ret
  69   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
  70                                                                                   i32* %base,
  71                                                                                   <vscale x 4 x i32> %b)
  72   ret <vscale x 4 x i32> %load
  73 }
  74
  75 define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
  76 ; CHECK-LABEL: gld1w_d_uxtw_index:
  77 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
  78 ; CHECK-NEXT: ret
  79   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
  80                                                                                   i32* %base,
  81                                                                                   <vscale x 2 x i32> %b)
  82   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
  83   ret <vscale x 2 x i64> %res
  84 }
  85
  86 define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
  87 ; CHECK-LABEL: gld1w_d_sxtw_index:
  88 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
  89 ; CHECK-NEXT: ret
  90   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
  91                                                                                   i32* %base,
  92                                                                                   <vscale x 2 x i32> %b)
  93   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
  94   ret <vscale x 2 x i64> %res
  95 }
  96
  97 define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
  98 ; CHECK-LABEL: gld1w_s_uxtw_index_float:
  99 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
 100 ; CHECK-NEXT:   ret
 101   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
 102                                                                                     float* %base,
 103                                                                                     <vscale x 4 x i32> %b)
 104   ret <vscale x 4 x float> %load
 105 }
 106
 107 define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 108 ; CHECK-LABEL: gld1w_s_sxtw_index_float:
 109 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
 110 ; CHECK-NEXT:   ret
 111   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
 112                                                                                     float* %base,
 113                                                                                     <vscale x 4 x i32> %b)
 114   ret <vscale x 4 x float> %load
 115 }
 116
 117 ; LD1D
 118 define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 119 ; CHECK-LABEL: gld1d_s_uxtw_index:
 120 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
 121 ; CHECK-NEXT:   ret
 122   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
 123                                                                                   i64* %base,
 124                                                                                   <vscale x 2 x i32> %b)
 125   ret <vscale x 2 x i64> %load
 126 }
 127
 128 define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 129 ; CHECK-LABEL: gld1d_sxtw_index:
 130 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
 131 ; CHECK-NEXT:   ret
 132   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
 133                                                                                   i64* %base,
 134                                                                                   <vscale x 2 x i32> %b)
 135   ret <vscale x 2 x i64> %load
 136 }
 137
 138 define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 139 ; CHECK-LABEL: gld1d_uxtw_index_double:
 140 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
 141 ; CHECK-NEXT:   ret
 142   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
 143                                                                                      double* %base,
 144                                                                                      <vscale x 2 x i32> %b)
 145   ret <vscale x 2 x double> %load
 146 }
 147
 148 define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 149 ; CHECK-LABEL: gld1d_sxtw_index_double:
 150 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
 151 ; CHECK-NEXT:   ret
 152   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
 153                                                                                      double* %base,
 154                                                                                      <vscale x 2 x i32> %b)
 155   ret <vscale x 2 x double> %load
 156 }
 157
 158 ;
 159 ; LD1SH, LD1SW, LD1SD: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
 160 ; extended to 64 bits
 161 ;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
 162 ;
 163
 164 ; LD1SH
 165 define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 166 ; CHECK-LABEL: gld1sh_s_uxtw_index:
 167 ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
 168 ; CHECK-NEXT:   ret
 169   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
 170                                                                                   i16* %base,
 171                                                                                   <vscale x 4 x i32> %b)
 172   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
 173   ret <vscale x 4 x i32> %res
 174 }
 175
 176 define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 177 ; CHECK-LABEL: gld1sh_s_sxtw_index:
 178 ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
 179 ; CHECK-NEXT: ret
 180   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
 181                                                                                   i16* %base,
 182                                                                                   <vscale x 4 x i32> %b)
 183   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
 184   ret <vscale x 4 x i32> %res
 185 }
 186
 187 define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 188 ; CHECK-LABEL: gld1sh_d_uxtw_index:
 189 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
 190 ; CHECK-NEXT: ret
 191   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
 192                                                                                   i16* %base,
 193                                                                                   <vscale x 2 x i32> %b)
 194   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 195   ret <vscale x 2 x i64> %res
 196 }
 197
 198 define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 199 ; CHECK-LABEL: gld1sh_d_sxtw_index:
 200 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
 201 ; CHECK-NEXT: ret
 202   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
 203                                                                                   i16* %base,
 204                                                                                   <vscale x 2 x i32> %b)
 205   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 206   ret <vscale x 2 x i64> %res
 207 }
 208
 209 ; LD1SW
 210 define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 211 ; CHECK-LABEL: gld1sw_d_uxtw_index:
 212 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
 213 ; CHECK-NEXT: ret
 214   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
 215                                                                                   i32* %base,
 216                                                                                   <vscale x 2 x i32> %b)
 217   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 218   ret <vscale x 2 x i64> %res
 219 }
 220
 221 define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 222 ; CHECK-LABEL: gld1sw_d_sxtw_index:
 223 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
 224 ; CHECK-NEXT: ret
 225   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
 226                                                                                   i32* %base,
 227                                                                                   <vscale x 2 x i32> %b)
 228   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 229   ret <vscale x 2 x i64> %res
 230 }
 231
 232
 233 ; LD1H/LD1SH
 234 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
 235 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
 236
 237 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
 238 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
 239
 240 ; LD1W/LD1SW
 241 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
 242 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
 243
 244 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
 245 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
 246
 247 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
 248 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
 249
 250 ; LD1D
 251 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
 252 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
 253
 254 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
 255 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)