llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
   2
   3 ;
   4 ; LD1H, LD1W, LD1D: base + 64-bit scaled offset
   5 ;   e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
   6 ;
   7
   8 define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
   9 ; CHECK-LABEL: gld1h_index
  10 ; CHECK:            ld1h        { z0.d }, p0/z, [x0, z0.d, lsl #1]
  11 ; CHECK-NEXT:   ret
  12   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
  13                                                                              i16* %base,
  14                                                                              <vscale x 2 x i64> %b)
  15   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  16   ret <vscale x 2 x i64> %res
  17 }
  18
  19 define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
  20 ; CHECK-LABEL: gld1w_index
  21 ; CHECK:            ld1w        { z0.d }, p0/z, [x0, z0.d, lsl #2]
  22 ; CHECK-NEXT:   ret
  23   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
  24                                                                              i32* %base,
  25                                                                              <vscale x 2 x i64> %b)
  26   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
  27   ret <vscale x 2 x i64> %res
  28 }
  29
  30 define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
  31 ; CHECK-LABEL: gld1d_index
  32 ; CHECK:            ld1d        { z0.d }, p0/z, [x0, z0.d, lsl #3]
  33 ; CHECK-NEXT:   ret
  34   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
  35                                                                              i64* %base,
  36                                                                              <vscale x 2 x i64> %b)
  37   ret <vscale x 2 x i64> %load
  38 }
  39
  40 define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
  41 ; CHECK-LABEL: gld1d_index_double
  42 ; CHECK:            ld1d        { z0.d }, p0/z, [x0, z0.d, lsl #3]
  43 ; CHECK-NEXT:   ret
  44   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
  45                                                                                 double* %base,
  46                                                                                 <vscale x 2 x i64> %b)
  47   ret <vscale x 2 x double> %load
  48 }
  49
  50 ;
  51 ; LD1SH, LD1SW: base + 64-bit scaled offset
  52 ;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, lsl #1]
  53 ;
  54
  55 define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
  56 ; CHECK-LABEL: gld1sh_index
  57 ; CHECK:            ld1sh       { z0.d }, p0/z, [x0, z0.d, lsl #1]
  58 ; CHECK-NEXT:   ret
  59   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
  60                                                                              i16* %base,
  61                                                                              <vscale x 2 x i64> %b)
  62   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  63   ret <vscale x 2 x i64> %res
  64 }
  65
  66 define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
  67 ; CHECK-LABEL: gld1sw_index
  68 ; CHECK:            ld1sw       { z0.d }, p0/z, [x0, z0.d, lsl #2]
  69 ; CHECK-NEXT:   ret
  70   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
  71                                                                              i32* %base,
  72                                                                              <vscale x 2 x i64> %b)
  73   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
  74   ret <vscale x 2 x i64> %res
  75 }
  76
  77 ;
  78 ; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
  79 ;   e.g. ld1h z0.d, p0/z, [x0, z0.d, sxtw #1]
  80 ;
  81
  82 define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
  83 ; CHECK-LABEL: gld1h_index_sxtw
  84 ; CHECK:            ld1h        { z0.d }, p0/z, [x0, z0.d, sxtw #1]
  85 ; CHECK-NEXT:   ret
  86   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
  87                                                                  <vscale x 2 x i1> %pg,
  88                                                                  <vscale x 2 x i64> %b)
  89   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
  90                                                                              i16* %base,
  91                                                                              <vscale x 2 x i64> %sxtw)
  92   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  93   ret <vscale x 2 x i64> %res
  94 }
  95
  96 define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
  97 ; CHECK-LABEL: gld1w_index_sxtw
  98 ; CHECK:            ld1w        { z0.d }, p0/z, [x0, z0.d, sxtw #2]
  99 ; CHECK-NEXT:   ret
 100   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
 101                                                                  <vscale x 2 x i1> %pg,
 102                                                                  <vscale x 2 x i64> %b)
 103   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
 104                                                                              i32* %base,
 105                                                                              <vscale x 2 x i64> %sxtw)
 106   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 107   ret <vscale x 2 x i64> %res
 108 }
 109
 110 define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 111 ; CHECK-LABEL: gld1d_index_sxtw
 112 ; CHECK:            ld1d        { z0.d }, p0/z, [x0, z0.d, sxtw #3]
 113 ; CHECK-NEXT:   ret
 114   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
 115                                                                  <vscale x 2 x i1> %pg,
 116                                                                  <vscale x 2 x i64> %b)
 117   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
 118                                                                              i64* %base,
 119                                                                              <vscale x 2 x i64> %sxtw)
 120   ret <vscale x 2 x i64> %load
 121 }
 122
 123 define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 124 ; CHECK-LABEL: gld1d_index_double_sxtw
 125 ; CHECK:            ld1d        { z0.d }, p0/z, [x0, z0.d, sxtw #3]
 126 ; CHECK-NEXT:   ret
 127   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
 128                                                                  <vscale x 2 x i1> %pg,
 129                                                                  <vscale x 2 x i64> %b)
 130   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
 131                                                                                 double* %base,
 132                                                                                 <vscale x 2 x i64> %sxtw)
 133   ret <vscale x 2 x double> %load
 134 }
 135
 136 ;
 137 ; LD1SH, LD1SW: base + 64-bit sxtw'd scaled offset
 138 ;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, sxtw #1]
 139 ;
 140
 141 define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 142 ; CHECK-LABEL: gld1sh_index_sxtw
 143 ; CHECK:            ld1sh       { z0.d }, p0/z, [x0, z0.d, sxtw #1]
 144 ; CHECK-NEXT:   ret
 145   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
 146                                                                  <vscale x 2 x i1> %pg,
 147                                                                  <vscale x 2 x i64> %b)
 148   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
 149                                                                              i16* %base,
 150                                                                              <vscale x 2 x i64> %sxtw)
 151   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 152   ret <vscale x 2 x i64> %res
 153 }
 154
 155 define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
 156 ; CHECK-LABEL: gld1sw_index_sxtw
 157 ; CHECK:            ld1sw       { z0.d }, p0/z, [x0, z0.d, sxtw #2]
 158 ; CHECK-NEXT:   ret
 159   %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
 160                                                                  <vscale x 2 x i1> %pg,
 161                                                                  <vscale x 2 x i64> %b)
 162   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
 163                                                                              i32* %base,
 164                                                                              <vscale x 2 x i64> %sxtw)
 165   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 166   ret <vscale x 2 x i64> %res
 167 }
 168
 169 ;
 170 ; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
 171 ;   e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
 172 ;
 173
 174 define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 175 ; CHECK-LABEL: gld1h_index_uxtw
 176 ; CHECK:            ld1h        { z0.d }, p0/z, [x0, z0.d, uxtw #1]
 177 ; CHECK-NEXT:   ret
 178   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
 179                                                                  <vscale x 2 x i1> %pg,
 180                                                                  <vscale x 2 x i64> %b)
 181   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
 182                                                                              i16* %base,
 183                                                                              <vscale x 2 x i64> %uxtw)
 184   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 185   ret <vscale x 2 x i64> %res
 186 }
 187
 188 define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
 189 ; CHECK-LABEL: gld1w_index_uxtw
 190 ; CHECK:            ld1w        { z0.d }, p0/z, [x0, z0.d, uxtw #2]
 191 ; CHECK-NEXT:   ret
 192   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
 193                                                                  <vscale x 2 x i1> %pg,
 194                                                                  <vscale x 2 x i64> %b)
 195   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
 196                                                                              i32* %base,
 197                                                                              <vscale x 2 x i64> %uxtw)
 198   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 199   ret <vscale x 2 x i64> %res
 200 }
 201
 202 define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
 203 ; CHECK-LABEL: gld1d_index_uxtw
 204 ; CHECK:            ld1d        { z0.d }, p0/z, [x0, z0.d, uxtw #3]
 205 ; CHECK-NEXT:   ret
 206   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
 207                                                                  <vscale x 2 x i1> %pg,
 208                                                                  <vscale x 2 x i64> %b)
 209   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
 210                                                                              i64* %base,
 211                                                                              <vscale x 2 x i64> %uxtw)
 212   ret <vscale x 2 x i64> %load
 213 }
 214
 215 define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
 216 ; CHECK-LABEL: gld1d_index_double_uxtw
 217 ; CHECK:            ld1d        { z0.d }, p0/z, [x0, z0.d, uxtw #3]
 218 ; CHECK-NEXT:   ret
 219   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
 220                                                                  <vscale x 2 x i1> %pg,
 221                                                                  <vscale x 2 x i64> %b)
 222   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
 223                                                                                 double* %base,
 224                                                                                 <vscale x 2 x i64> %uxtw)
 225   ret <vscale x 2 x double> %load
 226 }
 227
 228 ;
 229 ; LD1SH, LD1SW: base + 64-bit uxtw'd scaled offset
 230 ;   e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
 231 ;
 232
 233 define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
 234 ; CHECK-LABEL: gld1sh_index_uxtw
 235 ; CHECK:            ld1sh       { z0.d }, p0/z, [x0, z0.d, uxtw #1]
 236 ; CHECK-NEXT:   ret
 237   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
 238                                                                  <vscale x 2 x i1> %pg,
 239                                                                  <vscale x 2 x i64> %b)
 240   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
 241                                                                              i16* %base,
 242                                                                              <vscale x 2 x i64> %uxtw)
 243   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 244   ret <vscale x 2 x i64> %res
 245 }
 246
 247 define <vscale x 2 x i64> @gld1sw_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
 248 ; CHECK-LABEL: gld1sw_index_uxtw
 249 ; CHECK:            ld1sw       { z0.d }, p0/z, [x0, z0.d, uxtw #2]
 250 ; CHECK-NEXT:   ret
 251   %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
 252                                                                  <vscale x 2 x i1> %pg,
 253                                                                  <vscale x 2 x i64> %b)
 254   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
 255                                                                              i32* %base,
 256                                                                              <vscale x 2 x i64> %uxtw)
 257   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 258   ret <vscale x 2 x i64> %res
 259 }
 260
 261 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
 262 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
 263 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
 264 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
 265
 266 declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
 267 declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)