llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
   3
   4 ;
   5 ; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + scalar offset (index)
   6 ;   e.g. ldff1b { z0.d }, p0/z, [x0, z0.d]
   7 ;
   8
   9 ; LDFF1B
  10 define <vscale x 4 x i32> @gldff1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  11 ; CHECK-LABEL: gldff1b_s_scalar_offset:
  12 ; CHECK:       // %bb.0:
  13 ; CHECK-NEXT:    ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw]
  14 ; CHECK-NEXT:    ret
  15   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
  16                                                                                              <vscale x 4 x i32> %base,
  17                                                                                              i64 %offset)
  18   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
  19   ret <vscale x 4 x i32> %res
  20 }
  21
  22 define <vscale x 2 x i64> @gldff1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  23 ; CHECK-LABEL: gldff1b_d_scalar_offset:
  24 ; CHECK:       // %bb.0:
  25 ; CHECK-NEXT:    ldff1b { z0.d }, p0/z, [x0, z0.d]
  26 ; CHECK-NEXT:    ret
  27   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
  28                                                                                              <vscale x 2 x i64> %base,
  29                                                                                              i64 %offset)
  30   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
  31   ret <vscale x 2 x i64> %res
  32 }
  33
  34 ; LDFF1H
  35 define <vscale x 4 x i32> @gldff1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  36 ; CHECK-LABEL: gldff1h_s_scalar_offset:
  37 ; CHECK:       // %bb.0:
  38 ; CHECK-NEXT:    ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw]
  39 ; CHECK-NEXT:    ret
  40   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
  41                                                                                                <vscale x 4 x i32> %base,
  42                                                                                                i64 %offset)
  43   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
  44   ret <vscale x 4 x i32> %res
  45 }
  46
  47 define <vscale x 2 x i64> @gldff1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  48 ; CHECK-LABEL: gldff1h_d_scalar_offset:
  49 ; CHECK:       // %bb.0:
  50 ; CHECK-NEXT:    ldff1h { z0.d }, p0/z, [x0, z0.d]
  51 ; CHECK-NEXT:    ret
  52   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
  53                                                                                                <vscale x 2 x i64> %base,
  54                                                                                                i64 %offset)
  55   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  56   ret <vscale x 2 x i64> %res
  57 }
  58
  59 ; LDFF1W
  60 define <vscale x 4 x i32> @gldff1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  61 ; CHECK-LABEL: gldff1w_s_scalar_offset:
  62 ; CHECK:       // %bb.0:
  63 ; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
  64 ; CHECK-NEXT:    ret
  65   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
  66                                                                                                <vscale x 4 x i32> %base,
  67                                                                                                i64 %offset)
  68   ret <vscale x 4 x i32> %load
  69 }
  70
  71 define <vscale x 2 x i64> @gldff1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  72 ; CHECK-LABEL: gldff1w_d_scalar_offset:
  73 ; CHECK:       // %bb.0:
  74 ; CHECK-NEXT:    ldff1w { z0.d }, p0/z, [x0, z0.d]
  75 ; CHECK-NEXT:    ret
  76   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
  77                                                                                                <vscale x 2 x i64> %base,
  78                                                                                                i64 %offset)
  79   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
  80   ret <vscale x 2 x i64> %res
  81 }
  82
  83 define <vscale x 4 x float> @gldff1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
  84 ; CHECK-LABEL: gldff1w_s_scalar_offset_float:
  85 ; CHECK:       // %bb.0:
  86 ; CHECK-NEXT:    ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw]
  87 ; CHECK-NEXT:    ret
  88   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
  89                                                                                                  <vscale x 4 x i32> %base,
  90                                                                                                  i64 %offset)
  91   ret <vscale x 4 x float> %load
  92 }
  93
  94 ; LDFF1D
  95 define <vscale x 2 x i64> @gldff1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
  96 ; CHECK-LABEL: gldff1d_d_scalar_offset:
  97 ; CHECK:       // %bb.0:
  98 ; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d]
  99 ; CHECK-NEXT:    ret
 100   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
 101                                                                                                <vscale x 2 x i64> %base,
 102                                                                                                i64 %offset)
 103   ret <vscale x 2 x i64> %load
 104 }
 105
 106 define <vscale x 2 x double> @gldff1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 107 ; CHECK-LABEL: gldff1d_d_scalar_offset_double:
 108 ; CHECK:       // %bb.0:
 109 ; CHECK-NEXT:    ldff1d { z0.d }, p0/z, [x0, z0.d]
 110 ; CHECK-NEXT:    ret
 111   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
 112                                                                                                   <vscale x 2 x i64> %base,
 113                                                                                                   i64 %offset)
 114   ret <vscale x 2 x double> %load
 115 }
 116
 117 ; LDFF1SB, LDFF1SW, LDFF1SH: vector base + scalar offset (index)
 118 ;   e.g. ldff1b { z0.d }, p0/z, [x0, z0.d]
 119 ;
 120
 121 ; LDFF1SB
 122 define <vscale x 4 x i32> @gldff1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 123 ; CHECK-LABEL: gldff1sb_s_scalar_offset:
 124 ; CHECK:       // %bb.0:
 125 ; CHECK-NEXT:    ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
 126 ; CHECK-NEXT:    ret
 127   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
 128                                                                                              <vscale x 4 x i32> %base,
 129                                                                                              i64 %offset)
 130   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
 131   ret <vscale x 4 x i32> %res
 132 }
 133
 134 define <vscale x 2 x i64> @gldff1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 135 ; CHECK-LABEL: gldff1sb_d_scalar_offset:
 136 ; CHECK:       // %bb.0:
 137 ; CHECK-NEXT:    ldff1sb { z0.d }, p0/z, [x0, z0.d]
 138 ; CHECK-NEXT:    ret
 139   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
 140                                                                                              <vscale x 2 x i64> %base,
 141                                                                                              i64 %offset)
 142   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
 143   ret <vscale x 2 x i64> %res
 144 }
 145
 146 ; LDFF1SH
 147 define <vscale x 4 x i32> @gldff1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
 148 ; CHECK-LABEL: gldff1sh_s_scalar_offset:
 149 ; CHECK:       // %bb.0:
 150 ; CHECK-NEXT:    ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
 151 ; CHECK-NEXT:    ret
 152   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
 153                                                                                                <vscale x 4 x i32> %base,
 154                                                                                                i64 %offset)
 155   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
 156   ret <vscale x 4 x i32> %res
 157 }
 158
 159 define <vscale x 2 x i64> @gldff1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 160 ; CHECK-LABEL: gldff1sh_d_scalar_offset:
 161 ; CHECK:       // %bb.0:
 162 ; CHECK-NEXT:    ldff1sh { z0.d }, p0/z, [x0, z0.d]
 163 ; CHECK-NEXT:    ret
 164   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
 165                                                                                                <vscale x 2 x i64> %base,
 166                                                                                                i64 %offset)
 167   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 168   ret <vscale x 2 x i64> %res
 169 }
 170
 171 ; LDFF1SW
 172 define <vscale x 2 x i64> @gldff1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
 173 ; CHECK-LABEL: gldff1sw_d_scalar_offset:
 174 ; CHECK:       // %bb.0:
 175 ; CHECK-NEXT:    ldff1sw { z0.d }, p0/z, [x0, z0.d]
 176 ; CHECK-NEXT:    ret
 177   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
 178                                                                                                <vscale x 2 x i64> %base,
 179                                                                                                i64 %offset)
 180   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 181   ret <vscale x 2 x i64> %res
 182 }
 183
 184 ; LDFF1B/LDFF1SB
 185 declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 186 declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 187
 188 ; LDFF1H/LDFF1SH
 189 declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 190 declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 191
 192 ; LDFF1W/LDFF1SW
 193 declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 194 declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 195
 196 declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
 197
 198 ; LDFF1D
 199 declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
 200
 201 declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)