llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll

   1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
   2
   3 ;
   4 ; LD1B, LD1W, LD1H, LD1D: base + 32-bit unscaled offset, sign (sxtw) or zero
   5 ; (uxtw) extended to 64 bits.
   6 ;   e.g. ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
   7 ;
   8
   9 ; LD1B
  10 define <vscale x 4 x i32> @gld1b_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
  11 ; CHECK-LABEL: gld1b_s_uxtw:
  12 ; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
  13 ; CHECK-NEXT: ret
  14   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
  15                                                                           i8* %base,
  16                                                                           <vscale x 4 x i32> %b)
  17   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
  18   ret <vscale x 4 x i32> %res
  19 }
  20
  21 define <vscale x 4 x i32> @gld1b_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
  22 ; CHECK-LABEL: gld1b_s_sxtw:
  23 ; CHECK: ld1b { z0.s }, p0/z, [x0, z0.s, sxtw]
  24 ; CHECK-NEXT: ret
  25   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
  26                                                                           i8* %base,
  27                                                                           <vscale x 4 x i32> %b)
  28   %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
  29   ret <vscale x 4 x i32> %res
  30 }
  31
  32 define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
  33 ; CHECK-LABEL: gld1b_d_uxtw:
  34 ; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
  35 ; CHECK-NEXT: ret
  36   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
  37                                                                           i8* %base,
  38                                                                           <vscale x 2 x i32> %b)
  39   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
  40   ret <vscale x 2 x i64> %res
  41 }
  42
  43 define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
  44 ; CHECK-LABEL: gld1b_d_sxtw:
  45 ; CHECK: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
  46 ; CHECK-NEXT: ret
  47   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
  48                                                                           i8* %base,
  49                                                                           <vscale x 2 x i32> %b)
  50   %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
  51   ret <vscale x 2 x i64> %res
  52 }
  53
  54 ; LD1H
  55 define <vscale x 4 x i32> @gld1h_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
  56 ; CHECK-LABEL: gld1h_s_uxtw:
  57 ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
  58 ; CHECK-NEXT:   ret
  59   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
  60                                                                             i16* %base,
  61                                                                             <vscale x 4 x i32> %b)
  62   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
  63   ret <vscale x 4 x i32> %res
  64 }
  65
  66 define <vscale x 4 x i32> @gld1h_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
  67 ; CHECK-LABEL: gld1h_s_sxtw:
  68 ; CHECK: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw]
  69 ; CHECK-NEXT: ret
  70   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
  71                                                                             i16* %base,
  72                                                                             <vscale x 4 x i32> %b)
  73   %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
  74   ret <vscale x 4 x i32> %res
  75 }
  76
  77 define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
  78 ; CHECK-LABEL: gld1h_d_uxtw:
  79 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
  80 ; CHECK-NEXT: ret
  81   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
  82                                                                             i16* %base,
  83                                                                             <vscale x 2 x i32> %b)
  84   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  85   ret <vscale x 2 x i64> %res
  86 }
  87
  88 define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
  89 ; CHECK-LABEL: gld1h_d_sxtw:
  90 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
  91 ; CHECK-NEXT: ret
  92   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
  93                                                                             i16* %base,
  94                                                                             <vscale x 2 x i32> %b)
  95   %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
  96   ret <vscale x 2 x i64> %res
  97 }
  98
  99 ; LD1W
 100 define <vscale x 4 x i32> @gld1w_s_uxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 101 ; CHECK-LABEL: gld1w_s_uxtw:
 102 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
 103 ; CHECK-NEXT:   ret
 104   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg,
 105                                                                             i32* %base,
 106                                                                             <vscale x 4 x i32> %b)
 107   ret <vscale x 4 x i32> %load
 108 }
 109
 110 define <vscale x 4 x i32> @gld1w_s_sxtw(<vscale x 4 x i1> %pg, i32* %base, <vscale x 4 x i32> %b) {
 111 ; CHECK-LABEL: gld1w_s_sxtw:
 112 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
 113 ; CHECK-NEXT: ret
 114   %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1> %pg,
 115                                                                             i32* %base,
 116                                                                             <vscale x 4 x i32> %b)
 117   ret <vscale x 4 x i32> %load
 118 }
 119
 120 define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 121 ; CHECK-LABEL: gld1w_d_uxtw:
 122 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
 123 ; CHECK-NEXT: ret
 124   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
 125                                                                             i32* %base,
 126                                                                             <vscale x 2 x i32> %b)
 127   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 128   ret <vscale x 2 x i64> %res
 129 }
 130
 131 define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 132 ; CHECK-LABEL: gld1w_d_sxtw:
 133 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
 134 ; CHECK-NEXT: ret
 135   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
 136                                                                             i32* %base,
 137                                                                             <vscale x 2 x i32> %b)
 138   %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 139   ret <vscale x 2 x i64> %res
 140 }
 141
 142 define <vscale x 4 x float> @gld1w_s_uxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 143 ; CHECK-LABEL: gld1w_s_uxtw_float:
 144 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
 145 ; CHECK-NEXT:   ret
 146   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg,
 147                                                                               float* %base,
 148                                                                               <vscale x 4 x i32> %b)
 149   ret <vscale x 4 x float> %load
 150 }
 151
 152 define <vscale x 4 x float> @gld1w_s_sxtw_float(<vscale x 4 x i1> %pg, float* %base, <vscale x 4 x i32> %b) {
 153 ; CHECK-LABEL: gld1w_s_sxtw_float:
 154 ; CHECK: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw]
 155 ; CHECK-NEXT: ret
 156   %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1> %pg,
 157                                                                               float* %base,
 158                                                                               <vscale x 4 x i32> %b)
 159   ret <vscale x 4 x float> %load
 160 }
 161
 162 ; LD1D
 163 define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 164 ; CHECK-LABEL: gld1d_d_uxtw:
 165 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
 166 ; CHECK-NEXT: ret
 167   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64(<vscale x 2 x i1> %pg,
 168                                                                             i64* %base,
 169                                                                             <vscale x 2 x i32> %b)
 170   ret <vscale x 2 x i64> %load
 171 }
 172
 173 define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i32> %b) {
 174 ; CHECK-LABEL: gld1d_d_sxtw:
 175 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
 176 ; CHECK-NEXT: ret
 177   %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64(<vscale x 2 x i1> %pg,
 178                                                                             i64* %base,
 179                                                                             <vscale x 2 x i32> %b)
 180   ret <vscale x 2 x i64> %load
 181 }
 182
 183 define <vscale x 2 x double> @gld1d_d_uxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 184 ; CHECK-LABEL: gld1d_d_uxtw_double:
 185 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
 186 ; CHECK-NEXT: ret
 187   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64(<vscale x 2 x i1> %pg,
 188                                                                                double* %base,
 189                                                                                <vscale x 2 x i32> %b)
 190   ret <vscale x 2 x double> %load
 191 }
 192
 193 define <vscale x 2 x double> @gld1d_d_sxtw_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i32> %b) {
 194 ; CHECK-LABEL: gld1d_d_sxtw_double:
 195 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
 196 ; CHECK-NEXT: ret
 197   %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64(<vscale x 2 x i1> %pg,
 198                                                                                double* %base,
 199                                                                                <vscale x 2 x i32> %b)
 200   ret <vscale x 2 x double> %load
 201 }
 202
 203 ;
 204 ; LD1SB, LD1SW, LD1SH: base + 32-bit unscaled offset, sign (sxtw) or zero
 205 ; (uxtw) extended to 64 bits.
 206 ;   e.g. ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
 207 ;
 208
 209 ; LD1SB
 210 define <vscale x 4 x i32> @gld1sb_s_uxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 211 ; CHECK-LABEL: gld1sb_s_uxtw:
 212 ; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
 213 ; CHECK-NEXT: ret
 214   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg,
 215                                                                           i8* %base,
 216                                                                           <vscale x 4 x i32> %b)
 217   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
 218   ret <vscale x 4 x i32> %res
 219 }
 220
 221 define <vscale x 4 x i32> @gld1sb_s_sxtw(<vscale x 4 x i1> %pg, i8* %base, <vscale x 4 x i32> %b) {
 222 ; CHECK-LABEL: gld1sb_s_sxtw:
 223 ; CHECK: ld1sb { z0.s }, p0/z, [x0, z0.s, sxtw]
 224 ; CHECK-NEXT: ret
 225   %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1> %pg,
 226                                                                           i8* %base,
 227                                                                           <vscale x 4 x i32> %b)
 228   %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
 229   ret <vscale x 4 x i32> %res
 230 }
 231
 232 define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 233 ; CHECK-LABEL: gld1sb_d_uxtw:
 234 ; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
 235 ; CHECK-NEXT: ret
 236   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1> %pg,
 237                                                                           i8* %base,
 238                                                                           <vscale x 2 x i32> %b)
 239   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
 240   ret <vscale x 2 x i64> %res
 241 }
 242
 243 define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, i8* %base, <vscale x 2 x i32> %b) {
 244 ; CHECK-LABEL: gld1sb_d_sxtw:
 245 ; CHECK: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
 246 ; CHECK-NEXT: ret
 247   %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1> %pg,
 248                                                                           i8* %base,
 249                                                                           <vscale x 2 x i32> %b)
 250   %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
 251   ret <vscale x 2 x i64> %res
 252 }
 253
 254 ; LD1SH
 255 define <vscale x 4 x i32> @gld1sh_s_uxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 256 ; CHECK-LABEL: gld1sh_s_uxtw:
 257 ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
 258 ; CHECK-NEXT:   ret
 259   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg,
 260                                                                             i16* %base,
 261                                                                             <vscale x 4 x i32> %b)
 262   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
 263   ret <vscale x 4 x i32> %res
 264 }
 265
 266 define <vscale x 4 x i32> @gld1sh_s_sxtw(<vscale x 4 x i1> %pg, i16* %base, <vscale x 4 x i32> %b) {
 267 ; CHECK-LABEL: gld1sh_s_sxtw:
 268 ; CHECK: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw]
 269 ; CHECK-NEXT: ret
 270   %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1> %pg,
 271                                                                             i16* %base,
 272                                                                             <vscale x 4 x i32> %b)
 273   %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
 274   ret <vscale x 4 x i32> %res
 275 }
 276
 277 define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 278 ; CHECK-LABEL: gld1sh_d_uxtw:
 279 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
 280 ; CHECK-NEXT: ret
 281   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1> %pg,
 282                                                                             i16* %base,
 283                                                                             <vscale x 2 x i32> %b)
 284   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 285   ret <vscale x 2 x i64> %res
 286 }
 287
 288 define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i32> %b) {
 289 ; CHECK-LABEL: gld1sh_d_sxtw:
 290 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
 291 ; CHECK-NEXT: ret
 292   %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1> %pg,
 293                                                                             i16* %base,
 294                                                                             <vscale x 2 x i32> %b)
 295   %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
 296   ret <vscale x 2 x i64> %res
 297 }
 298
 299 ; LD1SW
 300 define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 301 ; CHECK-LABEL: gld1sw_d_uxtw:
 302 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
 303 ; CHECK-NEXT: ret
 304   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1> %pg,
 305                                                                             i32* %base,
 306                                                                             <vscale x 2 x i32> %b)
 307   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 308   ret <vscale x 2 x i64> %res
 309 }
 310
 311 define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i32> %b) {
 312 ; CHECK-LABEL: gld1sw_d_sxtw:
 313 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
 314 ; CHECK-NEXT: ret
 315   %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1> %pg,
 316                                                                             i32* %base,
 317                                                                             <vscale x 2 x i32> %b)
 318   %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
 319   ret <vscale x 2 x i64> %res
 320 }
 321
 322 ; LD1B/LD1SB
 323 declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
 324 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
 325 declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, i8*, <vscale x 4 x i32>)
 326 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i8(<vscale x 2 x i1>, i8*, <vscale x 2 x i32>)
 327
 328 ; LD1H/LD1SH
 329 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
 330 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
 331 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, i16*, <vscale x 4 x i32>)
 332 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i32>)
 333
 334 ; LD1W/LD1SW
 335 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
 336 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
 337 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, i32*, <vscale x 4 x i32>)
 338 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i32>)
 339
 340 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
 341 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, float*, <vscale x 4 x i32>)
 342
 343 ; LD1D
 344 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
 345 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i32>)
 346
 347 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)
 348 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i32>)