test/CodeGen/AArch64/arm64-ld1.ll

   1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
   2
   3 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
   4 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
   5 %struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
   6
   7 define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
   8 ; CHECK-LABEL: ld2_8b
   9 ; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
  10 ; and from the argument of the function also defined by ABI (i.e., x0)
  11 ; CHECK: ld2.8b { v0, v1 }, [x0]
  12 ; CHECK-NEXT: ret
  13         %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
  14         ret %struct.__neon_int8x8x2_t  %tmp2
  15 }
  16
  17 define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
  18 ; CHECK-LABEL: ld3_8b
  19 ; Make sure we are using the operands defined by the ABI
  20 ; CHECK: ld3.8b { v0, v1, v2 }, [x0]
  21 ; CHECK-NEXT: ret
  22         %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
  23         ret %struct.__neon_int8x8x3_t  %tmp2
  24 }
  25
  26 define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
  27 ; CHECK-LABEL: ld4_8b
  28 ; Make sure we are using the operands defined by the ABI
  29 ; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0]
  30 ; CHECK-NEXT: ret
  31         %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
  32         ret %struct.__neon_int8x8x4_t  %tmp2
  33 }
  34
  35 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
  36 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
  37 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
  38
  39 %struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
  40 %struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
  41 %struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
  42
  43 define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
  44 ; CHECK-LABEL: ld2_16b
  45 ; Make sure we are using the operands defined by the ABI
  46 ; CHECK: ld2.16b { v0, v1 }, [x0]
  47 ; CHECK-NEXT: ret
  48   %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
  49   ret %struct.__neon_int8x16x2_t  %tmp2
  50 }
  51
  52 define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
  53 ; CHECK-LABEL: ld3_16b
  54 ; Make sure we are using the operands defined by the ABI
  55 ; CHECK: ld3.16b { v0, v1, v2 }, [x0]
  56 ; CHECK-NEXT: ret
  57   %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
  58   ret %struct.__neon_int8x16x3_t  %tmp2
  59 }
  60
  61 define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
  62 ; CHECK-LABEL: ld4_16b
  63 ; Make sure we are using the operands defined by the ABI
  64 ; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0]
  65 ; CHECK-NEXT: ret
  66   %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
  67   ret %struct.__neon_int8x16x4_t  %tmp2
  68 }
  69
  70 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
  71 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
  72 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
  73
  74 %struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
  75 %struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
  76 %struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
  77
  78 define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
  79 ; CHECK-LABEL: ld2_4h
  80 ; Make sure we are using the operands defined by the ABI
  81 ; CHECK: ld2.4h { v0, v1 }, [x0]
  82 ; CHECK-NEXT: ret
  83         %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
  84         ret %struct.__neon_int16x4x2_t  %tmp2
  85 }
  86
  87 define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
  88 ; CHECK-LABEL: ld3_4h
  89 ; Make sure we are using the operands defined by the ABI
  90 ; CHECK: ld3.4h { v0, v1, v2 }, [x0]
  91 ; CHECK-NEXT: ret
  92         %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
  93         ret %struct.__neon_int16x4x3_t  %tmp2
  94 }
  95
  96 define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
  97 ; CHECK-LABEL: ld4_4h
  98 ; Make sure we are using the operands defined by the ABI
  99 ; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0]
 100 ; CHECK-NEXT: ret
 101         %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
 102         ret %struct.__neon_int16x4x4_t  %tmp2
 103 }
 104
 105 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
 106 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
 107 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
 108
 109 %struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
 110 %struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
 111 %struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
 112
 113 define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
 114 ; CHECK-LABEL: ld2_8h
 115 ; Make sure we are using the operands defined by the ABI
 116 ; CHECK: ld2.8h { v0, v1 }, [x0]
 117 ; CHECK-NEXT: ret
 118   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
 119   ret %struct.__neon_int16x8x2_t  %tmp2
 120 }
 121
 122 define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
 123 ; CHECK-LABEL: ld3_8h
 124 ; Make sure we are using the operands defined by the ABI
 125 ; CHECK: ld3.8h { v0, v1, v2 }, [x0]
 126 ; CHECK-NEXT: ret
 127   %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
 128   ret %struct.__neon_int16x8x3_t %tmp2
 129 }
 130
 131 define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
 132 ; CHECK-LABEL: ld4_8h
 133 ; Make sure we are using the operands defined by the ABI
 134 ; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0]
 135 ; CHECK-NEXT: ret
 136   %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
 137   ret %struct.__neon_int16x8x4_t  %tmp2
 138 }
 139
 140 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
 141 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
 142 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
 143
 144 %struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
 145 %struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
 146 %struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
 147
 148 define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
 149 ; CHECK-LABEL: ld2_2s
 150 ; Make sure we are using the operands defined by the ABI
 151 ; CHECK: ld2.2s { v0, v1 }, [x0]
 152 ; CHECK-NEXT: ret
 153         %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
 154         ret %struct.__neon_int32x2x2_t  %tmp2
 155 }
 156
 157 define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
 158 ; CHECK-LABEL: ld3_2s
 159 ; Make sure we are using the operands defined by the ABI
 160 ; CHECK: ld3.2s { v0, v1, v2 }, [x0]
 161 ; CHECK-NEXT: ret
 162         %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
 163         ret %struct.__neon_int32x2x3_t  %tmp2
 164 }
 165
 166 define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
 167 ; CHECK-LABEL: ld4_2s
 168 ; Make sure we are using the operands defined by the ABI
 169 ; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0]
 170 ; CHECK-NEXT: ret
 171         %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
 172         ret %struct.__neon_int32x2x4_t  %tmp2
 173 }
 174
 175 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
 176 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
 177 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
 178
 179 %struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
 180 %struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
 181 %struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
 182
 183 define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
 184 ; CHECK-LABEL: ld2_4s
 185 ; Make sure we are using the operands defined by the ABI
 186 ; CHECK: ld2.4s { v0, v1 }, [x0]
 187 ; CHECK-NEXT: ret
 188         %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
 189         ret %struct.__neon_int32x4x2_t  %tmp2
 190 }
 191
 192 define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
 193 ; CHECK-LABEL: ld3_4s
 194 ; Make sure we are using the operands defined by the ABI
 195 ; CHECK: ld3.4s { v0, v1, v2 }, [x0]
 196 ; CHECK-NEXT: ret
 197         %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
 198         ret %struct.__neon_int32x4x3_t  %tmp2
 199 }
 200
 201 define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
 202 ; CHECK-LABEL: ld4_4s
 203 ; Make sure we are using the operands defined by the ABI
 204 ; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0]
 205 ; CHECK-NEXT: ret
 206         %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
 207         ret %struct.__neon_int32x4x4_t  %tmp2
 208 }
 209
 210 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
 211 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
 212 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
 213
 214 %struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
 215 %struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
 216 %struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
 217
 218 define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
 219 ; CHECK-LABEL: ld2_2d
 220 ; Make sure we are using the operands defined by the ABI
 221 ; CHECK: ld2.2d { v0, v1 }, [x0]
 222 ; CHECK-NEXT: ret
 223         %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
 224         ret %struct.__neon_int64x2x2_t  %tmp2
 225 }
 226
 227 define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
 228 ; CHECK-LABEL: ld3_2d
 229 ; Make sure we are using the operands defined by the ABI
 230 ; CHECK: ld3.2d { v0, v1, v2 }, [x0]
 231 ; CHECK-NEXT: ret
 232         %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
 233         ret %struct.__neon_int64x2x3_t  %tmp2
 234 }
 235
 236 define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
 237 ; CHECK-LABEL: ld4_2d
 238 ; Make sure we are using the operands defined by the ABI
 239 ; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0]
 240 ; CHECK-NEXT: ret
 241         %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
 242         ret %struct.__neon_int64x2x4_t  %tmp2
 243 }
 244
 245 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
 246 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
 247 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
 248
 249 %struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
 250 %struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
 251 %struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
 252
 253
 254 define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
 255 ; CHECK-LABEL: ld2_1di64
 256 ; Make sure we are using the operands defined by the ABI
 257 ; CHECK: ld1.1d { v0, v1 }, [x0]
 258 ; CHECK-NEXT: ret
 259         %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
 260         ret %struct.__neon_int64x1x2_t  %tmp2
 261 }
 262
 263 define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
 264 ; CHECK-LABEL: ld3_1di64
 265 ; Make sure we are using the operands defined by the ABI
 266 ; CHECK: ld1.1d { v0, v1, v2 }, [x0]
 267 ; CHECK-NEXT: ret
 268         %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
 269         ret %struct.__neon_int64x1x3_t  %tmp2
 270 }
 271
 272 define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
 273 ; CHECK-LABEL: ld4_1di64
 274 ; Make sure we are using the operands defined by the ABI
 275 ; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
 276 ; CHECK-NEXT: ret
 277         %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
 278         ret %struct.__neon_int64x1x4_t  %tmp2
 279 }
 280
 281
 282 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
 283 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
 284 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
 285
 286 %struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
 287 %struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
 288 %struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
 289
 290
 291 define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
 292 ; CHECK-LABEL: ld2_1df64
 293 ; Make sure we are using the operands defined by the ABI
 294 ; CHECK: ld1.1d { v0, v1 }, [x0]
 295 ; CHECK-NEXT: ret
 296         %tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
 297         ret %struct.__neon_float64x1x2_t  %tmp2
 298 }
 299
 300 define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
 301 ; CHECK-LABEL: ld3_1df64
 302 ; Make sure we are using the operands defined by the ABI
 303 ; CHECK: ld1.1d { v0, v1, v2 }, [x0]
 304 ; CHECK-NEXT: ret
 305         %tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
 306         ret %struct.__neon_float64x1x3_t  %tmp2
 307 }
 308
 309 define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
 310 ; CHECK-LABEL: ld4_1df64
 311 ; Make sure we are using the operands defined by the ABI
 312 ; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
 313 ; CHECK-NEXT: ret
 314         %tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
 315         ret %struct.__neon_float64x1x4_t  %tmp2
 316 }
 317
 318 declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
 319 declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
 320 declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
 321
 322
 323 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
 324 ; Make sure we are using the operands defined by the ABI
 325 ; CHECK: ld2lane_16b
 326 ; CHECK: ld2.b { v0, v1 }[1], [x0]
 327 ; CHECK-NEXT: ret
 328         %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
 329         ret %struct.__neon_int8x16x2_t  %tmp2
 330 }
 331
 332 define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
 333 ; Make sure we are using the operands defined by the ABI
 334 ; CHECK: ld3lane_16b
 335 ; CHECK: ld3.b { v0, v1, v2 }[1], [x0]
 336 ; CHECK-NEXT: ret
 337         %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
 338         ret %struct.__neon_int8x16x3_t  %tmp2
 339 }
 340
 341 define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
 342 ; Make sure we are using the operands defined by the ABI
 343 ; CHECK: ld4lane_16b
 344 ; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0]
 345 ; CHECK-NEXT: ret
 346         %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
 347         ret %struct.__neon_int8x16x4_t  %tmp2
 348 }
 349
 350 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 351 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 352 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
 353
 354 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
 355 ; Make sure we are using the operands defined by the ABI
 356 ; CHECK: ld2lane_8h
 357 ; CHECK: ld2.h { v0, v1 }[1], [x0]
 358 ; CHECK-NEXT: ret
 359         %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
 360         ret %struct.__neon_int16x8x2_t  %tmp2
 361 }
 362
 363 define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
 364 ; Make sure we are using the operands defined by the ABI
 365 ; CHECK: ld3lane_8h
 366 ; CHECK: ld3.h { v0, v1, v2 }[1], [x0]
 367 ; CHECK-NEXT: ret
 368         %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
 369         ret %struct.__neon_int16x8x3_t  %tmp2
 370 }
 371
 372 define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
 373 ; Make sure we are using the operands defined by the ABI
 374 ; CHECK: ld4lane_8h
 375 ; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0]
 376 ; CHECK-NEXT: ret
 377         %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
 378         ret %struct.__neon_int16x8x4_t  %tmp2
 379 }
 380
 381 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 382 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 383 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
 384
 385 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
 386 ; Make sure we are using the operands defined by the ABI
 387 ; CHECK: ld2lane_4s
 388 ; CHECK: ld2.s { v0, v1 }[1], [x0]
 389 ; CHECK-NEXT: ret
 390         %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
 391         ret %struct.__neon_int32x4x2_t  %tmp2
 392 }
 393
 394 define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
 395 ; Make sure we are using the operands defined by the ABI
 396 ; CHECK: ld3lane_4s
 397 ; CHECK: ld3.s { v0, v1, v2 }[1], [x0]
 398 ; CHECK-NEXT: ret
 399         %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
 400         ret %struct.__neon_int32x4x3_t  %tmp2
 401 }
 402
 403 define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
 404 ; Make sure we are using the operands defined by the ABI
 405 ; CHECK: ld4lane_4s
 406 ; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0]
 407 ; CHECK-NEXT: ret
 408         %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
 409         ret %struct.__neon_int32x4x4_t  %tmp2
 410 }
 411
 412 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 413 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 414 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
 415
 416 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
 417 ; Make sure we are using the operands defined by the ABI
 418 ; CHECK: ld2lane_2d
 419 ; CHECK: ld2.d { v0, v1 }[1], [x0]
 420 ; CHECK-NEXT: ret
 421         %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
 422         ret %struct.__neon_int64x2x2_t  %tmp2
 423 }
 424
 425 define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
 426 ; Make sure we are using the operands defined by the ABI
 427 ; CHECK: ld3lane_2d
 428 ; CHECK: ld3.d { v0, v1, v2 }[1], [x0]
 429 ; CHECK-NEXT: ret
 430         %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
 431         ret %struct.__neon_int64x2x3_t  %tmp2
 432 }
 433
 434 define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
 435 ; Make sure we are using the operands defined by the ABI
 436 ; CHECK: ld4lane_2d
 437 ; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0]
 438 ; CHECK-NEXT: ret
 439         %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
 440         ret %struct.__neon_int64x2x4_t  %tmp2
 441 }
 442
 443 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 444 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 445 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
 446
 447 define <8 x i8> @ld1r_8b(i8* %bar) {
 448 ; CHECK: ld1r_8b
 449 ; Make sure we are using the operands defined by the ABI
 450 ; CHECK: ld1r.8b { v0 }, [x0]
 451 ; CHECK-NEXT: ret
 452   %tmp1 = load i8, i8* %bar
 453   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
 454   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
 455   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
 456   %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
 457   %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
 458   %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
 459   %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
 460   %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
 461   ret <8 x i8> %tmp9
 462 }
 463
 464 define <16 x i8> @ld1r_16b(i8* %bar) {
 465 ; CHECK: ld1r_16b
 466 ; Make sure we are using the operands defined by the ABI
 467 ; CHECK: ld1r.16b { v0 }, [x0]
 468 ; CHECK-NEXT: ret
 469   %tmp1 = load i8, i8* %bar
 470   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
 471   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
 472   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
 473   %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
 474   %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
 475   %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
 476   %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
 477   %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
 478   %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
 479   %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
 480   %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
 481   %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
 482   %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
 483   %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
 484   %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
 485   %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
 486   ret <16 x i8> %tmp17
 487 }
 488
 489 define <4 x i16> @ld1r_4h(i16* %bar) {
 490 ; CHECK: ld1r_4h
 491 ; Make sure we are using the operands defined by the ABI
 492 ; CHECK: ld1r.4h { v0 }, [x0]
 493 ; CHECK-NEXT: ret
 494   %tmp1 = load i16, i16* %bar
 495   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
 496   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
 497   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
 498   %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
 499   ret <4 x i16> %tmp5
 500 }
 501
 502 define <8 x i16> @ld1r_8h(i16* %bar) {
 503 ; CHECK: ld1r_8h
 504 ; Make sure we are using the operands defined by the ABI
 505 ; CHECK: ld1r.8h { v0 }, [x0]
 506 ; CHECK-NEXT: ret
 507   %tmp1 = load i16, i16* %bar
 508   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
 509   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
 510   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
 511   %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
 512   %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
 513   %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
 514   %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
 515   %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
 516   ret <8 x i16> %tmp9
 517 }
 518
 519 define <2 x i32> @ld1r_2s(i32* %bar) {
 520 ; CHECK: ld1r_2s
 521 ; Make sure we are using the operands defined by the ABI
 522 ; CHECK: ld1r.2s { v0 }, [x0]
 523 ; CHECK-NEXT: ret
 524   %tmp1 = load i32, i32* %bar
 525   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
 526   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
 527   ret <2 x i32> %tmp3
 528 }
 529
 530 define <4 x i32> @ld1r_4s(i32* %bar) {
 531 ; CHECK: ld1r_4s
 532 ; Make sure we are using the operands defined by the ABI
 533 ; CHECK: ld1r.4s { v0 }, [x0]
 534 ; CHECK-NEXT: ret
 535   %tmp1 = load i32, i32* %bar
 536   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
 537   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
 538   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
 539   %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
 540   ret <4 x i32> %tmp5
 541 }
 542
 543 define <2 x i64> @ld1r_2d(i64* %bar) {
 544 ; CHECK: ld1r_2d
 545 ; Make sure we are using the operands defined by the ABI
 546 ; CHECK: ld1r.2d { v0 }, [x0]
 547 ; CHECK-NEXT: ret
 548   %tmp1 = load i64, i64* %bar
 549   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
 550   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
 551   ret <2 x i64> %tmp3
 552 }
 553
 554 define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
 555 ; CHECK: ld2r_8b
 556 ; Make sure we are using the operands defined by the ABI
 557 ; CHECK: ld2r.8b { v0, v1 }, [x0]
 558 ; CHECK-NEXT: ret
 559         %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
 560         ret %struct.__neon_int8x8x2_t  %tmp2
 561 }
 562
 563 define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
 564 ; CHECK: ld3r_8b
 565 ; Make sure we are using the operands defined by the ABI
 566 ; CHECK: ld3r.8b { v0, v1, v2 }, [x0]
 567 ; CHECK-NEXT: ret
 568         %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
 569         ret %struct.__neon_int8x8x3_t  %tmp2
 570 }
 571
 572 define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
 573 ; CHECK: ld4r_8b
 574 ; Make sure we are using the operands defined by the ABI
 575 ; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0]
 576 ; CHECK-NEXT: ret
 577         %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
 578         ret %struct.__neon_int8x8x4_t  %tmp2
 579 }
 580
 581 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
 582 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
 583 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
 584
 585 define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
 586 ; CHECK: ld2r_16b
 587 ; Make sure we are using the operands defined by the ABI
 588 ; CHECK: ld2r.16b { v0, v1 }, [x0]
 589 ; CHECK-NEXT: ret
 590         %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
 591         ret %struct.__neon_int8x16x2_t  %tmp2
 592 }
 593
 594 define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
 595 ; CHECK: ld3r_16b
 596 ; Make sure we are using the operands defined by the ABI
 597 ; CHECK: ld3r.16b { v0, v1, v2 }, [x0]
 598 ; CHECK-NEXT: ret
 599         %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
 600         ret %struct.__neon_int8x16x3_t  %tmp2
 601 }
 602
 603 define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
 604 ; CHECK: ld4r_16b
 605 ; Make sure we are using the operands defined by the ABI
 606 ; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0]
 607 ; CHECK-NEXT: ret
 608         %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
 609         ret %struct.__neon_int8x16x4_t  %tmp2
 610 }
 611
 612 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
 613 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
 614 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
 615
 616 define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
 617 ; CHECK: ld2r_4h
 618 ; Make sure we are using the operands defined by the ABI
 619 ; CHECK: ld2r.4h { v0, v1 }, [x0]
 620 ; CHECK-NEXT: ret
 621         %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
 622         ret %struct.__neon_int16x4x2_t  %tmp2
 623 }
 624
 625 define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
 626 ; CHECK: ld3r_4h
 627 ; Make sure we are using the operands defined by the ABI
 628 ; CHECK: ld3r.4h { v0, v1, v2 }, [x0]
 629 ; CHECK-NEXT: ret
 630         %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
 631         ret %struct.__neon_int16x4x3_t  %tmp2
 632 }
 633
 634 define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
 635 ; CHECK: ld4r_4h
 636 ; Make sure we are using the operands defined by the ABI
 637 ; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0]
 638 ; CHECK-NEXT: ret
 639         %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
 640         ret %struct.__neon_int16x4x4_t  %tmp2
 641 }
 642
 643 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
 644 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
 645 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
 646
 647 define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
 648 ; CHECK: ld2r_8h
 649 ; Make sure we are using the operands defined by the ABI
 650 ; CHECK: ld2r.8h { v0, v1 }, [x0]
 651 ; CHECK-NEXT: ret
 652   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
 653   ret %struct.__neon_int16x8x2_t  %tmp2
 654 }
 655
 656 define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
 657 ; CHECK: ld3r_8h
 658 ; Make sure we are using the operands defined by the ABI
 659 ; CHECK: ld3r.8h { v0, v1, v2 }, [x0]
 660 ; CHECK-NEXT: ret
 661   %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
 662   ret %struct.__neon_int16x8x3_t  %tmp2
 663 }
 664
 665 define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
 666 ; CHECK: ld4r_8h
 667 ; Make sure we are using the operands defined by the ABI
 668 ; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0]
 669 ; CHECK-NEXT: ret
 670   %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
 671   ret %struct.__neon_int16x8x4_t  %tmp2
 672 }
 673
 674 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
 675 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
 676 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
 677
 678 define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
 679 ; CHECK: ld2r_2s
 680 ; Make sure we are using the operands defined by the ABI
 681 ; CHECK: ld2r.2s { v0, v1 }, [x0]
 682 ; CHECK-NEXT: ret
 683         %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
 684         ret %struct.__neon_int32x2x2_t  %tmp2
 685 }
 686
 687 define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
 688 ; CHECK: ld3r_2s
 689 ; Make sure we are using the operands defined by the ABI
 690 ; CHECK: ld3r.2s { v0, v1, v2 }, [x0]
 691 ; CHECK-NEXT: ret
 692         %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
 693         ret %struct.__neon_int32x2x3_t  %tmp2
 694 }
 695
 696 define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
 697 ; CHECK: ld4r_2s
 698 ; Make sure we are using the operands defined by the ABI
 699 ; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0]
 700 ; CHECK-NEXT: ret
 701         %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
 702         ret %struct.__neon_int32x2x4_t  %tmp2
 703 }
 704
 705 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
 706 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
 707 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
 708
 709 define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
 710 ; CHECK: ld2r_4s
 711 ; Make sure we are using the operands defined by the ABI
 712 ; CHECK: ld2r.4s { v0, v1 }, [x0]
 713 ; CHECK-NEXT: ret
 714         %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
 715         ret %struct.__neon_int32x4x2_t  %tmp2
 716 }
 717
 718 define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
 719 ; CHECK: ld3r_4s
 720 ; Make sure we are using the operands defined by the ABI
 721 ; CHECK: ld3r.4s { v0, v1, v2 }, [x0]
 722 ; CHECK-NEXT: ret
 723         %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
 724         ret %struct.__neon_int32x4x3_t  %tmp2
 725 }
 726
 727 define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
 728 ; CHECK: ld4r_4s
 729 ; Make sure we are using the operands defined by the ABI
 730 ; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0]
 731 ; CHECK-NEXT: ret
 732         %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
 733         ret %struct.__neon_int32x4x4_t  %tmp2
 734 }
 735
 736 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
 737 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
 738 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
 739
 740 define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
 741 ; CHECK: ld2r_1d
 742 ; Make sure we are using the operands defined by the ABI
 743 ; CHECK: ld2r.1d { v0, v1 }, [x0]
 744 ; CHECK-NEXT: ret
 745         %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
 746         ret %struct.__neon_int64x1x2_t  %tmp2
 747 }
 748
 749 define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
 750 ; CHECK: ld3r_1d
 751 ; Make sure we are using the operands defined by the ABI
 752 ; CHECK: ld3r.1d { v0, v1, v2 }, [x0]
 753 ; CHECK-NEXT: ret
 754         %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
 755         ret %struct.__neon_int64x1x3_t  %tmp2
 756 }
 757
 758 define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
 759 ; CHECK: ld4r_1d
 760 ; Make sure we are using the operands defined by the ABI
 761 ; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0]
 762 ; CHECK-NEXT: ret
 763         %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
 764         ret %struct.__neon_int64x1x4_t  %tmp2
 765 }
 766
 767 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
 768 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
 769 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
 770
 771 define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
 772 ; CHECK: ld2r_2d
 773 ; Make sure we are using the operands defined by the ABI
 774 ; CHECK: ld2r.2d { v0, v1 }, [x0]
 775 ; CHECK-NEXT: ret
 776         %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
 777         ret %struct.__neon_int64x2x2_t  %tmp2
 778 }
 779
 780 define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
 781 ; CHECK: ld3r_2d
 782 ; Make sure we are using the operands defined by the ABI
 783 ; CHECK: ld3r.2d { v0, v1, v2 }, [x0]
 784 ; CHECK-NEXT: ret
 785         %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
 786         ret %struct.__neon_int64x2x3_t  %tmp2
 787 }
 788
 789 define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
 790 ; CHECK: ld4r_2d
 791 ; Make sure we are using the operands defined by the ABI
 792 ; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0]
 793 ; CHECK-NEXT: ret
 794         %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
 795         ret %struct.__neon_int64x2x4_t  %tmp2
 796 }
 797
 798 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
 799 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
 800 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
 801
 802 define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
 803 ; CHECK-LABEL: ld1_16b
 804 ; Make sure we are using the operands defined by the ABI
 805 ; CHECK: ld1.b { v0 }[0], [x0]
 806 ; CHECK-NEXT: ret
 807   %tmp1 = load i8, i8* %bar
 808   %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
 809   ret <16 x i8> %tmp2
 810 }
 811
 812 define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
 813 ; CHECK-LABEL: ld1_8h
 814 ; Make sure we are using the operands defined by the ABI
 815 ; CHECK: ld1.h { v0 }[0], [x0]
 816 ; CHECK-NEXT: ret
 817   %tmp1 = load i16, i16* %bar
 818   %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
 819   ret <8 x i16> %tmp2
 820 }
 821
 822 define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
 823 ; CHECK-LABEL: ld1_4s
 824 ; Make sure we are using the operands defined by the ABI
 825 ; CHECK: ld1.s { v0 }[0], [x0]
 826 ; CHECK-NEXT: ret
 827   %tmp1 = load i32, i32* %bar
 828   %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
 829   ret <4 x i32> %tmp2
 830 }
 831
 832 define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
 833 ; CHECK-LABEL: ld1_4s_float:
 834 ; Make sure we are using the operands defined by the ABI
 835 ; CHECK: ld1.s { v0 }[0], [x0]
 836 ; CHECK-NEXT: ret
 837   %tmp1 = load float, float* %bar
 838   %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
 839   ret <4 x float> %tmp2
 840 }
 841
 842 define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
 843 ; CHECK-LABEL: ld1_2d
 844 ; Make sure we are using the operands defined by the ABI
 845 ; CHECK: ld1.d { v0 }[0], [x0]
 846 ; CHECK-NEXT: ret
 847   %tmp1 = load i64, i64* %bar
 848   %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
 849   ret <2 x i64> %tmp2
 850 }
 851
 852 define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
 853 ; CHECK-LABEL: ld1_2d_double:
 854 ; Make sure we are using the operands defined by the ABI
 855 ; CHECK: ld1.d { v0 }[0], [x0]
 856 ; CHECK-NEXT: ret
 857   %tmp1 = load double, double* %bar
 858   %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
 859   ret <2 x double> %tmp2
 860 }
 861
 862 define <1 x i64> @ld1_1d(<1 x i64>* %p) {
 863 ; CHECK-LABEL: ld1_1d
 864 ; Make sure we are using the operands defined by the ABI
 865 ; CHECK: ldr [[REG:d[0-9]+]], [x0]
 866 ; CHECK-NEXT: ret
 867   %tmp = load <1 x i64>, <1 x i64>* %p, align 8
 868   ret <1 x i64> %tmp
 869 }
 870
 871 define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
 872 ; CHECK-LABEL: ld1_8b
 873 ; Make sure we are using the operands defined by the ABI
 874 ; CHECK: ld1.b { v0 }[0], [x0]
 875 ; CHECK-NEXT: ret
 876   %tmp1 = load i8, i8* %bar
 877   %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
 878   ret <8 x i8> %tmp2
 879 }
 880
 881 define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
 882 ; CHECK-LABEL: ld1_4h
 883 ; Make sure we are using the operands defined by the ABI
 884 ; CHECK: ld1.h { v0 }[0], [x0]
 885 ; CHECK-NEXT: ret
 886   %tmp1 = load i16, i16* %bar
 887   %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
 888   ret <4 x i16> %tmp2
 889 }
 890
 891 define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
 892 ; CHECK-LABEL: ld1_2s:
 893 ; Make sure we are using the operands defined by the ABI
 894 ; CHECK: ld1.s { v0 }[0], [x0]
 895 ; CHECK-NEXT: ret
 896   %tmp1 = load i32, i32* %bar
 897   %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
 898   ret <2 x i32> %tmp2
 899 }
 900
 901 define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
 902 ; CHECK-LABEL: ld1_2s_float:
 903 ; Make sure we are using the operands defined by the ABI
 904 ; CHECK: ld1.s { v0 }[0], [x0]
 905 ; CHECK-NEXT: ret
 906   %tmp1 = load float, float* %bar
 907   %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
 908   ret <2 x float> %tmp2
 909 }
 910
 911
 912 ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
 913 define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
 914 entry:
 915 ; CHECK: ld1r_2s_from_dup
 916 ; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
 917 ; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
 918 ; CHECK-NEXT: ushll.8h [[ARG1]], [[ARG1]], #0
 919 ; CHECK-NEXT: ushll.8h [[ARG2]], [[ARG2]], #0
 920 ; CHECK-NEXT: sub.4h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
 921 ; CHECK-NEXT: str d[[RESREGNUM]], [x2]
 922 ; CHECK-NEXT: ret
 923   %tmp = bitcast i8* %a to i32*
 924   %tmp1 = load i32, i32* %tmp, align 4
 925   %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
 926   %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
 927   %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
 928   %tmp4 = bitcast i8* %b to i32*
 929   %tmp5 = load i32, i32* %tmp4, align 4
 930   %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
 931   %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
 932   %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
 933   %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
 934   %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
 935   %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
 936   %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
 937   %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
 938   %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
 939   %tmp10 = bitcast i16* %diff to <4 x i16>*
 940   store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
 941   ret void
 942 }
 943
 944 ; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
 945 define <4 x float> @ld1r_4s_float(float* nocapture %x) {
 946 entry:
 947 ; CHECK-LABEL: ld1r_4s_float
 948 ; Make sure we are using the operands defined by the ABI
 949 ; CHECK: ld1r.4s { v0 }, [x0]
 950 ; CHECK-NEXT: ret
 951   %tmp = load float, float* %x, align 4
 952   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
 953   %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
 954   %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
 955   %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
 956   ret <4 x float> %tmp4
 957 }
 958
 959 define <2 x float> @ld1r_2s_float(float* nocapture %x) {
 960 entry:
 961 ; CHECK-LABEL: ld1r_2s_float
 962 ; Make sure we are using the operands defined by the ABI
 963 ; CHECK: ld1r.2s { v0 }, [x0]
 964 ; CHECK-NEXT: ret
 965   %tmp = load float, float* %x, align 4
 966   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
 967   %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
 968   ret <2 x float> %tmp2
 969 }
 970
 971 define <2 x double> @ld1r_2d_double(double* nocapture %x) {
 972 entry:
 973 ; CHECK-LABEL: ld1r_2d_double
 974 ; Make sure we are using the operands defined by the ABI
 975 ; CHECK: ld1r.2d { v0 }, [x0]
 976 ; CHECK-NEXT: ret
 977   %tmp = load double, double* %x, align 4
 978   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
 979   %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
 980   ret <2 x double> %tmp2
 981 }
 982
 983 define <1 x double> @ld1r_1d_double(double* nocapture %x) {
 984 entry:
 985 ; CHECK-LABEL: ld1r_1d_double
 986 ; Make sure we are using the operands defined by the ABI
 987 ; CHECK: ldr d0, [x0]
 988 ; CHECK-NEXT: ret
 989   %tmp = load double, double* %x, align 4
 990   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
 991   ret <1 x double> %tmp1
 992 }
 993
 994 define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
 995 entry:
 996 ; CHECK-LABEL: ld1r_4s_float_shuff
 997 ; Make sure we are using the operands defined by the ABI
 998 ; CHECK: ld1r.4s { v0 }, [x0]
 999 ; CHECK-NEXT: ret
1000   %tmp = load float, float* %x, align 4
1001   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1002   %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
1003   ret <4 x float> %lane
1004 }
1005
1006 define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
1007 entry:
1008 ; CHECK-LABEL: ld1r_2s_float_shuff
1009 ; Make sure we are using the operands defined by the ABI
1010 ; CHECK: ld1r.2s { v0 }, [x0]
1011 ; CHECK-NEXT: ret
1012   %tmp = load float, float* %x, align 4
1013   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1014   %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
1015   ret <2 x float> %lane
1016 }
1017
1018 define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
1019 entry:
1020 ; CHECK-LABEL: ld1r_2d_double_shuff
1021 ; Make sure we are using the operands defined by the ABI
1022 ; CHECK: ld1r.2d { v0 }, [x0]
1023 ; CHECK-NEXT: ret
1024   %tmp = load double, double* %x, align 4
1025   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1026   %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
1027   ret <2 x double> %lane
1028 }
1029
1030 define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
1031 entry:
1032 ; CHECK-LABEL: ld1r_1d_double_shuff
1033 ; Make sure we are using the operands defined by the ABI
1034 ; CHECK: ldr d0, [x0]
1035 ; CHECK-NEXT: ret
1036   %tmp = load double, double* %x, align 4
1037   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1038   %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
1039   ret <1 x double> %lane
1040 }
1041
1042 %struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
1043 %struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
1044 %struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
1045
1046 declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
1047 declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
1048 declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
1049 declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
1050 declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
1051 declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
1052
1053 define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
1054 ; CHECK-LABEL: ld1_x2_v8i8:
1055 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1056   %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr)
1057   ret %struct.__neon_int8x8x2_t %val
1058 }
1059
1060 define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
1061 ; CHECK-LABEL: ld1_x2_v4i16:
1062 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1063   %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr)
1064   ret %struct.__neon_int16x4x2_t %val
1065 }
1066
1067 define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
1068 ; CHECK-LABEL: ld1_x2_v2i32:
1069 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1070   %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr)
1071   ret %struct.__neon_int32x2x2_t %val
1072 }
1073
1074 define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
1075 ; CHECK-LABEL: ld1_x2_v2f32:
1076 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1077   %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr)
1078   ret %struct.__neon_float32x2x2_t %val
1079 }
1080
1081 define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
1082 ; CHECK-LABEL: ld1_x2_v1i64:
1083 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1084   %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr)
1085   ret %struct.__neon_int64x1x2_t %val
1086 }
1087
1088 define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
1089 ; CHECK-LABEL: ld1_x2_v1f64:
1090 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1091   %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr)
1092   ret %struct.__neon_float64x1x2_t %val
1093 }
1094
1095
1096 %struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
1097 %struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
1098 %struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
1099
1100 %struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
1101 %struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
1102 %struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
1103
1104 declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
1105 declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
1106 declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
1107 declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
1108 declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
1109 declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
1110
1111 define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
1112 ; CHECK-LABEL: ld1_x2_v16i8:
1113 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1114   %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr)
1115   ret %struct.__neon_int8x16x2_t %val
1116 }
1117
1118 define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
1119 ; CHECK-LABEL: ld1_x2_v8i16:
1120 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1121   %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr)
1122   ret %struct.__neon_int16x8x2_t %val
1123 }
1124
1125 define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
1126 ; CHECK-LABEL: ld1_x2_v4i32:
1127 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1128   %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr)
1129   ret %struct.__neon_int32x4x2_t %val
1130 }
1131
1132 define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
1133 ; CHECK-LABEL: ld1_x2_v4f32:
1134 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1135   %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr)
1136   ret %struct.__neon_float32x4x2_t %val
1137 }
1138
1139 define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
1140 ; CHECK-LABEL: ld1_x2_v2i64:
1141 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1142   %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr)
1143   ret %struct.__neon_int64x2x2_t %val
1144 }
1145
1146 define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
1147 ; CHECK-LABEL: ld1_x2_v2f64:
1148 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1149   %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr)
1150   ret %struct.__neon_float64x2x2_t %val
1151 }
1152
1153 declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
1154 declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
1155 declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
1156 declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
1157 declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
1158 declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
1159
1160 define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
1161 ; CHECK-LABEL: ld1_x3_v8i8:
1162 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1163   %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr)
1164   ret %struct.__neon_int8x8x3_t %val
1165 }
1166
1167 define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
1168 ; CHECK-LABEL: ld1_x3_v4i16:
1169 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1170   %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr)
1171   ret %struct.__neon_int16x4x3_t %val
1172 }
1173
1174 define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
1175 ; CHECK-LABEL: ld1_x3_v2i32:
1176 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1177   %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr)
1178   ret %struct.__neon_int32x2x3_t %val
1179 }
1180
1181 define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
1182 ; CHECK-LABEL: ld1_x3_v2f32:
1183 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1184   %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr)
1185   ret %struct.__neon_float32x2x3_t %val
1186 }
1187
1188 define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
1189 ; CHECK-LABEL: ld1_x3_v1i64:
1190 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1191   %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr)
1192   ret %struct.__neon_int64x1x3_t %val
1193 }
1194
1195 define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
1196 ; CHECK-LABEL: ld1_x3_v1f64:
1197 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1198   %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr)
1199   ret %struct.__neon_float64x1x3_t %val
1200 }
1201
1202 declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
1203 declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
1204 declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
1205 declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
1206 declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
1207 declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
1208
1209 define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
1210 ; CHECK-LABEL: ld1_x3_v16i8:
1211 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1212   %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr)
1213   ret %struct.__neon_int8x16x3_t %val
1214 }
1215
1216 define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
1217 ; CHECK-LABEL: ld1_x3_v8i16:
1218 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1219   %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr)
1220   ret %struct.__neon_int16x8x3_t %val
1221 }
1222
1223 define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
1224 ; CHECK-LABEL: ld1_x3_v4i32:
1225 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1226   %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr)
1227   ret %struct.__neon_int32x4x3_t %val
1228 }
1229
1230 define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
1231 ; CHECK-LABEL: ld1_x3_v4f32:
1232 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1233   %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr)
1234   ret %struct.__neon_float32x4x3_t %val
1235 }
1236
1237 define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
1238 ; CHECK-LABEL: ld1_x3_v2i64:
1239 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1240   %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr)
1241   ret %struct.__neon_int64x2x3_t %val
1242 }
1243
1244 define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
1245 ; CHECK-LABEL: ld1_x3_v2f64:
1246 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1247   %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr)
1248   ret %struct.__neon_float64x2x3_t %val
1249 }
1250
1251 declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
1252 declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
1253 declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
1254 declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
1255 declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
1256 declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
1257
1258 define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
1259 ; CHECK-LABEL: ld1_x4_v8i8:
1260 ; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1261   %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr)
1262   ret %struct.__neon_int8x8x4_t %val
1263 }
1264
1265 define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
1266 ; CHECK-LABEL: ld1_x4_v4i16:
1267 ; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1268   %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr)
1269   ret %struct.__neon_int16x4x4_t %val
1270 }
1271
1272 define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
1273 ; CHECK-LABEL: ld1_x4_v2i32:
1274 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1275   %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr)
1276   ret %struct.__neon_int32x2x4_t %val
1277 }
1278
1279 define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
1280 ; CHECK-LABEL: ld1_x4_v2f32:
1281 ; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1282   %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr)
1283   ret %struct.__neon_float32x2x4_t %val
1284 }
1285
1286 define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
1287 ; CHECK-LABEL: ld1_x4_v1i64:
1288 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1289   %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr)
1290   ret %struct.__neon_int64x1x4_t %val
1291 }
1292
1293 define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
1294 ; CHECK-LABEL: ld1_x4_v1f64:
1295 ; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1296   %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr)
1297   ret %struct.__neon_float64x1x4_t %val
1298 }
1299
1300 declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
1301 declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
1302 declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
1303 declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
1304 declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
1305 declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
1306
1307 define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
1308 ; CHECK-LABEL: ld1_x4_v16i8:
1309 ; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1310   %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr)
1311   ret %struct.__neon_int8x16x4_t %val
1312 }
1313
1314 define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
1315 ; CHECK-LABEL: ld1_x4_v8i16:
1316 ; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1317   %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr)
1318   ret %struct.__neon_int16x8x4_t %val
1319 }
1320
1321 define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
1322 ; CHECK-LABEL: ld1_x4_v4i32:
1323 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1324   %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr)
1325   ret %struct.__neon_int32x4x4_t %val
1326 }
1327
1328 define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
1329 ; CHECK-LABEL: ld1_x4_v4f32:
1330 ; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1331   %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr)
1332   ret %struct.__neon_float32x4x4_t %val
1333 }
1334
1335 define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
1336 ; CHECK-LABEL: ld1_x4_v2i64:
1337 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1338   %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr)
1339   ret %struct.__neon_int64x2x4_t %val
1340 }
1341
1342 define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
1343 ; CHECK-LABEL: ld1_x4_v2f64:
1344 ; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1345   %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr)
1346   ret %struct.__neon_float64x2x4_t %val
1347 }