test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll

   1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
   2
   3
   4 %struct.uint8x16x2_t = type { [2 x <16 x i8>] }
   5 %struct.poly8x16x2_t = type { [2 x <16 x i8>] }
   6 %struct.uint8x16x3_t = type { [3 x <16 x i8>] }
   7 %struct.int8x16x2_t = type { [2 x <16 x i8>] }
   8 %struct.int16x8x2_t = type { [2 x <8 x i16>] }
   9 %struct.int32x4x2_t = type { [2 x <4 x i32>] }
  10 %struct.int64x2x2_t = type { [2 x <2 x i64>] }
  11 %struct.float32x4x2_t = type { [2 x <4 x float>] }
  12 %struct.float64x2x2_t = type { [2 x <2 x double>] }
  13 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
  14 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
  15 %struct.int32x2x2_t = type { [2 x <2 x i32>] }
  16 %struct.int64x1x2_t = type { [2 x <1 x i64>] }
  17 %struct.float32x2x2_t = type { [2 x <2 x float>] }
  18 %struct.float64x1x2_t = type { [2 x <1 x double>] }
  19 %struct.int8x16x3_t = type { [3 x <16 x i8>] }
  20 %struct.int16x8x3_t = type { [3 x <8 x i16>] }
  21 %struct.int32x4x3_t = type { [3 x <4 x i32>] }
  22 %struct.int64x2x3_t = type { [3 x <2 x i64>] }
  23 %struct.float32x4x3_t = type { [3 x <4 x float>] }
  24 %struct.float64x2x3_t = type { [3 x <2 x double>] }
  25 %struct.int8x8x3_t = type { [3 x <8 x i8>] }
  26 %struct.int16x4x3_t = type { [3 x <4 x i16>] }
  27 %struct.int32x2x3_t = type { [3 x <2 x i32>] }
  28 %struct.int64x1x3_t = type { [3 x <1 x i64>] }
  29 %struct.float32x2x3_t = type { [3 x <2 x float>] }
  30 %struct.float64x1x3_t = type { [3 x <1 x double>] }
  31 %struct.int8x16x4_t = type { [4 x <16 x i8>] }
  32 %struct.int16x8x4_t = type { [4 x <8 x i16>] }
  33 %struct.int32x4x4_t = type { [4 x <4 x i32>] }
  34 %struct.int64x2x4_t = type { [4 x <2 x i64>] }
  35 %struct.float32x4x4_t = type { [4 x <4 x float>] }
  36 %struct.float64x2x4_t = type { [4 x <2 x double>] }
  37 %struct.int8x8x4_t = type { [4 x <8 x i8>] }
  38 %struct.int16x4x4_t = type { [4 x <4 x i16>] }
  39 %struct.int32x2x4_t = type { [4 x <2 x i32>] }
  40 %struct.int64x1x4_t = type { [4 x <1 x i64>] }
  41 %struct.float32x2x4_t = type { [4 x <2 x float>] }
  42 %struct.float64x1x4_t = type { [4 x <1 x double>] }
  43
  44 define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) {
  45 ; CHECK-LABEL: test_ld_from_poll_v16i8:
  46 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
  47 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
  48 entry:
  49   %b = add <16 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 2, i8 13, i8 14, i8 15, i8 16>
  50   ret <16 x i8> %b
  51 }
  52
  53 define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) {
  54 ; CHECK-LABEL: test_ld_from_poll_v8i16:
  55 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
  56 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
  57 entry:
  58   %b = add <8 x i16> %a, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
  59   ret <8 x i16> %b
  60 }
  61
  62 define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) {
  63 ; CHECK-LABEL: test_ld_from_poll_v4i32:
  64 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
  65 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
  66 entry:
  67   %b = add <4 x i32> %a, <i32 1, i32 2, i32 3, i32 4>
  68   ret <4 x i32> %b
  69 }
  70
  71 define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) {
  72 ; CHECK-LABEL: test_ld_from_poll_v2i64:
  73 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
  74 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
  75 entry:
  76   %b = add <2 x i64> %a, <i64 1, i64 2>
  77   ret <2 x i64> %b
  78 }
  79
  80 define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) {
  81 ; CHECK-LABEL: test_ld_from_poll_v4f32:
  82 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
  83 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
  84 entry:
  85   %b = fadd <4 x float> %a, <float 1.0, float 2.0, float 3.0, float 4.0>
  86   ret <4 x float> %b
  87 }
  88
  89 define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) {
  90 ; CHECK-LABEL: test_ld_from_poll_v2f64:
  91 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
  92 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
  93 entry:
  94   %b = fadd <2 x double> %a, <double 1.0, double 2.0>
  95   ret <2 x double> %b
  96 }
  97
  98 define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) {
  99 ; CHECK-LABEL: test_ld_from_poll_v8i8:
 100 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
 101 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
 102 entry:
 103   %b = add <8 x i8> %a, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
 104   ret <8 x i8> %b
 105 }
 106
 107 define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) {
 108 ; CHECK-LABEL: test_ld_from_poll_v4i16:
 109 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
 110 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
 111 entry:
 112   %b = add <4 x i16> %a, <i16 1, i16 2, i16 3, i16 4>
 113   ret <4 x i16> %b
 114 }
 115
 116 define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
 117 ; CHECK-LABEL: test_ld_from_poll_v2i32:
 118 ; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}}
 119 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.{{[A-Z0-9_]+}}]
 120 entry:
 121   %b = add <2 x i32> %a, <i32 1, i32 2>
 122   ret <2 x i32> %b
 123 }
 124
 125 define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
 126 ; CHECK-LABEL: test_vld1q_dup_s8:
 127 ; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0]
 128 entry:
 129   %0 = load i8, i8* %a, align 1
 130   %1 = insertelement <16 x i8> undef, i8 %0, i32 0
 131   %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
 132   ret <16 x i8> %lane
 133 }
 134
 135 define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
 136 ; CHECK-LABEL: test_vld1q_dup_s16:
 137 ; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0]
 138 entry:
 139   %0 = load i16, i16* %a, align 2
 140   %1 = insertelement <8 x i16> undef, i16 %0, i32 0
 141   %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
 142   ret <8 x i16> %lane
 143 }
 144
 145 define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
 146 ; CHECK-LABEL: test_vld1q_dup_s32:
 147 ; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
 148 entry:
 149   %0 = load i32, i32* %a, align 4
 150   %1 = insertelement <4 x i32> undef, i32 %0, i32 0
 151   %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
 152   ret <4 x i32> %lane
 153 }
 154
 155 define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
 156 ; CHECK-LABEL: test_vld1q_dup_s64:
 157 ; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
 158 entry:
 159   %0 = load i64, i64* %a, align 8
 160   %1 = insertelement <2 x i64> undef, i64 %0, i32 0
 161   %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
 162   ret <2 x i64> %lane
 163 }
 164
 165 define <4 x float> @test_vld1q_dup_f32(float* %a) {
 166 ; CHECK-LABEL: test_vld1q_dup_f32:
 167 ; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
 168 entry:
 169   %0 = load float, float* %a, align 4
 170   %1 = insertelement <4 x float> undef, float %0, i32 0
 171   %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
 172   ret <4 x float> %lane
 173 }
 174
 175 define <2 x double> @test_vld1q_dup_f64(double* %a) {
 176 ; CHECK-LABEL: test_vld1q_dup_f64:
 177 ; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
 178 entry:
 179   %0 = load double, double* %a, align 8
 180   %1 = insertelement <2 x double> undef, double %0, i32 0
 181   %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
 182   ret <2 x double> %lane
 183 }
 184
 185 define <8 x i8> @test_vld1_dup_s8(i8* %a) {
 186 ; CHECK-LABEL: test_vld1_dup_s8:
 187 ; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0]
 188 entry:
 189   %0 = load i8, i8* %a, align 1
 190   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
 191   %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
 192   ret <8 x i8> %lane
 193 }
 194
 195 define <4 x i16> @test_vld1_dup_s16(i16* %a) {
 196 ; CHECK-LABEL: test_vld1_dup_s16:
 197 ; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0]
 198 entry:
 199   %0 = load i16, i16* %a, align 2
 200   %1 = insertelement <4 x i16> undef, i16 %0, i32 0
 201   %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
 202   ret <4 x i16> %lane
 203 }
 204
 205 define <2 x i32> @test_vld1_dup_s32(i32* %a) {
 206 ; CHECK-LABEL: test_vld1_dup_s32:
 207 ; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
 208 entry:
 209   %0 = load i32, i32* %a, align 4
 210   %1 = insertelement <2 x i32> undef, i32 %0, i32 0
 211   %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
 212   ret <2 x i32> %lane
 213 }
 214
 215 define <1 x i64> @test_vld1_dup_s64(i64* %a) {
 216 ; CHECK-LABEL: test_vld1_dup_s64:
 217 ; CHECK: ldr {{d[0-9]+}}, [x0]
 218 entry:
 219   %0 = load i64, i64* %a, align 8
 220   %1 = insertelement <1 x i64> undef, i64 %0, i32 0
 221   ret <1 x i64> %1
 222 }
 223
 224 define <2 x float> @test_vld1_dup_f32(float* %a) {
 225 ; CHECK-LABEL: test_vld1_dup_f32:
 226 ; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
 227 entry:
 228   %0 = load float, float* %a, align 4
 229   %1 = insertelement <2 x float> undef, float %0, i32 0
 230   %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
 231   ret <2 x float> %lane
 232 }
 233
 234 define <1 x double> @test_vld1_dup_f64(double* %a) {
 235 ; CHECK-LABEL: test_vld1_dup_f64:
 236 ; CHECK: ldr {{d[0-9]+}}, [x0]
 237 entry:
 238   %0 = load double, double* %a, align 8
 239   %1 = insertelement <1 x double> undef, double %0, i32 0
 240   ret <1 x double> %1
 241 }
 242
 243 define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
 244 ; As there is a store operation depending on %1, LD1R pattern can't be selected.
 245 ; So LDR and FMOV should be emitted.
 246 ; CHECK-LABEL: testDUP.v1i64:
 247 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
 248 ; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}}
 249 ; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}]
 250   %1 = load i64, i64* %a, align 8
 251   store i64 %1, i64* %b, align 8
 252   %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
 253   ret <1 x i64> %vecinit.i
 254 }
 255
 256 define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
 257 ; As there is a store operation depending on %1, LD1R pattern can't be selected.
 258 ; So LDR and FMOV should be emitted.
 259 ; CHECK-LABEL: testDUP.v1f64:
 260 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
 261 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
 262   %1 = load double, double* %a, align 8
 263   store double %1, double* %b, align 8
 264   %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
 265   ret <1 x double> %vecinit.i
 266 }
 267
 268 define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
 269 ; CHECK-LABEL: test_vld1q_lane_s8:
 270 ; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
 271 entry:
 272   %0 = load i8, i8* %a, align 1
 273   %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
 274   ret <16 x i8> %vld1_lane
 275 }
 276
 277 define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
 278 ; CHECK-LABEL: test_vld1q_lane_s16:
 279 ; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
 280 entry:
 281   %0 = load i16, i16* %a, align 2
 282   %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
 283   ret <8 x i16> %vld1_lane
 284 }
 285
 286 define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
 287 ; CHECK-LABEL: test_vld1q_lane_s32:
 288 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 289 entry:
 290   %0 = load i32, i32* %a, align 4
 291   %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
 292   ret <4 x i32> %vld1_lane
 293 }
 294
 295 define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
 296 ; CHECK-LABEL: test_vld1q_lane_s64:
 297 ; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
 298 entry:
 299   %0 = load i64, i64* %a, align 8
 300   %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
 301   ret <2 x i64> %vld1_lane
 302 }
 303
 304 define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
 305 ; CHECK-LABEL: test_vld1q_lane_f32:
 306 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 307 entry:
 308   %0 = load float, float* %a, align 4
 309   %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
 310   ret <4 x float> %vld1_lane
 311 }
 312
 313 define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
 314 ; CHECK-LABEL: test_vld1q_lane_f64:
 315 ; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
 316 entry:
 317   %0 = load double, double* %a, align 8
 318   %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
 319   ret <2 x double> %vld1_lane
 320 }
 321
 322 define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
 323 ; CHECK-LABEL: test_vld1_lane_s8:
 324 ; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
 325 entry:
 326   %0 = load i8, i8* %a, align 1
 327   %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
 328   ret <8 x i8> %vld1_lane
 329 }
 330
 331 define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
 332 ; CHECK-LABEL: test_vld1_lane_s16:
 333 ; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
 334 entry:
 335   %0 = load i16, i16* %a, align 2
 336   %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
 337   ret <4 x i16> %vld1_lane
 338 }
 339
 340 define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
 341 ; CHECK-LABEL: test_vld1_lane_s32:
 342 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 343 entry:
 344   %0 = load i32, i32* %a, align 4
 345   %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
 346   ret <2 x i32> %vld1_lane
 347 }
 348
 349 define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
 350 ; CHECK-LABEL: test_vld1_lane_s64:
 351 ; CHECK: ldr {{d[0-9]+}}, [x0]
 352 entry:
 353   %0 = load i64, i64* %a, align 8
 354   %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
 355   ret <1 x i64> %vld1_lane
 356 }
 357
 358 define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
 359 ; CHECK-LABEL: test_vld1_lane_f32:
 360 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 361 entry:
 362   %0 = load float, float* %a, align 4
 363   %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
 364   ret <2 x float> %vld1_lane
 365 }
 366
 367 define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
 368 ; CHECK-LABEL: test_vld1_lane_f64:
 369 ; CHECK: ldr {{d[0-9]+}}, [x0]
 370 entry:
 371   %0 = load double, double* %a, align 8
 372   %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
 373   ret <1 x double> %vld1_lane
 374 }
 375
 376 define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) {
 377 ; CHECK-LABEL: test_vst1q_lane_s8:
 378 ; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
 379 entry:
 380   %0 = extractelement <16 x i8> %b, i32 15
 381   store i8 %0, i8* %a, align 1
 382   ret void
 383 }
 384
 385 define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) {
 386 ; CHECK-LABEL: test_vst1q_lane_s16:
 387 ; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
 388 entry:
 389   %0 = extractelement <8 x i16> %b, i32 7
 390   store i16 %0, i16* %a, align 2
 391   ret void
 392 }
 393
 394 define void @test_vst1q_lane0_s16(i16* %a, <8 x i16> %b) {
 395 ; CHECK-LABEL: test_vst1q_lane0_s16:
 396 ; CHECK: str {{h[0-9]+}}, [x0]
 397 entry:
 398   %0 = extractelement <8 x i16> %b, i32 0
 399   store i16 %0, i16* %a, align 2
 400   ret void
 401 }
 402
 403 define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) {
 404 ; CHECK-LABEL: test_vst1q_lane_s32:
 405 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 406 entry:
 407   %0 = extractelement <4 x i32> %b, i32 3
 408   store i32 %0, i32* %a, align 4
 409   ret void
 410 }
 411
 412 define void @test_vst1q_lane0_s32(i32* %a, <4 x i32> %b) {
 413 ; CHECK-LABEL: test_vst1q_lane0_s32:
 414 ; CHECK: str {{s[0-9]+}}, [x0]
 415 entry:
 416   %0 = extractelement <4 x i32> %b, i32 0
 417   store i32 %0, i32* %a, align 4
 418   ret void
 419 }
 420
 421 define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) {
 422 ; CHECK-LABEL: test_vst1q_lane_s64:
 423 ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
 424 entry:
 425   %0 = extractelement <2 x i64> %b, i32 1
 426   store i64 %0, i64* %a, align 8
 427   ret void
 428 }
 429
 430 define void @test_vst1q_lane0_s64(i64* %a, <2 x i64> %b) {
 431 ; CHECK-LABEL: test_vst1q_lane0_s64:
 432 ; CHECK: str {{d[0-9]+}}, [x0]
 433 entry:
 434   %0 = extractelement <2 x i64> %b, i32 0
 435   store i64 %0, i64* %a, align 8
 436   ret void
 437 }
 438
 439 define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) {
 440 ; CHECK-LABEL: test_vst1q_lane_f32:
 441 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 442 entry:
 443   %0 = extractelement <4 x float> %b, i32 3
 444   store float %0, float* %a, align 4
 445   ret void
 446 }
 447
 448 define void @test_vst1q_lane0_f32(float* %a, <4 x float> %b) {
 449 ; CHECK-LABEL: test_vst1q_lane0_f32:
 450 ; CHECK: str {{s[0-9]+}}, [x0]
 451 entry:
 452   %0 = extractelement <4 x float> %b, i32 0
 453   store float %0, float* %a, align 4
 454   ret void
 455 }
 456
 457 define void @test_vst1q_lane_f64(double* %a, <2 x double> %b) {
 458 ; CHECK-LABEL: test_vst1q_lane_f64:
 459 ; CHECK: st1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
 460 entry:
 461   %0 = extractelement <2 x double> %b, i32 1
 462   store double %0, double* %a, align 8
 463   ret void
 464 }
 465
 466 define void @test_vst1q_lane0_f64(double* %a, <2 x double> %b) {
 467 ; CHECK-LABEL: test_vst1q_lane0_f64:
 468 ; CHECK: str {{d[0-9]+}}, [x0]
 469 entry:
 470   %0 = extractelement <2 x double> %b, i32 0
 471   store double %0, double* %a, align 8
 472   ret void
 473 }
 474
 475 define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) {
 476 ; CHECK-LABEL: test_vst1_lane_s8:
 477 ; CHECK: st1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
 478 entry:
 479   %0 = extractelement <8 x i8> %b, i32 7
 480   store i8 %0, i8* %a, align 1
 481   ret void
 482 }
 483
 484 define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) {
 485 ; CHECK-LABEL: test_vst1_lane_s16:
 486 ; CHECK: st1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
 487 entry:
 488   %0 = extractelement <4 x i16> %b, i32 3
 489   store i16 %0, i16* %a, align 2
 490   ret void
 491 }
 492
 493 define void @test_vst1_lane0_s16(i16* %a, <4 x i16> %b) {
 494 ; CHECK-LABEL: test_vst1_lane0_s16:
 495 ; CHECK: str {{h[0-9]+}}, [x0]
 496 entry:
 497   %0 = extractelement <4 x i16> %b, i32 0
 498   store i16 %0, i16* %a, align 2
 499   ret void
 500 }
 501
 502 define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) {
 503 ; CHECK-LABEL: test_vst1_lane_s32:
 504 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 505 entry:
 506   %0 = extractelement <2 x i32> %b, i32 1
 507   store i32 %0, i32* %a, align 4
 508   ret void
 509 }
 510
 511 define void @test_vst1_lane0_s32(i32* %a, <2 x i32> %b) {
 512 ; CHECK-LABEL: test_vst1_lane0_s32:
 513 ; CHECK: str {{s[0-9]+}}, [x0]
 514 entry:
 515   %0 = extractelement <2 x i32> %b, i32 0
 516   store i32 %0, i32* %a, align 4
 517   ret void
 518 }
 519
 520 define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) {
 521 ; CHECK-LABEL: test_vst1_lane_s64:
 522 ; CHECK: str {{d[0-9]+}}, [x0]
 523 entry:
 524   %0 = extractelement <1 x i64> %b, i32 0
 525   store i64 %0, i64* %a, align 8
 526   ret void
 527 }
 528
 529 define void @test_vst1_lane_f32(float* %a, <2 x float> %b) {
 530 ; CHECK-LABEL: test_vst1_lane_f32:
 531 ; CHECK: st1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 532 entry:
 533   %0 = extractelement <2 x float> %b, i32 1
 534   store float %0, float* %a, align 4
 535   ret void
 536 }
 537
 538 define void @test_vst1_lane0_f32(float* %a, <2 x float> %b) {
 539 ; CHECK-LABEL: test_vst1_lane0_f32:
 540 ; CHECK: str {{s[0-9]+}}, [x0]
 541 entry:
 542   %0 = extractelement <2 x float> %b, i32 0
 543   store float %0, float* %a, align 4
 544   ret void
 545 }
 546
 547 define void @test_vst1_lane_f64(double* %a, <1 x double> %b) {
 548 ; CHECK-LABEL: test_vst1_lane_f64:
 549 ; CHECK: str {{d[0-9]+}}, [x0]
 550 entry:
 551   %0 = extractelement <1 x double> %b, i32 0
 552   store double %0, double* %a, align 8
 553   ret void
 554 }