clang/test/CodeGen/aarch64-neon-intrinsics.c

   1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
   2 // RUN:     -S -disable-O0-optnone \
   3 // RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
   4 // RUN: | opt -S -passes=mem2reg \
   5 // RUN: | FileCheck %s
   6
   7 // REQUIRES: aarch64-registered-target || arm-registered-target
   8
   9 #include <arm_neon.h>
  10
  11 // CHECK-LABEL: @test_vadd_s8(
  12 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
  13 // CHECK:   ret <8 x i8> [[ADD_I]]
  14 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
  15   return vadd_s8(v1, v2);
  16 }
  17
  18 // CHECK-LABEL: @test_vadd_s16(
  19 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
  20 // CHECK:   ret <4 x i16> [[ADD_I]]
  21 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
  22   return vadd_s16(v1, v2);
  23 }
  24
  25 // CHECK-LABEL: @test_vadd_s32(
  26 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
  27 // CHECK:   ret <2 x i32> [[ADD_I]]
  28 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
  29   return vadd_s32(v1, v2);
  30 }
  31
  32 // CHECK-LABEL: @test_vadd_s64(
  33 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
  34 // CHECK:   ret <1 x i64> [[ADD_I]]
  35 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
  36   return vadd_s64(v1, v2);
  37 }
  38
  39 // CHECK-LABEL: @test_vadd_f32(
  40 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
  41 // CHECK:   ret <2 x float> [[ADD_I]]
  42 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
  43   return vadd_f32(v1, v2);
  44 }
  45
  46 // CHECK-LABEL: @test_vadd_u8(
  47 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
  48 // CHECK:   ret <8 x i8> [[ADD_I]]
  49 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
  50   return vadd_u8(v1, v2);
  51 }
  52
  53 // CHECK-LABEL: @test_vadd_u16(
  54 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
  55 // CHECK:   ret <4 x i16> [[ADD_I]]
  56 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
  57   return vadd_u16(v1, v2);
  58 }
  59
  60 // CHECK-LABEL: @test_vadd_u32(
  61 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
  62 // CHECK:   ret <2 x i32> [[ADD_I]]
  63 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
  64   return vadd_u32(v1, v2);
  65 }
  66
  67 // CHECK-LABEL: @test_vadd_u64(
  68 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
  69 // CHECK:   ret <1 x i64> [[ADD_I]]
  70 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
  71   return vadd_u64(v1, v2);
  72 }
  73
  74 // CHECK-LABEL: @test_vaddq_s8(
  75 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
  76 // CHECK:   ret <16 x i8> [[ADD_I]]
  77 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
  78   return vaddq_s8(v1, v2);
  79 }
  80
  81 // CHECK-LABEL: @test_vaddq_s16(
  82 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
  83 // CHECK:   ret <8 x i16> [[ADD_I]]
  84 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
  85   return vaddq_s16(v1, v2);
  86 }
  87
  88 // CHECK-LABEL: @test_vaddq_s32(
  89 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
  90 // CHECK:   ret <4 x i32> [[ADD_I]]
  91 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
  92   return vaddq_s32(v1, v2);
  93 }
  94
  95 // CHECK-LABEL: @test_vaddq_s64(
  96 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
  97 // CHECK:   ret <2 x i64> [[ADD_I]]
  98 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
  99   return vaddq_s64(v1, v2);
 100 }
 101
 102 // CHECK-LABEL: @test_vaddq_f32(
 103 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
 104 // CHECK:   ret <4 x float> [[ADD_I]]
 105 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
 106   return vaddq_f32(v1, v2);
 107 }
 108
 109 // CHECK-LABEL: @test_vaddq_f64(
 110 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
 111 // CHECK:   ret <2 x double> [[ADD_I]]
 112 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
 113   return vaddq_f64(v1, v2);
 114 }
 115
 116 // CHECK-LABEL: @test_vaddq_u8(
 117 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
 118 // CHECK:   ret <16 x i8> [[ADD_I]]
 119 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
 120   return vaddq_u8(v1, v2);
 121 }
 122
 123 // CHECK-LABEL: @test_vaddq_u16(
 124 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
 125 // CHECK:   ret <8 x i16> [[ADD_I]]
 126 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
 127   return vaddq_u16(v1, v2);
 128 }
 129
 130 // CHECK-LABEL: @test_vaddq_u32(
 131 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
 132 // CHECK:   ret <4 x i32> [[ADD_I]]
 133 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
 134   return vaddq_u32(v1, v2);
 135 }
 136
 137 // CHECK-LABEL: @test_vaddq_u64(
 138 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
 139 // CHECK:   ret <2 x i64> [[ADD_I]]
 140 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
 141   return vaddq_u64(v1, v2);
 142 }
 143
 144 // CHECK-LABEL: @test_vsub_s8(
 145 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
 146 // CHECK:   ret <8 x i8> [[SUB_I]]
 147 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
 148   return vsub_s8(v1, v2);
 149 }
 150
 151 // CHECK-LABEL: @test_vsub_s16(
 152 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
 153 // CHECK:   ret <4 x i16> [[SUB_I]]
 154 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
 155   return vsub_s16(v1, v2);
 156 }
 157
 158 // CHECK-LABEL: @test_vsub_s32(
 159 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
 160 // CHECK:   ret <2 x i32> [[SUB_I]]
 161 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
 162   return vsub_s32(v1, v2);
 163 }
 164
 165 // CHECK-LABEL: @test_vsub_s64(
 166 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
 167 // CHECK:   ret <1 x i64> [[SUB_I]]
 168 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
 169   return vsub_s64(v1, v2);
 170 }
 171
 172 // CHECK-LABEL: @test_vsub_f32(
 173 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
 174 // CHECK:   ret <2 x float> [[SUB_I]]
 175 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
 176   return vsub_f32(v1, v2);
 177 }
 178
 179 // CHECK-LABEL: @test_vsub_u8(
 180 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
 181 // CHECK:   ret <8 x i8> [[SUB_I]]
 182 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
 183   return vsub_u8(v1, v2);
 184 }
 185
 186 // CHECK-LABEL: @test_vsub_u16(
 187 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
 188 // CHECK:   ret <4 x i16> [[SUB_I]]
 189 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
 190   return vsub_u16(v1, v2);
 191 }
 192
 193 // CHECK-LABEL: @test_vsub_u32(
 194 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
 195 // CHECK:   ret <2 x i32> [[SUB_I]]
 196 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
 197   return vsub_u32(v1, v2);
 198 }
 199
 200 // CHECK-LABEL: @test_vsub_u64(
 201 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
 202 // CHECK:   ret <1 x i64> [[SUB_I]]
 203 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
 204   return vsub_u64(v1, v2);
 205 }
 206
 207 // CHECK-LABEL: @test_vsubq_s8(
 208 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
 209 // CHECK:   ret <16 x i8> [[SUB_I]]
 210 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
 211   return vsubq_s8(v1, v2);
 212 }
 213
 214 // CHECK-LABEL: @test_vsubq_s16(
 215 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
 216 // CHECK:   ret <8 x i16> [[SUB_I]]
 217 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
 218   return vsubq_s16(v1, v2);
 219 }
 220
 221 // CHECK-LABEL: @test_vsubq_s32(
 222 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
 223 // CHECK:   ret <4 x i32> [[SUB_I]]
 224 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
 225   return vsubq_s32(v1, v2);
 226 }
 227
 228 // CHECK-LABEL: @test_vsubq_s64(
 229 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
 230 // CHECK:   ret <2 x i64> [[SUB_I]]
 231 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
 232   return vsubq_s64(v1, v2);
 233 }
 234
 235 // CHECK-LABEL: @test_vsubq_f32(
 236 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
 237 // CHECK:   ret <4 x float> [[SUB_I]]
 238 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
 239   return vsubq_f32(v1, v2);
 240 }
 241
 242 // CHECK-LABEL: @test_vsubq_f64(
 243 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
 244 // CHECK:   ret <2 x double> [[SUB_I]]
 245 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
 246   return vsubq_f64(v1, v2);
 247 }
 248
 249 // CHECK-LABEL: @test_vsubq_u8(
 250 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
 251 // CHECK:   ret <16 x i8> [[SUB_I]]
 252 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
 253   return vsubq_u8(v1, v2);
 254 }
 255
 256 // CHECK-LABEL: @test_vsubq_u16(
 257 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
 258 // CHECK:   ret <8 x i16> [[SUB_I]]
 259 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
 260   return vsubq_u16(v1, v2);
 261 }
 262
 263 // CHECK-LABEL: @test_vsubq_u32(
 264 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
 265 // CHECK:   ret <4 x i32> [[SUB_I]]
 266 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
 267   return vsubq_u32(v1, v2);
 268 }
 269
 270 // CHECK-LABEL: @test_vsubq_u64(
 271 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
 272 // CHECK:   ret <2 x i64> [[SUB_I]]
 273 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
 274   return vsubq_u64(v1, v2);
 275 }
 276
 277 // CHECK-LABEL: @test_vmul_s8(
 278 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
 279 // CHECK:   ret <8 x i8> [[MUL_I]]
 280 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
 281   return vmul_s8(v1, v2);
 282 }
 283
 284 // CHECK-LABEL: @test_vmul_s16(
 285 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
 286 // CHECK:   ret <4 x i16> [[MUL_I]]
 287 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
 288   return vmul_s16(v1, v2);
 289 }
 290
 291 // CHECK-LABEL: @test_vmul_s32(
 292 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
 293 // CHECK:   ret <2 x i32> [[MUL_I]]
 294 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
 295   return vmul_s32(v1, v2);
 296 }
 297
 298 // CHECK-LABEL: @test_vmul_f32(
 299 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
 300 // CHECK:   ret <2 x float> [[MUL_I]]
 301 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
 302   return vmul_f32(v1, v2);
 303 }
 304
 305 // CHECK-LABEL: @test_vmul_u8(
 306 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
 307 // CHECK:   ret <8 x i8> [[MUL_I]]
 308 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
 309   return vmul_u8(v1, v2);
 310 }
 311
 312 // CHECK-LABEL: @test_vmul_u16(
 313 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
 314 // CHECK:   ret <4 x i16> [[MUL_I]]
 315 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
 316   return vmul_u16(v1, v2);
 317 }
 318
 319 // CHECK-LABEL: @test_vmul_u32(
 320 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
 321 // CHECK:   ret <2 x i32> [[MUL_I]]
 322 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
 323   return vmul_u32(v1, v2);
 324 }
 325
 326 // CHECK-LABEL: @test_vmulq_s8(
 327 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
 328 // CHECK:   ret <16 x i8> [[MUL_I]]
 329 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
 330   return vmulq_s8(v1, v2);
 331 }
 332
 333 // CHECK-LABEL: @test_vmulq_s16(
 334 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
 335 // CHECK:   ret <8 x i16> [[MUL_I]]
 336 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
 337   return vmulq_s16(v1, v2);
 338 }
 339
 340 // CHECK-LABEL: @test_vmulq_s32(
 341 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
 342 // CHECK:   ret <4 x i32> [[MUL_I]]
 343 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
 344   return vmulq_s32(v1, v2);
 345 }
 346
 347 // CHECK-LABEL: @test_vmulq_u8(
 348 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
 349 // CHECK:   ret <16 x i8> [[MUL_I]]
 350 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
 351   return vmulq_u8(v1, v2);
 352 }
 353
 354 // CHECK-LABEL: @test_vmulq_u16(
 355 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
 356 // CHECK:   ret <8 x i16> [[MUL_I]]
 357 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
 358   return vmulq_u16(v1, v2);
 359 }
 360
 361 // CHECK-LABEL: @test_vmulq_u32(
 362 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
 363 // CHECK:   ret <4 x i32> [[MUL_I]]
 364 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
 365   return vmulq_u32(v1, v2);
 366 }
 367
 368 // CHECK-LABEL: @test_vmulq_f32(
 369 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
 370 // CHECK:   ret <4 x float> [[MUL_I]]
 371 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
 372   return vmulq_f32(v1, v2);
 373 }
 374
 375 // CHECK-LABEL: @test_vmulq_f64(
 376 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
 377 // CHECK:   ret <2 x double> [[MUL_I]]
 378 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
 379   return vmulq_f64(v1, v2);
 380 }
 381
 382 // CHECK-LABEL: @test_vmul_p8(
 383 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
 384 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
 385 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
 386   return vmul_p8(v1, v2);
 387 }
 388
 389 // CHECK-LABEL: @test_vmulq_p8(
 390 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
 391 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
 392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
 393   return vmulq_p8(v1, v2);
 394 }
 395
 396 // CHECK-LABEL: @test_vmla_s8(
 397 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
 398 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
 399 // CHECK:   ret <8 x i8> [[ADD_I]]
 400 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
 401   return vmla_s8(v1, v2, v3);
 402 }
 403
 404 // CHECK-LABEL: @test_vmla_s16(
 405 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
 406 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
 407 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
 408 // CHECK:   ret <8 x i8> [[TMP0]]
 409 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
 410   return (int8x8_t)vmla_s16(v1, v2, v3);
 411 }
 412
 413 // CHECK-LABEL: @test_vmla_s32(
 414 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
 415 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
 416 // CHECK:   ret <2 x i32> [[ADD_I]]
 417 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
 418   return vmla_s32(v1, v2, v3);
 419 }
 420
 421 // CHECK-LABEL: @test_vmla_f32(
 422 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
 423 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
 424 // CHECK:   ret <2 x float> [[ADD_I]]
 425 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
 426   return vmla_f32(v1, v2, v3);
 427 }
 428
 429 // CHECK-LABEL: @test_vmla_u8(
 430 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
 431 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
 432 // CHECK:   ret <8 x i8> [[ADD_I]]
 433 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
 434   return vmla_u8(v1, v2, v3);
 435 }
 436
 437 // CHECK-LABEL: @test_vmla_u16(
 438 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
 439 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
 440 // CHECK:   ret <4 x i16> [[ADD_I]]
 441 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
 442   return vmla_u16(v1, v2, v3);
 443 }
 444
 445 // CHECK-LABEL: @test_vmla_u32(
 446 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
 447 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
 448 // CHECK:   ret <2 x i32> [[ADD_I]]
 449 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
 450   return vmla_u32(v1, v2, v3);
 451 }
 452
 453 // CHECK-LABEL: @test_vmlaq_s8(
 454 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
 455 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
 456 // CHECK:   ret <16 x i8> [[ADD_I]]
 457 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
 458   return vmlaq_s8(v1, v2, v3);
 459 }
 460
 461 // CHECK-LABEL: @test_vmlaq_s16(
 462 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
 463 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
 464 // CHECK:   ret <8 x i16> [[ADD_I]]
 465 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
 466   return vmlaq_s16(v1, v2, v3);
 467 }
 468
 469 // CHECK-LABEL: @test_vmlaq_s32(
 470 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
 471 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
 472 // CHECK:   ret <4 x i32> [[ADD_I]]
 473 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
 474   return vmlaq_s32(v1, v2, v3);
 475 }
 476
 477 // CHECK-LABEL: @test_vmlaq_f32(
 478 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
 479 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
 480 // CHECK:   ret <4 x float> [[ADD_I]]
 481 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
 482   return vmlaq_f32(v1, v2, v3);
 483 }
 484
 485 // CHECK-LABEL: @test_vmlaq_u8(
 486 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
 487 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
 488 // CHECK:   ret <16 x i8> [[ADD_I]]
 489 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
 490   return vmlaq_u8(v1, v2, v3);
 491 }
 492
 493 // CHECK-LABEL: @test_vmlaq_u16(
 494 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
 495 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
 496 // CHECK:   ret <8 x i16> [[ADD_I]]
 497 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
 498   return vmlaq_u16(v1, v2, v3);
 499 }
 500
 501 // CHECK-LABEL: @test_vmlaq_u32(
 502 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
 503 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
 504 // CHECK:   ret <4 x i32> [[ADD_I]]
 505 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
 506   return vmlaq_u32(v1, v2, v3);
 507 }
 508
 509 // CHECK-LABEL: @test_vmlaq_f64(
 510 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
 511 // CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
 512 // CHECK:   ret <2 x double> [[ADD_I]]
 513 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
 514   return vmlaq_f64(v1, v2, v3);
 515 }
 516
 517 // CHECK-LABEL: @test_vmls_s8(
 518 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
 519 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
 520 // CHECK:   ret <8 x i8> [[SUB_I]]
 521 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
 522   return vmls_s8(v1, v2, v3);
 523 }
 524
 525 // CHECK-LABEL: @test_vmls_s16(
 526 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
 527 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
 528 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
 529 // CHECK:   ret <8 x i8> [[TMP0]]
 530 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
 531   return (int8x8_t)vmls_s16(v1, v2, v3);
 532 }
 533
 534 // CHECK-LABEL: @test_vmls_s32(
 535 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
 536 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
 537 // CHECK:   ret <2 x i32> [[SUB_I]]
 538 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
 539   return vmls_s32(v1, v2, v3);
 540 }
 541
 542 // CHECK-LABEL: @test_vmls_f32(
 543 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
 544 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
 545 // CHECK:   ret <2 x float> [[SUB_I]]
 546 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
 547   return vmls_f32(v1, v2, v3);
 548 }
 549
 550 // CHECK-LABEL: @test_vmls_u8(
 551 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
 552 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
 553 // CHECK:   ret <8 x i8> [[SUB_I]]
 554 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
 555   return vmls_u8(v1, v2, v3);
 556 }
 557
 558 // CHECK-LABEL: @test_vmls_u16(
 559 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
 560 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
 561 // CHECK:   ret <4 x i16> [[SUB_I]]
 562 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
 563   return vmls_u16(v1, v2, v3);
 564 }
 565
 566 // CHECK-LABEL: @test_vmls_u32(
 567 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
 568 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
 569 // CHECK:   ret <2 x i32> [[SUB_I]]
 570 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
 571   return vmls_u32(v1, v2, v3);
 572 }
 573
 574 // CHECK-LABEL: @test_vmlsq_s8(
 575 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
 576 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
 577 // CHECK:   ret <16 x i8> [[SUB_I]]
 578 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
 579   return vmlsq_s8(v1, v2, v3);
 580 }
 581
 582 // CHECK-LABEL: @test_vmlsq_s16(
 583 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
 584 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
 585 // CHECK:   ret <8 x i16> [[SUB_I]]
 586 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
 587   return vmlsq_s16(v1, v2, v3);
 588 }
 589
 590 // CHECK-LABEL: @test_vmlsq_s32(
 591 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
 592 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
 593 // CHECK:   ret <4 x i32> [[SUB_I]]
 594 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
 595   return vmlsq_s32(v1, v2, v3);
 596 }
 597
 598 // CHECK-LABEL: @test_vmlsq_f32(
 599 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
 600 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
 601 // CHECK:   ret <4 x float> [[SUB_I]]
 602 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
 603   return vmlsq_f32(v1, v2, v3);
 604 }
 605
 606 // CHECK-LABEL: @test_vmlsq_u8(
 607 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
 608 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
 609 // CHECK:   ret <16 x i8> [[SUB_I]]
 610 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
 611   return vmlsq_u8(v1, v2, v3);
 612 }
 613
 614 // CHECK-LABEL: @test_vmlsq_u16(
 615 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
 616 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
 617 // CHECK:   ret <8 x i16> [[SUB_I]]
 618 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
 619   return vmlsq_u16(v1, v2, v3);
 620 }
 621
 622 // CHECK-LABEL: @test_vmlsq_u32(
 623 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
 624 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
 625 // CHECK:   ret <4 x i32> [[SUB_I]]
 626 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
 627   return vmlsq_u32(v1, v2, v3);
 628 }
 629
 630 // CHECK-LABEL: @test_vmlsq_f64(
 631 // CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
 632 // CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
 633 // CHECK:   ret <2 x double> [[SUB_I]]
 634 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
 635   return vmlsq_f64(v1, v2, v3);
 636 }
 637
 638 // CHECK-LABEL: @test_vfma_f32(
 639 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
 640 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
 641 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
 642 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
 643 // CHECK:   ret <2 x float> [[TMP3]]
 644 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
 645   return vfma_f32(v1, v2, v3);
 646 }
 647
 648 // CHECK-LABEL: @test_vfmaq_f32(
 649 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
 650 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
 651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
 652 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
 653 // CHECK:   ret <4 x float> [[TMP3]]
 654 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
 655   return vfmaq_f32(v1, v2, v3);
 656 }
 657
 658 // CHECK-LABEL: @test_vfmaq_f64(
 659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
 660 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
 661 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
 662 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
 663 // CHECK:   ret <2 x double> [[TMP3]]
 664 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
 665   return vfmaq_f64(v1, v2, v3);
 666 }
 667
 668 // CHECK-LABEL: @test_vfms_f32(
 669 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %v2
 670 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
 671 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
 672 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
 673 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
 674 // CHECK:   ret <2 x float> [[TMP3]]
 675 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
 676   return vfms_f32(v1, v2, v3);
 677 }
 678
 679 // CHECK-LABEL: @test_vfmsq_f32(
 680 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %v2
 681 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
 682 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
 683 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
 684 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
 685 // CHECK:   ret <4 x float> [[TMP3]]
 686 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
 687   return vfmsq_f32(v1, v2, v3);
 688 }
 689
 690 // CHECK-LABEL: @test_vfmsq_f64(
 691 // CHECK:   [[SUB_I:%.*]] = fneg <2 x double> %v2
 692 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
 693 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
 694 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
 695 // CHECK:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
 696 // CHECK:   ret <2 x double> [[TMP3]]
 697 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
 698   return vfmsq_f64(v1, v2, v3);
 699 }
 700
 701 // CHECK-LABEL: @test_vdivq_f64(
 702 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
 703 // CHECK:   ret <2 x double> [[DIV_I]]
 704 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
 705   return vdivq_f64(v1, v2);
 706 }
 707
 708 // CHECK-LABEL: @test_vdivq_f32(
 709 // CHECK:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
 710 // CHECK:   ret <4 x float> [[DIV_I]]
 711 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
 712   return vdivq_f32(v1, v2);
 713 }
 714
 715 // CHECK-LABEL: @test_vdiv_f32(
 716 // CHECK:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
 717 // CHECK:   ret <2 x float> [[DIV_I]]
 718 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
 719   return vdiv_f32(v1, v2);
 720 }
 721
 722 // CHECK-LABEL: @test_vaba_s8(
 723 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
 724 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
 725 // CHECK:   ret <8 x i8> [[ADD_I]]
 726 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
 727   return vaba_s8(v1, v2, v3);
 728 }
 729
 730 // CHECK-LABEL: @test_vaba_s16(
 731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
 732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
 733 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
 734 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
 735 // CHECK:   ret <4 x i16> [[ADD_I]]
 736 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
 737   return vaba_s16(v1, v2, v3);
 738 }
 739
 740 // CHECK-LABEL: @test_vaba_s32(
 741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
 742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
 743 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
 744 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
 745 // CHECK:   ret <2 x i32> [[ADD_I]]
 746 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
 747   return vaba_s32(v1, v2, v3);
 748 }
 749
 750 // CHECK-LABEL: @test_vaba_u8(
 751 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
 752 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
 753 // CHECK:   ret <8 x i8> [[ADD_I]]
 754 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
 755   return vaba_u8(v1, v2, v3);
 756 }
 757
 758 // CHECK-LABEL: @test_vaba_u16(
 759 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
 760 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
 761 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
 762 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
 763 // CHECK:   ret <4 x i16> [[ADD_I]]
 764 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
 765   return vaba_u16(v1, v2, v3);
 766 }
 767
 768 // CHECK-LABEL: @test_vaba_u32(
 769 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
 770 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
 771 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
 772 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
 773 // CHECK:   ret <2 x i32> [[ADD_I]]
 774 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
 775   return vaba_u32(v1, v2, v3);
 776 }
 777
 778 // CHECK-LABEL: @test_vabaq_s8(
 779 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
 780 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
 781 // CHECK:   ret <16 x i8> [[ADD_I]]
 782 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
 783   return vabaq_s8(v1, v2, v3);
 784 }
 785
 786 // CHECK-LABEL: @test_vabaq_s16(
 787 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
 788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
 789 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
 790 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
 791 // CHECK:   ret <8 x i16> [[ADD_I]]
 792 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
 793   return vabaq_s16(v1, v2, v3);
 794 }
 795
 796 // CHECK-LABEL: @test_vabaq_s32(
 797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
 798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
 799 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
 800 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
 801 // CHECK:   ret <4 x i32> [[ADD_I]]
 802 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
 803   return vabaq_s32(v1, v2, v3);
 804 }
 805
 806 // CHECK-LABEL: @test_vabaq_u8(
 807 // CHECK:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
 808 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
 809 // CHECK:   ret <16 x i8> [[ADD_I]]
 810 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
 811   return vabaq_u8(v1, v2, v3);
 812 }
 813
 814 // CHECK-LABEL: @test_vabaq_u16(
 815 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
 816 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
 817 // CHECK:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
 818 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
 819 // CHECK:   ret <8 x i16> [[ADD_I]]
 820 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
 821   return vabaq_u16(v1, v2, v3);
 822 }
 823
 824 // CHECK-LABEL: @test_vabaq_u32(
 825 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
 826 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
 827 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
 828 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
 829 // CHECK:   ret <4 x i32> [[ADD_I]]
 830 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
 831   return vabaq_u32(v1, v2, v3);
 832 }
 833
 834 // CHECK-LABEL: @test_vabd_s8(
 835 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
 836 // CHECK:   ret <8 x i8> [[VABD_I]]
 837 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
 838   return vabd_s8(v1, v2);
 839 }
 840
 841 // CHECK-LABEL: @test_vabd_s16(
 842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
 843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
 844 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
 845 // CHECK:   ret <4 x i16> [[VABD2_I]]
 846 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
 847   return vabd_s16(v1, v2);
 848 }
 849
 850 // CHECK-LABEL: @test_vabd_s32(
 851 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
 852 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
 853 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
 854 // CHECK:   ret <2 x i32> [[VABD2_I]]
 855 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
 856   return vabd_s32(v1, v2);
 857 }
 858
 859 // CHECK-LABEL: @test_vabd_u8(
 860 // CHECK:   [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
 861 // CHECK:   ret <8 x i8> [[VABD_I]]
 862 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
 863   return vabd_u8(v1, v2);
 864 }
 865
 866 // CHECK-LABEL: @test_vabd_u16(
 867 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
 868 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
 869 // CHECK:   [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
 870 // CHECK:   ret <4 x i16> [[VABD2_I]]
 871 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
 872   return vabd_u16(v1, v2);
 873 }
 874
 875 // CHECK-LABEL: @test_vabd_u32(
 876 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
 877 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
 878 // CHECK:   [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
 879 // CHECK:   ret <2 x i32> [[VABD2_I]]
 880 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
 881   return vabd_u32(v1, v2);
 882 }
 883
 884 // CHECK-LABEL: @test_vabd_f32(
 885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
 886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
 887 // CHECK:   [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2)
 888 // CHECK:   ret <2 x float> [[VABD2_I]]
 889 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
 890   return vabd_f32(v1, v2);
 891 }
 892
 893 // CHECK-LABEL: @test_vabdq_s8(
 894 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
 895 // CHECK:   ret <16 x i8> [[VABD_I]]
 896 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
 897   return vabdq_s8(v1, v2);
 898 }
 899
 900 // CHECK-LABEL: @test_vabdq_s16(
 901 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
 902 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
 903 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
 904 // CHECK:   ret <8 x i16> [[VABD2_I]]
 905 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
 906   return vabdq_s16(v1, v2);
 907 }
 908
 909 // CHECK-LABEL: @test_vabdq_s32(
 910 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
 911 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
 912 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
 913 // CHECK:   ret <4 x i32> [[VABD2_I]]
 914 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
 915   return vabdq_s32(v1, v2);
 916 }
 917
 918 // CHECK-LABEL: @test_vabdq_u8(
 919 // CHECK:   [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
 920 // CHECK:   ret <16 x i8> [[VABD_I]]
 921 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
 922   return vabdq_u8(v1, v2);
 923 }
 924
 925 // CHECK-LABEL: @test_vabdq_u16(
 926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
 927 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
 928 // CHECK:   [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
 929 // CHECK:   ret <8 x i16> [[VABD2_I]]
 930 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
 931   return vabdq_u16(v1, v2);
 932 }
 933
 934 // CHECK-LABEL: @test_vabdq_u32(
 935 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
 936 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
 937 // CHECK:   [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
 938 // CHECK:   ret <4 x i32> [[VABD2_I]]
 939 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
 940   return vabdq_u32(v1, v2);
 941 }
 942
 943 // CHECK-LABEL: @test_vabdq_f32(
 944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
 945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
 946 // CHECK:   [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2)
 947 // CHECK:   ret <4 x float> [[VABD2_I]]
 948 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
 949   return vabdq_f32(v1, v2);
 950 }
 951
 952 // CHECK-LABEL: @test_vabdq_f64(
 953 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
 954 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
 955 // CHECK:   [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2)
 956 // CHECK:   ret <2 x double> [[VABD2_I]]
 957 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
 958   return vabdq_f64(v1, v2);
 959 }
 960
 961 // CHECK-LABEL: @test_vbsl_s8(
 962 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
 963 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 964 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
 965 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
 966 // CHECK:   ret <8 x i8> [[VBSL2_I]]
 967 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
 968   return vbsl_s8(v1, v2, v3);
 969 }
 970
 971 // CHECK-LABEL: @test_vbsl_s16(
 972 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
 973 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
 974 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
 975 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
 976 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
 977 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
 978 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
 979 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8>
 980 // CHECK:   ret <8 x i8> [[TMP4]]
 981 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
 982   return (int8x8_t)vbsl_s16(v1, v2, v3);
 983 }
 984
 985 // CHECK-LABEL: @test_vbsl_s32(
 986 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
 987 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
 988 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
 989 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
 990 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
 991 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
 992 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
 993 // CHECK:   ret <2 x i32> [[VBSL5_I]]
 994 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
 995   return vbsl_s32(v1, v2, v3);
 996 }
 997
 998 // CHECK-LABEL: @test_vbsl_s64(
 999 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1000 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1001 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1002 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1003 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1004 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1005 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1006 // CHECK:   ret <1 x i64> [[VBSL5_I]]
1007 int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
1008   return vbsl_s64(v1, v2, v3);
1009 }
1010
1011 // CHECK-LABEL: @test_vbsl_u8(
1012 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1013 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1014 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1015 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1016 // CHECK:   ret <8 x i8> [[VBSL2_I]]
1017 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
1018   return vbsl_u8(v1, v2, v3);
1019 }
1020
1021 // CHECK-LABEL: @test_vbsl_u16(
1022 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1023 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1024 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1025 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1026 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1027 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1028 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1029 // CHECK:   ret <4 x i16> [[VBSL5_I]]
1030 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
1031   return vbsl_u16(v1, v2, v3);
1032 }
1033
1034 // CHECK-LABEL: @test_vbsl_u32(
1035 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1036 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1037 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
1038 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2
1039 // CHECK:   [[TMP3:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1040 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3
1041 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1042 // CHECK:   ret <2 x i32> [[VBSL5_I]]
1043 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
1044   return vbsl_u32(v1, v2, v3);
1045 }
1046
1047 // CHECK-LABEL: @test_vbsl_u64(
1048 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1049 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8>
1050 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8>
1051 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2
1052 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1053 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3
1054 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1055 // CHECK:   ret <1 x i64> [[VBSL5_I]]
1056 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
1057   return vbsl_u64(v1, v2, v3);
1058 }
1059
1060 // CHECK-LABEL: @test_vbsl_f32(
1061 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1062 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1063 // CHECK:   [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
1064 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32>
1065 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32>
1066 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]]
1067 // CHECK:   [[TMP4:%.*]] = xor <2 x i32> %v1, <i32 -1, i32 -1>
1068 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]]
1069 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
1070 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
1071 // CHECK:   ret <2 x float> [[TMP5]]
1072 float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
1073   return vbsl_f32(v1, v2, v3);
1074 }
1075
1076 // CHECK-LABEL: @test_vbsl_f64(
1077 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8>
1078 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8>
1079 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8>
1080 // CHECK:   [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1081 // CHECK:   [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
1082 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]]
1083 // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %v1, <i64 -1>
1084 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]]
1085 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
1086 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
1087 // CHECK:   ret <1 x double> [[TMP4]]
1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
1089   return vbsl_f64(v1, v2, v3);
1090 }
1091
1092 // CHECK-LABEL: @test_vbsl_p8(
1093 // CHECK:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
1094 // CHECK:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1095 // CHECK:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
1096 // CHECK:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
1097 // CHECK:   ret <8 x i8> [[VBSL2_I]]
1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
1099   return vbsl_p8(v1, v2, v3);
1100 }
1101
1102 // CHECK-LABEL: @test_vbsl_p16(
1103 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1104 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1105 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
1106 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
1107 // CHECK:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
1108 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
1109 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
1110 // CHECK:   ret <4 x i16> [[VBSL5_I]]
1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
1112   return vbsl_p16(v1, v2, v3);
1113 }
1114
1115 // CHECK-LABEL: @test_vbslq_s8(
1116 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1117 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1118 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1119 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1120 // CHECK:   ret <16 x i8> [[VBSL2_I]]
1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
1122   return vbslq_s8(v1, v2, v3);
1123 }
1124
1125 // CHECK-LABEL: @test_vbslq_s16(
1126 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1127 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1128 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1129 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1130 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1131 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1132 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1133 // CHECK:   ret <8 x i16> [[VBSL5_I]]
1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
1135   return vbslq_s16(v1, v2, v3);
1136 }
1137
1138 // CHECK-LABEL: @test_vbslq_s32(
1139 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1140 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1141 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1142 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1143 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1144 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1145 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1146 // CHECK:   ret <4 x i32> [[VBSL5_I]]
1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1148   return vbslq_s32(v1, v2, v3);
1149 }
1150
1151 // CHECK-LABEL: @test_vbslq_s64(
1152 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1153 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1154 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1155 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1156 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1157 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1158 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1159 // CHECK:   ret <2 x i64> [[VBSL5_I]]
1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
1161   return vbslq_s64(v1, v2, v3);
1162 }
1163
1164 // CHECK-LABEL: @test_vbslq_u8(
1165 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1166 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1167 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1168 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1169 // CHECK:   ret <16 x i8> [[VBSL2_I]]
1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
1171   return vbslq_u8(v1, v2, v3);
1172 }
1173
1174 // CHECK-LABEL: @test_vbslq_u16(
1175 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1176 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1177 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1178 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1179 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1180 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1181 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1182 // CHECK:   ret <8 x i16> [[VBSL5_I]]
1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
1184   return vbslq_u16(v1, v2, v3);
1185 }
1186
1187 // CHECK-LABEL: @test_vbslq_u32(
1188 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1189 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1190 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
1191 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2
1192 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1193 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3
1194 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1195 // CHECK:   ret <4 x i32> [[VBSL5_I]]
1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
1197   return vbslq_s32(v1, v2, v3);
1198 }
1199
1200 // CHECK-LABEL: @test_vbslq_u64(
1201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1202 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1203 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8>
1204 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2
1205 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1206 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3
1207 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1208 // CHECK:   ret <2 x i64> [[VBSL5_I]]
1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
1210   return vbslq_u64(v1, v2, v3);
1211 }
1212
1213 // CHECK-LABEL: @test_vbslq_f32(
1214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1215 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1216 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
1217 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1218 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1219 // CHECK:   [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]]
1220 // CHECK:   [[TMP3:%.*]] = xor <4 x i32> %v1, <i32 -1, i32 -1, i32 -1, i32 -1>
1221 // CHECK:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]]
1222 // CHECK:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
1223 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
1224 // CHECK:   ret <4 x float> [[TMP4]]
1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
1226   return vbslq_f32(v1, v2, v3);
1227 }
1228
1229 // CHECK-LABEL: @test_vbslq_p8(
1230 // CHECK:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
1231 // CHECK:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1232 // CHECK:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
1233 // CHECK:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
1234 // CHECK:   ret <16 x i8> [[VBSL2_I]]
1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
1236   return vbslq_p8(v1, v2, v3);
1237 }
1238
1239 // CHECK-LABEL: @test_vbslq_p16(
1240 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1241 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1242 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
1243 // CHECK:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
1244 // CHECK:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1245 // CHECK:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
1246 // CHECK:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
1247 // CHECK:   ret <8 x i16> [[VBSL5_I]]
1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
1249   return vbslq_p16(v1, v2, v3);
1250 }
1251
1252 // CHECK-LABEL: @test_vbslq_f64(
1253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
1256 // CHECK:   [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
1257 // CHECK:   [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
1258 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]]
1259 // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %v1, <i64 -1, i64 -1>
1260 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]]
1261 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
1262 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
1263 // CHECK:   ret <2 x double> [[TMP4]]
1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
1265   return vbslq_f64(v1, v2, v3);
1266 }
1267
1268 // CHECK-LABEL: @test_vrecps_f32(
1269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1270 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1271 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
1272 // CHECK:   ret <2 x float> [[VRECPS_V2_I]]
1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
1274   return vrecps_f32(v1, v2);
1275 }
1276
1277 // CHECK-LABEL: @test_vrecpsq_f32(
1278 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1279 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1280 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
1281 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
1282 // CHECK:   ret <4 x float> [[VRECPSQ_V2_I]]
1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
1284   return vrecpsq_f32(v1, v2);
1285 }
1286
1287 // CHECK-LABEL: @test_vrecpsq_f64(
1288 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1289 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1290 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
1291 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
1292 // CHECK:   ret <2 x double> [[VRECPSQ_V2_I]]
1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
1294   return vrecpsq_f64(v1, v2);
1295 }
1296
1297 // CHECK-LABEL: @test_vrsqrts_f32(
1298 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1299 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1300 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
1301 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
1302 // CHECK:   ret <2 x float> [[VRSQRTS_V2_I]]
1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
1304   return vrsqrts_f32(v1, v2);
1305 }
1306
1307 // CHECK-LABEL: @test_vrsqrtsq_f32(
1308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1310 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
1311 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
1312 // CHECK:   ret <4 x float> [[VRSQRTSQ_V2_I]]
1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
1314   return vrsqrtsq_f32(v1, v2);
1315 }
1316
1317 // CHECK-LABEL: @test_vrsqrtsq_f64(
1318 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1319 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1320 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
1321 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
1322 // CHECK:   ret <2 x double> [[VRSQRTSQ_V2_I]]
1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
1324   return vrsqrtsq_f64(v1, v2);
1325 }
1326
1327 // CHECK-LABEL: @test_vcage_f32(
1328 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1329 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1330 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1331 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
1333   return vcage_f32(v1, v2);
1334 }
1335
1336 // CHECK-LABEL: @test_vcage_f64(
1337 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1338 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1339 // CHECK:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1340 // CHECK:   ret <1 x i64> [[VCAGE_V2_I]]
1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
1342   return vcage_f64(a, b);
1343 }
1344
1345 // CHECK-LABEL: @test_vcageq_f32(
1346 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1347 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1348 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1349 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
1351   return vcageq_f32(v1, v2);
1352 }
1353
1354 // CHECK-LABEL: @test_vcageq_f64(
1355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1356 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1357 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1358 // CHECK:   ret <2 x i64> [[VCAGEQ_V2_I]]
1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
1360   return vcageq_f64(v1, v2);
1361 }
1362
1363 // CHECK-LABEL: @test_vcagt_f32(
1364 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1365 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1366 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
1367 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
1369   return vcagt_f32(v1, v2);
1370 }
1371
1372 // CHECK-LABEL: @test_vcagt_f64(
1373 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1374 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1375 // CHECK:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
1376 // CHECK:   ret <1 x i64> [[VCAGT_V2_I]]
1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
1378   return vcagt_f64(a, b);
1379 }
1380
1381 // CHECK-LABEL: @test_vcagtq_f32(
1382 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1383 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1384 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
1385 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
1387   return vcagtq_f32(v1, v2);
1388 }
1389
1390 // CHECK-LABEL: @test_vcagtq_f64(
1391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1392 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1393 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
1394 // CHECK:   ret <2 x i64> [[VCAGTQ_V2_I]]
1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
1396   return vcagtq_f64(v1, v2);
1397 }
1398
1399 // CHECK-LABEL: @test_vcale_f32(
1400 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1401 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1402 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1403 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
1405   return vcale_f32(v1, v2);
1406   // Using registers other than v0, v1 are possible, but would be odd.
1407 }
1408
1409 // CHECK-LABEL: @test_vcale_f64(
1410 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1411 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1412 // CHECK:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1413 // CHECK:   ret <1 x i64> [[VCALE_V2_I]]
1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
1415   return vcale_f64(a, b);
1416 }
1417
1418 // CHECK-LABEL: @test_vcaleq_f32(
1419 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1420 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1421 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1422 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
1424   return vcaleq_f32(v1, v2);
1425   // Using registers other than v0, v1 are possible, but would be odd.
1426 }
1427
1428 // CHECK-LABEL: @test_vcaleq_f64(
1429 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1430 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1431 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1432 // CHECK:   ret <2 x i64> [[VCALEQ_V2_I]]
1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
1434   return vcaleq_f64(v1, v2);
1435   // Using registers other than v0, v1 are possible, but would be odd.
1436 }
1437
1438 // CHECK-LABEL: @test_vcalt_f32(
1439 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
1440 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
1441 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
1442 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
1444   return vcalt_f32(v1, v2);
1445   // Using registers other than v0, v1 are possible, but would be odd.
1446 }
1447
1448 // CHECK-LABEL: @test_vcalt_f64(
1449 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
1450 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
1451 // CHECK:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
1452 // CHECK:   ret <1 x i64> [[VCALT_V2_I]]
1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
1454   return vcalt_f64(a, b);
1455 }
1456
1457 // CHECK-LABEL: @test_vcaltq_f32(
1458 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
1459 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
1460 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
1461 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
1463   return vcaltq_f32(v1, v2);
1464   // Using registers other than v0, v1 are possible, but would be odd.
1465 }
1466
1467 // CHECK-LABEL: @test_vcaltq_f64(
1468 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
1469 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
1470 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
1471 // CHECK:   ret <2 x i64> [[VCALTQ_V2_I]]
1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
1473   return vcaltq_f64(v1, v2);
1474   // Using registers other than v0, v1 are possible, but would be odd.
1475 }
1476
1477 // CHECK-LABEL: @test_vtst_s8(
1478 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1479 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1480 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1481 // CHECK:   ret <8 x i8> [[VTST_I]]
1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
1483   return vtst_s8(v1, v2);
1484 }
1485
1486 // CHECK-LABEL: @test_vtst_s16(
1487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1488 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1489 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1490 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1491 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1492 // CHECK:   ret <4 x i16> [[VTST_I]]
1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
1494   return vtst_s16(v1, v2);
1495 }
1496
1497 // CHECK-LABEL: @test_vtst_s32(
1498 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1499 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1500 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1501 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1502 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1503 // CHECK:   ret <2 x i32> [[VTST_I]]
1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
1505   return vtst_s32(v1, v2);
1506 }
1507
1508 // CHECK-LABEL: @test_vtst_u8(
1509 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1510 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1511 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1512 // CHECK:   ret <8 x i8> [[VTST_I]]
1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
1514   return vtst_u8(v1, v2);
1515 }
1516
1517 // CHECK-LABEL: @test_vtst_u16(
1518 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1519 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1520 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1521 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1522 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1523 // CHECK:   ret <4 x i16> [[VTST_I]]
1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
1525   return vtst_u16(v1, v2);
1526 }
1527
1528 // CHECK-LABEL: @test_vtst_u32(
1529 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
1530 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
1531 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
1532 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
1533 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
1534 // CHECK:   ret <2 x i32> [[VTST_I]]
1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
1536   return vtst_u32(v1, v2);
1537 }
1538
1539 // CHECK-LABEL: @test_vtstq_s8(
1540 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1541 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1542 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1543 // CHECK:   ret <16 x i8> [[VTST_I]]
1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
1545   return vtstq_s8(v1, v2);
1546 }
1547
1548 // CHECK-LABEL: @test_vtstq_s16(
1549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1550 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1551 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1552 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1553 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1554 // CHECK:   ret <8 x i16> [[VTST_I]]
1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
1556   return vtstq_s16(v1, v2);
1557 }
1558
1559 // CHECK-LABEL: @test_vtstq_s32(
1560 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1561 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1562 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1563 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1564 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1565 // CHECK:   ret <4 x i32> [[VTST_I]]
1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
1567   return vtstq_s32(v1, v2);
1568 }
1569
1570 // CHECK-LABEL: @test_vtstq_u8(
1571 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1572 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1573 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1574 // CHECK:   ret <16 x i8> [[VTST_I]]
1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
1576   return vtstq_u8(v1, v2);
1577 }
1578
1579 // CHECK-LABEL: @test_vtstq_u16(
1580 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1581 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1582 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1583 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1584 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1585 // CHECK:   ret <8 x i16> [[VTST_I]]
1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
1587   return vtstq_u16(v1, v2);
1588 }
1589
1590 // CHECK-LABEL: @test_vtstq_u32(
1591 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
1592 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
1593 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
1594 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1595 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1596 // CHECK:   ret <4 x i32> [[VTST_I]]
1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
1598   return vtstq_u32(v1, v2);
1599 }
1600
1601 // CHECK-LABEL: @test_vtstq_s64(
1602 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1603 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1604 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1605 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1606 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1607 // CHECK:   ret <2 x i64> [[VTST_I]]
1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
1609   return vtstq_s64(v1, v2);
1610 }
1611
1612 // CHECK-LABEL: @test_vtstq_u64(
1613 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
1614 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
1615 // CHECK:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
1616 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1617 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1618 // CHECK:   ret <2 x i64> [[VTST_I]]
1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
1620   return vtstq_u64(v1, v2);
1621 }
1622
1623 // CHECK-LABEL: @test_vtst_p8(
1624 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
1625 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
1626 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
1627 // CHECK:   ret <8 x i8> [[VTST_I]]
1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
1629   return vtst_p8(v1, v2);
1630 }
1631
1632 // CHECK-LABEL: @test_vtst_p16(
1633 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
1634 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
1635 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
1636 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
1637 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
1638 // CHECK:   ret <4 x i16> [[VTST_I]]
1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
1640   return vtst_p16(v1, v2);
1641 }
1642
1643 // CHECK-LABEL: @test_vtstq_p8(
1644 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
1645 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
1646 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
1647 // CHECK:   ret <16 x i8> [[VTST_I]]
1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
1649   return vtstq_p8(v1, v2);
1650 }
1651
1652 // CHECK-LABEL: @test_vtstq_p16(
1653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
1654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
1655 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
1656 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
1657 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
1658 // CHECK:   ret <8 x i16> [[VTST_I]]
1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
1660   return vtstq_p16(v1, v2);
1661 }
1662
1663 // CHECK-LABEL: @test_vtst_s64(
1664 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1665 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1666 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1667 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1668 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1669 // CHECK:   ret <1 x i64> [[VTST_I]]
1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
1671   return vtst_s64(a, b);
1672 }
1673
1674 // CHECK-LABEL: @test_vtst_u64(
1675 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1676 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1677 // CHECK:   [[TMP2:%.*]] = and <1 x i64> %a, %b
1678 // CHECK:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
1679 // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
1680 // CHECK:   ret <1 x i64> [[VTST_I]]
1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
1682   return vtst_u64(a, b);
1683 }
1684
1685 // CHECK-LABEL: @test_vceq_s8(
1686 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1687 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1688 // CHECK:   ret <8 x i8> [[SEXT_I]]
1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
1690   return vceq_s8(v1, v2);
1691 }
1692
1693 // CHECK-LABEL: @test_vceq_s16(
1694 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1695 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1696 // CHECK:   ret <4 x i16> [[SEXT_I]]
1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
1698   return vceq_s16(v1, v2);
1699 }
1700
1701 // CHECK-LABEL: @test_vceq_s32(
1702 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1703 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1704 // CHECK:   ret <2 x i32> [[SEXT_I]]
1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
1706   return vceq_s32(v1, v2);
1707 }
1708
1709 // CHECK-LABEL: @test_vceq_s64(
1710 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1711 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1712 // CHECK:   ret <1 x i64> [[SEXT_I]]
1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
1714   return vceq_s64(a, b);
1715 }
1716
1717 // CHECK-LABEL: @test_vceq_u64(
1718 // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
1719 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1720 // CHECK:   ret <1 x i64> [[SEXT_I]]
1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
1722   return vceq_u64(a, b);
1723 }
1724
1725 // CHECK-LABEL: @test_vceq_f32(
1726 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
1727 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1728 // CHECK:   ret <2 x i32> [[SEXT_I]]
1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
1730   return vceq_f32(v1, v2);
1731 }
1732
1733 // CHECK-LABEL: @test_vceq_f64(
1734 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
1735 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1736 // CHECK:   ret <1 x i64> [[SEXT_I]]
1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
1738   return vceq_f64(a, b);
1739 }
1740
1741 // CHECK-LABEL: @test_vceq_u8(
1742 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1743 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1744 // CHECK:   ret <8 x i8> [[SEXT_I]]
1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
1746   return vceq_u8(v1, v2);
1747 }
1748
1749 // CHECK-LABEL: @test_vceq_u16(
1750 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
1751 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1752 // CHECK:   ret <4 x i16> [[SEXT_I]]
1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
1754   return vceq_u16(v1, v2);
1755 }
1756
1757 // CHECK-LABEL: @test_vceq_u32(
1758 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
1759 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1760 // CHECK:   ret <2 x i32> [[SEXT_I]]
1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
1762   return vceq_u32(v1, v2);
1763 }
1764
1765 // CHECK-LABEL: @test_vceq_p8(
1766 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
1767 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1768 // CHECK:   ret <8 x i8> [[SEXT_I]]
1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
1770   return vceq_p8(v1, v2);
1771 }
1772
1773 // CHECK-LABEL: @test_vceqq_s8(
1774 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1775 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1776 // CHECK:   ret <16 x i8> [[SEXT_I]]
1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
1778   return vceqq_s8(v1, v2);
1779 }
1780
1781 // CHECK-LABEL: @test_vceqq_s16(
1782 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1783 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1784 // CHECK:   ret <8 x i16> [[SEXT_I]]
1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
1786   return vceqq_s16(v1, v2);
1787 }
1788
1789 // CHECK-LABEL: @test_vceqq_s32(
1790 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1791 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1792 // CHECK:   ret <4 x i32> [[SEXT_I]]
1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
1794   return vceqq_s32(v1, v2);
1795 }
1796
1797 // CHECK-LABEL: @test_vceqq_f32(
1798 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
1799 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1800 // CHECK:   ret <4 x i32> [[SEXT_I]]
1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
1802   return vceqq_f32(v1, v2);
1803 }
1804
1805 // CHECK-LABEL: @test_vceqq_u8(
1806 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1807 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1808 // CHECK:   ret <16 x i8> [[SEXT_I]]
1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
1810   return vceqq_u8(v1, v2);
1811 }
1812
1813 // CHECK-LABEL: @test_vceqq_u16(
1814 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
1815 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1816 // CHECK:   ret <8 x i16> [[SEXT_I]]
1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
1818   return vceqq_u16(v1, v2);
1819 }
1820
1821 // CHECK-LABEL: @test_vceqq_u32(
1822 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
1823 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1824 // CHECK:   ret <4 x i32> [[SEXT_I]]
1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
1826   return vceqq_u32(v1, v2);
1827 }
1828
1829 // CHECK-LABEL: @test_vceqq_p8(
1830 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
1831 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1832 // CHECK:   ret <16 x i8> [[SEXT_I]]
1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
1834   return vceqq_p8(v1, v2);
1835 }
1836
1837 // CHECK-LABEL: @test_vceqq_s64(
1838 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1839 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1840 // CHECK:   ret <2 x i64> [[SEXT_I]]
1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
1842   return vceqq_s64(v1, v2);
1843 }
1844
1845 // CHECK-LABEL: @test_vceqq_u64(
1846 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
1847 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1848 // CHECK:   ret <2 x i64> [[SEXT_I]]
1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
1850   return vceqq_u64(v1, v2);
1851 }
1852
1853 // CHECK-LABEL: @test_vceqq_f64(
1854 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
1855 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1856 // CHECK:   ret <2 x i64> [[SEXT_I]]
1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
1858   return vceqq_f64(v1, v2);
1859 }
1860
1861 // CHECK-LABEL: @test_vcge_s8(
1862 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
1863 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1864 // CHECK:   ret <8 x i8> [[SEXT_I]]
1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
1866   return vcge_s8(v1, v2);
1867 }
1868
1869 // CHECK-LABEL: @test_vcge_s16(
1870 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
1871 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1872 // CHECK:   ret <4 x i16> [[SEXT_I]]
1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
1874   return vcge_s16(v1, v2);
1875 }
1876
1877 // CHECK-LABEL: @test_vcge_s32(
1878 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
1879 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1880 // CHECK:   ret <2 x i32> [[SEXT_I]]
1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
1882   return vcge_s32(v1, v2);
1883 }
1884
1885 // CHECK-LABEL: @test_vcge_s64(
1886 // CHECK:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
1887 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1888 // CHECK:   ret <1 x i64> [[SEXT_I]]
1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
1890   return vcge_s64(a, b);
1891 }
1892
1893 // CHECK-LABEL: @test_vcge_u64(
1894 // CHECK:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
1895 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1896 // CHECK:   ret <1 x i64> [[SEXT_I]]
1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
1898   return vcge_u64(a, b);
1899 }
1900
1901 // CHECK-LABEL: @test_vcge_f32(
1902 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
1903 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1904 // CHECK:   ret <2 x i32> [[SEXT_I]]
1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
1906   return vcge_f32(v1, v2);
1907 }
1908
1909 // CHECK-LABEL: @test_vcge_f64(
1910 // CHECK:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
1911 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
1912 // CHECK:   ret <1 x i64> [[SEXT_I]]
1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
1914   return vcge_f64(a, b);
1915 }
1916
1917 // CHECK-LABEL: @test_vcge_u8(
1918 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
1919 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1920 // CHECK:   ret <8 x i8> [[SEXT_I]]
1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
1922   return vcge_u8(v1, v2);
1923 }
1924
1925 // CHECK-LABEL: @test_vcge_u16(
1926 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
1927 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1928 // CHECK:   ret <4 x i16> [[SEXT_I]]
1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
1930   return vcge_u16(v1, v2);
1931 }
1932
1933 // CHECK-LABEL: @test_vcge_u32(
1934 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
1935 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1936 // CHECK:   ret <2 x i32> [[SEXT_I]]
1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
1938   return vcge_u32(v1, v2);
1939 }
1940
1941 // CHECK-LABEL: @test_vcgeq_s8(
1942 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
1943 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1944 // CHECK:   ret <16 x i8> [[SEXT_I]]
1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
1946   return vcgeq_s8(v1, v2);
1947 }
1948
1949 // CHECK-LABEL: @test_vcgeq_s16(
1950 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
1951 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1952 // CHECK:   ret <8 x i16> [[SEXT_I]]
1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
1954   return vcgeq_s16(v1, v2);
1955 }
1956
1957 // CHECK-LABEL: @test_vcgeq_s32(
1958 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
1959 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1960 // CHECK:   ret <4 x i32> [[SEXT_I]]
1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
1962   return vcgeq_s32(v1, v2);
1963 }
1964
1965 // CHECK-LABEL: @test_vcgeq_f32(
1966 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
1967 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1968 // CHECK:   ret <4 x i32> [[SEXT_I]]
1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
1970   return vcgeq_f32(v1, v2);
1971 }
1972
1973 // CHECK-LABEL: @test_vcgeq_u8(
1974 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
1975 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1976 // CHECK:   ret <16 x i8> [[SEXT_I]]
1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
1978   return vcgeq_u8(v1, v2);
1979 }
1980
1981 // CHECK-LABEL: @test_vcgeq_u16(
1982 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
1983 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1984 // CHECK:   ret <8 x i16> [[SEXT_I]]
1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
1986   return vcgeq_u16(v1, v2);
1987 }
1988
1989 // CHECK-LABEL: @test_vcgeq_u32(
1990 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
1991 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1992 // CHECK:   ret <4 x i32> [[SEXT_I]]
1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
1994   return vcgeq_u32(v1, v2);
1995 }
1996
1997 // CHECK-LABEL: @test_vcgeq_s64(
1998 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
1999 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2000 // CHECK:   ret <2 x i64> [[SEXT_I]]
2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
2002   return vcgeq_s64(v1, v2);
2003 }
2004
2005 // CHECK-LABEL: @test_vcgeq_u64(
2006 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
2007 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2008 // CHECK:   ret <2 x i64> [[SEXT_I]]
2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
2010   return vcgeq_u64(v1, v2);
2011 }
2012
2013 // CHECK-LABEL: @test_vcgeq_f64(
2014 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
2015 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2016 // CHECK:   ret <2 x i64> [[SEXT_I]]
2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
2018   return vcgeq_f64(v1, v2);
2019 }
2020
2021 // CHECK-LABEL: @test_vcle_s8(
2022 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
2023 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2024 // CHECK:   ret <8 x i8> [[SEXT_I]]
2025 // Notes about vcle:
2026 // LE condition predicate implemented as GE, so check reversed operands.
2027 // Using registers other than v0, v1 are possible, but would be odd.
2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
2029   return vcle_s8(v1, v2);
2030 }
2031
2032 // CHECK-LABEL: @test_vcle_s16(
2033 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
2034 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2035 // CHECK:   ret <4 x i16> [[SEXT_I]]
2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
2037   return vcle_s16(v1, v2);
2038 }
2039
2040 // CHECK-LABEL: @test_vcle_s32(
2041 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
2042 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2043 // CHECK:   ret <2 x i32> [[SEXT_I]]
2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
2045   return vcle_s32(v1, v2);
2046 }
2047
2048 // CHECK-LABEL: @test_vcle_s64(
2049 // CHECK:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
2050 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2051 // CHECK:   ret <1 x i64> [[SEXT_I]]
2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
2053   return vcle_s64(a, b);
2054 }
2055
2056 // CHECK-LABEL: @test_vcle_u64(
2057 // CHECK:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
2058 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2059 // CHECK:   ret <1 x i64> [[SEXT_I]]
2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
2061   return vcle_u64(a, b);
2062 }
2063
2064 // CHECK-LABEL: @test_vcle_f32(
2065 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
2066 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2067 // CHECK:   ret <2 x i32> [[SEXT_I]]
2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
2069   return vcle_f32(v1, v2);
2070 }
2071
2072 // CHECK-LABEL: @test_vcle_f64(
2073 // CHECK:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
2074 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2075 // CHECK:   ret <1 x i64> [[SEXT_I]]
2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
2077   return vcle_f64(a, b);
2078 }
2079
2080 // CHECK-LABEL: @test_vcle_u8(
2081 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
2082 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2083 // CHECK:   ret <8 x i8> [[SEXT_I]]
2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
2085   return vcle_u8(v1, v2);
2086 }
2087
2088 // CHECK-LABEL: @test_vcle_u16(
2089 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
2090 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2091 // CHECK:   ret <4 x i16> [[SEXT_I]]
2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
2093   return vcle_u16(v1, v2);
2094 }
2095
2096 // CHECK-LABEL: @test_vcle_u32(
2097 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
2098 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2099 // CHECK:   ret <2 x i32> [[SEXT_I]]
2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
2101   return vcle_u32(v1, v2);
2102 }
2103
2104 // CHECK-LABEL: @test_vcleq_s8(
2105 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
2106 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2107 // CHECK:   ret <16 x i8> [[SEXT_I]]
2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
2109   return vcleq_s8(v1, v2);
2110 }
2111
2112 // CHECK-LABEL: @test_vcleq_s16(
2113 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
2114 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2115 // CHECK:   ret <8 x i16> [[SEXT_I]]
2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
2117   return vcleq_s16(v1, v2);
2118 }
2119
2120 // CHECK-LABEL: @test_vcleq_s32(
2121 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
2122 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2123 // CHECK:   ret <4 x i32> [[SEXT_I]]
2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
2125   return vcleq_s32(v1, v2);
2126 }
2127
2128 // CHECK-LABEL: @test_vcleq_f32(
2129 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
2130 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2131 // CHECK:   ret <4 x i32> [[SEXT_I]]
2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
2133   return vcleq_f32(v1, v2);
2134 }
2135
2136 // CHECK-LABEL: @test_vcleq_u8(
2137 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
2138 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2139 // CHECK:   ret <16 x i8> [[SEXT_I]]
2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
2141   return vcleq_u8(v1, v2);
2142 }
2143
2144 // CHECK-LABEL: @test_vcleq_u16(
2145 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
2146 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2147 // CHECK:   ret <8 x i16> [[SEXT_I]]
2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
2149   return vcleq_u16(v1, v2);
2150 }
2151
2152 // CHECK-LABEL: @test_vcleq_u32(
2153 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
2154 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2155 // CHECK:   ret <4 x i32> [[SEXT_I]]
2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
2157   return vcleq_u32(v1, v2);
2158 }
2159
2160 // CHECK-LABEL: @test_vcleq_s64(
2161 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
2162 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2163 // CHECK:   ret <2 x i64> [[SEXT_I]]
2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
2165   return vcleq_s64(v1, v2);
2166 }
2167
2168 // CHECK-LABEL: @test_vcleq_u64(
2169 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
2170 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2171 // CHECK:   ret <2 x i64> [[SEXT_I]]
2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
2173   return vcleq_u64(v1, v2);
2174 }
2175
2176 // CHECK-LABEL: @test_vcleq_f64(
2177 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
2178 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2179 // CHECK:   ret <2 x i64> [[SEXT_I]]
2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
2181   return vcleq_f64(v1, v2);
2182 }
2183
2184 // CHECK-LABEL: @test_vcgt_s8(
2185 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
2186 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2187 // CHECK:   ret <8 x i8> [[SEXT_I]]
2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
2189   return vcgt_s8(v1, v2);
2190 }
2191
2192 // CHECK-LABEL: @test_vcgt_s16(
2193 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
2194 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2195 // CHECK:   ret <4 x i16> [[SEXT_I]]
2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
2197   return vcgt_s16(v1, v2);
2198 }
2199
2200 // CHECK-LABEL: @test_vcgt_s32(
2201 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
2202 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2203 // CHECK:   ret <2 x i32> [[SEXT_I]]
2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
2205   return vcgt_s32(v1, v2);
2206 }
2207
2208 // CHECK-LABEL: @test_vcgt_s64(
2209 // CHECK:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
2210 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2211 // CHECK:   ret <1 x i64> [[SEXT_I]]
2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
2213   return vcgt_s64(a, b);
2214 }
2215
2216 // CHECK-LABEL: @test_vcgt_u64(
2217 // CHECK:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
2218 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2219 // CHECK:   ret <1 x i64> [[SEXT_I]]
2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
2221   return vcgt_u64(a, b);
2222 }
2223
2224 // CHECK-LABEL: @test_vcgt_f32(
2225 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
2226 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2227 // CHECK:   ret <2 x i32> [[SEXT_I]]
2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
2229   return vcgt_f32(v1, v2);
2230 }
2231
2232 // CHECK-LABEL: @test_vcgt_f64(
2233 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
2234 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2235 // CHECK:   ret <1 x i64> [[SEXT_I]]
2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
2237   return vcgt_f64(a, b);
2238 }
2239
2240 // CHECK-LABEL: @test_vcgt_u8(
2241 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
2242 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2243 // CHECK:   ret <8 x i8> [[SEXT_I]]
2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
2245   return vcgt_u8(v1, v2);
2246 }
2247
2248 // CHECK-LABEL: @test_vcgt_u16(
2249 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
2250 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2251 // CHECK:   ret <4 x i16> [[SEXT_I]]
2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
2253   return vcgt_u16(v1, v2);
2254 }
2255
2256 // CHECK-LABEL: @test_vcgt_u32(
2257 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
2258 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2259 // CHECK:   ret <2 x i32> [[SEXT_I]]
2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
2261   return vcgt_u32(v1, v2);
2262 }
2263
2264 // CHECK-LABEL: @test_vcgtq_s8(
2265 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
2266 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2267 // CHECK:   ret <16 x i8> [[SEXT_I]]
2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
2269   return vcgtq_s8(v1, v2);
2270 }
2271
2272 // CHECK-LABEL: @test_vcgtq_s16(
2273 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
2274 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2275 // CHECK:   ret <8 x i16> [[SEXT_I]]
2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
2277   return vcgtq_s16(v1, v2);
2278 }
2279
2280 // CHECK-LABEL: @test_vcgtq_s32(
2281 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
2282 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2283 // CHECK:   ret <4 x i32> [[SEXT_I]]
2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
2285   return vcgtq_s32(v1, v2);
2286 }
2287
2288 // CHECK-LABEL: @test_vcgtq_f32(
2289 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
2290 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2291 // CHECK:   ret <4 x i32> [[SEXT_I]]
2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
2293   return vcgtq_f32(v1, v2);
2294 }
2295
2296 // CHECK-LABEL: @test_vcgtq_u8(
2297 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
2298 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2299 // CHECK:   ret <16 x i8> [[SEXT_I]]
2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
2301   return vcgtq_u8(v1, v2);
2302 }
2303
2304 // CHECK-LABEL: @test_vcgtq_u16(
2305 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
2306 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2307 // CHECK:   ret <8 x i16> [[SEXT_I]]
2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
2309   return vcgtq_u16(v1, v2);
2310 }
2311
2312 // CHECK-LABEL: @test_vcgtq_u32(
2313 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
2314 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2315 // CHECK:   ret <4 x i32> [[SEXT_I]]
2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
2317   return vcgtq_u32(v1, v2);
2318 }
2319
2320 // CHECK-LABEL: @test_vcgtq_s64(
2321 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
2322 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2323 // CHECK:   ret <2 x i64> [[SEXT_I]]
2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
2325   return vcgtq_s64(v1, v2);
2326 }
2327
2328 // CHECK-LABEL: @test_vcgtq_u64(
2329 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
2330 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2331 // CHECK:   ret <2 x i64> [[SEXT_I]]
2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
2333   return vcgtq_u64(v1, v2);
2334 }
2335
2336 // CHECK-LABEL: @test_vcgtq_f64(
2337 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
2338 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2339 // CHECK:   ret <2 x i64> [[SEXT_I]]
2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
2341   return vcgtq_f64(v1, v2);
2342 }
2343
2344 // CHECK-LABEL: @test_vclt_s8(
2345 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
2346 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2347 // CHECK:   ret <8 x i8> [[SEXT_I]]
2348 // Notes about vclt:
2349 // LT condition predicate implemented as GT, so check reversed operands.
2350 // Using registers other than v0, v1 are possible, but would be odd.
2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
2352   return vclt_s8(v1, v2);
2353 }
2354
2355 // CHECK-LABEL: @test_vclt_s16(
2356 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
2357 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2358 // CHECK:   ret <4 x i16> [[SEXT_I]]
2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
2360   return vclt_s16(v1, v2);
2361 }
2362
2363 // CHECK-LABEL: @test_vclt_s32(
2364 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
2365 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2366 // CHECK:   ret <2 x i32> [[SEXT_I]]
2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
2368   return vclt_s32(v1, v2);
2369 }
2370
2371 // CHECK-LABEL: @test_vclt_s64(
2372 // CHECK:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
2373 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2374 // CHECK:   ret <1 x i64> [[SEXT_I]]
2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
2376   return vclt_s64(a, b);
2377 }
2378
2379 // CHECK-LABEL: @test_vclt_u64(
2380 // CHECK:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
2381 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2382 // CHECK:   ret <1 x i64> [[SEXT_I]]
2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
2384   return vclt_u64(a, b);
2385 }
2386
2387 // CHECK-LABEL: @test_vclt_f32(
2388 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
2389 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2390 // CHECK:   ret <2 x i32> [[SEXT_I]]
2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
2392   return vclt_f32(v1, v2);
2393 }
2394
2395 // CHECK-LABEL: @test_vclt_f64(
2396 // CHECK:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
2397 // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
2398 // CHECK:   ret <1 x i64> [[SEXT_I]]
2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
2400   return vclt_f64(a, b);
2401 }
2402
2403 // CHECK-LABEL: @test_vclt_u8(
2404 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
2405 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
2406 // CHECK:   ret <8 x i8> [[SEXT_I]]
2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
2408   return vclt_u8(v1, v2);
2409 }
2410
2411 // CHECK-LABEL: @test_vclt_u16(
2412 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
2413 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
2414 // CHECK:   ret <4 x i16> [[SEXT_I]]
2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
2416   return vclt_u16(v1, v2);
2417 }
2418
2419 // CHECK-LABEL: @test_vclt_u32(
2420 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
2421 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
2422 // CHECK:   ret <2 x i32> [[SEXT_I]]
2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
2424   return vclt_u32(v1, v2);
2425 }
2426
2427 // CHECK-LABEL: @test_vcltq_s8(
2428 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
2429 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2430 // CHECK:   ret <16 x i8> [[SEXT_I]]
2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
2432   return vcltq_s8(v1, v2);
2433 }
2434
2435 // CHECK-LABEL: @test_vcltq_s16(
2436 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
2437 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2438 // CHECK:   ret <8 x i16> [[SEXT_I]]
2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
2440   return vcltq_s16(v1, v2);
2441 }
2442
2443 // CHECK-LABEL: @test_vcltq_s32(
2444 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
2445 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2446 // CHECK:   ret <4 x i32> [[SEXT_I]]
2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
2448   return vcltq_s32(v1, v2);
2449 }
2450
2451 // CHECK-LABEL: @test_vcltq_f32(
2452 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
2453 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2454 // CHECK:   ret <4 x i32> [[SEXT_I]]
2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
2456   return vcltq_f32(v1, v2);
2457 }
2458
2459 // CHECK-LABEL: @test_vcltq_u8(
2460 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
2461 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
2462 // CHECK:   ret <16 x i8> [[SEXT_I]]
2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
2464   return vcltq_u8(v1, v2);
2465 }
2466
2467 // CHECK-LABEL: @test_vcltq_u16(
2468 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
2469 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
2470 // CHECK:   ret <8 x i16> [[SEXT_I]]
2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
2472   return vcltq_u16(v1, v2);
2473 }
2474
2475 // CHECK-LABEL: @test_vcltq_u32(
2476 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
2477 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
2478 // CHECK:   ret <4 x i32> [[SEXT_I]]
2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
2480   return vcltq_u32(v1, v2);
2481 }
2482
2483 // CHECK-LABEL: @test_vcltq_s64(
2484 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
2485 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2486 // CHECK:   ret <2 x i64> [[SEXT_I]]
2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
2488   return vcltq_s64(v1, v2);
2489 }
2490
2491 // CHECK-LABEL: @test_vcltq_u64(
2492 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
2493 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2494 // CHECK:   ret <2 x i64> [[SEXT_I]]
2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
2496   return vcltq_u64(v1, v2);
2497 }
2498
2499 // CHECK-LABEL: @test_vcltq_f64(
2500 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
2501 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
2502 // CHECK:   ret <2 x i64> [[SEXT_I]]
2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
2504   return vcltq_f64(v1, v2);
2505 }
2506
2507 // CHECK-LABEL: @test_vhadd_s8(
2508 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2509 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
2511   return vhadd_s8(v1, v2);
2512 }
2513
2514 // CHECK-LABEL: @test_vhadd_s16(
2515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2517 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2518 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2519 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
2521   return vhadd_s16(v1, v2);
2522 }
2523
2524 // CHECK-LABEL: @test_vhadd_s32(
2525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2527 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2528 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2529 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
2531   return vhadd_s32(v1, v2);
2532 }
2533
2534 // CHECK-LABEL: @test_vhadd_u8(
2535 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2536 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2538   return vhadd_u8(v1, v2);
2539 }
2540
2541 // CHECK-LABEL: @test_vhadd_u16(
2542 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2543 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2544 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2545 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
2546 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2548   return vhadd_u16(v1, v2);
2549 }
2550
2551 // CHECK-LABEL: @test_vhadd_u32(
2552 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2553 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2554 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2555 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
2556 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2558   return vhadd_u32(v1, v2);
2559 }
2560
2561 // CHECK-LABEL: @test_vhaddq_s8(
2562 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2563 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
2565   return vhaddq_s8(v1, v2);
2566 }
2567
2568 // CHECK-LABEL: @test_vhaddq_s16(
2569 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2571 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2572 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2573 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
2575   return vhaddq_s16(v1, v2);
2576 }
2577
2578 // CHECK-LABEL: @test_vhaddq_s32(
2579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2581 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2582 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2583 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
2585   return vhaddq_s32(v1, v2);
2586 }
2587
2588 // CHECK-LABEL: @test_vhaddq_u8(
2589 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2590 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2592   return vhaddq_u8(v1, v2);
2593 }
2594
2595 // CHECK-LABEL: @test_vhaddq_u16(
2596 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2597 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2598 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2599 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
2600 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2602   return vhaddq_u16(v1, v2);
2603 }
2604
2605 // CHECK-LABEL: @test_vhaddq_u32(
2606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2608 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2609 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
2610 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2612   return vhaddq_u32(v1, v2);
2613 }
2614
2615 // CHECK-LABEL: @test_vhsub_s8(
2616 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2617 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
2619   return vhsub_s8(v1, v2);
2620 }
2621
2622 // CHECK-LABEL: @test_vhsub_s16(
2623 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2624 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2625 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2626 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2627 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
2629   return vhsub_s16(v1, v2);
2630 }
2631
2632 // CHECK-LABEL: @test_vhsub_s32(
2633 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2634 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2635 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2636 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2637 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
2639   return vhsub_s32(v1, v2);
2640 }
2641
2642 // CHECK-LABEL: @test_vhsub_u8(
2643 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2644 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
2646   return vhsub_u8(v1, v2);
2647 }
2648
2649 // CHECK-LABEL: @test_vhsub_u16(
2650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2651 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2652 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2653 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
2654 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
2656   return vhsub_u16(v1, v2);
2657 }
2658
2659 // CHECK-LABEL: @test_vhsub_u32(
2660 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2661 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2662 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2663 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
2664 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
2666   return vhsub_u32(v1, v2);
2667 }
2668
2669 // CHECK-LABEL: @test_vhsubq_s8(
2670 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2671 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
2673   return vhsubq_s8(v1, v2);
2674 }
2675
2676 // CHECK-LABEL: @test_vhsubq_s16(
2677 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2678 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2679 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2680 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2681 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
2683   return vhsubq_s16(v1, v2);
2684 }
2685
2686 // CHECK-LABEL: @test_vhsubq_s32(
2687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2688 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2689 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2690 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2691 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
2693   return vhsubq_s32(v1, v2);
2694 }
2695
2696 // CHECK-LABEL: @test_vhsubq_u8(
2697 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2698 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
2700   return vhsubq_u8(v1, v2);
2701 }
2702
2703 // CHECK-LABEL: @test_vhsubq_u16(
2704 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2705 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2706 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2707 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
2708 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
2710   return vhsubq_u16(v1, v2);
2711 }
2712
2713 // CHECK-LABEL: @test_vhsubq_u32(
2714 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2715 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2716 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2717 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
2718 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
2720   return vhsubq_u32(v1, v2);
2721 }
2722
2723 // CHECK-LABEL: @test_vrhadd_s8(
2724 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2725 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
2727   return vrhadd_s8(v1, v2);
2728 }
2729
2730 // CHECK-LABEL: @test_vrhadd_s16(
2731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2732 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2733 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2734 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2735 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
2737   return vrhadd_s16(v1, v2);
2738 }
2739
2740 // CHECK-LABEL: @test_vrhadd_s32(
2741 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2742 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2743 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2744 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2745 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
2747   return vrhadd_s32(v1, v2);
2748 }
2749
2750 // CHECK-LABEL: @test_vrhadd_u8(
2751 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2)
2752 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
2754   return vrhadd_u8(v1, v2);
2755 }
2756
2757 // CHECK-LABEL: @test_vrhadd_u16(
2758 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
2759 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
2760 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2)
2761 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
2762 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
2764   return vrhadd_u16(v1, v2);
2765 }
2766
2767 // CHECK-LABEL: @test_vrhadd_u32(
2768 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
2769 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
2770 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2)
2771 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
2772 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
2774   return vrhadd_u32(v1, v2);
2775 }
2776
2777 // CHECK-LABEL: @test_vrhaddq_s8(
2778 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2779 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
2781   return vrhaddq_s8(v1, v2);
2782 }
2783
2784 // CHECK-LABEL: @test_vrhaddq_s16(
2785 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2786 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2787 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2788 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2789 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
2791   return vrhaddq_s16(v1, v2);
2792 }
2793
2794 // CHECK-LABEL: @test_vrhaddq_s32(
2795 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2796 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2797 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2798 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2799 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
2801   return vrhaddq_s32(v1, v2);
2802 }
2803
2804 // CHECK-LABEL: @test_vrhaddq_u8(
2805 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2)
2806 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
2808   return vrhaddq_u8(v1, v2);
2809 }
2810
2811 // CHECK-LABEL: @test_vrhaddq_u16(
2812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
2813 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
2814 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2)
2815 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
2816 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
2818   return vrhaddq_u16(v1, v2);
2819 }
2820
2821 // CHECK-LABEL: @test_vrhaddq_u32(
2822 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
2823 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
2824 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2)
2825 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
2826 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
2828   return vrhaddq_u32(v1, v2);
2829 }
2830
2831 // CHECK-LABEL: @test_vqadd_s8(
2832 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2833 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
2835   return vqadd_s8(a, b);
2836 }
2837
2838 // CHECK-LABEL: @test_vqadd_s16(
2839 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2840 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2841 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2842 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2843 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
2845   return vqadd_s16(a, b);
2846 }
2847
2848 // CHECK-LABEL: @test_vqadd_s32(
2849 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2850 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2851 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2852 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2853 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
2855   return vqadd_s32(a, b);
2856 }
2857
2858 // CHECK-LABEL: @test_vqadd_s64(
2859 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2860 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2861 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2862 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2863 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
2865   return vqadd_s64(a, b);
2866 }
2867
2868 // CHECK-LABEL: @test_vqadd_u8(
2869 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
2870 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
2872   return vqadd_u8(a, b);
2873 }
2874
2875 // CHECK-LABEL: @test_vqadd_u16(
2876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2878 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
2879 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
2880 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
2882   return vqadd_u16(a, b);
2883 }
2884
2885 // CHECK-LABEL: @test_vqadd_u32(
2886 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2887 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2888 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
2889 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
2890 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
2892   return vqadd_u32(a, b);
2893 }
2894
2895 // CHECK-LABEL: @test_vqadd_u64(
2896 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
2897 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
2898 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
2899 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
2900 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
2902   return vqadd_u64(a, b);
2903 }
2904
2905 // CHECK-LABEL: @test_vqaddq_s8(
2906 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2907 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
2909   return vqaddq_s8(a, b);
2910 }
2911
2912 // CHECK-LABEL: @test_vqaddq_s16(
2913 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2914 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2915 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2916 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2917 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
2919   return vqaddq_s16(a, b);
2920 }
2921
2922 // CHECK-LABEL: @test_vqaddq_s32(
2923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2925 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2926 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2927 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
2929   return vqaddq_s32(a, b);
2930 }
2931
2932 // CHECK-LABEL: @test_vqaddq_s64(
2933 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2934 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2935 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2936 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2937 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
2939   return vqaddq_s64(a, b);
2940 }
2941
2942 // CHECK-LABEL: @test_vqaddq_u8(
2943 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
2944 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
2946   return vqaddq_u8(a, b);
2947 }
2948
2949 // CHECK-LABEL: @test_vqaddq_u16(
2950 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2951 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2952 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
2953 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
2954 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
2956   return vqaddq_u16(a, b);
2957 }
2958
2959 // CHECK-LABEL: @test_vqaddq_u32(
2960 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2961 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
2962 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
2963 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
2964 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
2966   return vqaddq_u32(a, b);
2967 }
2968
2969 // CHECK-LABEL: @test_vqaddq_u64(
2970 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
2971 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
2972 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
2973 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
2974 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
2976   return vqaddq_u64(a, b);
2977 }
2978
2979 // CHECK-LABEL: @test_vqsub_s8(
2980 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
2981 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
2983   return vqsub_s8(a, b);
2984 }
2985
2986 // CHECK-LABEL: @test_vqsub_s16(
2987 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2988 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
2989 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
2990 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
2991 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
2993   return vqsub_s16(a, b);
2994 }
2995
2996 // CHECK-LABEL: @test_vqsub_s32(
2997 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2998 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
2999 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3000 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3001 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
3003   return vqsub_s32(a, b);
3004 }
3005
3006 // CHECK-LABEL: @test_vqsub_s64(
3007 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3008 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3009 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3010 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3011 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
3013   return vqsub_s64(a, b);
3014 }
3015
3016 // CHECK-LABEL: @test_vqsub_u8(
3017 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
3018 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
3020   return vqsub_u8(a, b);
3021 }
3022
3023 // CHECK-LABEL: @test_vqsub_u16(
3024 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3025 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3026 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
3027 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
3028 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
3030   return vqsub_u16(a, b);
3031 }
3032
3033 // CHECK-LABEL: @test_vqsub_u32(
3034 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3035 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3036 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
3037 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
3038 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
3040   return vqsub_u32(a, b);
3041 }
3042
3043 // CHECK-LABEL: @test_vqsub_u64(
3044 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3045 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3046 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
3047 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
3048 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
3050   return vqsub_u64(a, b);
3051 }
3052
3053 // CHECK-LABEL: @test_vqsubq_s8(
3054 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3055 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
3057   return vqsubq_s8(a, b);
3058 }
3059
3060 // CHECK-LABEL: @test_vqsubq_s16(
3061 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3062 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3063 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3064 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3065 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
3067   return vqsubq_s16(a, b);
3068 }
3069
3070 // CHECK-LABEL: @test_vqsubq_s32(
3071 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3072 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3073 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3074 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3075 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
3077   return vqsubq_s32(a, b);
3078 }
3079
3080 // CHECK-LABEL: @test_vqsubq_s64(
3081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3083 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3084 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3085 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
3087   return vqsubq_s64(a, b);
3088 }
3089
3090 // CHECK-LABEL: @test_vqsubq_u8(
3091 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
3092 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
3094   return vqsubq_u8(a, b);
3095 }
3096
3097 // CHECK-LABEL: @test_vqsubq_u16(
3098 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3099 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3100 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
3101 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
3102 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
3104   return vqsubq_u16(a, b);
3105 }
3106
3107 // CHECK-LABEL: @test_vqsubq_u32(
3108 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3109 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3110 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
3111 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
3112 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
3114   return vqsubq_u32(a, b);
3115 }
3116
3117 // CHECK-LABEL: @test_vqsubq_u64(
3118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3119 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3120 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
3121 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
3122 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
3124   return vqsubq_u64(a, b);
3125 }
3126
3127 // CHECK-LABEL: @test_vshl_s8(
3128 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3129 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
3131   return vshl_s8(a, b);
3132 }
3133
3134 // CHECK-LABEL: @test_vshl_s16(
3135 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3136 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3137 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3138 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3139 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
3141   return vshl_s16(a, b);
3142 }
3143
3144 // CHECK-LABEL: @test_vshl_s32(
3145 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3146 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3147 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3148 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3149 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
3151   return vshl_s32(a, b);
3152 }
3153
3154 // CHECK-LABEL: @test_vshl_s64(
3155 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3156 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3157 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3158 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3159 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
3161   return vshl_s64(a, b);
3162 }
3163
3164 // CHECK-LABEL: @test_vshl_u8(
3165 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b)
3166 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
3168   return vshl_u8(a, b);
3169 }
3170
3171 // CHECK-LABEL: @test_vshl_u16(
3172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3173 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3174 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b)
3175 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
3176 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
3178   return vshl_u16(a, b);
3179 }
3180
3181 // CHECK-LABEL: @test_vshl_u32(
3182 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3183 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3184 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b)
3185 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
3186 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
3188   return vshl_u32(a, b);
3189 }
3190
3191 // CHECK-LABEL: @test_vshl_u64(
3192 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3193 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3194 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b)
3195 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
3196 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
3198   return vshl_u64(a, b);
3199 }
3200
3201 // CHECK-LABEL: @test_vshlq_s8(
3202 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3203 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
3205   return vshlq_s8(a, b);
3206 }
3207
3208 // CHECK-LABEL: @test_vshlq_s16(
3209 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3210 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3211 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3212 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3213 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
3215   return vshlq_s16(a, b);
3216 }
3217
3218 // CHECK-LABEL: @test_vshlq_s32(
3219 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3220 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3221 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3222 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3223 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
3225   return vshlq_s32(a, b);
3226 }
3227
3228 // CHECK-LABEL: @test_vshlq_s64(
3229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3230 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3231 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3232 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3233 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
3235   return vshlq_s64(a, b);
3236 }
3237
3238 // CHECK-LABEL: @test_vshlq_u8(
3239 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b)
3240 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
3242   return vshlq_u8(a, b);
3243 }
3244
3245 // CHECK-LABEL: @test_vshlq_u16(
3246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3248 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b)
3249 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
3250 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
3252   return vshlq_u16(a, b);
3253 }
3254
3255 // CHECK-LABEL: @test_vshlq_u32(
3256 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3257 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3258 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b)
3259 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
3260 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
3262   return vshlq_u32(a, b);
3263 }
3264
3265 // CHECK-LABEL: @test_vshlq_u64(
3266 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3267 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3268 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b)
3269 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
3270 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
3272   return vshlq_u64(a, b);
3273 }
3274
3275 // CHECK-LABEL: @test_vqshl_s8(
3276 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3277 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
3279   return vqshl_s8(a, b);
3280 }
3281
3282 // CHECK-LABEL: @test_vqshl_s16(
3283 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3284 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3285 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3286 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3287 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
3289   return vqshl_s16(a, b);
3290 }
3291
3292 // CHECK-LABEL: @test_vqshl_s32(
3293 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3294 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3295 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3296 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3297 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
3299   return vqshl_s32(a, b);
3300 }
3301
3302 // CHECK-LABEL: @test_vqshl_s64(
3303 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3304 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3305 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3306 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3307 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
3309   return vqshl_s64(a, b);
3310 }
3311
3312 // CHECK-LABEL: @test_vqshl_u8(
3313 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3314 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
3316   return vqshl_u8(a, b);
3317 }
3318
3319 // CHECK-LABEL: @test_vqshl_u16(
3320 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3321 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3322 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3323 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
3324 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
3326   return vqshl_u16(a, b);
3327 }
3328
3329 // CHECK-LABEL: @test_vqshl_u32(
3330 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3331 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3332 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3333 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
3334 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
3336   return vqshl_u32(a, b);
3337 }
3338
3339 // CHECK-LABEL: @test_vqshl_u64(
3340 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3341 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3342 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3343 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
3344 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
3346   return vqshl_u64(a, b);
3347 }
3348
3349 // CHECK-LABEL: @test_vqshlq_s8(
3350 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3351 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
3353   return vqshlq_s8(a, b);
3354 }
3355
3356 // CHECK-LABEL: @test_vqshlq_s16(
3357 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3358 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3359 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3360 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3361 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
3363   return vqshlq_s16(a, b);
3364 }
3365
3366 // CHECK-LABEL: @test_vqshlq_s32(
3367 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3368 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3369 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3370 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3371 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
3373   return vqshlq_s32(a, b);
3374 }
3375
3376 // CHECK-LABEL: @test_vqshlq_s64(
3377 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3378 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3379 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3380 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3381 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
3383   return vqshlq_s64(a, b);
3384 }
3385
3386 // CHECK-LABEL: @test_vqshlq_u8(
3387 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3388 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
3390   return vqshlq_u8(a, b);
3391 }
3392
3393 // CHECK-LABEL: @test_vqshlq_u16(
3394 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3395 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3396 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3397 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
3398 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
3400   return vqshlq_u16(a, b);
3401 }
3402
3403 // CHECK-LABEL: @test_vqshlq_u32(
3404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3406 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3407 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
3408 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
3410   return vqshlq_u32(a, b);
3411 }
3412
3413 // CHECK-LABEL: @test_vqshlq_u64(
3414 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3415 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3416 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3417 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
3418 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
3420   return vqshlq_u64(a, b);
3421 }
3422
3423 // CHECK-LABEL: @test_vrshl_s8(
3424 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3425 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
3427   return vrshl_s8(a, b);
3428 }
3429
3430 // CHECK-LABEL: @test_vrshl_s16(
3431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3432 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3433 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3434 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3435 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
3437   return vrshl_s16(a, b);
3438 }
3439
3440 // CHECK-LABEL: @test_vrshl_s32(
3441 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3442 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3443 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3444 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3445 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
3447   return vrshl_s32(a, b);
3448 }
3449
3450 // CHECK-LABEL: @test_vrshl_s64(
3451 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3452 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3453 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3454 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3455 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
3457   return vrshl_s64(a, b);
3458 }
3459
3460 // CHECK-LABEL: @test_vrshl_u8(
3461 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3462 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
3464   return vrshl_u8(a, b);
3465 }
3466
3467 // CHECK-LABEL: @test_vrshl_u16(
3468 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3469 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3470 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3471 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
3472 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
3474   return vrshl_u16(a, b);
3475 }
3476
3477 // CHECK-LABEL: @test_vrshl_u32(
3478 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3479 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3480 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3481 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
3482 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
3484   return vrshl_u32(a, b);
3485 }
3486
3487 // CHECK-LABEL: @test_vrshl_u64(
3488 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3489 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3490 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3491 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
3492 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
3494   return vrshl_u64(a, b);
3495 }
3496
3497 // CHECK-LABEL: @test_vrshlq_s8(
3498 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3499 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
3501   return vrshlq_s8(a, b);
3502 }
3503
3504 // CHECK-LABEL: @test_vrshlq_s16(
3505 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3506 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3507 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3508 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3509 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
3511   return vrshlq_s16(a, b);
3512 }
3513
3514 // CHECK-LABEL: @test_vrshlq_s32(
3515 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3516 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3517 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3518 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3519 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
3521   return vrshlq_s32(a, b);
3522 }
3523
3524 // CHECK-LABEL: @test_vrshlq_s64(
3525 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3527 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3528 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3529 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
3531   return vrshlq_s64(a, b);
3532 }
3533
3534 // CHECK-LABEL: @test_vrshlq_u8(
3535 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3536 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
3538   return vrshlq_u8(a, b);
3539 }
3540
3541 // CHECK-LABEL: @test_vrshlq_u16(
3542 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3543 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3544 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3545 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
3546 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
3548   return vrshlq_u16(a, b);
3549 }
3550
3551 // CHECK-LABEL: @test_vrshlq_u32(
3552 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3553 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3554 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3555 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
3556 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
3558   return vrshlq_u32(a, b);
3559 }
3560
3561 // CHECK-LABEL: @test_vrshlq_u64(
3562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3564 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3565 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
3566 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
3568   return vrshlq_u64(a, b);
3569 }
3570
3571 // CHECK-LABEL: @test_vqrshl_s8(
3572 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3573 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
3575   return vqrshl_s8(a, b);
3576 }
3577
3578 // CHECK-LABEL: @test_vqrshl_s16(
3579 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3580 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3581 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3582 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3583 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
3585   return vqrshl_s16(a, b);
3586 }
3587
3588 // CHECK-LABEL: @test_vqrshl_s32(
3589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3590 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3591 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3592 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3593 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
3595   return vqrshl_s32(a, b);
3596 }
3597
3598 // CHECK-LABEL: @test_vqrshl_s64(
3599 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3600 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3601 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3602 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3603 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
3605   return vqrshl_s64(a, b);
3606 }
3607
3608 // CHECK-LABEL: @test_vqrshl_u8(
3609 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
3610 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
3612   return vqrshl_u8(a, b);
3613 }
3614
3615 // CHECK-LABEL: @test_vqrshl_u16(
3616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3617 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3618 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
3619 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
3620 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
3622   return vqrshl_u16(a, b);
3623 }
3624
3625 // CHECK-LABEL: @test_vqrshl_u32(
3626 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3628 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
3629 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
3630 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
3632   return vqrshl_u32(a, b);
3633 }
3634
3635 // CHECK-LABEL: @test_vqrshl_u64(
3636 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3637 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3638 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
3639 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
3640 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
3642   return vqrshl_u64(a, b);
3643 }
3644
3645 // CHECK-LABEL: @test_vqrshlq_s8(
3646 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3647 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
3649   return vqrshlq_s8(a, b);
3650 }
3651
3652 // CHECK-LABEL: @test_vqrshlq_s16(
3653 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3654 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3655 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3656 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3657 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
3659   return vqrshlq_s16(a, b);
3660 }
3661
3662 // CHECK-LABEL: @test_vqrshlq_s32(
3663 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3664 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3665 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3666 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3667 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
3669   return vqrshlq_s32(a, b);
3670 }
3671
3672 // CHECK-LABEL: @test_vqrshlq_s64(
3673 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3674 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3675 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3676 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3677 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
3679   return vqrshlq_s64(a, b);
3680 }
3681
3682 // CHECK-LABEL: @test_vqrshlq_u8(
3683 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
3684 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
3686   return vqrshlq_u8(a, b);
3687 }
3688
3689 // CHECK-LABEL: @test_vqrshlq_u16(
3690 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3691 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3692 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
3693 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
3694 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
3696   return vqrshlq_u16(a, b);
3697 }
3698
3699 // CHECK-LABEL: @test_vqrshlq_u32(
3700 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3701 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3702 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
3703 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
3704 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
3706   return vqrshlq_u32(a, b);
3707 }
3708
3709 // CHECK-LABEL: @test_vqrshlq_u64(
3710 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3711 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3712 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
3713 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
3714 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
3716   return vqrshlq_u64(a, b);
3717 }
3718
3719 // CHECK-LABEL: @test_vsli_n_p64(
3720 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3721 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3722 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3723 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3724 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
3725 // CHECK:   ret <1 x i64> [[VSLI_N2]]
3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
3727   return vsli_n_p64(a, b, 0);
3728 }
3729
3730 // CHECK-LABEL: @test_vsliq_n_p64(
3731 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3732 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3733 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3734 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3735 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
3736 // CHECK:   ret <2 x i64> [[VSLI_N2]]
3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
3738   return vsliq_n_p64(a, b, 0);
3739 }
3740
3741 // CHECK-LABEL: @test_vmax_s8(
3742 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
3743 // CHECK:   ret <8 x i8> [[VMAX_I]]
3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
3745   return vmax_s8(a, b);
3746 }
3747
3748 // CHECK-LABEL: @test_vmax_s16(
3749 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3750 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3751 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
3752 // CHECK:   ret <4 x i16> [[VMAX2_I]]
3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
3754   return vmax_s16(a, b);
3755 }
3756
3757 // CHECK-LABEL: @test_vmax_s32(
3758 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3759 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3760 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
3761 // CHECK:   ret <2 x i32> [[VMAX2_I]]
3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
3763   return vmax_s32(a, b);
3764 }
3765
3766 // CHECK-LABEL: @test_vmax_u8(
3767 // CHECK:   [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
3768 // CHECK:   ret <8 x i8> [[VMAX_I]]
3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
3770   return vmax_u8(a, b);
3771 }
3772
3773 // CHECK-LABEL: @test_vmax_u16(
3774 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3775 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3776 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
3777 // CHECK:   ret <4 x i16> [[VMAX2_I]]
3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
3779   return vmax_u16(a, b);
3780 }
3781
3782 // CHECK-LABEL: @test_vmax_u32(
3783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3784 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3785 // CHECK:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
3786 // CHECK:   ret <2 x i32> [[VMAX2_I]]
3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
3788   return vmax_u32(a, b);
3789 }
3790
3791 // CHECK-LABEL: @test_vmax_f32(
3792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3793 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3794 // CHECK:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b)
3795 // CHECK:   ret <2 x float> [[VMAX2_I]]
3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
3797   return vmax_f32(a, b);
3798 }
3799
3800 // CHECK-LABEL: @test_vmaxq_s8(
3801 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b)
3802 // CHECK:   ret <16 x i8> [[VMAX_I]]
3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
3804   return vmaxq_s8(a, b);
3805 }
3806
3807 // CHECK-LABEL: @test_vmaxq_s16(
3808 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3809 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3810 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b)
3811 // CHECK:   ret <8 x i16> [[VMAX2_I]]
3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
3813   return vmaxq_s16(a, b);
3814 }
3815
3816 // CHECK-LABEL: @test_vmaxq_s32(
3817 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3818 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3819 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
3820 // CHECK:   ret <4 x i32> [[VMAX2_I]]
3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
3822   return vmaxq_s32(a, b);
3823 }
3824
3825 // CHECK-LABEL: @test_vmaxq_u8(
3826 // CHECK:   [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b)
3827 // CHECK:   ret <16 x i8> [[VMAX_I]]
3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
3829   return vmaxq_u8(a, b);
3830 }
3831
3832 // CHECK-LABEL: @test_vmaxq_u16(
3833 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3834 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3835 // CHECK:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b)
3836 // CHECK:   ret <8 x i16> [[VMAX2_I]]
3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
3838   return vmaxq_u16(a, b);
3839 }
3840
3841 // CHECK-LABEL: @test_vmaxq_u32(
3842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3843 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3844 // CHECK:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
3845 // CHECK:   ret <4 x i32> [[VMAX2_I]]
3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
3847   return vmaxq_u32(a, b);
3848 }
3849
3850 // CHECK-LABEL: @test_vmaxq_f32(
3851 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3852 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3853 // CHECK:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b)
3854 // CHECK:   ret <4 x float> [[VMAX2_I]]
3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
3856   return vmaxq_f32(a, b);
3857 }
3858
3859 // CHECK-LABEL: @test_vmaxq_f64(
3860 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3861 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3862 // CHECK:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b)
3863 // CHECK:   ret <2 x double> [[VMAX2_I]]
3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
3865   return vmaxq_f64(a, b);
3866 }
3867
3868 // CHECK-LABEL: @test_vmin_s8(
3869 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
3870 // CHECK:   ret <8 x i8> [[VMIN_I]]
3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
3872   return vmin_s8(a, b);
3873 }
3874
3875 // CHECK-LABEL: @test_vmin_s16(
3876 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3877 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3878 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
3879 // CHECK:   ret <4 x i16> [[VMIN2_I]]
3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
3881   return vmin_s16(a, b);
3882 }
3883
3884 // CHECK-LABEL: @test_vmin_s32(
3885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3886 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3887 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
3888 // CHECK:   ret <2 x i32> [[VMIN2_I]]
3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
3890   return vmin_s32(a, b);
3891 }
3892
3893 // CHECK-LABEL: @test_vmin_u8(
3894 // CHECK:   [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
3895 // CHECK:   ret <8 x i8> [[VMIN_I]]
3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
3897   return vmin_u8(a, b);
3898 }
3899
3900 // CHECK-LABEL: @test_vmin_u16(
3901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3902 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3903 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
3904 // CHECK:   ret <4 x i16> [[VMIN2_I]]
3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
3906   return vmin_u16(a, b);
3907 }
3908
3909 // CHECK-LABEL: @test_vmin_u32(
3910 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3911 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3912 // CHECK:   [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
3913 // CHECK:   ret <2 x i32> [[VMIN2_I]]
3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
3915   return vmin_u32(a, b);
3916 }
3917
3918 // CHECK-LABEL: @test_vmin_f32(
3919 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3920 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3921 // CHECK:   [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b)
3922 // CHECK:   ret <2 x float> [[VMIN2_I]]
3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
3924   return vmin_f32(a, b);
3925 }
3926
3927 // CHECK-LABEL: @test_vminq_s8(
3928 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b)
3929 // CHECK:   ret <16 x i8> [[VMIN_I]]
3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
3931   return vminq_s8(a, b);
3932 }
3933
3934 // CHECK-LABEL: @test_vminq_s16(
3935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3936 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3937 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b)
3938 // CHECK:   ret <8 x i16> [[VMIN2_I]]
3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
3940   return vminq_s16(a, b);
3941 }
3942
3943 // CHECK-LABEL: @test_vminq_s32(
3944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3945 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3946 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
3947 // CHECK:   ret <4 x i32> [[VMIN2_I]]
3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
3949   return vminq_s32(a, b);
3950 }
3951
3952 // CHECK-LABEL: @test_vminq_u8(
3953 // CHECK:   [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b)
3954 // CHECK:   ret <16 x i8> [[VMIN_I]]
3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
3956   return vminq_u8(a, b);
3957 }
3958
3959 // CHECK-LABEL: @test_vminq_u16(
3960 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3961 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3962 // CHECK:   [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b)
3963 // CHECK:   ret <8 x i16> [[VMIN2_I]]
3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
3965   return vminq_u16(a, b);
3966 }
3967
3968 // CHECK-LABEL: @test_vminq_u32(
3969 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3970 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3971 // CHECK:   [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
3972 // CHECK:   ret <4 x i32> [[VMIN2_I]]
3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
3974   return vminq_u32(a, b);
3975 }
3976
3977 // CHECK-LABEL: @test_vminq_f32(
3978 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3979 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3980 // CHECK:   [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b)
3981 // CHECK:   ret <4 x float> [[VMIN2_I]]
3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
3983   return vminq_f32(a, b);
3984 }
3985
3986 // CHECK-LABEL: @test_vminq_f64(
3987 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
3988 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
3989 // CHECK:   [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b)
3990 // CHECK:   ret <2 x double> [[VMIN2_I]]
3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
3992   return vminq_f64(a, b);
3993 }
3994
3995 // CHECK-LABEL: @test_vmaxnm_f32(
3996 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3997 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3998 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
3999 // CHECK:   ret <2 x float> [[VMAXNM2_I]]
4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
4001   return vmaxnm_f32(a, b);
4002 }
4003
4004 // CHECK-LABEL: @test_vmaxnmq_f32(
4005 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4006 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4007 // CHECK:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
4008 // CHECK:   ret <4 x float> [[VMAXNM2_I]]
4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
4010   return vmaxnmq_f32(a, b);
4011 }
4012
4013 // CHECK-LABEL: @test_vmaxnmq_f64(
4014 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4015 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4016 // CHECK:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
4017 // CHECK:   ret <2 x double> [[VMAXNM2_I]]
4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
4019   return vmaxnmq_f64(a, b);
4020 }
4021
4022 // CHECK-LABEL: @test_vminnm_f32(
4023 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4024 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4025 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
4026 // CHECK:   ret <2 x float> [[VMINNM2_I]]
4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
4028   return vminnm_f32(a, b);
4029 }
4030
4031 // CHECK-LABEL: @test_vminnmq_f32(
4032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4033 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4034 // CHECK:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
4035 // CHECK:   ret <4 x float> [[VMINNM2_I]]
4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
4037   return vminnmq_f32(a, b);
4038 }
4039
4040 // CHECK-LABEL: @test_vminnmq_f64(
4041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4042 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4043 // CHECK:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
4044 // CHECK:   ret <2 x double> [[VMINNM2_I]]
4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
4046   return vminnmq_f64(a, b);
4047 }
4048
4049 // CHECK-LABEL: @test_vpmax_s8(
4050 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4051 // CHECK:   ret <8 x i8> [[VPMAX_I]]
4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
4053   return vpmax_s8(a, b);
4054 }
4055
4056 // CHECK-LABEL: @test_vpmax_s16(
4057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4058 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4059 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4060 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
4062   return vpmax_s16(a, b);
4063 }
4064
4065 // CHECK-LABEL: @test_vpmax_s32(
4066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4067 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4068 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4069 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
4071   return vpmax_s32(a, b);
4072 }
4073
4074 // CHECK-LABEL: @test_vpmax_u8(
4075 // CHECK:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
4076 // CHECK:   ret <8 x i8> [[VPMAX_I]]
4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
4078   return vpmax_u8(a, b);
4079 }
4080
4081 // CHECK-LABEL: @test_vpmax_u16(
4082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4083 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4084 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
4085 // CHECK:   ret <4 x i16> [[VPMAX2_I]]
4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
4087   return vpmax_u16(a, b);
4088 }
4089
4090 // CHECK-LABEL: @test_vpmax_u32(
4091 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4092 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4093 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
4094 // CHECK:   ret <2 x i32> [[VPMAX2_I]]
4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
4096   return vpmax_u32(a, b);
4097 }
4098
4099 // CHECK-LABEL: @test_vpmax_f32(
4100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4101 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4102 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
4103 // CHECK:   ret <2 x float> [[VPMAX2_I]]
4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
4105   return vpmax_f32(a, b);
4106 }
4107
4108 // CHECK-LABEL: @test_vpmaxq_s8(
4109 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4110 // CHECK:   ret <16 x i8> [[VPMAX_I]]
4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
4112   return vpmaxq_s8(a, b);
4113 }
4114
4115 // CHECK-LABEL: @test_vpmaxq_s16(
4116 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4117 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4118 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4119 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
4121   return vpmaxq_s16(a, b);
4122 }
4123
4124 // CHECK-LABEL: @test_vpmaxq_s32(
4125 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4126 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4127 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4128 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
4130   return vpmaxq_s32(a, b);
4131 }
4132
4133 // CHECK-LABEL: @test_vpmaxq_u8(
4134 // CHECK:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
4135 // CHECK:   ret <16 x i8> [[VPMAX_I]]
4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
4137   return vpmaxq_u8(a, b);
4138 }
4139
4140 // CHECK-LABEL: @test_vpmaxq_u16(
4141 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4142 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4143 // CHECK:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
4144 // CHECK:   ret <8 x i16> [[VPMAX2_I]]
4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
4146   return vpmaxq_u16(a, b);
4147 }
4148
4149 // CHECK-LABEL: @test_vpmaxq_u32(
4150 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4151 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4152 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
4153 // CHECK:   ret <4 x i32> [[VPMAX2_I]]
4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
4155   return vpmaxq_u32(a, b);
4156 }
4157
4158 // CHECK-LABEL: @test_vpmaxq_f32(
4159 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4160 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4161 // CHECK:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
4162 // CHECK:   ret <4 x float> [[VPMAX2_I]]
4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
4164   return vpmaxq_f32(a, b);
4165 }
4166
4167 // CHECK-LABEL: @test_vpmaxq_f64(
4168 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4169 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4170 // CHECK:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
4171 // CHECK:   ret <2 x double> [[VPMAX2_I]]
4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
4173   return vpmaxq_f64(a, b);
4174 }
4175
4176 // CHECK-LABEL: @test_vpmin_s8(
4177 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4178 // CHECK:   ret <8 x i8> [[VPMIN_I]]
4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
4180   return vpmin_s8(a, b);
4181 }
4182
4183 // CHECK-LABEL: @test_vpmin_s16(
4184 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4185 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4186 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4187 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
4189   return vpmin_s16(a, b);
4190 }
4191
4192 // CHECK-LABEL: @test_vpmin_s32(
4193 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4194 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4195 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4196 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
4198   return vpmin_s32(a, b);
4199 }
4200
4201 // CHECK-LABEL: @test_vpmin_u8(
4202 // CHECK:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
4203 // CHECK:   ret <8 x i8> [[VPMIN_I]]
4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
4205   return vpmin_u8(a, b);
4206 }
4207
4208 // CHECK-LABEL: @test_vpmin_u16(
4209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4210 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4211 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
4212 // CHECK:   ret <4 x i16> [[VPMIN2_I]]
4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
4214   return vpmin_u16(a, b);
4215 }
4216
4217 // CHECK-LABEL: @test_vpmin_u32(
4218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4220 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
4221 // CHECK:   ret <2 x i32> [[VPMIN2_I]]
4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
4223   return vpmin_u32(a, b);
4224 }
4225
4226 // CHECK-LABEL: @test_vpmin_f32(
4227 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4228 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4229 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
4230 // CHECK:   ret <2 x float> [[VPMIN2_I]]
4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
4232   return vpmin_f32(a, b);
4233 }
4234
4235 // CHECK-LABEL: @test_vpminq_s8(
4236 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4237 // CHECK:   ret <16 x i8> [[VPMIN_I]]
4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
4239   return vpminq_s8(a, b);
4240 }
4241
4242 // CHECK-LABEL: @test_vpminq_s16(
4243 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4244 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4245 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4246 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
4248   return vpminq_s16(a, b);
4249 }
4250
4251 // CHECK-LABEL: @test_vpminq_s32(
4252 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4253 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4254 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4255 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
4257   return vpminq_s32(a, b);
4258 }
4259
4260 // CHECK-LABEL: @test_vpminq_u8(
4261 // CHECK:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
4262 // CHECK:   ret <16 x i8> [[VPMIN_I]]
4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
4264   return vpminq_u8(a, b);
4265 }
4266
4267 // CHECK-LABEL: @test_vpminq_u16(
4268 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4270 // CHECK:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
4271 // CHECK:   ret <8 x i16> [[VPMIN2_I]]
4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
4273   return vpminq_u16(a, b);
4274 }
4275
4276 // CHECK-LABEL: @test_vpminq_u32(
4277 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4278 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4279 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
4280 // CHECK:   ret <4 x i32> [[VPMIN2_I]]
4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
4282   return vpminq_u32(a, b);
4283 }
4284
4285 // CHECK-LABEL: @test_vpminq_f32(
4286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4287 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4288 // CHECK:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
4289 // CHECK:   ret <4 x float> [[VPMIN2_I]]
4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
4291   return vpminq_f32(a, b);
4292 }
4293
4294 // CHECK-LABEL: @test_vpminq_f64(
4295 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4296 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4297 // CHECK:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
4298 // CHECK:   ret <2 x double> [[VPMIN2_I]]
4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
4300   return vpminq_f64(a, b);
4301 }
4302
4303 // CHECK-LABEL: @test_vpmaxnm_f32(
4304 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4305 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4306 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
4307 // CHECK:   ret <2 x float> [[VPMAXNM2_I]]
4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
4309   return vpmaxnm_f32(a, b);
4310 }
4311
4312 // CHECK-LABEL: @test_vpmaxnmq_f32(
4313 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4314 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4315 // CHECK:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
4316 // CHECK:   ret <4 x float> [[VPMAXNM2_I]]
4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
4318   return vpmaxnmq_f32(a, b);
4319 }
4320
4321 // CHECK-LABEL: @test_vpmaxnmq_f64(
4322 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4323 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4324 // CHECK:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
4325 // CHECK:   ret <2 x double> [[VPMAXNM2_I]]
4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
4327   return vpmaxnmq_f64(a, b);
4328 }
4329
4330 // CHECK-LABEL: @test_vpminnm_f32(
4331 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4332 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4333 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
4334 // CHECK:   ret <2 x float> [[VPMINNM2_I]]
4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
4336   return vpminnm_f32(a, b);
4337 }
4338
4339 // CHECK-LABEL: @test_vpminnmq_f32(
4340 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4341 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4342 // CHECK:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
4343 // CHECK:   ret <4 x float> [[VPMINNM2_I]]
4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
4345   return vpminnmq_f32(a, b);
4346 }
4347
4348 // CHECK-LABEL: @test_vpminnmq_f64(
4349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4350 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4351 // CHECK:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
4352 // CHECK:   ret <2 x double> [[VPMINNM2_I]]
4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
4354   return vpminnmq_f64(a, b);
4355 }
4356
4357 // CHECK-LABEL: @test_vpadd_s8(
4358 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4359 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
4361   return vpadd_s8(a, b);
4362 }
4363
4364 // CHECK-LABEL: @test_vpadd_s16(
4365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4366 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4367 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4368 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4369 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
4371   return vpadd_s16(a, b);
4372 }
4373
4374 // CHECK-LABEL: @test_vpadd_s32(
4375 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4376 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4377 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4378 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4379 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
4381   return vpadd_s32(a, b);
4382 }
4383
4384 // CHECK-LABEL: @test_vpadd_u8(
4385 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
4386 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
4388   return vpadd_u8(a, b);
4389 }
4390
4391 // CHECK-LABEL: @test_vpadd_u16(
4392 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4393 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4394 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
4395 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
4396 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
4398   return vpadd_u16(a, b);
4399 }
4400
4401 // CHECK-LABEL: @test_vpadd_u32(
4402 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4403 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4404 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
4405 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
4406 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
4408   return vpadd_u32(a, b);
4409 }
4410
4411 // CHECK-LABEL: @test_vpadd_f32(
4412 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4413 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4414 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
4415 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
4416 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
4418   return vpadd_f32(a, b);
4419 }
4420
4421 // CHECK-LABEL: @test_vpaddq_s8(
4422 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4423 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
4425   return vpaddq_s8(a, b);
4426 }
4427
4428 // CHECK-LABEL: @test_vpaddq_s16(
4429 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4430 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4431 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4432 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4433 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
4435   return vpaddq_s16(a, b);
4436 }
4437
4438 // CHECK-LABEL: @test_vpaddq_s32(
4439 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4440 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4441 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4442 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4443 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
4445   return vpaddq_s32(a, b);
4446 }
4447
4448 // CHECK-LABEL: @test_vpaddq_u8(
4449 // CHECK:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
4450 // CHECK:   ret <16 x i8> [[VPADDQ_V_I]]
4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
4452   return vpaddq_u8(a, b);
4453 }
4454
4455 // CHECK-LABEL: @test_vpaddq_u16(
4456 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4457 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4458 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
4459 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
4460 // CHECK:   ret <8 x i16> [[VPADDQ_V2_I]]
4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
4462   return vpaddq_u16(a, b);
4463 }
4464
4465 // CHECK-LABEL: @test_vpaddq_u32(
4466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4467 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4468 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
4469 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
4470 // CHECK:   ret <4 x i32> [[VPADDQ_V2_I]]
4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
4472   return vpaddq_u32(a, b);
4473 }
4474
4475 // CHECK-LABEL: @test_vpaddq_f32(
4476 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4477 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4478 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
4479 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
4480 // CHECK:   ret <4 x float> [[VPADDQ_V2_I]]
4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
4482   return vpaddq_f32(a, b);
4483 }
4484
4485 // CHECK-LABEL: @test_vpaddq_f64(
4486 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4487 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4488 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
4489 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
4490 // CHECK:   ret <2 x double> [[VPADDQ_V2_I]]
4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
4492   return vpaddq_f64(a, b);
4493 }
4494
4495 // CHECK-LABEL: @test_vqdmulh_s16(
4496 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4497 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4498 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4499 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
4500 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
4502   return vqdmulh_s16(a, b);
4503 }
4504
4505 // CHECK-LABEL: @test_vqdmulh_s32(
4506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4507 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4508 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4509 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
4510 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
4512   return vqdmulh_s32(a, b);
4513 }
4514
4515 // CHECK-LABEL: @test_vqdmulhq_s16(
4516 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4517 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4518 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4519 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
4520 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
4522   return vqdmulhq_s16(a, b);
4523 }
4524
4525 // CHECK-LABEL: @test_vqdmulhq_s32(
4526 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4527 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4528 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4529 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
4530 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
4532   return vqdmulhq_s32(a, b);
4533 }
4534
4535 // CHECK-LABEL: @test_vqrdmulh_s16(
4536 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4537 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4538 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
4539 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
4540 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
4542   return vqrdmulh_s16(a, b);
4543 }
4544
4545 // CHECK-LABEL: @test_vqrdmulh_s32(
4546 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4547 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4548 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
4549 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
4550 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
4552   return vqrdmulh_s32(a, b);
4553 }
4554
4555 // CHECK-LABEL: @test_vqrdmulhq_s16(
4556 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4557 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4558 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
4559 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
4560 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
4562   return vqrdmulhq_s16(a, b);
4563 }
4564
4565 // CHECK-LABEL: @test_vqrdmulhq_s32(
4566 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4567 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4568 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
4569 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
4570 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
4572   return vqrdmulhq_s32(a, b);
4573 }
4574
4575 // CHECK-LABEL: @test_vmulx_f32(
4576 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
4577 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4578 // CHECK:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
4579 // CHECK:   ret <2 x float> [[VMULX2_I]]
4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
4581   return vmulx_f32(a, b);
4582 }
4583
4584 // CHECK-LABEL: @test_vmulxq_f32(
4585 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
4586 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4587 // CHECK:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
4588 // CHECK:   ret <4 x float> [[VMULX2_I]]
4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
4590   return vmulxq_f32(a, b);
4591 }
4592
4593 // CHECK-LABEL: @test_vmulxq_f64(
4594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
4595 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
4596 // CHECK:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
4597 // CHECK:   ret <2 x double> [[VMULX2_I]]
4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
4599   return vmulxq_f64(a, b);
4600 }
4601
4602 // CHECK-LABEL: @test_vshl_n_s8(
4603 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4604 // CHECK:   ret <8 x i8> [[VSHL_N]]
4605 int8x8_t test_vshl_n_s8(int8x8_t a) {
4606   return vshl_n_s8(a, 3);
4607 }
4608
4609 // CHECK-LABEL: @test_vshl_n_s16(
4610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4611 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4612 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4613 // CHECK:   ret <4 x i16> [[VSHL_N]]
4614 int16x4_t test_vshl_n_s16(int16x4_t a) {
4615   return vshl_n_s16(a, 3);
4616 }
4617
4618 // CHECK-LABEL: @test_vshl_n_s32(
4619 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4620 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4621 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4622 // CHECK:   ret <2 x i32> [[VSHL_N]]
4623 int32x2_t test_vshl_n_s32(int32x2_t a) {
4624   return vshl_n_s32(a, 3);
4625 }
4626
4627 // CHECK-LABEL: @test_vshlq_n_s8(
4628 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4629 // CHECK:   ret <16 x i8> [[VSHL_N]]
4630 int8x16_t test_vshlq_n_s8(int8x16_t a) {
4631   return vshlq_n_s8(a, 3);
4632 }
4633
4634 // CHECK-LABEL: @test_vshlq_n_s16(
4635 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4636 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4637 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4638 // CHECK:   ret <8 x i16> [[VSHL_N]]
4639 int16x8_t test_vshlq_n_s16(int16x8_t a) {
4640   return vshlq_n_s16(a, 3);
4641 }
4642
4643 // CHECK-LABEL: @test_vshlq_n_s32(
4644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4645 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4646 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4647 // CHECK:   ret <4 x i32> [[VSHL_N]]
4648 int32x4_t test_vshlq_n_s32(int32x4_t a) {
4649   return vshlq_n_s32(a, 3);
4650 }
4651
4652 // CHECK-LABEL: @test_vshlq_n_s64(
4653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4654 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4655 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4656 // CHECK:   ret <2 x i64> [[VSHL_N]]
4657 int64x2_t test_vshlq_n_s64(int64x2_t a) {
4658   return vshlq_n_s64(a, 3);
4659 }
4660
4661 // CHECK-LABEL: @test_vshl_n_u8(
4662 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4663 // CHECK:   ret <8 x i8> [[VSHL_N]]
4664 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
4665   return vshl_n_u8(a, 3);
4666 }
4667
4668 // CHECK-LABEL: @test_vshl_n_u16(
4669 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4670 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4671 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4672 // CHECK:   ret <4 x i16> [[VSHL_N]]
4673 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
4674   return vshl_n_u16(a, 3);
4675 }
4676
4677 // CHECK-LABEL: @test_vshl_n_u32(
4678 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4679 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4680 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 3, i32 3>
4681 // CHECK:   ret <2 x i32> [[VSHL_N]]
4682 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
4683   return vshl_n_u32(a, 3);
4684 }
4685
4686 // CHECK-LABEL: @test_vshlq_n_u8(
4687 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4688 // CHECK:   ret <16 x i8> [[VSHL_N]]
4689 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
4690   return vshlq_n_u8(a, 3);
4691 }
4692
4693 // CHECK-LABEL: @test_vshlq_n_u16(
4694 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4695 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4696 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4697 // CHECK:   ret <8 x i16> [[VSHL_N]]
4698 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
4699   return vshlq_n_u16(a, 3);
4700 }
4701
4702 // CHECK-LABEL: @test_vshlq_n_u32(
4703 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4704 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4705 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4706 // CHECK:   ret <4 x i32> [[VSHL_N]]
4707 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
4708   return vshlq_n_u32(a, 3);
4709 }
4710
4711 // CHECK-LABEL: @test_vshlq_n_u64(
4712 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4713 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4714 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 3, i64 3>
4715 // CHECK:   ret <2 x i64> [[VSHL_N]]
4716 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
4717   return vshlq_n_u64(a, 3);
4718 }
4719
4720 // CHECK-LABEL: @test_vshr_n_s8(
4721 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4722 // CHECK:   ret <8 x i8> [[VSHR_N]]
4723 int8x8_t test_vshr_n_s8(int8x8_t a) {
4724   return vshr_n_s8(a, 3);
4725 }
4726
4727 // CHECK-LABEL: @test_vshr_n_s16(
4728 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4729 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4730 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4731 // CHECK:   ret <4 x i16> [[VSHR_N]]
4732 int16x4_t test_vshr_n_s16(int16x4_t a) {
4733   return vshr_n_s16(a, 3);
4734 }
4735
4736 // CHECK-LABEL: @test_vshr_n_s32(
4737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4738 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4739 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 3, i32 3>
4740 // CHECK:   ret <2 x i32> [[VSHR_N]]
4741 int32x2_t test_vshr_n_s32(int32x2_t a) {
4742   return vshr_n_s32(a, 3);
4743 }
4744
4745 // CHECK-LABEL: @test_vshrq_n_s8(
4746 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4747 // CHECK:   ret <16 x i8> [[VSHR_N]]
4748 int8x16_t test_vshrq_n_s8(int8x16_t a) {
4749   return vshrq_n_s8(a, 3);
4750 }
4751
4752 // CHECK-LABEL: @test_vshrq_n_s16(
4753 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4754 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4755 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4756 // CHECK:   ret <8 x i16> [[VSHR_N]]
4757 int16x8_t test_vshrq_n_s16(int16x8_t a) {
4758   return vshrq_n_s16(a, 3);
4759 }
4760
4761 // CHECK-LABEL: @test_vshrq_n_s32(
4762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4763 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4764 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4765 // CHECK:   ret <4 x i32> [[VSHR_N]]
4766 int32x4_t test_vshrq_n_s32(int32x4_t a) {
4767   return vshrq_n_s32(a, 3);
4768 }
4769
4770 // CHECK-LABEL: @test_vshrq_n_s64(
4771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4772 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4773 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 3, i64 3>
4774 // CHECK:   ret <2 x i64> [[VSHR_N]]
4775 int64x2_t test_vshrq_n_s64(int64x2_t a) {
4776   return vshrq_n_s64(a, 3);
4777 }
4778
4779 // CHECK-LABEL: @test_vshr_n_u8(
4780 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4781 // CHECK:   ret <8 x i8> [[VSHR_N]]
4782 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
4783   return vshr_n_u8(a, 3);
4784 }
4785
4786 // CHECK-LABEL: @test_vshr_n_u16(
4787 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4788 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4789 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3>
4790 // CHECK:   ret <4 x i16> [[VSHR_N]]
4791 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
4792   return vshr_n_u16(a, 3);
4793 }
4794
4795 // CHECK-LABEL: @test_vshr_n_u32(
4796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4797 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4798 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 3, i32 3>
4799 // CHECK:   ret <2 x i32> [[VSHR_N]]
4800 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
4801   return vshr_n_u32(a, 3);
4802 }
4803
4804 // CHECK-LABEL: @test_vshrq_n_u8(
4805 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4806 // CHECK:   ret <16 x i8> [[VSHR_N]]
4807 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
4808   return vshrq_n_u8(a, 3);
4809 }
4810
4811 // CHECK-LABEL: @test_vshrq_n_u16(
4812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4813 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4814 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4815 // CHECK:   ret <8 x i16> [[VSHR_N]]
4816 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
4817   return vshrq_n_u16(a, 3);
4818 }
4819
4820 // CHECK-LABEL: @test_vshrq_n_u32(
4821 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4822 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4823 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 3, i32 3, i32 3, i32 3>
4824 // CHECK:   ret <4 x i32> [[VSHR_N]]
4825 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
4826   return vshrq_n_u32(a, 3);
4827 }
4828
4829 // CHECK-LABEL: @test_vshrq_n_u64(
4830 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4831 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4832 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 3, i64 3>
4833 // CHECK:   ret <2 x i64> [[VSHR_N]]
4834 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
4835   return vshrq_n_u64(a, 3);
4836 }
4837
4838 // CHECK-LABEL: @test_vsra_n_s8(
4839 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4840 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4841 // CHECK:   ret <8 x i8> [[TMP0]]
4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
4843   return vsra_n_s8(a, b, 3);
4844 }
4845
4846 // CHECK-LABEL: @test_vsra_n_s16(
4847 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4848 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4849 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4850 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4851 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4852 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4853 // CHECK:   ret <4 x i16> [[TMP4]]
4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
4855   return vsra_n_s16(a, b, 3);
4856 }
4857
4858 // CHECK-LABEL: @test_vsra_n_s32(
4859 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4860 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4861 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4862 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4863 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 3, i32 3>
4864 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4865 // CHECK:   ret <2 x i32> [[TMP4]]
4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
4867   return vsra_n_s32(a, b, 3);
4868 }
4869
4870 // CHECK-LABEL: @test_vsraq_n_s8(
4871 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4872 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4873 // CHECK:   ret <16 x i8> [[TMP0]]
4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
4875   return vsraq_n_s8(a, b, 3);
4876 }
4877
4878 // CHECK-LABEL: @test_vsraq_n_s16(
4879 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4880 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4881 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4882 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4883 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4884 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4885 // CHECK:   ret <8 x i16> [[TMP4]]
4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
4887   return vsraq_n_s16(a, b, 3);
4888 }
4889
4890 // CHECK-LABEL: @test_vsraq_n_s32(
4891 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4892 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4893 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4894 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4895 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4896 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4897 // CHECK:   ret <4 x i32> [[TMP4]]
4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
4899   return vsraq_n_s32(a, b, 3);
4900 }
4901
4902 // CHECK-LABEL: @test_vsraq_n_s64(
4903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4905 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4906 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4907 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 3, i64 3>
4908 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4909 // CHECK:   ret <2 x i64> [[TMP4]]
4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
4911   return vsraq_n_s64(a, b, 3);
4912 }
4913
4914 // CHECK-LABEL: @test_vsra_n_u8(
4915 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4916 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
4917 // CHECK:   ret <8 x i8> [[TMP0]]
4918 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
4919   return vsra_n_u8(a, b, 3);
4920 }
4921
4922 // CHECK-LABEL: @test_vsra_n_u16(
4923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4925 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
4926 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4927 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3>
4928 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
4929 // CHECK:   ret <4 x i16> [[TMP4]]
4930 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
4931   return vsra_n_u16(a, b, 3);
4932 }
4933
4934 // CHECK-LABEL: @test_vsra_n_u32(
4935 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
4936 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4937 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
4938 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4939 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 3, i32 3>
4940 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
4941 // CHECK:   ret <2 x i32> [[TMP4]]
4942 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
4943   return vsra_n_u32(a, b, 3);
4944 }
4945
4946 // CHECK-LABEL: @test_vsraq_n_u8(
4947 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
4948 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
4949 // CHECK:   ret <16 x i8> [[TMP0]]
4950 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
4951   return vsraq_n_u8(a, b, 3);
4952 }
4953
4954 // CHECK-LABEL: @test_vsraq_n_u16(
4955 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
4956 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4957 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
4958 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4959 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
4960 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
4961 // CHECK:   ret <8 x i16> [[TMP4]]
4962 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
4963   return vsraq_n_u16(a, b, 3);
4964 }
4965
4966 // CHECK-LABEL: @test_vsraq_n_u32(
4967 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
4968 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4969 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
4970 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4971 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
4972 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
4973 // CHECK:   ret <4 x i32> [[TMP4]]
4974 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
4975   return vsraq_n_u32(a, b, 3);
4976 }
4977
4978 // CHECK-LABEL: @test_vsraq_n_u64(
4979 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
4980 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4981 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
4982 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4983 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 3, i64 3>
4984 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
4985 // CHECK:   ret <2 x i64> [[TMP4]]
4986 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
4987   return vsraq_n_u64(a, b, 3);
4988 }
4989
4990 // CHECK-LABEL: @test_vrshr_n_s8(
4991 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
4992 // CHECK:   ret <8 x i8> [[VRSHR_N]]
4993 int8x8_t test_vrshr_n_s8(int8x8_t a) {
4994   return vrshr_n_s8(a, 3);
4995 }
4996
4997 // CHECK-LABEL: @test_vrshr_n_s16(
4998 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
4999 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5000 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5001 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
5002 int16x4_t test_vrshr_n_s16(int16x4_t a) {
5003   return vrshr_n_s16(a, 3);
5004 }
5005
5006 // CHECK-LABEL: @test_vrshr_n_s32(
5007 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5008 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5009 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5010 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
5011 int32x2_t test_vrshr_n_s32(int32x2_t a) {
5012   return vrshr_n_s32(a, 3);
5013 }
5014
5015 // CHECK-LABEL: @test_vrshrq_n_s8(
5016 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5017 // CHECK:   ret <16 x i8> [[VRSHR_N]]
5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
5019   return vrshrq_n_s8(a, 3);
5020 }
5021
5022 // CHECK-LABEL: @test_vrshrq_n_s16(
5023 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5024 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5025 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5026 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
5028   return vrshrq_n_s16(a, 3);
5029 }
5030
5031 // CHECK-LABEL: @test_vrshrq_n_s32(
5032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5033 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5034 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5035 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
5037   return vrshrq_n_s32(a, 3);
5038 }
5039
5040 // CHECK-LABEL: @test_vrshrq_n_s64(
5041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5042 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5043 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5044 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
5046   return vrshrq_n_s64(a, 3);
5047 }
5048
5049 // CHECK-LABEL: @test_vrshr_n_u8(
5050 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5051 // CHECK:   ret <8 x i8> [[VRSHR_N]]
5052 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
5053   return vrshr_n_u8(a, 3);
5054 }
5055
5056 // CHECK-LABEL: @test_vrshr_n_u16(
5057 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5058 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5059 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5060 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
5061 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
5062   return vrshr_n_u16(a, 3);
5063 }
5064
5065 // CHECK-LABEL: @test_vrshr_n_u32(
5066 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5067 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5068 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5069 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
5070 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
5071   return vrshr_n_u32(a, 3);
5072 }
5073
5074 // CHECK-LABEL: @test_vrshrq_n_u8(
5075 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5076 // CHECK:   ret <16 x i8> [[VRSHR_N]]
5077 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
5078   return vrshrq_n_u8(a, 3);
5079 }
5080
5081 // CHECK-LABEL: @test_vrshrq_n_u16(
5082 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5083 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5084 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5085 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
5086 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
5087   return vrshrq_n_u16(a, 3);
5088 }
5089
5090 // CHECK-LABEL: @test_vrshrq_n_u32(
5091 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5092 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5093 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5094 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
5095 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
5096   return vrshrq_n_u32(a, 3);
5097 }
5098
5099 // CHECK-LABEL: @test_vrshrq_n_u64(
5100 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5101 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5102 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5103 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
5104 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
5105   return vrshrq_n_u64(a, 3);
5106 }
5107
5108 // CHECK-LABEL: @test_vrsra_n_s8(
5109 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5110 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5111 // CHECK:   ret <8 x i8> [[TMP0]]
5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
5113   return vrsra_n_s8(a, b, 3);
5114 }
5115
5116 // CHECK-LABEL: @test_vrsra_n_s16(
5117 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5118 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5119 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5120 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5121 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5122 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5123 // CHECK:   ret <4 x i16> [[TMP3]]
5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
5125   return vrsra_n_s16(a, b, 3);
5126 }
5127
5128 // CHECK-LABEL: @test_vrsra_n_s32(
5129 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5130 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5131 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5132 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5133 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5134 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5135 // CHECK:   ret <2 x i32> [[TMP3]]
5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
5137   return vrsra_n_s32(a, b, 3);
5138 }
5139
5140 // CHECK-LABEL: @test_vrsraq_n_s8(
5141 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5142 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5143 // CHECK:   ret <16 x i8> [[TMP0]]
5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
5145   return vrsraq_n_s8(a, b, 3);
5146 }
5147
5148 // CHECK-LABEL: @test_vrsraq_n_s16(
5149 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5150 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5151 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5152 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5153 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5154 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5155 // CHECK:   ret <8 x i16> [[TMP3]]
5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
5157   return vrsraq_n_s16(a, b, 3);
5158 }
5159
5160 // CHECK-LABEL: @test_vrsraq_n_s32(
5161 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5162 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5163 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5164 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5165 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5166 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5167 // CHECK:   ret <4 x i32> [[TMP3]]
5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
5169   return vrsraq_n_s32(a, b, 3);
5170 }
5171
5172 // CHECK-LABEL: @test_vrsraq_n_s64(
5173 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5174 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5175 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5176 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5177 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5178 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5179 // CHECK:   ret <2 x i64> [[TMP3]]
5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
5181   return vrsraq_n_s64(a, b, 3);
5182 }
5183
5184 // CHECK-LABEL: @test_vrsra_n_u8(
5185 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5186 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]]
5187 // CHECK:   ret <8 x i8> [[TMP0]]
5188 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
5189   return vrsra_n_u8(a, b, 3);
5190 }
5191
5192 // CHECK-LABEL: @test_vrsra_n_u16(
5193 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5194 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5195 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5196 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>)
5197 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5198 // CHECK:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
5199 // CHECK:   ret <4 x i16> [[TMP3]]
5200 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
5201   return vrsra_n_u16(a, b, 3);
5202 }
5203
5204 // CHECK-LABEL: @test_vrsra_n_u32(
5205 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5206 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5207 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5208 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -3, i32 -3>)
5209 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5210 // CHECK:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
5211 // CHECK:   ret <2 x i32> [[TMP3]]
5212 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
5213   return vrsra_n_u32(a, b, 3);
5214 }
5215
5216 // CHECK-LABEL: @test_vrsraq_n_u8(
5217 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> <i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3, i8 -3>)
5218 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]]
5219 // CHECK:   ret <16 x i8> [[TMP0]]
5220 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
5221   return vrsraq_n_u8(a, b, 3);
5222 }
5223
5224 // CHECK-LABEL: @test_vrsraq_n_u16(
5225 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5226 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5227 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5228 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>)
5229 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5230 // CHECK:   [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]]
5231 // CHECK:   ret <8 x i16> [[TMP3]]
5232 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
5233   return vrsraq_n_u16(a, b, 3);
5234 }
5235
5236 // CHECK-LABEL: @test_vrsraq_n_u32(
5237 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5238 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5239 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5240 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>)
5241 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5242 // CHECK:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
5243 // CHECK:   ret <4 x i32> [[TMP3]]
5244 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
5245   return vrsraq_n_u32(a, b, 3);
5246 }
5247
5248 // CHECK-LABEL: @test_vrsraq_n_u64(
5249 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5250 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5251 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5252 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -3, i64 -3>)
5253 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5254 // CHECK:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
5255 // CHECK:   ret <2 x i64> [[TMP3]]
5256 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
5257   return vrsraq_n_u64(a, b, 3);
5258 }
5259
5260 // CHECK-LABEL: @test_vsri_n_s8(
5261 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5262 // CHECK:   ret <8 x i8> [[VSRI_N]]
5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
5264   return vsri_n_s8(a, b, 3);
5265 }
5266
5267 // CHECK-LABEL: @test_vsri_n_s16(
5268 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5269 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5270 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5271 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5272 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5273 // CHECK:   ret <4 x i16> [[VSRI_N2]]
5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
5275   return vsri_n_s16(a, b, 3);
5276 }
5277
5278 // CHECK-LABEL: @test_vsri_n_s32(
5279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5281 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5282 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5283 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5284 // CHECK:   ret <2 x i32> [[VSRI_N2]]
5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
5286   return vsri_n_s32(a, b, 3);
5287 }
5288
5289 // CHECK-LABEL: @test_vsriq_n_s8(
5290 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5291 // CHECK:   ret <16 x i8> [[VSRI_N]]
5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
5293   return vsriq_n_s8(a, b, 3);
5294 }
5295
5296 // CHECK-LABEL: @test_vsriq_n_s16(
5297 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5298 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5299 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5300 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5301 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5302 // CHECK:   ret <8 x i16> [[VSRI_N2]]
5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
5304   return vsriq_n_s16(a, b, 3);
5305 }
5306
5307 // CHECK-LABEL: @test_vsriq_n_s32(
5308 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5309 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5310 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5311 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5312 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5313 // CHECK:   ret <4 x i32> [[VSRI_N2]]
5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
5315   return vsriq_n_s32(a, b, 3);
5316 }
5317
5318 // CHECK-LABEL: @test_vsriq_n_s64(
5319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5320 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5321 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5322 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5323 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5324 // CHECK:   ret <2 x i64> [[VSRI_N2]]
5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
5326   return vsriq_n_s64(a, b, 3);
5327 }
5328
5329 // CHECK-LABEL: @test_vsri_n_u8(
5330 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5331 // CHECK:   ret <8 x i8> [[VSRI_N]]
5332 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
5333   return vsri_n_u8(a, b, 3);
5334 }
5335
5336 // CHECK-LABEL: @test_vsri_n_u16(
5337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5339 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5340 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5341 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
5342 // CHECK:   ret <4 x i16> [[VSRI_N2]]
5343 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
5344   return vsri_n_u16(a, b, 3);
5345 }
5346
5347 // CHECK-LABEL: @test_vsri_n_u32(
5348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5350 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5351 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5352 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
5353 // CHECK:   ret <2 x i32> [[VSRI_N2]]
5354 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
5355   return vsri_n_u32(a, b, 3);
5356 }
5357
5358 // CHECK-LABEL: @test_vsriq_n_u8(
5359 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5360 // CHECK:   ret <16 x i8> [[VSRI_N]]
5361 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
5362   return vsriq_n_u8(a, b, 3);
5363 }
5364
5365 // CHECK-LABEL: @test_vsriq_n_u16(
5366 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5367 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5368 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5369 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5370 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
5371 // CHECK:   ret <8 x i16> [[VSRI_N2]]
5372 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
5373   return vsriq_n_u16(a, b, 3);
5374 }
5375
5376 // CHECK-LABEL: @test_vsriq_n_u32(
5377 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5378 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5379 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5380 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5381 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
5382 // CHECK:   ret <4 x i32> [[VSRI_N2]]
5383 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
5384   return vsriq_n_u32(a, b, 3);
5385 }
5386
5387 // CHECK-LABEL: @test_vsriq_n_u64(
5388 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5389 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5390 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5391 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5392 // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
5393 // CHECK:   ret <2 x i64> [[VSRI_N2]]
5394 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
5395   return vsriq_n_u64(a, b, 3);
5396 }
5397
5398 // CHECK-LABEL: @test_vsri_n_p8(
5399 // CHECK:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5400 // CHECK:   ret <8 x i8> [[VSRI_N]]
5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
5402   return vsri_n_p8(a, b, 3);
5403 }
5404
5405 // CHECK-LABEL: @test_vsri_n_p16(
5406 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5407 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5408 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5409 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5410 // CHECK:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
5411 // CHECK:   ret <4 x i16> [[VSRI_N2]]
5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
5413   return vsri_n_p16(a, b, 15);
5414 }
5415
5416 // CHECK-LABEL: @test_vsriq_n_p8(
5417 // CHECK:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5418 // CHECK:   ret <16 x i8> [[VSRI_N]]
5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
5420   return vsriq_n_p8(a, b, 3);
5421 }
5422
5423 // CHECK-LABEL: @test_vsriq_n_p16(
5424 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5425 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5426 // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5427 // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5428 // CHECK:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
5429 // CHECK:   ret <8 x i16> [[VSRI_N2]]
5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
5431   return vsriq_n_p16(a, b, 15);
5432 }
5433
5434 // CHECK-LABEL: @test_vsli_n_s8(
5435 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5436 // CHECK:   ret <8 x i8> [[VSLI_N]]
5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
5438   return vsli_n_s8(a, b, 3);
5439 }
5440
5441 // CHECK-LABEL: @test_vsli_n_s16(
5442 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5443 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5444 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5445 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5446 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5447 // CHECK:   ret <4 x i16> [[VSLI_N2]]
5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
5449   return vsli_n_s16(a, b, 3);
5450 }
5451
5452 // CHECK-LABEL: @test_vsli_n_s32(
5453 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5454 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5455 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5456 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5457 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5458 // CHECK:   ret <2 x i32> [[VSLI_N2]]
5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
5460   return vsli_n_s32(a, b, 3);
5461 }
5462
5463 // CHECK-LABEL: @test_vsliq_n_s8(
5464 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5465 // CHECK:   ret <16 x i8> [[VSLI_N]]
5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
5467   return vsliq_n_s8(a, b, 3);
5468 }
5469
5470 // CHECK-LABEL: @test_vsliq_n_s16(
5471 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5472 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5473 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5474 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5475 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5476 // CHECK:   ret <8 x i16> [[VSLI_N2]]
5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
5478   return vsliq_n_s16(a, b, 3);
5479 }
5480
5481 // CHECK-LABEL: @test_vsliq_n_s32(
5482 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5483 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5484 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5485 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5486 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5487 // CHECK:   ret <4 x i32> [[VSLI_N2]]
5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
5489   return vsliq_n_s32(a, b, 3);
5490 }
5491
5492 // CHECK-LABEL: @test_vsliq_n_s64(
5493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5494 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5495 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5496 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5497 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5498 // CHECK:   ret <2 x i64> [[VSLI_N2]]
5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
5500   return vsliq_n_s64(a, b, 3);
5501 }
5502
5503 // CHECK-LABEL: @test_vsli_n_u8(
5504 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5505 // CHECK:   ret <8 x i8> [[VSLI_N]]
5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
5507   return vsli_n_u8(a, b, 3);
5508 }
5509
5510 // CHECK-LABEL: @test_vsli_n_u16(
5511 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5512 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5513 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5514 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5515 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
5516 // CHECK:   ret <4 x i16> [[VSLI_N2]]
5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
5518   return vsli_n_u16(a, b, 3);
5519 }
5520
5521 // CHECK-LABEL: @test_vsli_n_u32(
5522 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5523 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
5524 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5525 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
5526 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
5527 // CHECK:   ret <2 x i32> [[VSLI_N2]]
5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
5529   return vsli_n_u32(a, b, 3);
5530 }
5531
5532 // CHECK-LABEL: @test_vsliq_n_u8(
5533 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5534 // CHECK:   ret <16 x i8> [[VSLI_N]]
5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
5536   return vsliq_n_u8(a, b, 3);
5537 }
5538
5539 // CHECK-LABEL: @test_vsliq_n_u16(
5540 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5541 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5542 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5543 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5544 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
5545 // CHECK:   ret <8 x i16> [[VSLI_N2]]
5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
5547   return vsliq_n_u16(a, b, 3);
5548 }
5549
5550 // CHECK-LABEL: @test_vsliq_n_u32(
5551 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5552 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5553 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5554 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
5555 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
5556 // CHECK:   ret <4 x i32> [[VSLI_N2]]
5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
5558   return vsliq_n_u32(a, b, 3);
5559 }
5560
5561 // CHECK-LABEL: @test_vsliq_n_u64(
5562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5563 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5564 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5565 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
5566 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
5567 // CHECK:   ret <2 x i64> [[VSLI_N2]]
5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
5569   return vsliq_n_u64(a, b, 3);
5570 }
5571
5572 // CHECK-LABEL: @test_vsli_n_p8(
5573 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
5574 // CHECK:   ret <8 x i8> [[VSLI_N]]
5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
5576   return vsli_n_p8(a, b, 3);
5577 }
5578
5579 // CHECK-LABEL: @test_vsli_n_p16(
5580 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5581 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
5582 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5583 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
5584 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
5585 // CHECK:   ret <4 x i16> [[VSLI_N2]]
5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
5587   return vsli_n_p16(a, b, 15);
5588 }
5589
5590 // CHECK-LABEL: @test_vsliq_n_p8(
5591 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
5592 // CHECK:   ret <16 x i8> [[VSLI_N]]
5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
5594   return vsliq_n_p8(a, b, 3);
5595 }
5596
5597 // CHECK-LABEL: @test_vsliq_n_p16(
5598 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5599 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5600 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5601 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
5602 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
5603 // CHECK:   ret <8 x i16> [[VSLI_N2]]
5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
5605   return vsliq_n_p16(a, b, 15);
5606 }
5607
5608 // CHECK-LABEL: @test_vqshlu_n_s8(
5609 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5610 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
5611 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
5612   return vqshlu_n_s8(a, 3);
5613 }
5614
5615 // CHECK-LABEL: @test_vqshlu_n_s16(
5616 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
5617 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
5618 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
5619 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
5620 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
5621   return vqshlu_n_s16(a, 3);
5622 }
5623
5624 // CHECK-LABEL: @test_vqshlu_n_s32(
5625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
5626 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
5627 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 3, i32 3>)
5628 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
5629 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
5630   return vqshlu_n_s32(a, 3);
5631 }
5632
5633 // CHECK-LABEL: @test_vqshluq_n_s8(
5634 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
5635 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
5636 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
5637   return vqshluq_n_s8(a, 3);
5638 }
5639
5640 // CHECK-LABEL: @test_vqshluq_n_s16(
5641 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5642 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5643 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
5644 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
5645 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
5646   return vqshluq_n_s16(a, 3);
5647 }
5648
5649 // CHECK-LABEL: @test_vqshluq_n_s32(
5650 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5651 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5652 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
5653 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
5654 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
5655   return vqshluq_n_s32(a, 3);
5656 }
5657
5658 // CHECK-LABEL: @test_vqshluq_n_s64(
5659 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5660 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5661 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 3, i64 3>)
5662 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
5663 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
5664   return vqshluq_n_s64(a, 3);
5665 }
5666
5667 // CHECK-LABEL: @test_vshrn_n_s16(
5668 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5669 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5670 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5671 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5672 // CHECK:   ret <8 x i8> [[VSHRN_N]]
5673 int8x8_t test_vshrn_n_s16(int16x8_t a) {
5674   return vshrn_n_s16(a, 3);
5675 }
5676
5677 // CHECK-LABEL: @test_vshrn_n_s32(
5678 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5679 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5680 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5681 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5682 // CHECK:   ret <4 x i16> [[VSHRN_N]]
5683 int16x4_t test_vshrn_n_s32(int32x4_t a) {
5684   return vshrn_n_s32(a, 9);
5685 }
5686
5687 // CHECK-LABEL: @test_vshrn_n_s64(
5688 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5689 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5690 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5691 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5692 // CHECK:   ret <2 x i32> [[VSHRN_N]]
5693 int32x2_t test_vshrn_n_s64(int64x2_t a) {
5694   return vshrn_n_s64(a, 19);
5695 }
5696
5697 // CHECK-LABEL: @test_vshrn_n_u16(
5698 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5699 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5700 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5701 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5702 // CHECK:   ret <8 x i8> [[VSHRN_N]]
5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
5704   return vshrn_n_u16(a, 3);
5705 }
5706
5707 // CHECK-LABEL: @test_vshrn_n_u32(
5708 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5709 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5710 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5711 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5712 // CHECK:   ret <4 x i16> [[VSHRN_N]]
5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
5714   return vshrn_n_u32(a, 9);
5715 }
5716
5717 // CHECK-LABEL: @test_vshrn_n_u64(
5718 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5719 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5720 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5721 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5722 // CHECK:   ret <2 x i32> [[VSHRN_N]]
5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
5724   return vshrn_n_u64(a, 19);
5725 }
5726
5727 // CHECK-LABEL: @test_vshrn_high_n_s16(
5728 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5729 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5730 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5731 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5732 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5733 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5735   return vshrn_high_n_s16(a, b, 3);
5736 }
5737
5738 // CHECK-LABEL: @test_vshrn_high_n_s32(
5739 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5740 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5741 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5742 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5744 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5746   return vshrn_high_n_s32(a, b, 9);
5747 }
5748
5749 // CHECK-LABEL: @test_vshrn_high_n_s64(
5750 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5751 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5752 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
5753 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5754 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5755 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5757   return vshrn_high_n_s64(a, b, 19);
5758 }
5759
5760 // CHECK-LABEL: @test_vshrn_high_n_u16(
5761 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5762 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5763 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
5764 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
5765 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5766 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5768   return vshrn_high_n_u16(a, b, 3);
5769 }
5770
5771 // CHECK-LABEL: @test_vshrn_high_n_u32(
5772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5773 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5774 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
5775 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
5776 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5777 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5779   return vshrn_high_n_u32(a, b, 9);
5780 }
5781
5782 // CHECK-LABEL: @test_vshrn_high_n_u64(
5783 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5784 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5785 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
5786 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
5787 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5788 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5790   return vshrn_high_n_u64(a, b, 19);
5791 }
5792
5793 // CHECK-LABEL: @test_vqshrun_n_s16(
5794 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5795 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5796 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5797 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
5798 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
5799   return vqshrun_n_s16(a, 3);
5800 }
5801
5802 // CHECK-LABEL: @test_vqshrun_n_s32(
5803 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5804 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5805 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5806 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
5807 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
5808   return vqshrun_n_s32(a, 9);
5809 }
5810
5811 // CHECK-LABEL: @test_vqshrun_n_s64(
5812 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5813 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5814 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5815 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
5816 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
5817   return vqshrun_n_s64(a, 19);
5818 }
5819
5820 // CHECK-LABEL: @test_vqshrun_high_n_s16(
5821 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5822 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5823 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
5824 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5825 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5827   return vqshrun_high_n_s16(a, b, 3);
5828 }
5829
5830 // CHECK-LABEL: @test_vqshrun_high_n_s32(
5831 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5832 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5833 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
5834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5835 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
5837   return vqshrun_high_n_s32(a, b, 9);
5838 }
5839
5840 // CHECK-LABEL: @test_vqshrun_high_n_s64(
5841 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5842 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5843 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
5844 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5845 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
5847   return vqshrun_high_n_s64(a, b, 19);
5848 }
5849
5850 // CHECK-LABEL: @test_vrshrn_n_s16(
5851 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5852 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5853 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5854 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
5856   return vrshrn_n_s16(a, 3);
5857 }
5858
5859 // CHECK-LABEL: @test_vrshrn_n_s32(
5860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5861 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5862 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5863 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
5865   return vrshrn_n_s32(a, 9);
5866 }
5867
5868 // CHECK-LABEL: @test_vrshrn_n_s64(
5869 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5870 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5871 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5872 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
5874   return vrshrn_n_s64(a, 19);
5875 }
5876
5877 // CHECK-LABEL: @test_vrshrn_n_u16(
5878 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5879 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5880 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5881 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
5883   return vrshrn_n_u16(a, 3);
5884 }
5885
5886 // CHECK-LABEL: @test_vrshrn_n_u32(
5887 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5888 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5889 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5890 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
5892   return vrshrn_n_u32(a, 9);
5893 }
5894
5895 // CHECK-LABEL: @test_vrshrn_n_u64(
5896 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5897 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5898 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5899 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
5901   return vrshrn_n_u64(a, 19);
5902 }
5903
5904 // CHECK-LABEL: @test_vrshrn_high_n_s16(
5905 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5906 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5907 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5908 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5909 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
5911   return vrshrn_high_n_s16(a, b, 3);
5912 }
5913
5914 // CHECK-LABEL: @test_vrshrn_high_n_s32(
5915 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5916 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5917 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5918 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5919 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
5921   return vrshrn_high_n_s32(a, b, 9);
5922 }
5923
5924 // CHECK-LABEL: @test_vrshrn_high_n_s64(
5925 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5926 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5927 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5928 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5929 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
5931   return vrshrn_high_n_s64(a, b, 19);
5932 }
5933
5934 // CHECK-LABEL: @test_vrshrn_high_n_u16(
5935 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5936 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5937 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
5938 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5939 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
5941   return vrshrn_high_n_u16(a, b, 3);
5942 }
5943
5944 // CHECK-LABEL: @test_vrshrn_high_n_u32(
5945 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
5946 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5947 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
5948 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5949 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
5951   return vrshrn_high_n_u32(a, b, 9);
5952 }
5953
5954 // CHECK-LABEL: @test_vrshrn_high_n_u64(
5955 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
5956 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5957 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
5958 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5959 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
5961   return vrshrn_high_n_u64(a, b, 19);
5962 }
5963
5964 // CHECK-LABEL: @test_vqrshrun_n_s16(
5965 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
5966 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5967 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5968 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
5969 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
5970   return vqrshrun_n_s16(a, 3);
5971 }
5972
5973 // CHECK-LABEL: @test_vqrshrun_n_s32(
5974 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
5975 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
5976 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
5977 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
5978 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
5979   return vqrshrun_n_s32(a, 9);
5980 }
5981
5982 // CHECK-LABEL: @test_vqrshrun_n_s64(
5983 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
5984 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
5985 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
5986 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
5987 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
5988   return vqrshrun_n_s64(a, 19);
5989 }
5990
5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16(
5992 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
5993 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
5994 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
5995 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5996 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
5998   return vqrshrun_high_n_s16(a, b, 3);
5999 }
6000
6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32(
6002 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6003 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6004 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
6005 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6006 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
6008   return vqrshrun_high_n_s32(a, b, 9);
6009 }
6010
6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64(
6012 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6013 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6014 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
6015 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6016 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
6018   return vqrshrun_high_n_s64(a, b, 19);
6019 }
6020
6021 // CHECK-LABEL: @test_vqshrn_n_s16(
6022 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6023 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6024 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6025 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
6027   return vqshrn_n_s16(a, 3);
6028 }
6029
6030 // CHECK-LABEL: @test_vqshrn_n_s32(
6031 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6032 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6033 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6034 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
6036   return vqshrn_n_s32(a, 9);
6037 }
6038
6039 // CHECK-LABEL: @test_vqshrn_n_s64(
6040 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6041 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6042 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6043 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
6045   return vqshrn_n_s64(a, 19);
6046 }
6047
6048 // CHECK-LABEL: @test_vqshrn_n_u16(
6049 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6050 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6051 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6052 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
6054   return vqshrn_n_u16(a, 3);
6055 }
6056
6057 // CHECK-LABEL: @test_vqshrn_n_u32(
6058 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6059 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6060 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6061 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
6063   return vqshrn_n_u32(a, 9);
6064 }
6065
6066 // CHECK-LABEL: @test_vqshrn_n_u64(
6067 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6068 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6069 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6070 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
6072   return vqshrn_n_u64(a, 19);
6073 }
6074
6075 // CHECK-LABEL: @test_vqshrn_high_n_s16(
6076 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6077 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6078 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6079 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6080 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6082   return vqshrn_high_n_s16(a, b, 3);
6083 }
6084
6085 // CHECK-LABEL: @test_vqshrn_high_n_s32(
6086 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6087 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6088 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6089 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6090 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6092   return vqshrn_high_n_s32(a, b, 9);
6093 }
6094
6095 // CHECK-LABEL: @test_vqshrn_high_n_s64(
6096 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6097 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6098 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6099 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6100 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6102   return vqshrn_high_n_s64(a, b, 19);
6103 }
6104
6105 // CHECK-LABEL: @test_vqshrn_high_n_u16(
6106 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6107 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6108 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
6109 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6110 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6112   return vqshrn_high_n_u16(a, b, 3);
6113 }
6114
6115 // CHECK-LABEL: @test_vqshrn_high_n_u32(
6116 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6117 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6118 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
6119 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6120 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6122   return vqshrn_high_n_u32(a, b, 9);
6123 }
6124
6125 // CHECK-LABEL: @test_vqshrn_high_n_u64(
6126 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6127 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6128 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
6129 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6130 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6132   return vqshrn_high_n_u64(a, b, 19);
6133 }
6134
6135 // CHECK-LABEL: @test_vqrshrn_n_s16(
6136 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6137 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6138 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6139 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
6141   return vqrshrn_n_s16(a, 3);
6142 }
6143
6144 // CHECK-LABEL: @test_vqrshrn_n_s32(
6145 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6146 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6147 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6148 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
6150   return vqrshrn_n_s32(a, 9);
6151 }
6152
6153 // CHECK-LABEL: @test_vqrshrn_n_s64(
6154 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6155 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6156 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6157 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
6159   return vqrshrn_n_s64(a, 19);
6160 }
6161
6162 // CHECK-LABEL: @test_vqrshrn_n_u16(
6163 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6164 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6165 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6166 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
6168   return vqrshrn_n_u16(a, 3);
6169 }
6170
6171 // CHECK-LABEL: @test_vqrshrn_n_u32(
6172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6173 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6174 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6175 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
6177   return vqrshrn_n_u32(a, 9);
6178 }
6179
6180 // CHECK-LABEL: @test_vqrshrn_n_u64(
6181 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6182 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6183 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6184 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
6186   return vqrshrn_n_u64(a, 19);
6187 }
6188
6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16(
6190 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6191 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6192 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6193 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6194 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
6196   return vqrshrn_high_n_s16(a, b, 3);
6197 }
6198
6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32(
6200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6201 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6202 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6203 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6204 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
6206   return vqrshrn_high_n_s32(a, b, 9);
6207 }
6208
6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64(
6210 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6211 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6212 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6213 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6214 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
6216   return vqrshrn_high_n_s64(a, b, 19);
6217 }
6218
6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16(
6220 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6221 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
6222 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
6223 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6224 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
6226   return vqrshrn_high_n_u16(a, b, 3);
6227 }
6228
6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32(
6230 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6231 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6232 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
6233 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
6234 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
6236   return vqrshrn_high_n_u32(a, b, 9);
6237 }
6238
6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64(
6240 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
6241 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6242 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
6243 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6244 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
6246   return vqrshrn_high_n_u64(a, b, 19);
6247 }
6248
6249 // CHECK-LABEL: @test_vshll_n_s8(
6250 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
6251 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6252 // CHECK:   ret <8 x i16> [[VSHLL_N]]
6253 int16x8_t test_vshll_n_s8(int8x8_t a) {
6254   return vshll_n_s8(a, 3);
6255 }
6256
6257 // CHECK-LABEL: @test_vshll_n_s16(
6258 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6259 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6260 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6261 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6262 // CHECK:   ret <4 x i32> [[VSHLL_N]]
6263 int32x4_t test_vshll_n_s16(int16x4_t a) {
6264   return vshll_n_s16(a, 9);
6265 }
6266
6267 // CHECK-LABEL: @test_vshll_n_s32(
6268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6270 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6271 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6272 // CHECK:   ret <2 x i64> [[VSHLL_N]]
6273 int64x2_t test_vshll_n_s32(int32x2_t a) {
6274   return vshll_n_s32(a, 19);
6275 }
6276
6277 // CHECK-LABEL: @test_vshll_n_u8(
6278 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
6279 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6280 // CHECK:   ret <8 x i16> [[VSHLL_N]]
6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
6282   return vshll_n_u8(a, 3);
6283 }
6284
6285 // CHECK-LABEL: @test_vshll_n_u16(
6286 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6287 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6288 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6289 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6290 // CHECK:   ret <4 x i32> [[VSHLL_N]]
6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
6292   return vshll_n_u16(a, 9);
6293 }
6294
6295 // CHECK-LABEL: @test_vshll_n_u32(
6296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6297 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6298 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6299 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6300 // CHECK:   ret <2 x i64> [[VSHLL_N]]
6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
6302   return vshll_n_u32(a, 19);
6303 }
6304
6305 // CHECK-LABEL: @test_vshll_high_n_s8(
6306 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6307 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6308 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6309 // CHECK:   ret <8 x i16> [[VSHLL_N]]
6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
6311   return vshll_high_n_s8(a, 3);
6312 }
6313
6314 // CHECK-LABEL: @test_vshll_high_n_s16(
6315 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6316 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6317 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6318 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6319 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6320 // CHECK:   ret <4 x i32> [[VSHLL_N]]
6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
6322   return vshll_high_n_s16(a, 9);
6323 }
6324
6325 // CHECK-LABEL: @test_vshll_high_n_s32(
6326 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6327 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6328 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6329 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6330 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6331 // CHECK:   ret <2 x i64> [[VSHLL_N]]
6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
6333   return vshll_high_n_s32(a, 19);
6334 }
6335
6336 // CHECK-LABEL: @test_vshll_high_n_u8(
6337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6338 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
6339 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
6340 // CHECK:   ret <8 x i16> [[VSHLL_N]]
6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
6342   return vshll_high_n_u8(a, 3);
6343 }
6344
6345 // CHECK-LABEL: @test_vshll_high_n_u16(
6346 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6347 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
6348 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
6349 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
6350 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
6351 // CHECK:   ret <4 x i32> [[VSHLL_N]]
6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
6353   return vshll_high_n_u16(a, 9);
6354 }
6355
6356 // CHECK-LABEL: @test_vshll_high_n_u32(
6357 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6358 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
6359 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6360 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
6361 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
6362 // CHECK:   ret <2 x i64> [[VSHLL_N]]
6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
6364   return vshll_high_n_u32(a, 19);
6365 }
6366
6367 // CHECK-LABEL: @test_vmovl_s8(
6368 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6369 // CHECK:   ret <8 x i16> [[VMOVL_I]]
6370 int16x8_t test_vmovl_s8(int8x8_t a) {
6371   return vmovl_s8(a);
6372 }
6373
6374 // CHECK-LABEL: @test_vmovl_s16(
6375 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6376 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6377 // CHECK:   ret <4 x i32> [[VMOVL_I]]
6378 int32x4_t test_vmovl_s16(int16x4_t a) {
6379   return vmovl_s16(a);
6380 }
6381
6382 // CHECK-LABEL: @test_vmovl_s32(
6383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6384 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6385 // CHECK:   ret <2 x i64> [[VMOVL_I]]
6386 int64x2_t test_vmovl_s32(int32x2_t a) {
6387   return vmovl_s32(a);
6388 }
6389
6390 // CHECK-LABEL: @test_vmovl_u8(
6391 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6392 // CHECK:   ret <8 x i16> [[VMOVL_I]]
6393 uint16x8_t test_vmovl_u8(uint8x8_t a) {
6394   return vmovl_u8(a);
6395 }
6396
6397 // CHECK-LABEL: @test_vmovl_u16(
6398 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6399 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6400 // CHECK:   ret <4 x i32> [[VMOVL_I]]
6401 uint32x4_t test_vmovl_u16(uint16x4_t a) {
6402   return vmovl_u16(a);
6403 }
6404
6405 // CHECK-LABEL: @test_vmovl_u32(
6406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6407 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6408 // CHECK:   ret <2 x i64> [[VMOVL_I]]
6409 uint64x2_t test_vmovl_u32(uint32x2_t a) {
6410   return vmovl_u32(a);
6411 }
6412
6413 // CHECK-LABEL: @test_vmovl_high_s8(
6414 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6415 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6416 // CHECK:   ret <8 x i16> [[TMP0]]
6417 int16x8_t test_vmovl_high_s8(int8x16_t a) {
6418   return vmovl_high_s8(a);
6419 }
6420
6421 // CHECK-LABEL: @test_vmovl_high_s16(
6422 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6423 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6424 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6425 // CHECK:   ret <4 x i32> [[TMP1]]
6426 int32x4_t test_vmovl_high_s16(int16x8_t a) {
6427   return vmovl_high_s16(a);
6428 }
6429
6430 // CHECK-LABEL: @test_vmovl_high_s32(
6431 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6432 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6433 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6434 // CHECK:   ret <2 x i64> [[TMP1]]
6435 int64x2_t test_vmovl_high_s32(int32x4_t a) {
6436   return vmovl_high_s32(a);
6437 }
6438
6439 // CHECK-LABEL: @test_vmovl_high_u8(
6440 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6441 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
6442 // CHECK:   ret <8 x i16> [[TMP0]]
6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
6444   return vmovl_high_u8(a);
6445 }
6446
6447 // CHECK-LABEL: @test_vmovl_high_u16(
6448 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6449 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
6450 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
6451 // CHECK:   ret <4 x i32> [[TMP1]]
6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
6453   return vmovl_high_u16(a);
6454 }
6455
6456 // CHECK-LABEL: @test_vmovl_high_u32(
6457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6458 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
6459 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
6460 // CHECK:   ret <2 x i64> [[TMP1]]
6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
6462   return vmovl_high_u32(a);
6463 }
6464
6465 // CHECK-LABEL: @test_vcvt_n_f32_s32(
6466 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6467 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6468 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6469 // CHECK:   ret <2 x float> [[VCVT_N1]]
6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
6471   return vcvt_n_f32_s32(a, 31);
6472 }
6473
6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
6475 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6476 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6477 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6478 // CHECK:   ret <4 x float> [[VCVT_N1]]
6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
6480   return vcvtq_n_f32_s32(a, 31);
6481 }
6482
6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64(
6484 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6485 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6486 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6487 // CHECK:   ret <2 x double> [[VCVT_N1]]
6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
6489   return vcvtq_n_f64_s64(a, 50);
6490 }
6491
6492 // CHECK-LABEL: @test_vcvt_n_f32_u32(
6493 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6494 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
6495 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
6496 // CHECK:   ret <2 x float> [[VCVT_N1]]
6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
6498   return vcvt_n_f32_u32(a, 31);
6499 }
6500
6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
6502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6503 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
6504 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
6505 // CHECK:   ret <4 x float> [[VCVT_N1]]
6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
6507   return vcvtq_n_f32_u32(a, 31);
6508 }
6509
6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64(
6511 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
6512 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
6513 // CHECK:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
6514 // CHECK:   ret <2 x double> [[VCVT_N1]]
6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
6516   return vcvtq_n_f64_u64(a, 50);
6517 }
6518
6519 // CHECK-LABEL: @test_vcvt_n_s32_f32(
6520 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6521 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6522 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6523 // CHECK:   ret <2 x i32> [[VCVT_N1]]
6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
6525   return vcvt_n_s32_f32(a, 31);
6526 }
6527
6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
6529 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6530 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6531 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6532 // CHECK:   ret <4 x i32> [[VCVT_N1]]
6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
6534   return vcvtq_n_s32_f32(a, 31);
6535 }
6536
6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64(
6538 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6539 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6540 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6541 // CHECK:   ret <2 x i64> [[VCVT_N1]]
6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
6543   return vcvtq_n_s64_f64(a, 50);
6544 }
6545
6546 // CHECK-LABEL: @test_vcvt_n_u32_f32(
6547 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6548 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
6549 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
6550 // CHECK:   ret <2 x i32> [[VCVT_N1]]
6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
6552   return vcvt_n_u32_f32(a, 31);
6553 }
6554
6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
6556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6557 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
6558 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
6559 // CHECK:   ret <4 x i32> [[VCVT_N1]]
6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
6561   return vcvtq_n_u32_f32(a, 31);
6562 }
6563
6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64(
6565 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
6566 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
6567 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
6568 // CHECK:   ret <2 x i64> [[VCVT_N1]]
6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
6570   return vcvtq_n_u64_f64(a, 50);
6571 }
6572
6573 // CHECK-LABEL: @test_vaddl_s8(
6574 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6575 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6576 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6577 // CHECK:   ret <8 x i16> [[ADD_I]]
6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
6579   return vaddl_s8(a, b);
6580 }
6581
6582 // CHECK-LABEL: @test_vaddl_s16(
6583 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6584 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6585 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6586 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6587 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6588 // CHECK:   ret <4 x i32> [[ADD_I]]
6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
6590   return vaddl_s16(a, b);
6591 }
6592
6593 // CHECK-LABEL: @test_vaddl_s32(
6594 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6595 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6596 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6597 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6598 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6599 // CHECK:   ret <2 x i64> [[ADD_I]]
6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
6601   return vaddl_s32(a, b);
6602 }
6603
6604 // CHECK-LABEL: @test_vaddl_u8(
6605 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6606 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6607 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6608 // CHECK:   ret <8 x i16> [[ADD_I]]
6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
6610   return vaddl_u8(a, b);
6611 }
6612
6613 // CHECK-LABEL: @test_vaddl_u16(
6614 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6615 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6616 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6617 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6618 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6619 // CHECK:   ret <4 x i32> [[ADD_I]]
6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
6621   return vaddl_u16(a, b);
6622 }
6623
6624 // CHECK-LABEL: @test_vaddl_u32(
6625 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6626 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6628 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6629 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6630 // CHECK:   ret <2 x i64> [[ADD_I]]
6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
6632   return vaddl_u32(a, b);
6633 }
6634
6635 // CHECK-LABEL: @test_vaddl_high_s8(
6636 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6637 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6638 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6639 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6640 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6641 // CHECK:   ret <8 x i16> [[ADD_I]]
6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
6643   return vaddl_high_s8(a, b);
6644 }
6645
6646 // CHECK-LABEL: @test_vaddl_high_s16(
6647 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6648 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6649 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6650 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6651 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6652 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6653 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6654 // CHECK:   ret <4 x i32> [[ADD_I]]
6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
6656   return vaddl_high_s16(a, b);
6657 }
6658
6659 // CHECK-LABEL: @test_vaddl_high_s32(
6660 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6661 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6662 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6663 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6664 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6665 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6666 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6667 // CHECK:   ret <2 x i64> [[ADD_I]]
6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
6669   return vaddl_high_s32(a, b);
6670 }
6671
6672 // CHECK-LABEL: @test_vaddl_high_u8(
6673 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6674 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6675 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6676 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6677 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
6678 // CHECK:   ret <8 x i16> [[ADD_I]]
6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
6680   return vaddl_high_u8(a, b);
6681 }
6682
6683 // CHECK-LABEL: @test_vaddl_high_u16(
6684 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6685 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6686 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6687 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6688 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6689 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6690 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
6691 // CHECK:   ret <4 x i32> [[ADD_I]]
6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
6693   return vaddl_high_u16(a, b);
6694 }
6695
6696 // CHECK-LABEL: @test_vaddl_high_u32(
6697 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6699 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6700 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6701 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6702 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6703 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
6704 // CHECK:   ret <2 x i64> [[ADD_I]]
6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
6706   return vaddl_high_u32(a, b);
6707 }
6708
6709 // CHECK-LABEL: @test_vaddw_s8(
6710 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6711 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6712 // CHECK:   ret <8 x i16> [[ADD_I]]
6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
6714   return vaddw_s8(a, b);
6715 }
6716
6717 // CHECK-LABEL: @test_vaddw_s16(
6718 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6719 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6720 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6721 // CHECK:   ret <4 x i32> [[ADD_I]]
6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
6723   return vaddw_s16(a, b);
6724 }
6725
6726 // CHECK-LABEL: @test_vaddw_s32(
6727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6728 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6729 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6730 // CHECK:   ret <2 x i64> [[ADD_I]]
6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
6732   return vaddw_s32(a, b);
6733 }
6734
6735 // CHECK-LABEL: @test_vaddw_u8(
6736 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6737 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
6738 // CHECK:   ret <8 x i16> [[ADD_I]]
6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
6740   return vaddw_u8(a, b);
6741 }
6742
6743 // CHECK-LABEL: @test_vaddw_u16(
6744 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6745 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6746 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
6747 // CHECK:   ret <4 x i32> [[ADD_I]]
6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
6749   return vaddw_u16(a, b);
6750 }
6751
6752 // CHECK-LABEL: @test_vaddw_u32(
6753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6754 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6755 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
6756 // CHECK:   ret <2 x i64> [[ADD_I]]
6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
6758   return vaddw_u32(a, b);
6759 }
6760
6761 // CHECK-LABEL: @test_vaddw_high_s8(
6762 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6763 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6764 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6765 // CHECK:   ret <8 x i16> [[ADD_I]]
6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
6767   return vaddw_high_s8(a, b);
6768 }
6769
6770 // CHECK-LABEL: @test_vaddw_high_s16(
6771 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6773 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6774 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6775 // CHECK:   ret <4 x i32> [[ADD_I]]
6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
6777   return vaddw_high_s16(a, b);
6778 }
6779
6780 // CHECK-LABEL: @test_vaddw_high_s32(
6781 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6783 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6784 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6785 // CHECK:   ret <2 x i64> [[ADD_I]]
6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
6787   return vaddw_high_s32(a, b);
6788 }
6789
6790 // CHECK-LABEL: @test_vaddw_high_u8(
6791 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6792 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6793 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
6794 // CHECK:   ret <8 x i16> [[ADD_I]]
6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
6796   return vaddw_high_u8(a, b);
6797 }
6798
6799 // CHECK-LABEL: @test_vaddw_high_u16(
6800 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6801 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6802 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6803 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
6804 // CHECK:   ret <4 x i32> [[ADD_I]]
6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
6806   return vaddw_high_u16(a, b);
6807 }
6808
6809 // CHECK-LABEL: @test_vaddw_high_u32(
6810 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6812 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6813 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
6814 // CHECK:   ret <2 x i64> [[ADD_I]]
6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
6816   return vaddw_high_u32(a, b);
6817 }
6818
6819 // CHECK-LABEL: @test_vsubl_s8(
6820 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
6821 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6822 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6823 // CHECK:   ret <8 x i16> [[SUB_I]]
6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
6825   return vsubl_s8(a, b);
6826 }
6827
6828 // CHECK-LABEL: @test_vsubl_s16(
6829 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6830 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
6831 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6832 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6833 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6834 // CHECK:   ret <4 x i32> [[SUB_I]]
6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
6836   return vsubl_s16(a, b);
6837 }
6838
6839 // CHECK-LABEL: @test_vsubl_s32(
6840 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6841 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
6842 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6843 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6844 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6845 // CHECK:   ret <2 x i64> [[SUB_I]]
6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
6847   return vsubl_s32(a, b);
6848 }
6849
6850 // CHECK-LABEL: @test_vsubl_u8(
6851 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
6852 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6853 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6854 // CHECK:   ret <8 x i16> [[SUB_I]]
6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
6856   return vsubl_u8(a, b);
6857 }
6858
6859 // CHECK-LABEL: @test_vsubl_u16(
6860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6861 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
6862 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6863 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6864 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6865 // CHECK:   ret <4 x i32> [[SUB_I]]
6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
6867   return vsubl_u16(a, b);
6868 }
6869
6870 // CHECK-LABEL: @test_vsubl_u32(
6871 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6872 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
6873 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6874 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
6875 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
6876 // CHECK:   ret <2 x i64> [[SUB_I]]
6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
6878   return vsubl_u32(a, b);
6879 }
6880
6881 // CHECK-LABEL: @test_vsubl_high_s8(
6882 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6883 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6884 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6885 // CHECK:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6886 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6887 // CHECK:   ret <8 x i16> [[SUB_I]]
6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
6889   return vsubl_high_s8(a, b);
6890 }
6891
6892 // CHECK-LABEL: @test_vsubl_high_s16(
6893 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6895 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6896 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6897 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6898 // CHECK:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6899 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6900 // CHECK:   ret <4 x i32> [[SUB_I]]
6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
6902   return vsubl_high_s16(a, b);
6903 }
6904
6905 // CHECK-LABEL: @test_vsubl_high_s32(
6906 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6907 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6908 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6909 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6910 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6911 // CHECK:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6912 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6913 // CHECK:   ret <2 x i64> [[SUB_I]]
6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
6915   return vsubl_high_s32(a, b);
6916 }
6917
6918 // CHECK-LABEL: @test_vsubl_high_u8(
6919 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6920 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
6921 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6922 // CHECK:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
6923 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
6924 // CHECK:   ret <8 x i16> [[SUB_I]]
6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
6926   return vsubl_high_u8(a, b);
6927 }
6928
6929 // CHECK-LABEL: @test_vsubl_high_u16(
6930 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6931 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
6932 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
6933 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6934 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
6935 // CHECK:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
6936 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
6937 // CHECK:   ret <4 x i32> [[SUB_I]]
6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
6939   return vsubl_high_u16(a, b);
6940 }
6941
6942 // CHECK-LABEL: @test_vsubl_high_u32(
6943 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
6944 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
6945 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
6946 // CHECK:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
6947 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
6948 // CHECK:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
6949 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
6950 // CHECK:   ret <2 x i64> [[SUB_I]]
6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
6952   return vsubl_high_u32(a, b);
6953 }
6954
6955 // CHECK-LABEL: @test_vsubw_s8(
6956 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
6957 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6958 // CHECK:   ret <8 x i16> [[SUB_I]]
6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
6960   return vsubw_s8(a, b);
6961 }
6962
6963 // CHECK-LABEL: @test_vsubw_s16(
6964 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6965 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
6966 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6967 // CHECK:   ret <4 x i32> [[SUB_I]]
6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
6969   return vsubw_s16(a, b);
6970 }
6971
6972 // CHECK-LABEL: @test_vsubw_s32(
6973 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6974 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
6975 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
6976 // CHECK:   ret <2 x i64> [[SUB_I]]
6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
6978   return vsubw_s32(a, b);
6979 }
6980
6981 // CHECK-LABEL: @test_vsubw_u8(
6982 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
6983 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
6984 // CHECK:   ret <8 x i16> [[SUB_I]]
6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
6986   return vsubw_u8(a, b);
6987 }
6988
6989 // CHECK-LABEL: @test_vsubw_u16(
6990 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6991 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
6992 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
6993 // CHECK:   ret <4 x i32> [[SUB_I]]
6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
6995   return vsubw_u16(a, b);
6996 }
6997
6998 // CHECK-LABEL: @test_vsubw_u32(
6999 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7000 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
7001 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
7002 // CHECK:   ret <2 x i64> [[SUB_I]]
7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
7004   return vsubw_u32(a, b);
7005 }
7006
7007 // CHECK-LABEL: @test_vsubw_high_s8(
7008 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7009 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7010 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7011 // CHECK:   ret <8 x i16> [[SUB_I]]
7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
7013   return vsubw_high_s8(a, b);
7014 }
7015
7016 // CHECK-LABEL: @test_vsubw_high_s16(
7017 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7018 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7019 // CHECK:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7020 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7021 // CHECK:   ret <4 x i32> [[SUB_I]]
7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
7023   return vsubw_high_s16(a, b);
7024 }
7025
7026 // CHECK-LABEL: @test_vsubw_high_s32(
7027 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7028 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7029 // CHECK:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7030 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7031 // CHECK:   ret <2 x i64> [[SUB_I]]
7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
7033   return vsubw_high_s32(a, b);
7034 }
7035
7036 // CHECK-LABEL: @test_vsubw_high_u8(
7037 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7038 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
7039 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
7040 // CHECK:   ret <8 x i16> [[SUB_I]]
7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
7042   return vsubw_high_u8(a, b);
7043 }
7044
7045 // CHECK-LABEL: @test_vsubw_high_u16(
7046 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7047 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
7048 // CHECK:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
7049 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
7050 // CHECK:   ret <4 x i32> [[SUB_I]]
7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
7052   return vsubw_high_u16(a, b);
7053 }
7054
7055 // CHECK-LABEL: @test_vsubw_high_u32(
7056 // CHECK:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7057 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
7058 // CHECK:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
7059 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
7060 // CHECK:   ret <2 x i64> [[SUB_I]]
7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
7062   return vsubw_high_u32(a, b);
7063 }
7064
7065 // CHECK-LABEL: @test_vaddhn_s16(
7066 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7067 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7068 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7069 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7071 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
7073   return vaddhn_s16(a, b);
7074 }
7075
7076 // CHECK-LABEL: @test_vaddhn_s32(
7077 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7078 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7079 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7080 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7081 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7082 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
7084   return vaddhn_s32(a, b);
7085 }
7086
7087 // CHECK-LABEL: @test_vaddhn_s64(
7088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7090 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7091 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7092 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7093 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
7095   return vaddhn_s64(a, b);
7096 }
7097
7098 // CHECK-LABEL: @test_vaddhn_u16(
7099 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7100 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7101 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
7102 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7103 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
7104 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
7106   return vaddhn_u16(a, b);
7107 }
7108
7109 // CHECK-LABEL: @test_vaddhn_u32(
7110 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7111 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7112 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
7113 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
7114 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
7115 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
7117   return vaddhn_u32(a, b);
7118 }
7119
7120 // CHECK-LABEL: @test_vaddhn_u64(
7121 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7122 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7123 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
7124 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
7125 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
7126 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
7128   return vaddhn_u64(a, b);
7129 }
7130
7131 // CHECK-LABEL: @test_vaddhn_high_s16(
7132 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7133 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7134 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7135 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7136 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7137 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7138 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7140   return vaddhn_high_s16(r, a, b);
7141 }
7142
7143 // CHECK-LABEL: @test_vaddhn_high_s32(
7144 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7145 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7146 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7147 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7148 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7149 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7150 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7152   return vaddhn_high_s32(r, a, b);
7153 }
7154
7155 // CHECK-LABEL: @test_vaddhn_high_s64(
7156 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7157 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7158 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7159 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7160 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7161 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7162 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7164   return vaddhn_high_s64(r, a, b);
7165 }
7166
7167 // CHECK-LABEL: @test_vaddhn_high_u16(
7168 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7169 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7170 // CHECK:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
7171 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7172 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
7173 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7174 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7176   return vaddhn_high_u16(r, a, b);
7177 }
7178
7179 // CHECK-LABEL: @test_vaddhn_high_u32(
7180 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7181 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7182 // CHECK:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
7183 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7184 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
7185 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7186 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7188   return vaddhn_high_u32(r, a, b);
7189 }
7190
7191 // CHECK-LABEL: @test_vaddhn_high_u64(
7192 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7193 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7194 // CHECK:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
7195 // CHECK:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
7196 // CHECK:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
7197 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7198 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7200   return vaddhn_high_u64(r, a, b);
7201 }
7202
7203 // CHECK-LABEL: @test_vraddhn_s16(
7204 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7205 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7206 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7207 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
7209   return vraddhn_s16(a, b);
7210 }
7211
7212 // CHECK-LABEL: @test_vraddhn_s32(
7213 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7214 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7215 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7216 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7217 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
7219   return vraddhn_s32(a, b);
7220 }
7221
7222 // CHECK-LABEL: @test_vraddhn_s64(
7223 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7224 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7225 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7226 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7227 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
7229   return vraddhn_s64(a, b);
7230 }
7231
7232 // CHECK-LABEL: @test_vraddhn_u16(
7233 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7234 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7235 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7236 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
7238   return vraddhn_u16(a, b);
7239 }
7240
7241 // CHECK-LABEL: @test_vraddhn_u32(
7242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7244 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7245 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
7246 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
7248   return vraddhn_u32(a, b);
7249 }
7250
7251 // CHECK-LABEL: @test_vraddhn_u64(
7252 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7253 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7254 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7255 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
7256 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
7258   return vraddhn_u64(a, b);
7259 }
7260
7261 // CHECK-LABEL: @test_vraddhn_high_s16(
7262 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7263 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7264 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7265 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7266 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7268   return vraddhn_high_s16(r, a, b);
7269 }
7270
7271 // CHECK-LABEL: @test_vraddhn_high_s32(
7272 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7273 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7274 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7275 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7276 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7277 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7279   return vraddhn_high_s32(r, a, b);
7280 }
7281
7282 // CHECK-LABEL: @test_vraddhn_high_s64(
7283 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7284 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7285 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7286 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7287 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7288 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7290   return vraddhn_high_s64(r, a, b);
7291 }
7292
7293 // CHECK-LABEL: @test_vraddhn_high_u16(
7294 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7295 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7296 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7297 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7298 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7300   return vraddhn_high_u16(r, a, b);
7301 }
7302
7303 // CHECK-LABEL: @test_vraddhn_high_u32(
7304 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7305 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7306 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7307 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
7308 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7309 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7311   return vraddhn_high_u32(r, a, b);
7312 }
7313
7314 // CHECK-LABEL: @test_vraddhn_high_u64(
7315 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7316 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7317 // CHECK:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7318 // CHECK:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
7319 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7320 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7322   return vraddhn_high_u64(r, a, b);
7323 }
7324
7325 // CHECK-LABEL: @test_vsubhn_s16(
7326 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7327 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7328 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7329 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7330 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7331 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
7333   return vsubhn_s16(a, b);
7334 }
7335
7336 // CHECK-LABEL: @test_vsubhn_s32(
7337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7339 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7340 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7341 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7342 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
7344   return vsubhn_s32(a, b);
7345 }
7346
7347 // CHECK-LABEL: @test_vsubhn_s64(
7348 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7350 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7351 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7352 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7353 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
7355   return vsubhn_s64(a, b);
7356 }
7357
7358 // CHECK-LABEL: @test_vsubhn_u16(
7359 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7360 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7361 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
7362 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7363 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
7364 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
7366   return vsubhn_u16(a, b);
7367 }
7368
7369 // CHECK-LABEL: @test_vsubhn_u32(
7370 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7371 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7372 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
7373 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
7374 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
7375 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
7377   return vsubhn_u32(a, b);
7378 }
7379
7380 // CHECK-LABEL: @test_vsubhn_u64(
7381 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7382 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7383 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
7384 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
7385 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
7386 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
7388   return vsubhn_u64(a, b);
7389 }
7390
7391 // CHECK-LABEL: @test_vsubhn_high_s16(
7392 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7393 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7394 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7395 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7396 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7397 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7398 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7400   return vsubhn_high_s16(r, a, b);
7401 }
7402
7403 // CHECK-LABEL: @test_vsubhn_high_s32(
7404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7405 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7406 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7407 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7408 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7409 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7410 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7412   return vsubhn_high_s32(r, a, b);
7413 }
7414
7415 // CHECK-LABEL: @test_vsubhn_high_s64(
7416 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7417 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7418 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7419 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7420 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7421 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7422 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7424   return vsubhn_high_s64(r, a, b);
7425 }
7426
7427 // CHECK-LABEL: @test_vsubhn_high_u16(
7428 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7429 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7430 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
7431 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7432 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
7433 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7434 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7436   return vsubhn_high_u16(r, a, b);
7437 }
7438
7439 // CHECK-LABEL: @test_vsubhn_high_u32(
7440 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7441 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7442 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
7443 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
7444 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
7445 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7446 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7448   return vsubhn_high_u32(r, a, b);
7449 }
7450
7451 // CHECK-LABEL: @test_vsubhn_high_u64(
7452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7453 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7454 // CHECK:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
7455 // CHECK:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
7456 // CHECK:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
7457 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7458 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7460   return vsubhn_high_u64(r, a, b);
7461 }
7462
7463 // CHECK-LABEL: @test_vrsubhn_s16(
7464 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7465 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7466 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7467 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
7469   return vrsubhn_s16(a, b);
7470 }
7471
7472 // CHECK-LABEL: @test_vrsubhn_s32(
7473 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7474 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7475 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7476 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7477 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
7479   return vrsubhn_s32(a, b);
7480 }
7481
7482 // CHECK-LABEL: @test_vrsubhn_s64(
7483 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7484 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7485 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7486 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7487 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
7489   return vrsubhn_s64(a, b);
7490 }
7491
7492 // CHECK-LABEL: @test_vrsubhn_u16(
7493 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7494 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7495 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7496 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
7498   return vrsubhn_u16(a, b);
7499 }
7500
7501 // CHECK-LABEL: @test_vrsubhn_u32(
7502 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7503 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7504 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7505 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
7506 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
7508   return vrsubhn_u32(a, b);
7509 }
7510
7511 // CHECK-LABEL: @test_vrsubhn_u64(
7512 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7513 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7514 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7515 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
7516 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
7518   return vrsubhn_u64(a, b);
7519 }
7520
7521 // CHECK-LABEL: @test_vrsubhn_high_s16(
7522 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7524 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7525 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7526 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
7528   return vrsubhn_high_s16(r, a, b);
7529 }
7530
7531 // CHECK-LABEL: @test_vrsubhn_high_s32(
7532 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7533 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7534 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7535 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7536 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7537 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
7539   return vrsubhn_high_s32(r, a, b);
7540 }
7541
7542 // CHECK-LABEL: @test_vrsubhn_high_s64(
7543 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7544 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7545 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7546 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7547 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7548 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
7550   return vrsubhn_high_s64(r, a, b);
7551 }
7552
7553 // CHECK-LABEL: @test_vrsubhn_high_u16(
7554 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
7555 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
7556 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
7557 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7558 // CHECK:   ret <16 x i8> [[SHUFFLE_I_I]]
7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
7560   return vrsubhn_high_u16(r, a, b);
7561 }
7562
7563 // CHECK-LABEL: @test_vrsubhn_high_u32(
7564 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
7565 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
7566 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
7567 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
7568 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
7569 // CHECK:   ret <8 x i16> [[SHUFFLE_I_I]]
7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
7571   return vrsubhn_high_u32(r, a, b);
7572 }
7573
7574 // CHECK-LABEL: @test_vrsubhn_high_u64(
7575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
7576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
7577 // CHECK:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
7578 // CHECK:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
7579 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7580 // CHECK:   ret <4 x i32> [[SHUFFLE_I_I]]
7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
7582   return vrsubhn_high_u64(r, a, b);
7583 }
7584
7585 // CHECK-LABEL: @test_vabdl_s8(
7586 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7587 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7588 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
7590   return vabdl_s8(a, b);
7591 }
7592
7593 // CHECK-LABEL: @test_vabdl_s16(
7594 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7595 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7596 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7597 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7598 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7599 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
7601   return vabdl_s16(a, b);
7602 }
7603
7604 // CHECK-LABEL: @test_vabdl_s32(
7605 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7606 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7607 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7608 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7609 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7610 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
7612   return vabdl_s32(a, b);
7613 }
7614
7615 // CHECK-LABEL: @test_vabdl_u8(
7616 // CHECK:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
7617 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
7618 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
7620   return vabdl_u8(a, b);
7621 }
7622
7623 // CHECK-LABEL: @test_vabdl_u16(
7624 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7625 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7626 // CHECK:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
7627 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
7628 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
7629 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
7631   return vabdl_u16(a, b);
7632 }
7633
7634 // CHECK-LABEL: @test_vabdl_u32(
7635 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7636 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7637 // CHECK:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
7638 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
7639 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
7640 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
7642   return vabdl_u32(a, b);
7643 }
7644
7645 // CHECK-LABEL: @test_vabal_s8(
7646 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7647 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7648 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7649 // CHECK:   ret <8 x i16> [[ADD_I]]
7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7651   return vabal_s8(a, b, c);
7652 }
7653
7654 // CHECK-LABEL: @test_vabal_s16(
7655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7656 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7657 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7658 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7659 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7660 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7661 // CHECK:   ret <4 x i32> [[ADD_I]]
7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7663   return vabal_s16(a, b, c);
7664 }
7665
7666 // CHECK-LABEL: @test_vabal_s32(
7667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7668 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7669 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7670 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7671 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7672 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7673 // CHECK:   ret <2 x i64> [[ADD_I]]
7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7675   return vabal_s32(a, b, c);
7676 }
7677
7678 // CHECK-LABEL: @test_vabal_u8(
7679 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
7680 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7681 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
7682 // CHECK:   ret <8 x i16> [[ADD_I]]
7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7684   return vabal_u8(a, b, c);
7685 }
7686
7687 // CHECK-LABEL: @test_vabal_u16(
7688 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7689 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7690 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
7691 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7692 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7693 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
7694 // CHECK:   ret <4 x i32> [[ADD_I]]
7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7696   return vabal_u16(a, b, c);
7697 }
7698
7699 // CHECK-LABEL: @test_vabal_u32(
7700 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7701 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7702 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
7703 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7704 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7705 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
7706 // CHECK:   ret <2 x i64> [[ADD_I]]
7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7708   return vabal_u32(a, b, c);
7709 }
7710
7711 // CHECK-LABEL: @test_vabdl_high_s8(
7712 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7713 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7714 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7715 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7716 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
7718   return vabdl_high_s8(a, b);
7719 }
7720
7721 // CHECK-LABEL: @test_vabdl_high_s16(
7722 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7723 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7724 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7725 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7726 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7727 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7728 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7729 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
7731   return vabdl_high_s16(a, b);
7732 }
7733
7734 // CHECK-LABEL: @test_vabdl_high_s32(
7735 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7736 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7738 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7739 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7740 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7741 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7742 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
7744   return vabdl_high_s32(a, b);
7745 }
7746
7747 // CHECK-LABEL: @test_vabdl_high_u8(
7748 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7749 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7750 // CHECK:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7751 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
7752 // CHECK:   ret <8 x i16> [[VMOVL_I_I_I]]
7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
7754   return vabdl_high_u8(a, b);
7755 }
7756
7757 // CHECK-LABEL: @test_vabdl_high_u16(
7758 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7759 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7760 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7761 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7762 // CHECK:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7763 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
7764 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
7765 // CHECK:   ret <4 x i32> [[VMOVL_I_I_I]]
7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
7767   return vabdl_high_u16(a, b);
7768 }
7769
7770 // CHECK-LABEL: @test_vabdl_high_u32(
7771 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7772 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7773 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7774 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7775 // CHECK:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7776 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
7777 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
7778 // CHECK:   ret <2 x i64> [[VMOVL_I_I_I]]
7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
7780   return vabdl_high_u32(a, b);
7781 }
7782
7783 // CHECK-LABEL: @test_vabal_high_s8(
7784 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7785 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7786 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7787 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7788 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7789 // CHECK:   ret <8 x i16> [[ADD_I_I]]
7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
7791   return vabal_high_s8(a, b, c);
7792 }
7793
7794 // CHECK-LABEL: @test_vabal_high_s16(
7795 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7796 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7798 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7799 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7800 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7801 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7802 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7803 // CHECK:   ret <4 x i32> [[ADD_I_I]]
7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
7805   return vabal_high_s16(a, b, c);
7806 }
7807
7808 // CHECK-LABEL: @test_vabal_high_s32(
7809 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7810 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7811 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7812 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7813 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7814 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7815 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7816 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7817 // CHECK:   ret <2 x i64> [[ADD_I_I]]
7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
7819   return vabal_high_s32(a, b, c);
7820 }
7821
7822 // CHECK-LABEL: @test_vabal_high_u8(
7823 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7824 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7825 // CHECK:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7826 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
7827 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
7828 // CHECK:   ret <8 x i16> [[ADD_I_I]]
7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
7830   return vabal_high_u8(a, b, c);
7831 }
7832
7833 // CHECK-LABEL: @test_vabal_high_u16(
7834 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7835 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7836 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7837 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7838 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7839 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
7840 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
7841 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
7842 // CHECK:   ret <4 x i32> [[ADD_I_I]]
7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
7844   return vabal_high_u16(a, b, c);
7845 }
7846
7847 // CHECK-LABEL: @test_vabal_high_u32(
7848 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7849 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
7850 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7851 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7852 // CHECK:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7853 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
7854 // CHECK:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
7855 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
7856 // CHECK:   ret <2 x i64> [[ADD_I_I]]
7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
7858   return vabal_high_u32(a, b, c);
7859 }
7860
7861 // CHECK-LABEL: @test_vmull_s8(
7862 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
7863 // CHECK:   ret <8 x i16> [[VMULL_I]]
7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
7865   return vmull_s8(a, b);
7866 }
7867
7868 // CHECK-LABEL: @test_vmull_s16(
7869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7870 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7871 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
7872 // CHECK:   ret <4 x i32> [[VMULL2_I]]
7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
7874   return vmull_s16(a, b);
7875 }
7876
7877 // CHECK-LABEL: @test_vmull_s32(
7878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7879 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7880 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)
7881 // CHECK:   ret <2 x i64> [[VMULL2_I]]
7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
7883   return vmull_s32(a, b);
7884 }
7885
7886 // CHECK-LABEL: @test_vmull_u8(
7887 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
7888 // CHECK:   ret <8 x i16> [[VMULL_I]]
7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
7890   return vmull_u8(a, b);
7891 }
7892
7893 // CHECK-LABEL: @test_vmull_u16(
7894 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
7895 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7896 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
7897 // CHECK:   ret <4 x i32> [[VMULL2_I]]
7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
7899   return vmull_u16(a, b);
7900 }
7901
7902 // CHECK-LABEL: @test_vmull_u32(
7903 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
7904 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7905 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
7906 // CHECK:   ret <2 x i64> [[VMULL2_I]]
7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
7908   return vmull_u32(a, b);
7909 }
7910
7911 // CHECK-LABEL: @test_vmull_high_s8(
7912 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7913 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7914 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7915 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
7917   return vmull_high_s8(a, b);
7918 }
7919
7920 // CHECK-LABEL: @test_vmull_high_s16(
7921 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7922 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7924 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7925 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7926 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
7928   return vmull_high_s16(a, b);
7929 }
7930
7931 // CHECK-LABEL: @test_vmull_high_s32(
7932 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7933 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7934 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7935 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7936 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7937 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
7939   return vmull_high_s32(a, b);
7940 }
7941
7942 // CHECK-LABEL: @test_vmull_high_u8(
7943 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7944 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
7945 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
7946 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
7948   return vmull_high_u8(a, b);
7949 }
7950
7951 // CHECK-LABEL: @test_vmull_high_u16(
7952 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7953 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
7954 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
7955 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
7956 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
7957 // CHECK:   ret <4 x i32> [[VMULL2_I_I]]
7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
7959   return vmull_high_u16(a, b);
7960 }
7961
7962 // CHECK-LABEL: @test_vmull_high_u32(
7963 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
7964 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
7965 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
7966 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
7967 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
7968 // CHECK:   ret <2 x i64> [[VMULL2_I_I]]
7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
7970   return vmull_high_u32(a, b);
7971 }
7972
7973 // CHECK-LABEL: @test_vmlal_s8(
7974 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
7975 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7976 // CHECK:   ret <8 x i16> [[ADD_I]]
7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7978   return vmlal_s8(a, b, c);
7979 }
7980
7981 // CHECK-LABEL: @test_vmlal_s16(
7982 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7983 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7984 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
7985 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7986 // CHECK:   ret <4 x i32> [[ADD_I]]
7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7988   return vmlal_s16(a, b, c);
7989 }
7990
7991 // CHECK-LABEL: @test_vmlal_s32(
7992 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7993 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7994 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
7995 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7996 // CHECK:   ret <2 x i64> [[ADD_I]]
7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7998   return vmlal_s32(a, b, c);
7999 }
8000
8001 // CHECK-LABEL: @test_vmlal_u8(
8002 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8003 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
8004 // CHECK:   ret <8 x i16> [[ADD_I]]
8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8006   return vmlal_u8(a, b, c);
8007 }
8008
8009 // CHECK-LABEL: @test_vmlal_u16(
8010 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8011 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8012 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8013 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
8014 // CHECK:   ret <4 x i32> [[ADD_I]]
8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8016   return vmlal_u16(a, b, c);
8017 }
8018
8019 // CHECK-LABEL: @test_vmlal_u32(
8020 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8021 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8022 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8023 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
8024 // CHECK:   ret <2 x i64> [[ADD_I]]
8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8026   return vmlal_u32(a, b, c);
8027 }
8028
8029 // CHECK-LABEL: @test_vmlal_high_s8(
8030 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8031 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8032 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8033 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8034 // CHECK:   ret <8 x i16> [[ADD_I_I]]
8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8036   return vmlal_high_s8(a, b, c);
8037 }
8038
8039 // CHECK-LABEL: @test_vmlal_high_s16(
8040 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8041 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8042 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8043 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8044 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8045 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8046 // CHECK:   ret <4 x i32> [[ADD_I_I]]
8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8048   return vmlal_high_s16(a, b, c);
8049 }
8050
8051 // CHECK-LABEL: @test_vmlal_high_s32(
8052 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8053 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8054 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8055 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8056 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8057 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8058 // CHECK:   ret <2 x i64> [[ADD_I_I]]
8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8060   return vmlal_high_s32(a, b, c);
8061 }
8062
8063 // CHECK-LABEL: @test_vmlal_high_u8(
8064 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8065 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8066 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8067 // CHECK:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
8068 // CHECK:   ret <8 x i16> [[ADD_I_I]]
8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8070   return vmlal_high_u8(a, b, c);
8071 }
8072
8073 // CHECK-LABEL: @test_vmlal_high_u16(
8074 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8075 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8076 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8077 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8078 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8079 // CHECK:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
8080 // CHECK:   ret <4 x i32> [[ADD_I_I]]
8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8082   return vmlal_high_u16(a, b, c);
8083 }
8084
8085 // CHECK-LABEL: @test_vmlal_high_u32(
8086 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8087 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8089 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8090 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8091 // CHECK:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
8092 // CHECK:   ret <2 x i64> [[ADD_I_I]]
8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8094   return vmlal_high_u32(a, b, c);
8095 }
8096
8097 // CHECK-LABEL: @test_vmlsl_s8(
8098 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
8099 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8100 // CHECK:   ret <8 x i16> [[SUB_I]]
8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
8102   return vmlsl_s8(a, b, c);
8103 }
8104
8105 // CHECK-LABEL: @test_vmlsl_s16(
8106 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8107 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8108 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
8109 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8110 // CHECK:   ret <4 x i32> [[SUB_I]]
8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8112   return vmlsl_s16(a, b, c);
8113 }
8114
8115 // CHECK-LABEL: @test_vmlsl_s32(
8116 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8117 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8118 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
8119 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8120 // CHECK:   ret <2 x i64> [[SUB_I]]
8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8122   return vmlsl_s32(a, b, c);
8123 }
8124
8125 // CHECK-LABEL: @test_vmlsl_u8(
8126 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
8127 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
8128 // CHECK:   ret <8 x i16> [[SUB_I]]
8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
8130   return vmlsl_u8(a, b, c);
8131 }
8132
8133 // CHECK-LABEL: @test_vmlsl_u16(
8134 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8135 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8136 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
8137 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
8138 // CHECK:   ret <4 x i32> [[SUB_I]]
8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
8140   return vmlsl_u16(a, b, c);
8141 }
8142
8143 // CHECK-LABEL: @test_vmlsl_u32(
8144 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8145 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8146 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
8147 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
8148 // CHECK:   ret <2 x i64> [[SUB_I]]
8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
8150   return vmlsl_u32(a, b, c);
8151 }
8152
8153 // CHECK-LABEL: @test_vmlsl_high_s8(
8154 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8155 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8156 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8157 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8158 // CHECK:   ret <8 x i16> [[SUB_I_I]]
8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
8160   return vmlsl_high_s8(a, b, c);
8161 }
8162
8163 // CHECK-LABEL: @test_vmlsl_high_s16(
8164 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8165 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8166 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8167 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8168 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8169 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8170 // CHECK:   ret <4 x i32> [[SUB_I_I]]
8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8172   return vmlsl_high_s16(a, b, c);
8173 }
8174
8175 // CHECK-LABEL: @test_vmlsl_high_s32(
8176 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8177 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8178 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8179 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8180 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8181 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8182 // CHECK:   ret <2 x i64> [[SUB_I_I]]
8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8184   return vmlsl_high_s32(a, b, c);
8185 }
8186
8187 // CHECK-LABEL: @test_vmlsl_high_u8(
8188 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8189 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8190 // CHECK:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8191 // CHECK:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
8192 // CHECK:   ret <8 x i16> [[SUB_I_I]]
8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
8194   return vmlsl_high_u8(a, b, c);
8195 }
8196
8197 // CHECK-LABEL: @test_vmlsl_high_u16(
8198 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8199 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8200 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8201 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8202 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8203 // CHECK:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
8204 // CHECK:   ret <4 x i32> [[SUB_I_I]]
8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
8206   return vmlsl_high_u16(a, b, c);
8207 }
8208
8209 // CHECK-LABEL: @test_vmlsl_high_u32(
8210 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8211 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8213 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8214 // CHECK:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8215 // CHECK:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
8216 // CHECK:   ret <2 x i64> [[SUB_I_I]]
8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
8218   return vmlsl_high_u32(a, b, c);
8219 }
8220
8221 // CHECK-LABEL: @test_vqdmull_s16(
8222 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8223 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8224 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
8225 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
8226 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
8228   return vqdmull_s16(a, b);
8229 }
8230
8231 // CHECK-LABEL: @test_vqdmull_s32(
8232 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8233 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8234 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
8235 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
8236 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
8238   return vqdmull_s32(a, b);
8239 }
8240
8241 // CHECK-LABEL: @test_vqdmlal_s16(
8242 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8243 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8244 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8245 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8246 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8247 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8249   return vqdmlal_s16(a, b, c);
8250 }
8251
8252 // CHECK-LABEL: @test_vqdmlal_s32(
8253 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8254 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8255 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8256 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8257 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8258 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8260   return vqdmlal_s32(a, b, c);
8261 }
8262
8263 // CHECK-LABEL: @test_vqdmlsl_s16(
8264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8265 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8266 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
8267 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
8268 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
8269 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
8271   return vqdmlsl_s16(a, b, c);
8272 }
8273
8274 // CHECK-LABEL: @test_vqdmlsl_s32(
8275 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8276 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8277 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
8278 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
8279 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
8280 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
8282   return vqdmlsl_s32(a, b, c);
8283 }
8284
8285 // CHECK-LABEL: @test_vqdmull_high_s16(
8286 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8287 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8289 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8290 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8291 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
8292 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I_I]]
8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
8294   return vqdmull_high_s16(a, b);
8295 }
8296
8297 // CHECK-LABEL: @test_vqdmull_high_s32(
8298 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
8299 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8300 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8301 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8302 // CHECK:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8303 // CHECK:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
8304 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I_I]]
8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
8306   return vqdmull_high_s32(a, b);
8307 }
8308
8309 // CHECK-LABEL: @test_vqdmlal_high_s16(
8310 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8311 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8312 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8313 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8314 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8315 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8316 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8317 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8319   return vqdmlal_high_s16(a, b, c);
8320 }
8321
8322 // CHECK-LABEL: @test_vqdmlal_high_s32(
8323 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8324 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8325 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8326 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8327 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8328 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8329 // CHECK:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8330 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8332   return vqdmlal_high_s32(a, b, c);
8333 }
8334
8335 // CHECK-LABEL: @test_vqdmlsl_high_s16(
8336 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8337 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8338 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8339 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
8340 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
8341 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
8342 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
8343 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
8345   return vqdmlsl_high_s16(a, b, c);
8346 }
8347
8348 // CHECK-LABEL: @test_vqdmlsl_high_s32(
8349 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
8350 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
8351 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8352 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
8353 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
8354 // CHECK:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
8355 // CHECK:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
8356 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
8358   return vqdmlsl_high_s32(a, b, c);
8359 }
8360
8361 // CHECK-LABEL: @test_vmull_p8(
8362 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b)
8363 // CHECK:   ret <8 x i16> [[VMULL_I]]
8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8365   return vmull_p8(a, b);
8366 }
8367
8368 // CHECK-LABEL: @test_vmull_high_p8(
8369 // CHECK:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8370 // CHECK:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
8371 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
8372 // CHECK:   ret <8 x i16> [[VMULL_I_I]]
8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
8374   return vmull_high_p8(a, b);
8375 }
8376
8377 // CHECK-LABEL: @test_vaddd_s64(
8378 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8379 // CHECK:   ret i64 [[VADDD_I]]
8380 int64_t test_vaddd_s64(int64_t a, int64_t b) {
8381   return vaddd_s64(a, b);
8382 }
8383
8384 // CHECK-LABEL: @test_vaddd_u64(
8385 // CHECK:   [[VADDD_I:%.*]] = add i64 %a, %b
8386 // CHECK:   ret i64 [[VADDD_I]]
8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
8388   return vaddd_u64(a, b);
8389 }
8390
8391 // CHECK-LABEL: @test_vsubd_s64(
8392 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8393 // CHECK:   ret i64 [[VSUBD_I]]
8394 int64_t test_vsubd_s64(int64_t a, int64_t b) {
8395   return vsubd_s64(a, b);
8396 }
8397
8398 // CHECK-LABEL: @test_vsubd_u64(
8399 // CHECK:   [[VSUBD_I:%.*]] = sub i64 %a, %b
8400 // CHECK:   ret i64 [[VSUBD_I]]
8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
8402   return vsubd_u64(a, b);
8403 }
8404
8405 // CHECK-LABEL: @test_vqaddb_s8(
8406 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8407 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8408 // CHECK:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8409 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
8410 // CHECK:   ret i8 [[TMP2]]
8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) {
8412   return vqaddb_s8(a, b);
8413 }
8414
8415 // CHECK-LABEL: @test_vqaddh_s16(
8416 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8417 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8418 // CHECK:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8419 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
8420 // CHECK:   ret i16 [[TMP2]]
8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) {
8422   return vqaddh_s16(a, b);
8423 }
8424
8425 // CHECK-LABEL: @test_vqadds_s32(
8426 // CHECK:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b)
8427 // CHECK:   ret i32 [[VQADDS_S32_I]]
8428 int32_t test_vqadds_s32(int32_t a, int32_t b) {
8429   return vqadds_s32(a, b);
8430 }
8431
8432 // CHECK-LABEL: @test_vqaddd_s64(
8433 // CHECK:   [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b)
8434 // CHECK:   ret i64 [[VQADDD_S64_I]]
8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) {
8436   return vqaddd_s64(a, b);
8437 }
8438
8439 // CHECK-LABEL: @test_vqaddb_u8(
8440 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8441 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8442 // CHECK:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8443 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
8444 // CHECK:   ret i8 [[TMP2]]
8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
8446   return vqaddb_u8(a, b);
8447 }
8448
8449 // CHECK-LABEL: @test_vqaddh_u16(
8450 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8451 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8452 // CHECK:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8453 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
8454 // CHECK:   ret i16 [[TMP2]]
8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
8456   return vqaddh_u16(a, b);
8457 }
8458
8459 // CHECK-LABEL: @test_vqadds_u32(
8460 // CHECK:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b)
8461 // CHECK:   ret i32 [[VQADDS_U32_I]]
8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
8463   return vqadds_u32(a, b);
8464 }
8465
8466 // CHECK-LABEL: @test_vqaddd_u64(
8467 // CHECK:   [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b)
8468 // CHECK:   ret i64 [[VQADDD_U64_I]]
8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
8470   return vqaddd_u64(a, b);
8471 }
8472
8473 // CHECK-LABEL: @test_vqsubb_s8(
8474 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8475 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8476 // CHECK:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8477 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
8478 // CHECK:   ret i8 [[TMP2]]
8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) {
8480   return vqsubb_s8(a, b);
8481 }
8482
8483 // CHECK-LABEL: @test_vqsubh_s16(
8484 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8485 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8486 // CHECK:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8487 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
8488 // CHECK:   ret i16 [[TMP2]]
8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) {
8490   return vqsubh_s16(a, b);
8491 }
8492
8493 // CHECK-LABEL: @test_vqsubs_s32(
8494 // CHECK:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
8495 // CHECK:   ret i32 [[VQSUBS_S32_I]]
8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) {
8497   return vqsubs_s32(a, b);
8498 }
8499
8500 // CHECK-LABEL: @test_vqsubd_s64(
8501 // CHECK:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b)
8502 // CHECK:   ret i64 [[VQSUBD_S64_I]]
8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) {
8504   return vqsubd_s64(a, b);
8505 }
8506
8507 // CHECK-LABEL: @test_vqsubb_u8(
8508 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8509 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8510 // CHECK:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8511 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
8512 // CHECK:   ret i8 [[TMP2]]
8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
8514   return vqsubb_u8(a, b);
8515 }
8516
8517 // CHECK-LABEL: @test_vqsubh_u16(
8518 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8519 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8520 // CHECK:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8521 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
8522 // CHECK:   ret i16 [[TMP2]]
8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
8524   return vqsubh_u16(a, b);
8525 }
8526
8527 // CHECK-LABEL: @test_vqsubs_u32(
8528 // CHECK:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
8529 // CHECK:   ret i32 [[VQSUBS_U32_I]]
8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
8531   return vqsubs_u32(a, b);
8532 }
8533
8534 // CHECK-LABEL: @test_vqsubd_u64(
8535 // CHECK:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
8536 // CHECK:   ret i64 [[VQSUBD_U64_I]]
8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
8538   return vqsubd_u64(a, b);
8539 }
8540
8541 // CHECK-LABEL: @test_vshld_s64(
8542 // CHECK:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
8543 // CHECK:   ret i64 [[VSHLD_S64_I]]
8544 int64_t test_vshld_s64(int64_t a, int64_t b) {
8545   return vshld_s64(a, b);
8546 }
8547
8548 // CHECK-LABEL: @test_vshld_u64(
8549 // CHECK:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
8550 // CHECK:   ret i64 [[VSHLD_U64_I]]
8551 uint64_t test_vshld_u64(uint64_t a, int64_t b) {
8552   return vshld_u64(a, b);
8553 }
8554
8555 // CHECK-LABEL: @test_vqshlb_s8(
8556 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8557 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8558 // CHECK:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8559 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
8560 // CHECK:   ret i8 [[TMP2]]
8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) {
8562   return vqshlb_s8(a, b);
8563 }
8564
8565 // CHECK-LABEL: @test_vqshlh_s16(
8566 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8567 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8568 // CHECK:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8569 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
8570 // CHECK:   ret i16 [[TMP2]]
8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) {
8572   return vqshlh_s16(a, b);
8573 }
8574
8575 // CHECK-LABEL: @test_vqshls_s32(
8576 // CHECK:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
8577 // CHECK:   ret i32 [[VQSHLS_S32_I]]
8578 int32_t test_vqshls_s32(int32_t a, int32_t b) {
8579   return vqshls_s32(a, b);
8580 }
8581
8582 // CHECK-LABEL: @test_vqshld_s64(
8583 // CHECK:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
8584 // CHECK:   ret i64 [[VQSHLD_S64_I]]
8585 int64_t test_vqshld_s64(int64_t a, int64_t b) {
8586   return vqshld_s64(a, b);
8587 }
8588
8589 // CHECK-LABEL: @test_vqshlb_u8(
8590 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8591 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8592 // CHECK:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8593 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
8594 // CHECK:   ret i8 [[TMP2]]
8595 uint8_t test_vqshlb_u8(uint8_t a, int8_t b) {
8596   return vqshlb_u8(a, b);
8597 }
8598
8599 // CHECK-LABEL: @test_vqshlh_u16(
8600 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8601 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8602 // CHECK:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8603 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
8604 // CHECK:   ret i16 [[TMP2]]
8605 uint16_t test_vqshlh_u16(uint16_t a, int16_t b) {
8606   return vqshlh_u16(a, b);
8607 }
8608
8609 // CHECK-LABEL: @test_vqshls_u32(
8610 // CHECK:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
8611 // CHECK:   ret i32 [[VQSHLS_U32_I]]
8612 uint32_t test_vqshls_u32(uint32_t a, int32_t b) {
8613   return vqshls_u32(a, b);
8614 }
8615
8616 // CHECK-LABEL: @test_vqshld_u64(
8617 // CHECK:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
8618 // CHECK:   ret i64 [[VQSHLD_U64_I]]
8619 uint64_t test_vqshld_u64(uint64_t a, int64_t b) {
8620   return vqshld_u64(a, b);
8621 }
8622
8623 // CHECK-LABEL: @test_vrshld_s64(
8624 // CHECK:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
8625 // CHECK:   ret i64 [[VRSHLD_S64_I]]
8626 int64_t test_vrshld_s64(int64_t a, int64_t b) {
8627   return vrshld_s64(a, b);
8628 }
8629
8630 // CHECK-LABEL: @test_vrshld_u64(
8631 // CHECK:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
8632 // CHECK:   ret i64 [[VRSHLD_U64_I]]
8633 uint64_t test_vrshld_u64(uint64_t a, int64_t b) {
8634   return vrshld_u64(a, b);
8635 }
8636
8637 // CHECK-LABEL: @test_vqrshlb_s8(
8638 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8639 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8640 // CHECK:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8641 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
8642 // CHECK:   ret i8 [[TMP2]]
8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
8644   return vqrshlb_s8(a, b);
8645 }
8646
8647 // CHECK-LABEL: @test_vqrshlh_s16(
8648 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8649 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8650 // CHECK:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8651 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
8652 // CHECK:   ret i16 [[TMP2]]
8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
8654   return vqrshlh_s16(a, b);
8655 }
8656
8657 // CHECK-LABEL: @test_vqrshls_s32(
8658 // CHECK:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
8659 // CHECK:   ret i32 [[VQRSHLS_S32_I]]
8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) {
8661   return vqrshls_s32(a, b);
8662 }
8663
8664 // CHECK-LABEL: @test_vqrshld_s64(
8665 // CHECK:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
8666 // CHECK:   ret i64 [[VQRSHLD_S64_I]]
8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) {
8668   return vqrshld_s64(a, b);
8669 }
8670
8671 // CHECK-LABEL: @test_vqrshlb_u8(
8672 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
8673 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
8674 // CHECK:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
8675 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
8676 // CHECK:   ret i8 [[TMP2]]
8677 uint8_t test_vqrshlb_u8(uint8_t a, int8_t b) {
8678   return vqrshlb_u8(a, b);
8679 }
8680
8681 // CHECK-LABEL: @test_vqrshlh_u16(
8682 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8683 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8684 // CHECK:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8685 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
8686 // CHECK:   ret i16 [[TMP2]]
8687 uint16_t test_vqrshlh_u16(uint16_t a, int16_t b) {
8688   return vqrshlh_u16(a, b);
8689 }
8690
8691 // CHECK-LABEL: @test_vqrshls_u32(
8692 // CHECK:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
8693 // CHECK:   ret i32 [[VQRSHLS_U32_I]]
8694 uint32_t test_vqrshls_u32(uint32_t a, int32_t b) {
8695   return vqrshls_u32(a, b);
8696 }
8697
8698 // CHECK-LABEL: @test_vqrshld_u64(
8699 // CHECK:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
8700 // CHECK:   ret i64 [[VQRSHLD_U64_I]]
8701 uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {
8702   return vqrshld_u64(a, b);
8703 }
8704
8705 // CHECK-LABEL: @test_vpaddd_s64(
8706 // CHECK:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
8707 // CHECK:   ret i64 [[VPADDD_S64_I]]
8708 int64_t test_vpaddd_s64(int64x2_t a) {
8709   return vpaddd_s64(a);
8710 }
8711
8712 // CHECK-LABEL: @test_vpadds_f32(
8713 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
8714 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
8715 // CHECK:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
8716 // CHECK:   ret float [[VPADDD_I]]
8717 float32_t test_vpadds_f32(float32x2_t a) {
8718   return vpadds_f32(a);
8719 }
8720
8721 // CHECK-LABEL: @test_vpaddd_f64(
8722 // CHECK:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
8723 // CHECK:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
8724 // CHECK:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
8725 // CHECK:   ret double [[VPADDD_I]]
8726 float64_t test_vpaddd_f64(float64x2_t a) {
8727   return vpaddd_f64(a);
8728 }
8729
8730 // CHECK-LABEL: @test_vpmaxnms_f32(
8731 // CHECK:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
8732 // CHECK:   ret float [[VPMAXNMS_F32_I]]
8733 float32_t test_vpmaxnms_f32(float32x2_t a) {
8734   return vpmaxnms_f32(a);
8735 }
8736
8737 // CHECK-LABEL: @test_vpmaxnmqd_f64(
8738 // CHECK:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
8739 // CHECK:   ret double [[VPMAXNMQD_F64_I]]
8740 float64_t test_vpmaxnmqd_f64(float64x2_t a) {
8741   return vpmaxnmqd_f64(a);
8742 }
8743
8744 // CHECK-LABEL: @test_vpmaxs_f32(
8745 // CHECK:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
8746 // CHECK:   ret float [[VPMAXS_F32_I]]
8747 float32_t test_vpmaxs_f32(float32x2_t a) {
8748   return vpmaxs_f32(a);
8749 }
8750
8751 // CHECK-LABEL: @test_vpmaxqd_f64(
8752 // CHECK:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
8753 // CHECK:   ret double [[VPMAXQD_F64_I]]
8754 float64_t test_vpmaxqd_f64(float64x2_t a) {
8755   return vpmaxqd_f64(a);
8756 }
8757
8758 // CHECK-LABEL: @test_vpminnms_f32(
8759 // CHECK:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
8760 // CHECK:   ret float [[VPMINNMS_F32_I]]
8761 float32_t test_vpminnms_f32(float32x2_t a) {
8762   return vpminnms_f32(a);
8763 }
8764
8765 // CHECK-LABEL: @test_vpminnmqd_f64(
8766 // CHECK:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
8767 // CHECK:   ret double [[VPMINNMQD_F64_I]]
8768 float64_t test_vpminnmqd_f64(float64x2_t a) {
8769   return vpminnmqd_f64(a);
8770 }
8771
8772 // CHECK-LABEL: @test_vpmins_f32(
8773 // CHECK:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
8774 // CHECK:   ret float [[VPMINS_F32_I]]
8775 float32_t test_vpmins_f32(float32x2_t a) {
8776   return vpmins_f32(a);
8777 }
8778
8779 // CHECK-LABEL: @test_vpminqd_f64(
8780 // CHECK:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
8781 // CHECK:   ret double [[VPMINQD_F64_I]]
8782 float64_t test_vpminqd_f64(float64x2_t a) {
8783   return vpminqd_f64(a);
8784 }
8785
8786 // CHECK-LABEL: @test_vqdmulhh_s16(
8787 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8788 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8789 // CHECK:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8790 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
8791 // CHECK:   ret i16 [[TMP2]]
8792 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
8793   return vqdmulhh_s16(a, b);
8794 }
8795
8796 // CHECK-LABEL: @test_vqdmulhs_s32(
8797 // CHECK:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
8798 // CHECK:   ret i32 [[VQDMULHS_S32_I]]
8799 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
8800   return vqdmulhs_s32(a, b);
8801 }
8802
8803 // CHECK-LABEL: @test_vqrdmulhh_s16(
8804 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
8805 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
8806 // CHECK:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
8807 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
8808 // CHECK:   ret i16 [[TMP2]]
8809 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
8810   return vqrdmulhh_s16(a, b);
8811 }
8812
8813 // CHECK-LABEL: @test_vqrdmulhs_s32(
8814 // CHECK:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b)
8815 // CHECK:   ret i32 [[VQRDMULHS_S32_I]]
8816 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
8817   return vqrdmulhs_s32(a, b);
8818 }
8819
8820 // CHECK-LABEL: @test_vmulxs_f32(
8821 // CHECK:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
8822 // CHECK:   ret float [[VMULXS_F32_I]]
8823 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
8824   return vmulxs_f32(a, b);
8825 }
8826
8827 // CHECK-LABEL: @test_vmulxd_f64(
8828 // CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
8829 // CHECK:   ret double [[VMULXD_F64_I]]
8830 float64_t test_vmulxd_f64(float64_t a, float64_t b) {
8831   return vmulxd_f64(a, b);
8832 }
8833
8834 // CHECK-LABEL: @test_vmulx_f64(
8835 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
8836 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
8837 // CHECK:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
8838 // CHECK:   ret <1 x double> [[VMULX2_I]]
8839 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
8840   return vmulx_f64(a, b);
8841 }
8842
8843 // CHECK-LABEL: @test_vrecpss_f32(
8844 // CHECK:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
8845 // CHECK:   ret float [[VRECPS_I]]
8846 float32_t test_vrecpss_f32(float32_t a, float32_t b) {
8847   return vrecpss_f32(a, b);
8848 }
8849
8850 // CHECK-LABEL: @test_vrecpsd_f64(
8851 // CHECK:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
8852 // CHECK:   ret double [[VRECPS_I]]
8853 float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
8854   return vrecpsd_f64(a, b);
8855 }
8856
8857 // CHECK-LABEL: @test_vrsqrtss_f32(
8858 // CHECK:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
8859 // CHECK:   ret float [[VRSQRTSS_F32_I]]
8860 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
8861   return vrsqrtss_f32(a, b);
8862 }
8863
8864 // CHECK-LABEL: @test_vrsqrtsd_f64(
8865 // CHECK:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
8866 // CHECK:   ret double [[VRSQRTSD_F64_I]]
8867 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
8868   return vrsqrtsd_f64(a, b);
8869 }
8870
8871 // CHECK-LABEL: @test_vcvts_f32_s32(
8872 // CHECK:   [[TMP0:%.*]] = sitofp i32 %a to float
8873 // CHECK:   ret float [[TMP0]]
8874 float32_t test_vcvts_f32_s32(int32_t a) {
8875   return vcvts_f32_s32(a);
8876 }
8877
8878 // CHECK-LABEL: @test_vcvtd_f64_s64(
8879 // CHECK:   [[TMP0:%.*]] = sitofp i64 %a to double
8880 // CHECK:   ret double [[TMP0]]
8881 float64_t test_vcvtd_f64_s64(int64_t a) {
8882   return vcvtd_f64_s64(a);
8883 }
8884
8885 // CHECK-LABEL: @test_vcvts_f32_u32(
8886 // CHECK:   [[TMP0:%.*]] = uitofp i32 %a to float
8887 // CHECK:   ret float [[TMP0]]
8888 float32_t test_vcvts_f32_u32(uint32_t a) {
8889   return vcvts_f32_u32(a);
8890 }
8891
8892 // CHECK-LABEL: @test_vcvtd_f64_u64(
8893 // CHECK:   [[TMP0:%.*]] = uitofp i64 %a to double
8894 // CHECK:   ret double [[TMP0]]
8895 float64_t test_vcvtd_f64_u64(uint64_t a) {
8896   return vcvtd_f64_u64(a);
8897 }
8898
8899 // CHECK-LABEL: @test_vrecpes_f32(
8900 // CHECK:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
8901 // CHECK:   ret float [[VRECPES_F32_I]]
8902 float32_t test_vrecpes_f32(float32_t a) {
8903   return vrecpes_f32(a);
8904 }
8905
8906 // CHECK-LABEL: @test_vrecped_f64(
8907 // CHECK:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
8908 // CHECK:   ret double [[VRECPED_F64_I]]
8909 float64_t test_vrecped_f64(float64_t a) {
8910   return vrecped_f64(a);
8911 }
8912
8913 // CHECK-LABEL: @test_vrecpxs_f32(
8914 // CHECK:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
8915 // CHECK:   ret float [[VRECPXS_F32_I]]
8916 float32_t test_vrecpxs_f32(float32_t a) {
8917   return vrecpxs_f32(a);
8918 }
8919
8920 // CHECK-LABEL: @test_vrecpxd_f64(
8921 // CHECK:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
8922 // CHECK:   ret double [[VRECPXD_F64_I]]
8923 float64_t test_vrecpxd_f64(float64_t a) {
8924   return vrecpxd_f64(a);
8925 }
8926
8927 // CHECK-LABEL: @test_vrsqrte_u32(
8928 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8929 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
8930 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
8931 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
8932   return vrsqrte_u32(a);
8933 }
8934
8935 // CHECK-LABEL: @test_vrsqrteq_u32(
8936 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8937 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
8938 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
8939 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
8940   return vrsqrteq_u32(a);
8941 }
8942
8943 // CHECK-LABEL: @test_vrsqrtes_f32(
8944 // CHECK:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
8945 // CHECK:   ret float [[VRSQRTES_F32_I]]
8946 float32_t test_vrsqrtes_f32(float32_t a) {
8947   return vrsqrtes_f32(a);
8948 }
8949
8950 // CHECK-LABEL: @test_vrsqrted_f64(
8951 // CHECK:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
8952 // CHECK:   ret double [[VRSQRTED_F64_I]]
8953 float64_t test_vrsqrted_f64(float64_t a) {
8954   return vrsqrted_f64(a);
8955 }
8956
8957 // CHECK-LABEL: @test_vld1q_u8(
8958 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
8959 // CHECK:   ret <16 x i8> [[TMP1]]
8960 uint8x16_t test_vld1q_u8(uint8_t const *a) {
8961   return vld1q_u8(a);
8962 }
8963
8964 // CHECK-LABEL: @test_vld1q_u16(
8965 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
8966 // CHECK:   ret <8 x i16> [[TMP2]]
8967 uint16x8_t test_vld1q_u16(uint16_t const *a) {
8968   return vld1q_u16(a);
8969 }
8970
8971 // CHECK-LABEL: @test_vld1q_u32(
8972 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
8973 // CHECK:   ret <4 x i32> [[TMP2]]
8974 uint32x4_t test_vld1q_u32(uint32_t const *a) {
8975   return vld1q_u32(a);
8976 }
8977
8978 // CHECK-LABEL: @test_vld1q_u64(
8979 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
8980 // CHECK:   ret <2 x i64> [[TMP2]]
8981 uint64x2_t test_vld1q_u64(uint64_t const *a) {
8982   return vld1q_u64(a);
8983 }
8984
8985 // CHECK-LABEL: @test_vld1q_s8(
8986 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
8987 // CHECK:   ret <16 x i8> [[TMP1]]
8988 int8x16_t test_vld1q_s8(int8_t const *a) {
8989   return vld1q_s8(a);
8990 }
8991
8992 // CHECK-LABEL: @test_vld1q_s16(
8993 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
8994 // CHECK:   ret <8 x i16> [[TMP2]]
8995 int16x8_t test_vld1q_s16(int16_t const *a) {
8996   return vld1q_s16(a);
8997 }
8998
8999 // CHECK-LABEL: @test_vld1q_s32(
9000 // CHECK:   [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4
9001 // CHECK:   ret <4 x i32> [[TMP2]]
9002 int32x4_t test_vld1q_s32(int32_t const *a) {
9003   return vld1q_s32(a);
9004 }
9005
9006 // CHECK-LABEL: @test_vld1q_s64(
9007 // CHECK:   [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8
9008 // CHECK:   ret <2 x i64> [[TMP2]]
9009 int64x2_t test_vld1q_s64(int64_t const *a) {
9010   return vld1q_s64(a);
9011 }
9012
9013 // CHECK-LABEL: @test_vld1q_f16(
9014 // CHECK:   [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2
9015 // CHECK:   ret <8 x half> [[TMP2]]
9016 float16x8_t test_vld1q_f16(float16_t const *a) {
9017   return vld1q_f16(a);
9018 }
9019
9020 // CHECK-LABEL: @test_vld1q_f32(
9021 // CHECK:   [[TMP2:%.*]] = load <4 x float>, ptr %a, align 4
9022 // CHECK:   ret <4 x float> [[TMP2]]
9023 float32x4_t test_vld1q_f32(float32_t const *a) {
9024   return vld1q_f32(a);
9025 }
9026
9027 // CHECK-LABEL: @test_vld1q_f64(
9028 // CHECK:   [[TMP2:%.*]] = load <2 x double>, ptr %a, align 8
9029 // CHECK:   ret <2 x double> [[TMP2]]
9030 float64x2_t test_vld1q_f64(float64_t const *a) {
9031   return vld1q_f64(a);
9032 }
9033
9034 // CHECK-LABEL: @test_vld1q_p8(
9035 // CHECK:   [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
9036 // CHECK:   ret <16 x i8> [[TMP1]]
9037 poly8x16_t test_vld1q_p8(poly8_t const *a) {
9038   return vld1q_p8(a);
9039 }
9040
9041 // CHECK-LABEL: @test_vld1q_p16(
9042 // CHECK:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
9043 // CHECK:   ret <8 x i16> [[TMP2]]
9044 poly16x8_t test_vld1q_p16(poly16_t const *a) {
9045   return vld1q_p16(a);
9046 }
9047
9048 // CHECK-LABEL: @test_vld1_u8(
9049 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9050 // CHECK:   ret <8 x i8> [[TMP1]]
9051 uint8x8_t test_vld1_u8(uint8_t const *a) {
9052   return vld1_u8(a);
9053 }
9054
9055 // CHECK-LABEL: @test_vld1_u16(
9056 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
9057 // CHECK:   ret <4 x i16> [[TMP2]]
9058 uint16x4_t test_vld1_u16(uint16_t const *a) {
9059   return vld1_u16(a);
9060 }
9061
9062 // CHECK-LABEL: @test_vld1_u32(
9063 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
9064 // CHECK:   ret <2 x i32> [[TMP2]]
9065 uint32x2_t test_vld1_u32(uint32_t const *a) {
9066   return vld1_u32(a);
9067 }
9068
9069 // CHECK-LABEL: @test_vld1_u64(
9070 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
9071 // CHECK:   ret <1 x i64> [[TMP2]]
9072 uint64x1_t test_vld1_u64(uint64_t const *a) {
9073   return vld1_u64(a);
9074 }
9075
9076 // CHECK-LABEL: @test_vld1_s8(
9077 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9078 // CHECK:   ret <8 x i8> [[TMP1]]
9079 int8x8_t test_vld1_s8(int8_t const *a) {
9080   return vld1_s8(a);
9081 }
9082
9083 // CHECK-LABEL: @test_vld1_s16(
9084 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
9085 // CHECK:   ret <4 x i16> [[TMP2]]
9086 int16x4_t test_vld1_s16(int16_t const *a) {
9087   return vld1_s16(a);
9088 }
9089
9090 // CHECK-LABEL: @test_vld1_s32(
9091 // CHECK:   [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
9092 // CHECK:   ret <2 x i32> [[TMP2]]
9093 int32x2_t test_vld1_s32(int32_t const *a) {
9094   return vld1_s32(a);
9095 }
9096
9097 // CHECK-LABEL: @test_vld1_s64(
9098 // CHECK:   [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
9099 // CHECK:   ret <1 x i64> [[TMP2]]
9100 int64x1_t test_vld1_s64(int64_t const *a) {
9101   return vld1_s64(a);
9102 }
9103
9104 // CHECK-LABEL: @test_vld1_f16(
9105 // CHECK:   [[TMP2:%.*]] = load <4 x half>, ptr %a, align 2
9106 // CHECK:   ret <4 x half> [[TMP2]]
9107 float16x4_t test_vld1_f16(float16_t const *a) {
9108   return vld1_f16(a);
9109 }
9110
9111 // CHECK-LABEL: @test_vld1_f32(
9112 // CHECK:   [[TMP2:%.*]] = load <2 x float>, ptr %a, align 4
9113 // CHECK:   ret <2 x float> [[TMP2]]
9114 float32x2_t test_vld1_f32(float32_t const *a) {
9115   return vld1_f32(a);
9116 }
9117
9118 // CHECK-LABEL: @test_vld1_f64(
9119 // CHECK:   [[TMP2:%.*]] = load <1 x double>, ptr %a, align 8
9120 // CHECK:   ret <1 x double> [[TMP2]]
9121 float64x1_t test_vld1_f64(float64_t const *a) {
9122   return vld1_f64(a);
9123 }
9124
9125 // CHECK-LABEL: @test_vld1_p8(
9126 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9127 // CHECK:   ret <8 x i8> [[TMP1]]
9128 poly8x8_t test_vld1_p8(poly8_t const *a) {
9129   return vld1_p8(a);
9130 }
9131
9132 // CHECK-LABEL: @test_vld1_p16(
9133 // CHECK:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
9134 // CHECK:   ret <4 x i16> [[TMP2]]
9135 poly16x4_t test_vld1_p16(poly16_t const *a) {
9136   return vld1_p16(a);
9137 }
9138
9139 // CHECK-LABEL: @test_vld1_u8_void(
9140 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9141 // CHECK:   ret <8 x i8> [[TMP1]]
9142 uint8x8_t test_vld1_u8_void(void *a) {
9143   return vld1_u8(a);
9144 }
9145
9146 // CHECK-LABEL: @test_vld1_u16_void(
9147 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
9148 // CHECK:   ret <4 x i16> [[TMP1]]
9149 uint16x4_t test_vld1_u16_void(void *a) {
9150   return vld1_u16(a);
9151 }
9152
9153 // CHECK-LABEL: @test_vld1_u32_void(
9154 // CHECK:   [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
9155 // CHECK:   ret <2 x i32> [[TMP1]]
9156 uint32x2_t test_vld1_u32_void(void *a) {
9157   return vld1_u32(a);
9158 }
9159
9160 // CHECK-LABEL: @test_vld1_u64_void(
9161 // CHECK:   [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
9162 // CHECK:   ret <1 x i64> [[TMP1]]
9163 uint64x1_t test_vld1_u64_void(void *a) {
9164   return vld1_u64(a);
9165 }
9166
9167 // CHECK-LABEL: @test_vld1_s8_void(
9168 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9169 // CHECK:   ret <8 x i8> [[TMP1]]
9170 int8x8_t test_vld1_s8_void(void *a) {
9171   return vld1_s8(a);
9172 }
9173
9174 // CHECK-LABEL: @test_vld1_s16_void(
9175 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
9176 // CHECK:   ret <4 x i16> [[TMP1]]
9177 int16x4_t test_vld1_s16_void(void *a) {
9178   return vld1_s16(a);
9179 }
9180
9181 // CHECK-LABEL: @test_vld1_s32_void(
9182 // CHECK:   [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
9183 // CHECK:   ret <2 x i32> [[TMP1]]
9184 int32x2_t test_vld1_s32_void(void *a) {
9185   return vld1_s32(a);
9186 }
9187
9188 // CHECK-LABEL: @test_vld1_s64_void(
9189 // CHECK:   [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
9190 // CHECK:   ret <1 x i64> [[TMP1]]
9191 int64x1_t test_vld1_s64_void(void *a) {
9192   return vld1_s64(a);
9193 }
9194
9195 // CHECK-LABEL: @test_vld1_f16_void(
9196 // CHECK:   [[TMP1:%.*]] = load <4 x half>, ptr %a, align 1
9197 // CHECK:   ret <4 x half> [[TMP1]]
9198 float16x4_t test_vld1_f16_void(void *a) {
9199   return vld1_f16(a);
9200 }
9201
9202 // CHECK-LABEL: @test_vld1_f32_void(
9203 // CHECK:   [[TMP1:%.*]] = load <2 x float>, ptr %a, align 1
9204 // CHECK:   ret <2 x float> [[TMP1]]
9205 float32x2_t test_vld1_f32_void(void *a) {
9206   return vld1_f32(a);
9207 }
9208
9209 // CHECK-LABEL: @test_vld1_f64_void(
9210 // CHECK:   [[TMP1:%.*]] = load <1 x double>, ptr %a, align 1
9211 // CHECK:   ret <1 x double> [[TMP1]]
9212 float64x1_t test_vld1_f64_void(void *a) {
9213   return vld1_f64(a);
9214 }
9215
9216 // CHECK-LABEL: @test_vld1_p8_void(
9217 // CHECK:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
9218 // CHECK:   ret <8 x i8> [[TMP1]]
9219 poly8x8_t test_vld1_p8_void(void *a) {
9220   return vld1_p8(a);
9221 }
9222
9223 // CHECK-LABEL: @test_vld1_p16_void(
9224 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
9225 // CHECK:   ret <4 x i16> [[TMP1]]
9226 poly16x4_t test_vld1_p16_void(void *a) {
9227   return vld1_p16(a);
9228 }
9229
9230 // CHECK-LABEL: @test_vld2q_u8(
9231 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
9232 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
9233 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
9234 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
9235 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9236 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
9237 // CHECK:   ret %struct.uint8x16x2_t [[TMP5]]
9238 uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
9239   return vld2q_u8(a);
9240 }
9241
9242 // CHECK-LABEL: @test_vld2q_u16(
9243 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
9244 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
9245 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
9246 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
9247 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9248 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
9249 // CHECK:   ret %struct.uint16x8x2_t [[TMP6]]
9250 uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
9251   return vld2q_u16(a);
9252 }
9253
9254 // CHECK-LABEL: @test_vld2q_u32(
9255 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
9256 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
9257 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
9258 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
9259 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9260 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
9261 // CHECK:   ret %struct.uint32x4x2_t [[TMP6]]
9262 uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
9263   return vld2q_u32(a);
9264 }
9265
9266 // CHECK-LABEL: @test_vld2q_u64(
9267 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
9268 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
9269 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
9270 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
9271 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9272 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
9273 // CHECK:   ret %struct.uint64x2x2_t [[TMP6]]
9274 uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
9275   return vld2q_u64(a);
9276 }
9277
9278 // CHECK-LABEL: @test_vld2q_s8(
9279 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
9280 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
9281 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
9282 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
9283 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9284 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
9285 // CHECK:   ret %struct.int8x16x2_t [[TMP5]]
9286 int8x16x2_t test_vld2q_s8(int8_t const *a) {
9287   return vld2q_s8(a);
9288 }
9289
9290 // CHECK-LABEL: @test_vld2q_s16(
9291 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
9292 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
9293 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
9294 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
9295 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9296 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
9297 // CHECK:   ret %struct.int16x8x2_t [[TMP6]]
9298 int16x8x2_t test_vld2q_s16(int16_t const *a) {
9299   return vld2q_s16(a);
9300 }
9301
9302 // CHECK-LABEL: @test_vld2q_s32(
9303 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
9304 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
9305 // CHECK:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
9306 // CHECK:   store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
9307 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9308 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
9309 // CHECK:   ret %struct.int32x4x2_t [[TMP6]]
9310 int32x4x2_t test_vld2q_s32(int32_t const *a) {
9311   return vld2q_s32(a);
9312 }
9313
9314 // CHECK-LABEL: @test_vld2q_s64(
9315 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
9316 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
9317 // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
9318 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
9319 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9320 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
9321 // CHECK:   ret %struct.int64x2x2_t [[TMP6]]
9322 int64x2x2_t test_vld2q_s64(int64_t const *a) {
9323   return vld2q_s64(a);
9324 }
9325
9326 // CHECK-LABEL: @test_vld2q_f16(
9327 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
9328 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
9329 // CHECK:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr %a)
9330 // CHECK:   store { <8 x half>, <8 x half> } [[VLD2]], ptr [[__RET]]
9331 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9332 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
9333 // CHECK:   ret %struct.float16x8x2_t [[TMP6]]
9334 float16x8x2_t test_vld2q_f16(float16_t const *a) {
9335   return vld2q_f16(a);
9336 }
9337
9338 // CHECK-LABEL: @test_vld2q_f32(
9339 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
9340 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
9341 // CHECK:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %a)
9342 // CHECK:   store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]]
9343 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9344 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
9345 // CHECK:   ret %struct.float32x4x2_t [[TMP6]]
9346 float32x4x2_t test_vld2q_f32(float32_t const *a) {
9347   return vld2q_f32(a);
9348 }
9349
9350 // CHECK-LABEL: @test_vld2q_f64(
9351 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
9352 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
9353 // CHECK:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %a)
9354 // CHECK:   store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]]
9355 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9356 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
9357 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
9358 float64x2x2_t test_vld2q_f64(float64_t const *a) {
9359   return vld2q_f64(a);
9360 }
9361
9362 // CHECK-LABEL: @test_vld2q_p8(
9363 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
9364 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
9365 // CHECK:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
9366 // CHECK:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
9367 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9368 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
9369 // CHECK:   ret %struct.poly8x16x2_t [[TMP5]]
9370 poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
9371   return vld2q_p8(a);
9372 }
9373
9374 // CHECK-LABEL: @test_vld2q_p16(
9375 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
9376 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
9377 // CHECK:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
9378 // CHECK:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
9379 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
9380 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
9381 // CHECK:   ret %struct.poly16x8x2_t [[TMP6]]
9382 poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
9383   return vld2q_p16(a);
9384 }
9385
9386 // CHECK-LABEL: @test_vld2_u8(
9387 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
9388 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
9389 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
9390 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
9391 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9392 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
9393 // CHECK:   ret %struct.uint8x8x2_t [[TMP5]]
9394 uint8x8x2_t test_vld2_u8(uint8_t const *a) {
9395   return vld2_u8(a);
9396 }
9397
9398 // CHECK-LABEL: @test_vld2_u16(
9399 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
9400 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
9401 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
9402 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
9403 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9404 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
9405 // CHECK:   ret %struct.uint16x4x2_t [[TMP6]]
9406 uint16x4x2_t test_vld2_u16(uint16_t const *a) {
9407   return vld2_u16(a);
9408 }
9409
9410 // CHECK-LABEL: @test_vld2_u32(
9411 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
9412 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
9413 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
9414 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
9415 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9416 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
9417 // CHECK:   ret %struct.uint32x2x2_t [[TMP6]]
9418 uint32x2x2_t test_vld2_u32(uint32_t const *a) {
9419   return vld2_u32(a);
9420 }
9421
9422 // CHECK-LABEL: @test_vld2_u64(
9423 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
9424 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
9425 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
9426 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
9427 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9428 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
9429 // CHECK:   ret %struct.uint64x1x2_t [[TMP6]]
9430 uint64x1x2_t test_vld2_u64(uint64_t const *a) {
9431   return vld2_u64(a);
9432 }
9433
9434 // CHECK-LABEL: @test_vld2_s8(
9435 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
9436 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
9437 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
9438 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
9439 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9440 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
9441 // CHECK:   ret %struct.int8x8x2_t [[TMP5]]
9442 int8x8x2_t test_vld2_s8(int8_t const *a) {
9443   return vld2_s8(a);
9444 }
9445
9446 // CHECK-LABEL: @test_vld2_s16(
9447 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
9448 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
9449 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
9450 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
9451 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9452 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
9453 // CHECK:   ret %struct.int16x4x2_t [[TMP6]]
9454 int16x4x2_t test_vld2_s16(int16_t const *a) {
9455   return vld2_s16(a);
9456 }
9457
9458 // CHECK-LABEL: @test_vld2_s32(
9459 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
9460 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
9461 // CHECK:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
9462 // CHECK:   store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
9463 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9464 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
9465 // CHECK:   ret %struct.int32x2x2_t [[TMP6]]
9466 int32x2x2_t test_vld2_s32(int32_t const *a) {
9467   return vld2_s32(a);
9468 }
9469
9470 // CHECK-LABEL: @test_vld2_s64(
9471 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
9472 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
9473 // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
9474 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
9475 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9476 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
9477 // CHECK:   ret %struct.int64x1x2_t [[TMP6]]
9478 int64x1x2_t test_vld2_s64(int64_t const *a) {
9479   return vld2_s64(a);
9480 }
9481
9482 // CHECK-LABEL: @test_vld2_f16(
9483 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
9484 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
9485 // CHECK:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr %a)
9486 // CHECK:   store { <4 x half>, <4 x half> } [[VLD2]], ptr [[__RET]]
9487 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9488 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
9489 // CHECK:   ret %struct.float16x4x2_t [[TMP6]]
9490 float16x4x2_t test_vld2_f16(float16_t const *a) {
9491   return vld2_f16(a);
9492 }
9493
9494 // CHECK-LABEL: @test_vld2_f32(
9495 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
9496 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
9497 // CHECK:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %a)
9498 // CHECK:   store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]]
9499 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9500 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
9501 // CHECK:   ret %struct.float32x2x2_t [[TMP6]]
9502 float32x2x2_t test_vld2_f32(float32_t const *a) {
9503   return vld2_f32(a);
9504 }
9505
9506 // CHECK-LABEL: @test_vld2_f64(
9507 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
9508 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
9509 // CHECK:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %a)
9510 // CHECK:   store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]]
9511 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9512 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
9513 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
9514 float64x1x2_t test_vld2_f64(float64_t const *a) {
9515   return vld2_f64(a);
9516 }
9517
9518 // CHECK-LABEL: @test_vld2_p8(
9519 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
9520 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
9521 // CHECK:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
9522 // CHECK:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
9523 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9524 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
9525 // CHECK:   ret %struct.poly8x8x2_t [[TMP5]]
9526 poly8x8x2_t test_vld2_p8(poly8_t const *a) {
9527   return vld2_p8(a);
9528 }
9529
9530 // CHECK-LABEL: @test_vld2_p16(
9531 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
9532 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
9533 // CHECK:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
9534 // CHECK:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
9535 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
9536 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
9537 // CHECK:   ret %struct.poly16x4x2_t [[TMP6]]
9538 poly16x4x2_t test_vld2_p16(poly16_t const *a) {
9539   return vld2_p16(a);
9540 }
9541
9542 // CHECK-LABEL: @test_vld3q_u8(
9543 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
9544 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
9545 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
9546 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
9547 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9548 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
9549 // CHECK:   ret %struct.uint8x16x3_t [[TMP5]]
9550 uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
9551   return vld3q_u8(a);
9552 }
9553
9554 // CHECK-LABEL: @test_vld3q_u16(
9555 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
9556 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
9557 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
9558 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
9559 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9560 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
9561 // CHECK:   ret %struct.uint16x8x3_t [[TMP6]]
9562 uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
9563   return vld3q_u16(a);
9564 }
9565
9566 // CHECK-LABEL: @test_vld3q_u32(
9567 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
9568 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
9569 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
9570 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
9571 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9572 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
9573 // CHECK:   ret %struct.uint32x4x3_t [[TMP6]]
9574 uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
9575   return vld3q_u32(a);
9576 }
9577
9578 // CHECK-LABEL: @test_vld3q_u64(
9579 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
9580 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
9581 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
9582 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
9583 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9584 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
9585 // CHECK:   ret %struct.uint64x2x3_t [[TMP6]]
9586 uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
9587   return vld3q_u64(a);
9588 }
9589
9590 // CHECK-LABEL: @test_vld3q_s8(
9591 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
9592 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
9593 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
9594 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
9595 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9596 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
9597 // CHECK:   ret %struct.int8x16x3_t [[TMP5]]
9598 int8x16x3_t test_vld3q_s8(int8_t const *a) {
9599   return vld3q_s8(a);
9600 }
9601
9602 // CHECK-LABEL: @test_vld3q_s16(
9603 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
9604 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
9605 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
9606 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
9607 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9608 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
9609 // CHECK:   ret %struct.int16x8x3_t [[TMP6]]
9610 int16x8x3_t test_vld3q_s16(int16_t const *a) {
9611   return vld3q_s16(a);
9612 }
9613
9614 // CHECK-LABEL: @test_vld3q_s32(
9615 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
9616 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
9617 // CHECK:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
9618 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
9619 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9620 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
9621 // CHECK:   ret %struct.int32x4x3_t [[TMP6]]
9622 int32x4x3_t test_vld3q_s32(int32_t const *a) {
9623   return vld3q_s32(a);
9624 }
9625
9626 // CHECK-LABEL: @test_vld3q_s64(
9627 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
9628 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
9629 // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
9630 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
9631 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9632 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
9633 // CHECK:   ret %struct.int64x2x3_t [[TMP6]]
9634 int64x2x3_t test_vld3q_s64(int64_t const *a) {
9635   return vld3q_s64(a);
9636 }
9637
9638 // CHECK-LABEL: @test_vld3q_f16(
9639 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
9640 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
9641 // CHECK:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr %a)
9642 // CHECK:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], ptr [[__RET]]
9643 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9644 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
9645 // CHECK:   ret %struct.float16x8x3_t [[TMP6]]
9646 float16x8x3_t test_vld3q_f16(float16_t const *a) {
9647   return vld3q_f16(a);
9648 }
9649
9650 // CHECK-LABEL: @test_vld3q_f32(
9651 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
9652 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
9653 // CHECK:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %a)
9654 // CHECK:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]]
9655 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9656 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
9657 // CHECK:   ret %struct.float32x4x3_t [[TMP6]]
9658 float32x4x3_t test_vld3q_f32(float32_t const *a) {
9659   return vld3q_f32(a);
9660 }
9661
9662 // CHECK-LABEL: @test_vld3q_f64(
9663 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
9664 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
9665 // CHECK:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %a)
9666 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]]
9667 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9668 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
9669 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
9670 float64x2x3_t test_vld3q_f64(float64_t const *a) {
9671   return vld3q_f64(a);
9672 }
9673
9674 // CHECK-LABEL: @test_vld3q_p8(
9675 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
9676 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
9677 // CHECK:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
9678 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
9679 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9680 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
9681 // CHECK:   ret %struct.poly8x16x3_t [[TMP5]]
9682 poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
9683   return vld3q_p8(a);
9684 }
9685
9686 // CHECK-LABEL: @test_vld3q_p16(
9687 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
9688 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
9689 // CHECK:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
9690 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
9691 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
9692 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
9693 // CHECK:   ret %struct.poly16x8x3_t [[TMP6]]
9694 poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
9695   return vld3q_p16(a);
9696 }
9697
9698 // CHECK-LABEL: @test_vld3_u8(
9699 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
9700 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
9701 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
9702 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
9703 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9704 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
9705 // CHECK:   ret %struct.uint8x8x3_t [[TMP5]]
9706 uint8x8x3_t test_vld3_u8(uint8_t const *a) {
9707   return vld3_u8(a);
9708 }
9709
9710 // CHECK-LABEL: @test_vld3_u16(
9711 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
9712 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
9713 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
9714 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
9715 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9716 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
9717 // CHECK:   ret %struct.uint16x4x3_t [[TMP6]]
9718 uint16x4x3_t test_vld3_u16(uint16_t const *a) {
9719   return vld3_u16(a);
9720 }
9721
9722 // CHECK-LABEL: @test_vld3_u32(
9723 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
9724 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
9725 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
9726 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
9727 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9728 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
9729 // CHECK:   ret %struct.uint32x2x3_t [[TMP6]]
9730 uint32x2x3_t test_vld3_u32(uint32_t const *a) {
9731   return vld3_u32(a);
9732 }
9733
9734 // CHECK-LABEL: @test_vld3_u64(
9735 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
9736 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
9737 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
9738 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
9739 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9740 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
9741 // CHECK:   ret %struct.uint64x1x3_t [[TMP6]]
9742 uint64x1x3_t test_vld3_u64(uint64_t const *a) {
9743   return vld3_u64(a);
9744 }
9745
9746 // CHECK-LABEL: @test_vld3_s8(
9747 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
9748 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
9749 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
9750 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
9751 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9752 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
9753 // CHECK:   ret %struct.int8x8x3_t [[TMP5]]
9754 int8x8x3_t test_vld3_s8(int8_t const *a) {
9755   return vld3_s8(a);
9756 }
9757
9758 // CHECK-LABEL: @test_vld3_s16(
9759 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
9760 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
9761 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
9762 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
9763 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9764 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
9765 // CHECK:   ret %struct.int16x4x3_t [[TMP6]]
9766 int16x4x3_t test_vld3_s16(int16_t const *a) {
9767   return vld3_s16(a);
9768 }
9769
9770 // CHECK-LABEL: @test_vld3_s32(
9771 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
9772 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
9773 // CHECK:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
9774 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
9775 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9776 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
9777 // CHECK:   ret %struct.int32x2x3_t [[TMP6]]
9778 int32x2x3_t test_vld3_s32(int32_t const *a) {
9779   return vld3_s32(a);
9780 }
9781
9782 // CHECK-LABEL: @test_vld3_s64(
9783 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
9784 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
9785 // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
9786 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
9787 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9788 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
9789 // CHECK:   ret %struct.int64x1x3_t [[TMP6]]
9790 int64x1x3_t test_vld3_s64(int64_t const *a) {
9791   return vld3_s64(a);
9792 }
9793
9794 // CHECK-LABEL: @test_vld3_f16(
9795 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
9796 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
9797 // CHECK:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr %a)
9798 // CHECK:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], ptr [[__RET]]
9799 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9800 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
9801 // CHECK:   ret %struct.float16x4x3_t [[TMP6]]
9802 float16x4x3_t test_vld3_f16(float16_t const *a) {
9803   return vld3_f16(a);
9804 }
9805
9806 // CHECK-LABEL: @test_vld3_f32(
9807 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
9808 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
9809 // CHECK:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %a)
9810 // CHECK:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]]
9811 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9812 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
9813 // CHECK:   ret %struct.float32x2x3_t [[TMP6]]
9814 float32x2x3_t test_vld3_f32(float32_t const *a) {
9815   return vld3_f32(a);
9816 }
9817
9818 // CHECK-LABEL: @test_vld3_f64(
9819 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
9820 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
9821 // CHECK:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %a)
9822 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]]
9823 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9824 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
9825 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
9826 float64x1x3_t test_vld3_f64(float64_t const *a) {
9827   return vld3_f64(a);
9828 }
9829
9830 // CHECK-LABEL: @test_vld3_p8(
9831 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
9832 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
9833 // CHECK:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
9834 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
9835 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9836 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
9837 // CHECK:   ret %struct.poly8x8x3_t [[TMP5]]
9838 poly8x8x3_t test_vld3_p8(poly8_t const *a) {
9839   return vld3_p8(a);
9840 }
9841
9842 // CHECK-LABEL: @test_vld3_p16(
9843 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
9844 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
9845 // CHECK:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
9846 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
9847 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
9848 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
9849 // CHECK:   ret %struct.poly16x4x3_t [[TMP6]]
9850 poly16x4x3_t test_vld3_p16(poly16_t const *a) {
9851   return vld3_p16(a);
9852 }
9853
9854 // CHECK-LABEL: @test_vld4q_u8(
9855 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
9856 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
9857 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
9858 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
9859 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9860 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
9861 // CHECK:   ret %struct.uint8x16x4_t [[TMP5]]
9862 uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
9863   return vld4q_u8(a);
9864 }
9865
9866 // CHECK-LABEL: @test_vld4q_u16(
9867 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
9868 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
9869 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
9870 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
9871 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9872 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
9873 // CHECK:   ret %struct.uint16x8x4_t [[TMP6]]
9874 uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
9875   return vld4q_u16(a);
9876 }
9877
9878 // CHECK-LABEL: @test_vld4q_u32(
9879 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
9880 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
9881 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
9882 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
9883 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9884 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
9885 // CHECK:   ret %struct.uint32x4x4_t [[TMP6]]
9886 uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
9887   return vld4q_u32(a);
9888 }
9889
9890 // CHECK-LABEL: @test_vld4q_u64(
9891 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
9892 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
9893 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
9894 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
9895 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9896 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
9897 // CHECK:   ret %struct.uint64x2x4_t [[TMP6]]
9898 uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
9899   return vld4q_u64(a);
9900 }
9901
9902 // CHECK-LABEL: @test_vld4q_s8(
9903 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
9904 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
9905 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
9906 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
9907 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9908 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
9909 // CHECK:   ret %struct.int8x16x4_t [[TMP5]]
9910 int8x16x4_t test_vld4q_s8(int8_t const *a) {
9911   return vld4q_s8(a);
9912 }
9913
9914 // CHECK-LABEL: @test_vld4q_s16(
9915 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
9916 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
9917 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
9918 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
9919 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9920 // CHECK:   [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
9921 // CHECK:   ret %struct.int16x8x4_t [[TMP6]]
9922 int16x8x4_t test_vld4q_s16(int16_t const *a) {
9923   return vld4q_s16(a);
9924 }
9925
9926 // CHECK-LABEL: @test_vld4q_s32(
9927 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
9928 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
9929 // CHECK:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
9930 // CHECK:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
9931 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9932 // CHECK:   [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
9933 // CHECK:   ret %struct.int32x4x4_t [[TMP6]]
9934 int32x4x4_t test_vld4q_s32(int32_t const *a) {
9935   return vld4q_s32(a);
9936 }
9937
9938 // CHECK-LABEL: @test_vld4q_s64(
9939 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
9940 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
9941 // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
9942 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
9943 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9944 // CHECK:   [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
9945 // CHECK:   ret %struct.int64x2x4_t [[TMP6]]
9946 int64x2x4_t test_vld4q_s64(int64_t const *a) {
9947   return vld4q_s64(a);
9948 }
9949
9950 // CHECK-LABEL: @test_vld4q_f16(
9951 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
9952 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
9953 // CHECK:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr %a)
9954 // CHECK:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], ptr [[__RET]]
9955 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9956 // CHECK:   [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
9957 // CHECK:   ret %struct.float16x8x4_t [[TMP6]]
9958 float16x8x4_t test_vld4q_f16(float16_t const *a) {
9959   return vld4q_f16(a);
9960 }
9961
9962 // CHECK-LABEL: @test_vld4q_f32(
9963 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
9964 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
9965 // CHECK:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %a)
9966 // CHECK:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]]
9967 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9968 // CHECK:   [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
9969 // CHECK:   ret %struct.float32x4x4_t [[TMP6]]
9970 float32x4x4_t test_vld4q_f32(float32_t const *a) {
9971   return vld4q_f32(a);
9972 }
9973
9974 // CHECK-LABEL: @test_vld4q_f64(
9975 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
9976 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
9977 // CHECK:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %a)
9978 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]]
9979 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9980 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
9981 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
9982 float64x2x4_t test_vld4q_f64(float64_t const *a) {
9983   return vld4q_f64(a);
9984 }
9985
9986 // CHECK-LABEL: @test_vld4q_p8(
9987 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
9988 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
9989 // CHECK:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
9990 // CHECK:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
9991 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
9992 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
9993 // CHECK:   ret %struct.poly8x16x4_t [[TMP5]]
9994 poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
9995   return vld4q_p8(a);
9996 }
9997
9998 // CHECK-LABEL: @test_vld4q_p16(
9999 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
10000 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
10001 // CHECK:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
10002 // CHECK:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
10003 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
10004 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
10005 // CHECK:   ret %struct.poly16x8x4_t [[TMP6]]
10006 poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
10007   return vld4q_p16(a);
10008 }
10009
10010 // CHECK-LABEL: @test_vld4_u8(
10011 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
10012 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
10013 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
10014 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
10015 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10016 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
10017 // CHECK:   ret %struct.uint8x8x4_t [[TMP5]]
10018 uint8x8x4_t test_vld4_u8(uint8_t const *a) {
10019   return vld4_u8(a);
10020 }
10021
10022 // CHECK-LABEL: @test_vld4_u16(
10023 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
10024 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
10025 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
10026 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
10027 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10028 // CHECK:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
10029 // CHECK:   ret %struct.uint16x4x4_t [[TMP6]]
10030 uint16x4x4_t test_vld4_u16(uint16_t const *a) {
10031   return vld4_u16(a);
10032 }
10033
10034 // CHECK-LABEL: @test_vld4_u32(
10035 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
10036 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
10037 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
10038 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
10039 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10040 // CHECK:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
10041 // CHECK:   ret %struct.uint32x2x4_t [[TMP6]]
10042 uint32x2x4_t test_vld4_u32(uint32_t const *a) {
10043   return vld4_u32(a);
10044 }
10045
10046 // CHECK-LABEL: @test_vld4_u64(
10047 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
10048 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
10049 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
10050 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
10051 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10052 // CHECK:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
10053 // CHECK:   ret %struct.uint64x1x4_t [[TMP6]]
10054 uint64x1x4_t test_vld4_u64(uint64_t const *a) {
10055   return vld4_u64(a);
10056 }
10057
10058 // CHECK-LABEL: @test_vld4_s8(
10059 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
10060 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
10061 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
10062 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
10063 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10064 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
10065 // CHECK:   ret %struct.int8x8x4_t [[TMP5]]
10066 int8x8x4_t test_vld4_s8(int8_t const *a) {
10067   return vld4_s8(a);
10068 }
10069
10070 // CHECK-LABEL: @test_vld4_s16(
10071 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
10072 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
10073 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
10074 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
10075 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10076 // CHECK:   [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
10077 // CHECK:   ret %struct.int16x4x4_t [[TMP6]]
10078 int16x4x4_t test_vld4_s16(int16_t const *a) {
10079   return vld4_s16(a);
10080 }
10081
10082 // CHECK-LABEL: @test_vld4_s32(
10083 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
10084 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
10085 // CHECK:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
10086 // CHECK:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
10087 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10088 // CHECK:   [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
10089 // CHECK:   ret %struct.int32x2x4_t [[TMP6]]
10090 int32x2x4_t test_vld4_s32(int32_t const *a) {
10091   return vld4_s32(a);
10092 }
10093
10094 // CHECK-LABEL: @test_vld4_s64(
10095 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
10096 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
10097 // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
10098 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
10099 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10100 // CHECK:   [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
10101 // CHECK:   ret %struct.int64x1x4_t [[TMP6]]
10102 int64x1x4_t test_vld4_s64(int64_t const *a) {
10103   return vld4_s64(a);
10104 }
10105
10106 // CHECK-LABEL: @test_vld4_f16(
10107 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
10108 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
10109 // CHECK:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr %a)
10110 // CHECK:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], ptr [[__RET]]
10111 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10112 // CHECK:   [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
10113 // CHECK:   ret %struct.float16x4x4_t [[TMP6]]
10114 float16x4x4_t test_vld4_f16(float16_t const *a) {
10115   return vld4_f16(a);
10116 }
10117
10118 // CHECK-LABEL: @test_vld4_f32(
10119 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
10120 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
10121 // CHECK:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %a)
10122 // CHECK:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]]
10123 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10124 // CHECK:   [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
10125 // CHECK:   ret %struct.float32x2x4_t [[TMP6]]
10126 float32x2x4_t test_vld4_f32(float32_t const *a) {
10127   return vld4_f32(a);
10128 }
10129
10130 // CHECK-LABEL: @test_vld4_f64(
10131 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
10132 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
10133 // CHECK:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %a)
10134 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]]
10135 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10136 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
10137 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
10138 float64x1x4_t test_vld4_f64(float64_t const *a) {
10139   return vld4_f64(a);
10140 }
10141
10142 // CHECK-LABEL: @test_vld4_p8(
10143 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
10144 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
10145 // CHECK:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
10146 // CHECK:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
10147 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10148 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
10149 // CHECK:   ret %struct.poly8x8x4_t [[TMP5]]
10150 poly8x8x4_t test_vld4_p8(poly8_t const *a) {
10151   return vld4_p8(a);
10152 }
10153
10154 // CHECK-LABEL: @test_vld4_p16(
10155 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
10156 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
10157 // CHECK:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
10158 // CHECK:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
10159 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
10160 // CHECK:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
10161 // CHECK:   ret %struct.poly16x4x4_t [[TMP6]]
10162 poly16x4x4_t test_vld4_p16(poly16_t const *a) {
10163   return vld4_p16(a);
10164 }
10165
10166 // CHECK-LABEL: @test_vst1q_u8(
10167 // CHECK:   store <16 x i8> %b, ptr %a
10168 // CHECK:   ret void
10169 void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
10170   vst1q_u8(a, b);
10171 }
10172
10173 // CHECK-LABEL: @test_vst1q_u16(
10174 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10175 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10176 // CHECK:   store <8 x i16> [[TMP3]], ptr %a
10177 // CHECK:   ret void
10178 void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
10179   vst1q_u16(a, b);
10180 }
10181
10182 // CHECK-LABEL: @test_vst1q_u32(
10183 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10184 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10185 // CHECK:   store <4 x i32> [[TMP3]], ptr %a
10186 // CHECK:   ret void
10187 void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
10188   vst1q_u32(a, b);
10189 }
10190
10191 // CHECK-LABEL: @test_vst1q_u64(
10192 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10193 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10194 // CHECK:   store <2 x i64> [[TMP3]], ptr %a
10195 // CHECK:   ret void
10196 void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
10197   vst1q_u64(a, b);
10198 }
10199
10200 // CHECK-LABEL: @test_vst1q_s8(
10201 // CHECK:   store <16 x i8> %b, ptr %a
10202 // CHECK:   ret void
10203 void test_vst1q_s8(int8_t *a, int8x16_t b) {
10204   vst1q_s8(a, b);
10205 }
10206
10207 // CHECK-LABEL: @test_vst1q_s16(
10208 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10209 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10210 // CHECK:   store <8 x i16> [[TMP3]], ptr %a
10211 // CHECK:   ret void
10212 void test_vst1q_s16(int16_t *a, int16x8_t b) {
10213   vst1q_s16(a, b);
10214 }
10215
10216 // CHECK-LABEL: @test_vst1q_s32(
10217 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10218 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
10219 // CHECK:   store <4 x i32> [[TMP3]], ptr %a
10220 // CHECK:   ret void
10221 void test_vst1q_s32(int32_t *a, int32x4_t b) {
10222   vst1q_s32(a, b);
10223 }
10224
10225 // CHECK-LABEL: @test_vst1q_s64(
10226 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10227 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
10228 // CHECK:   store <2 x i64> [[TMP3]], ptr %a
10229 // CHECK:   ret void
10230 void test_vst1q_s64(int64_t *a, int64x2_t b) {
10231   vst1q_s64(a, b);
10232 }
10233
10234 // CHECK-LABEL: @test_vst1q_f16(
10235 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
10236 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
10237 // CHECK:   store <8 x half> [[TMP3]], ptr %a
10238 // CHECK:   ret void
10239 void test_vst1q_f16(float16_t *a, float16x8_t b) {
10240   vst1q_f16(a, b);
10241 }
10242
10243 // CHECK-LABEL: @test_vst1q_f32(
10244 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
10245 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
10246 // CHECK:   store <4 x float> [[TMP3]], ptr %a
10247 // CHECK:   ret void
10248 void test_vst1q_f32(float32_t *a, float32x4_t b) {
10249   vst1q_f32(a, b);
10250 }
10251
10252 // CHECK-LABEL: @test_vst1q_f64(
10253 // CHECK:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
10254 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
10255 // CHECK:   store <2 x double> [[TMP3]], ptr %a
10256 // CHECK:   ret void
10257 void test_vst1q_f64(float64_t *a, float64x2_t b) {
10258   vst1q_f64(a, b);
10259 }
10260
10261 // CHECK-LABEL: @test_vst1q_p8(
10262 // CHECK:   store <16 x i8> %b, ptr %a
10263 // CHECK:   ret void
10264 void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
10265   vst1q_p8(a, b);
10266 }
10267
10268 // CHECK-LABEL: @test_vst1q_p16(
10269 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10270 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
10271 // CHECK:   store <8 x i16> [[TMP3]], ptr %a
10272 // CHECK:   ret void
10273 void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
10274   vst1q_p16(a, b);
10275 }
10276
10277 // CHECK-LABEL: @test_vst1_u8(
10278 // CHECK:   store <8 x i8> %b, ptr %a
10279 // CHECK:   ret void
10280 void test_vst1_u8(uint8_t *a, uint8x8_t b) {
10281   vst1_u8(a, b);
10282 }
10283
10284 // CHECK-LABEL: @test_vst1_u16(
10285 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10286 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10287 // CHECK:   store <4 x i16> [[TMP3]], ptr %a
10288 // CHECK:   ret void
10289 void test_vst1_u16(uint16_t *a, uint16x4_t b) {
10290   vst1_u16(a, b);
10291 }
10292
10293 // CHECK-LABEL: @test_vst1_u32(
10294 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10295 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10296 // CHECK:   store <2 x i32> [[TMP3]], ptr %a
10297 // CHECK:   ret void
10298 void test_vst1_u32(uint32_t *a, uint32x2_t b) {
10299   vst1_u32(a, b);
10300 }
10301
10302 // CHECK-LABEL: @test_vst1_u64(
10303 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10304 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10305 // CHECK:   store <1 x i64> [[TMP3]], ptr %a
10306 // CHECK:   ret void
10307 void test_vst1_u64(uint64_t *a, uint64x1_t b) {
10308   vst1_u64(a, b);
10309 }
10310
10311 // CHECK-LABEL: @test_vst1_s8(
10312 // CHECK:   store <8 x i8> %b, ptr %a
10313 // CHECK:   ret void
10314 void test_vst1_s8(int8_t *a, int8x8_t b) {
10315   vst1_s8(a, b);
10316 }
10317
10318 // CHECK-LABEL: @test_vst1_s16(
10319 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10320 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10321 // CHECK:   store <4 x i16> [[TMP3]], ptr %a
10322 // CHECK:   ret void
10323 void test_vst1_s16(int16_t *a, int16x4_t b) {
10324   vst1_s16(a, b);
10325 }
10326
10327 // CHECK-LABEL: @test_vst1_s32(
10328 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10329 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
10330 // CHECK:   store <2 x i32> [[TMP3]], ptr %a
10331 // CHECK:   ret void
10332 void test_vst1_s32(int32_t *a, int32x2_t b) {
10333   vst1_s32(a, b);
10334 }
10335
10336 // CHECK-LABEL: @test_vst1_s64(
10337 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10338 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
10339 // CHECK:   store <1 x i64> [[TMP3]], ptr %a
10340 // CHECK:   ret void
10341 void test_vst1_s64(int64_t *a, int64x1_t b) {
10342   vst1_s64(a, b);
10343 }
10344
10345 // CHECK-LABEL: @test_vst1_f16(
10346 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
10347 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
10348 // CHECK:   store <4 x half> [[TMP3]], ptr %a
10349 // CHECK:   ret void
10350 void test_vst1_f16(float16_t *a, float16x4_t b) {
10351   vst1_f16(a, b);
10352 }
10353
10354 // CHECK-LABEL: @test_vst1_f32(
10355 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
10356 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
10357 // CHECK:   store <2 x float> [[TMP3]], ptr %a
10358 // CHECK:   ret void
10359 void test_vst1_f32(float32_t *a, float32x2_t b) {
10360   vst1_f32(a, b);
10361 }
10362
10363 // CHECK-LABEL: @test_vst1_f64(
10364 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
10365 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
10366 // CHECK:   store <1 x double> [[TMP3]], ptr %a
10367 // CHECK:   ret void
10368 void test_vst1_f64(float64_t *a, float64x1_t b) {
10369   vst1_f64(a, b);
10370 }
10371
10372 // CHECK-LABEL: @test_vst1_p8(
10373 // CHECK:   store <8 x i8> %b, ptr %a
10374 // CHECK:   ret void
10375 void test_vst1_p8(poly8_t *a, poly8x8_t b) {
10376   vst1_p8(a, b);
10377 }
10378
10379 // CHECK-LABEL: @test_vst1_p16(
10380 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10381 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
10382 // CHECK:   store <4 x i16> [[TMP3]], ptr %a
10383 // CHECK:   ret void
10384 void test_vst1_p16(poly16_t *a, poly16x4_t b) {
10385   vst1_p16(a, b);
10386 }
10387
10388 // CHECK-LABEL: @test_vst2q_u8(
10389 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
10390 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
10391 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
10392 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10393 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10394 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
10395 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10396 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10397 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
10398 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10399 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10400 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
10401 // CHECK:   ret void
10402 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
10403   vst2q_u8(a, b);
10404 }
10405
10406 // CHECK-LABEL: @test_vst2q_u16(
10407 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
10408 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
10409 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
10410 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10411 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10412 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
10413 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10414 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10415 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10416 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
10417 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10418 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10419 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10420 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10421 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10422 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
10423 // CHECK:   ret void
10424 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
10425   vst2q_u16(a, b);
10426 }
10427
10428 // CHECK-LABEL: @test_vst2q_u32(
10429 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
10430 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
10431 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
10432 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10433 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10434 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
10435 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
10436 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
10437 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10438 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
10439 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
10440 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
10441 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10442 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10443 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10444 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
10445 // CHECK:   ret void
10446 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
10447   vst2q_u32(a, b);
10448 }
10449
10450 // CHECK-LABEL: @test_vst2q_u64(
10451 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
10452 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
10453 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
10454 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10455 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10456 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
10457 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
10458 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
10459 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10460 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
10461 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
10462 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
10463 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10464 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10465 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10466 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
10467 // CHECK:   ret void
10468 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
10469   vst2q_u64(a, b);
10470 }
10471
10472 // CHECK-LABEL: @test_vst2q_s8(
10473 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
10474 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
10475 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
10476 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10477 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10478 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
10479 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10480 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10481 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
10482 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10483 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10484 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
10485 // CHECK:   ret void
10486 void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
10487   vst2q_s8(a, b);
10488 }
10489
10490 // CHECK-LABEL: @test_vst2q_s16(
10491 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
10492 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
10493 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
10494 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10495 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10496 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
10497 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10498 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10499 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10500 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
10501 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10502 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10503 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10504 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10505 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10506 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
10507 // CHECK:   ret void
10508 void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
10509   vst2q_s16(a, b);
10510 }
10511
10512 // CHECK-LABEL: @test_vst2q_s32(
10513 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
10514 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
10515 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
10516 // CHECK:   store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10517 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10518 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
10519 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
10520 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
10521 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10522 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
10523 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
10524 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
10525 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10526 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
10527 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
10528 // CHECK:   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
10529 // CHECK:   ret void
10530 void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
10531   vst2q_s32(a, b);
10532 }
10533
10534 // CHECK-LABEL: @test_vst2q_s64(
10535 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
10536 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
10537 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
10538 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10539 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10540 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
10541 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
10542 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
10543 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
10544 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
10545 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
10546 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
10547 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
10548 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
10549 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
10550 // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
10551 // CHECK:   ret void
10552 void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
10553   vst2q_s64(a, b);
10554 }
10555
10556 // CHECK-LABEL: @test_vst2q_f16(
10557 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
10558 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
10559 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
10560 // CHECK:   store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10561 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10562 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
10563 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
10564 // CHECK:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
10565 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
10566 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
10567 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
10568 // CHECK:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
10569 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
10570 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
10571 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
10572 // CHECK:   call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a)
10573 // CHECK:   ret void
10574 void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
10575   vst2q_f16(a, b);
10576 }
10577
10578 // CHECK-LABEL: @test_vst2q_f32(
10579 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
10580 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
10581 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
10582 // CHECK:   store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10583 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10584 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
10585 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
10586 // CHECK:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
10587 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
10588 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
10589 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
10590 // CHECK:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
10591 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
10592 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
10593 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
10594 // CHECK:   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a)
10595 // CHECK:   ret void
10596 void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
10597   vst2q_f32(a, b);
10598 }
10599
10600 // CHECK-LABEL: @test_vst2q_f64(
10601 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
10602 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
10603 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
10604 // CHECK:   store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10605 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10606 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
10607 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
10608 // CHECK:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
10609 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
10610 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
10611 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
10612 // CHECK:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
10613 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
10614 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
10615 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
10616 // CHECK:   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
10617 // CHECK:   ret void
10618 void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
10619   vst2q_f64(a, b);
10620 }
10621
10622 // CHECK-LABEL: @test_vst2q_p8(
10623 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
10624 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
10625 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
10626 // CHECK:   store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10627 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10628 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
10629 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10630 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10631 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
10632 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10633 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10634 // CHECK:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
10635 // CHECK:   ret void
10636 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
10637   vst2q_p8(a, b);
10638 }
10639
10640 // CHECK-LABEL: @test_vst2q_p16(
10641 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
10642 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
10643 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
10644 // CHECK:   store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10645 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
10646 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
10647 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10648 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10649 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10650 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
10651 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10652 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10653 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10654 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10655 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10656 // CHECK:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
10657 // CHECK:   ret void
10658 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
10659   vst2q_p16(a, b);
10660 }
10661
10662 // CHECK-LABEL: @test_vst2_u8(
10663 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
10664 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
10665 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
10666 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10667 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10668 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
10669 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
10670 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
10671 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
10672 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
10673 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
10674 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
10675 // CHECK:   ret void
10676 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
10677   vst2_u8(a, b);
10678 }
10679
10680 // CHECK-LABEL: @test_vst2_u16(
10681 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
10682 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
10683 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
10684 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10685 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10686 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
10687 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
10688 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
10689 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
10690 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
10691 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
10692 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
10693 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
10694 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
10695 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
10696 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
10697 // CHECK:   ret void
10698 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
10699   vst2_u16(a, b);
10700 }
10701
10702 // CHECK-LABEL: @test_vst2_u32(
10703 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
10704 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
10705 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
10706 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10707 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10708 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
10709 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
10710 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
10711 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
10712 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
10713 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
10714 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
10715 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
10716 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
10717 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
10718 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
10719 // CHECK:   ret void
10720 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
10721   vst2_u32(a, b);
10722 }
10723
10724 // CHECK-LABEL: @test_vst2_u64(
10725 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
10726 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
10727 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
10728 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10729 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10730 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
10731 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
10732 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
10733 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
10734 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
10735 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
10736 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
10737 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
10738 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
10739 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
10740 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
10741 // CHECK:   ret void
10742 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
10743   vst2_u64(a, b);
10744 }
10745
10746 // CHECK-LABEL: @test_vst2_s8(
10747 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
10748 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
10749 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
10750 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10751 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10752 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
10753 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
10754 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
10755 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
10756 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
10757 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
10758 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
10759 // CHECK:   ret void
10760 void test_vst2_s8(int8_t *a, int8x8x2_t b) {
10761   vst2_s8(a, b);
10762 }
10763
10764 // CHECK-LABEL: @test_vst2_s16(
10765 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
10766 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
10767 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
10768 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10769 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10770 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
10771 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
10772 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
10773 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
10774 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
10775 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
10776 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
10777 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
10778 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
10779 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
10780 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
10781 // CHECK:   ret void
10782 void test_vst2_s16(int16_t *a, int16x4x2_t b) {
10783   vst2_s16(a, b);
10784 }
10785
10786 // CHECK-LABEL: @test_vst2_s32(
10787 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
10788 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
10789 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
10790 // CHECK:   store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10791 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10792 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
10793 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
10794 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
10795 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
10796 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
10797 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
10798 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
10799 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
10800 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
10801 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
10802 // CHECK:   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
10803 // CHECK:   ret void
10804 void test_vst2_s32(int32_t *a, int32x2x2_t b) {
10805   vst2_s32(a, b);
10806 }
10807
10808 // CHECK-LABEL: @test_vst2_s64(
10809 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
10810 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
10811 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
10812 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10813 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10814 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
10815 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
10816 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
10817 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
10818 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
10819 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
10820 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
10821 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
10822 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
10823 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
10824 // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
10825 // CHECK:   ret void
10826 void test_vst2_s64(int64_t *a, int64x1x2_t b) {
10827   vst2_s64(a, b);
10828 }
10829
10830 // CHECK-LABEL: @test_vst2_f16(
10831 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
10832 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
10833 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
10834 // CHECK:   store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10835 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10836 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
10837 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
10838 // CHECK:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
10839 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
10840 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
10841 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
10842 // CHECK:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
10843 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
10844 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
10845 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
10846 // CHECK:   call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a)
10847 // CHECK:   ret void
10848 void test_vst2_f16(float16_t *a, float16x4x2_t b) {
10849   vst2_f16(a, b);
10850 }
10851
10852 // CHECK-LABEL: @test_vst2_f32(
10853 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
10854 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
10855 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
10856 // CHECK:   store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10857 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10858 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
10859 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
10860 // CHECK:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
10861 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
10862 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
10863 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
10864 // CHECK:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
10865 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
10866 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
10867 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
10868 // CHECK:   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a)
10869 // CHECK:   ret void
10870 void test_vst2_f32(float32_t *a, float32x2x2_t b) {
10871   vst2_f32(a, b);
10872 }
10873
10874 // CHECK-LABEL: @test_vst2_f64(
10875 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
10876 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
10877 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
10878 // CHECK:   store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10879 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10880 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
10881 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
10882 // CHECK:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
10883 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
10884 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
10885 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
10886 // CHECK:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
10887 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
10888 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
10889 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
10890 // CHECK:   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
10891 // CHECK:   ret void
10892 void test_vst2_f64(float64_t *a, float64x1x2_t b) {
10893   vst2_f64(a, b);
10894 }
10895
10896 // CHECK-LABEL: @test_vst2_p8(
10897 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
10898 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
10899 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
10900 // CHECK:   store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10901 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10902 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
10903 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
10904 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
10905 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
10906 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
10907 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
10908 // CHECK:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
10909 // CHECK:   ret void
10910 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
10911   vst2_p8(a, b);
10912 }
10913
10914 // CHECK-LABEL: @test_vst2_p16(
10915 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
10916 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
10917 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
10918 // CHECK:   store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
10919 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
10920 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
10921 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
10922 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
10923 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
10924 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
10925 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
10926 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
10927 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
10928 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
10929 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
10930 // CHECK:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
10931 // CHECK:   ret void
10932 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
10933   vst2_p16(a, b);
10934 }
10935
10936 // CHECK-LABEL: @test_vst3q_u8(
10937 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
10938 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
10939 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
10940 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10941 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
10942 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
10943 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
10944 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
10945 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
10946 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
10947 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
10948 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
10949 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
10950 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
10951 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
10952 // CHECK:   ret void
10953 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
10954   vst3q_u8(a, b);
10955 }
10956
10957 // CHECK-LABEL: @test_vst3q_u16(
10958 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
10959 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
10960 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
10961 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10962 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
10963 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
10964 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
10965 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
10966 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
10967 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
10968 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
10969 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
10970 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
10971 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
10972 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
10973 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
10974 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
10975 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
10976 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
10977 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
10978 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
10979 // CHECK:   ret void
10980 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
10981   vst3q_u16(a, b);
10982 }
10983
10984 // CHECK-LABEL: @test_vst3q_u32(
10985 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
10986 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
10987 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
10988 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
10989 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
10990 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
10991 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
10992 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
10993 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
10994 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
10995 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
10996 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
10997 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
10998 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
10999 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11000 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11001 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11002 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11003 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11004 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11005 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
11006 // CHECK:   ret void
11007 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
11008   vst3q_u32(a, b);
11009 }
11010
11011 // CHECK-LABEL: @test_vst3q_u64(
11012 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
11013 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
11014 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
11015 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11016 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11017 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
11018 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11019 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11020 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11021 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
11022 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11023 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11024 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11025 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
11026 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11027 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11028 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11029 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11030 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11031 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11032 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
11033 // CHECK:   ret void
11034 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
11035   vst3q_u64(a, b);
11036 }
11037
11038 // CHECK-LABEL: @test_vst3q_s8(
11039 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
11040 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
11041 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
11042 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11043 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11044 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
11045 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11046 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11047 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
11048 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11049 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11050 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
11051 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11052 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11053 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
11054 // CHECK:   ret void
11055 void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
11056   vst3q_s8(a, b);
11057 }
11058
11059 // CHECK-LABEL: @test_vst3q_s16(
11060 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
11061 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
11062 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
11063 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11064 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11065 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
11066 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11067 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11068 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11069 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
11070 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11071 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11072 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11073 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
11074 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11075 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11076 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11077 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11078 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11079 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11080 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
11081 // CHECK:   ret void
11082 void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
11083   vst3q_s16(a, b);
11084 }
11085
11086 // CHECK-LABEL: @test_vst3q_s32(
11087 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
11088 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
11089 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
11090 // CHECK:   store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11091 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11092 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
11093 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
11094 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
11095 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11096 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
11097 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
11098 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
11099 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11100 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
11101 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11102 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11103 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11104 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11105 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11106 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11107 // CHECK:   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
11108 // CHECK:   ret void
11109 void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
11110   vst3q_s32(a, b);
11111 }
11112
11113 // CHECK-LABEL: @test_vst3q_s64(
11114 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
11115 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
11116 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
11117 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11118 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11119 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
11120 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11121 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11122 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11123 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
11124 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11125 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11126 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11127 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
11128 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11129 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11130 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11131 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11132 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11133 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11134 // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
11135 // CHECK:   ret void
11136 void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
11137   vst3q_s64(a, b);
11138 }
11139
11140 // CHECK-LABEL: @test_vst3q_f16(
11141 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
11142 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
11143 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
11144 // CHECK:   store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11145 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11146 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
11147 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
11148 // CHECK:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
11149 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11150 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
11151 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
11152 // CHECK:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
11153 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11154 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
11155 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
11156 // CHECK:   [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
11157 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11158 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11159 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11160 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11161 // CHECK:   call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a)
11162 // CHECK:   ret void
11163 void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
11164   vst3q_f16(a, b);
11165 }
11166
11167 // CHECK-LABEL: @test_vst3q_f32(
11168 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
11169 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
11170 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
11171 // CHECK:   store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11172 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11173 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
11174 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
11175 // CHECK:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
11176 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11177 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
11178 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
11179 // CHECK:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
11180 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11181 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
11182 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
11183 // CHECK:   [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
11184 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11185 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11186 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11187 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11188 // CHECK:   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a)
11189 // CHECK:   ret void
11190 void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
11191   vst3q_f32(a, b);
11192 }
11193
11194 // CHECK-LABEL: @test_vst3q_f64(
11195 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
11196 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
11197 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
11198 // CHECK:   store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11199 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11200 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
11201 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
11202 // CHECK:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
11203 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11204 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
11205 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
11206 // CHECK:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
11207 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11208 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
11209 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
11210 // CHECK:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
11211 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11212 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11213 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11214 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11215 // CHECK:   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
11216 // CHECK:   ret void
11217 void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
11218   vst3q_f64(a, b);
11219 }
11220
11221 // CHECK-LABEL: @test_vst3q_p8(
11222 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
11223 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
11224 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
11225 // CHECK:   store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11226 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11227 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
11228 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11229 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11230 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
11231 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11232 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11233 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
11234 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11235 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11236 // CHECK:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
11237 // CHECK:   ret void
11238 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
11239   vst3q_p8(a, b);
11240 }
11241
11242 // CHECK-LABEL: @test_vst3q_p16(
11243 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
11244 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
11245 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
11246 // CHECK:   store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11247 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
11248 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
11249 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11250 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11251 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11252 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
11253 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11254 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11255 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11256 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
11257 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11258 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11259 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11260 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11261 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11262 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11263 // CHECK:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
11264 // CHECK:   ret void
11265 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
11266   vst3q_p16(a, b);
11267 }
11268
11269 // CHECK-LABEL: @test_vst3_u8(
11270 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
11271 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
11272 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
11273 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11274 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11275 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
11276 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
11277 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
11278 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
11279 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
11280 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
11281 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
11282 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
11283 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
11284 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
11285 // CHECK:   ret void
11286 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
11287   vst3_u8(a, b);
11288 }
11289
11290 // CHECK-LABEL: @test_vst3_u16(
11291 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
11292 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
11293 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
11294 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11295 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11296 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
11297 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
11298 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
11299 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11300 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
11301 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
11302 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
11303 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11304 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
11305 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
11306 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
11307 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11308 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11309 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11310 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11311 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
11312 // CHECK:   ret void
11313 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
11314   vst3_u16(a, b);
11315 }
11316
11317 // CHECK-LABEL: @test_vst3_u32(
11318 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
11319 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
11320 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
11321 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11322 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11323 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
11324 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
11325 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
11326 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11327 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
11328 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
11329 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
11330 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11331 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
11332 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
11333 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
11334 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11335 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11336 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11337 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11338 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
11339 // CHECK:   ret void
11340 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
11341   vst3_u32(a, b);
11342 }
11343
11344 // CHECK-LABEL: @test_vst3_u64(
11345 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
11346 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
11347 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
11348 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11349 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11350 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
11351 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
11352 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
11353 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11354 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
11355 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
11356 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
11357 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11358 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
11359 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
11360 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
11361 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11362 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11363 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11364 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11365 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
11366 // CHECK:   ret void
11367 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
11368   vst3_u64(a, b);
11369 }
11370
11371 // CHECK-LABEL: @test_vst3_s8(
11372 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
11373 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
11374 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
11375 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11376 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11377 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
11378 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
11379 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
11380 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
11381 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
11382 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
11383 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
11384 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
11385 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
11386 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
11387 // CHECK:   ret void
11388 void test_vst3_s8(int8_t *a, int8x8x3_t b) {
11389   vst3_s8(a, b);
11390 }
11391
11392 // CHECK-LABEL: @test_vst3_s16(
11393 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
11394 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
11395 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
11396 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11397 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11398 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
11399 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
11400 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
11401 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11402 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
11403 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
11404 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
11405 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11406 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
11407 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
11408 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
11409 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11410 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11411 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11412 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11413 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
11414 // CHECK:   ret void
11415 void test_vst3_s16(int16_t *a, int16x4x3_t b) {
11416   vst3_s16(a, b);
11417 }
11418
11419 // CHECK-LABEL: @test_vst3_s32(
11420 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
11421 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
11422 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
11423 // CHECK:   store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11424 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11425 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
11426 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
11427 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
11428 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
11429 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
11430 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
11431 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
11432 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
11433 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
11434 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
11435 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
11436 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
11437 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
11438 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
11439 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
11440 // CHECK:   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
11441 // CHECK:   ret void
11442 void test_vst3_s32(int32_t *a, int32x2x3_t b) {
11443   vst3_s32(a, b);
11444 }
11445
11446 // CHECK-LABEL: @test_vst3_s64(
11447 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
11448 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
11449 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
11450 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11451 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11452 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
11453 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
11454 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
11455 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
11456 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
11457 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
11458 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
11459 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
11460 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
11461 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
11462 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
11463 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
11464 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
11465 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
11466 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
11467 // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
11468 // CHECK:   ret void
11469 void test_vst3_s64(int64_t *a, int64x1x3_t b) {
11470   vst3_s64(a, b);
11471 }
11472
11473 // CHECK-LABEL: @test_vst3_f16(
11474 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
11475 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
11476 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
11477 // CHECK:   store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11478 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11479 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
11480 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
11481 // CHECK:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
11482 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
11483 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
11484 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
11485 // CHECK:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
11486 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
11487 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
11488 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
11489 // CHECK:   [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
11490 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
11491 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
11492 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
11493 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
11494 // CHECK:   call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a)
11495 // CHECK:   ret void
11496 void test_vst3_f16(float16_t *a, float16x4x3_t b) {
11497   vst3_f16(a, b);
11498 }
11499
11500 // CHECK-LABEL: @test_vst3_f32(
11501 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
11502 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
11503 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
11504 // CHECK:   store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11505 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11506 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
11507 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
11508 // CHECK:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
11509 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
11510 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
11511 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
11512 // CHECK:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
11513 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
11514 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
11515 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
11516 // CHECK:   [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
11517 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
11518 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
11519 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
11520 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
11521 // CHECK:   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a)
11522 // CHECK:   ret void
11523 void test_vst3_f32(float32_t *a, float32x2x3_t b) {
11524   vst3_f32(a, b);
11525 }
11526
11527 // CHECK-LABEL: @test_vst3_f64(
11528 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
11529 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
11530 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
11531 // CHECK:   store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11532 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11533 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
11534 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
11535 // CHECK:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
11536 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
11537 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
11538 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
11539 // CHECK:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
11540 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
11541 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
11542 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
11543 // CHECK:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
11544 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
11545 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
11546 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
11547 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
11548 // CHECK:   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
11549 // CHECK:   ret void
11550 void test_vst3_f64(float64_t *a, float64x1x3_t b) {
11551   vst3_f64(a, b);
11552 }
11553
11554 // CHECK-LABEL: @test_vst3_p8(
11555 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
11556 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
11557 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
11558 // CHECK:   store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11559 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11560 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
11561 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
11562 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
11563 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
11564 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
11565 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
11566 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
11567 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
11568 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
11569 // CHECK:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
11570 // CHECK:   ret void
11571 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
11572   vst3_p8(a, b);
11573 }
11574
11575 // CHECK-LABEL: @test_vst3_p16(
11576 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
11577 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
11578 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
11579 // CHECK:   store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11580 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
11581 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
11582 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
11583 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
11584 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
11585 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
11586 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
11587 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
11588 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
11589 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
11590 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
11591 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
11592 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
11593 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
11594 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
11595 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
11596 // CHECK:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
11597 // CHECK:   ret void
11598 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
11599   vst3_p16(a, b);
11600 }
11601
11602 // CHECK-LABEL: @test_vst4q_u8(
11603 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
11604 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
11605 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
11606 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11607 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11608 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11609 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11610 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11611 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11612 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11613 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11614 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11615 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11616 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11617 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
11618 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
11619 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
11620 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
11621 // CHECK:   ret void
11622 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
11623   vst4q_u8(a, b);
11624 }
11625
11626 // CHECK-LABEL: @test_vst4q_u16(
11627 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
11628 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
11629 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
11630 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11631 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11632 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11633 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11634 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11635 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11636 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11637 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11638 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11639 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11640 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11641 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11642 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11643 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11644 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
11645 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
11646 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
11647 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
11648 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11649 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11650 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11651 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
11652 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
11653 // CHECK:   ret void
11654 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
11655   vst4q_u16(a, b);
11656 }
11657
11658 // CHECK-LABEL: @test_vst4q_u32(
11659 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
11660 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
11661 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
11662 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11663 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11664 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11665 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
11666 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
11667 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11668 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11669 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
11670 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
11671 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11672 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11673 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11674 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11675 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11676 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
11677 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
11678 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
11679 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
11680 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11681 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11682 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11683 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
11684 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
11685 // CHECK:   ret void
11686 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
11687   vst4q_u32(a, b);
11688 }
11689
11690 // CHECK-LABEL: @test_vst4q_u64(
11691 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
11692 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
11693 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
11694 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11695 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11696 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11697 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11698 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11699 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11700 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11701 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11702 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11703 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11704 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11705 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11706 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11707 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11708 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
11709 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
11710 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
11711 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
11712 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11713 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11714 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11715 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
11716 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
11717 // CHECK:   ret void
11718 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
11719   vst4q_u64(a, b);
11720 }
11721
11722 // CHECK-LABEL: @test_vst4q_s8(
11723 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
11724 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
11725 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
11726 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11727 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11728 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11729 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11730 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11731 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11732 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11733 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11734 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11735 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11736 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11737 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
11738 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
11739 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
11740 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
11741 // CHECK:   ret void
11742 void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
11743   vst4q_s8(a, b);
11744 }
11745
11746 // CHECK-LABEL: @test_vst4q_s16(
11747 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
11748 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
11749 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
11750 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11751 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11752 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11753 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11754 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11755 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11756 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11757 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11758 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11759 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11760 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11761 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11762 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11763 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11764 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
11765 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
11766 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
11767 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
11768 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11769 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11770 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11771 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
11772 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
11773 // CHECK:   ret void
11774 void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
11775   vst4q_s16(a, b);
11776 }
11777
11778 // CHECK-LABEL: @test_vst4q_s32(
11779 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
11780 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
11781 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
11782 // CHECK:   store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11783 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11784 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11785 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
11786 // CHECK:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
11787 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
11788 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11789 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
11790 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
11791 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
11792 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11793 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
11794 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
11795 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
11796 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
11797 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
11798 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
11799 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
11800 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
11801 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
11802 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
11803 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
11804 // CHECK:   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
11805 // CHECK:   ret void
11806 void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
11807   vst4q_s32(a, b);
11808 }
11809
11810 // CHECK-LABEL: @test_vst4q_s64(
11811 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
11812 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
11813 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
11814 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11815 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11816 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11817 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
11818 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
11819 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
11820 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11821 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
11822 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
11823 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
11824 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11825 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
11826 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
11827 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
11828 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
11829 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
11830 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
11831 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
11832 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
11833 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
11834 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
11835 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
11836 // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
11837 // CHECK:   ret void
11838 void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
11839   vst4q_s64(a, b);
11840 }
11841
11842 // CHECK-LABEL: @test_vst4q_f16(
11843 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
11844 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
11845 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
11846 // CHECK:   store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11847 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11848 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11849 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
11850 // CHECK:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
11851 // CHECK:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
11852 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11853 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
11854 // CHECK:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
11855 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
11856 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11857 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
11858 // CHECK:   [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
11859 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
11860 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
11861 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
11862 // CHECK:   [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
11863 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
11864 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
11865 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
11866 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
11867 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
11868 // CHECK:   call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a)
11869 // CHECK:   ret void
11870 void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
11871   vst4q_f16(a, b);
11872 }
11873
11874 // CHECK-LABEL: @test_vst4q_f32(
11875 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
11876 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
11877 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
11878 // CHECK:   store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11879 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11880 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11881 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
11882 // CHECK:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
11883 // CHECK:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
11884 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11885 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
11886 // CHECK:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
11887 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
11888 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11889 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
11890 // CHECK:   [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
11891 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
11892 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
11893 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
11894 // CHECK:   [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
11895 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
11896 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
11897 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
11898 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
11899 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
11900 // CHECK:   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a)
11901 // CHECK:   ret void
11902 void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
11903   vst4q_f32(a, b);
11904 }
11905
11906 // CHECK-LABEL: @test_vst4q_f64(
11907 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
11908 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
11909 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
11910 // CHECK:   store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11911 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11912 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11913 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
11914 // CHECK:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
11915 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
11916 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11917 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
11918 // CHECK:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
11919 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
11920 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11921 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
11922 // CHECK:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
11923 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
11924 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
11925 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
11926 // CHECK:   [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
11927 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
11928 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
11929 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
11930 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
11931 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
11932 // CHECK:   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
11933 // CHECK:   ret void
11934 void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
11935   vst4q_f64(a, b);
11936 }
11937
11938 // CHECK-LABEL: @test_vst4q_p8(
11939 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
11940 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
11941 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
11942 // CHECK:   store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11943 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11944 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11945 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
11946 // CHECK:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
11947 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11948 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
11949 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
11950 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11951 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
11952 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
11953 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
11954 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
11955 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
11956 // CHECK:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
11957 // CHECK:   ret void
11958 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
11959   vst4q_p8(a, b);
11960 }
11961
11962 // CHECK-LABEL: @test_vst4q_p16(
11963 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
11964 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
11965 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
11966 // CHECK:   store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
11967 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
11968 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11969 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
11970 // CHECK:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
11971 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
11972 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11973 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
11974 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
11975 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
11976 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11977 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
11978 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
11979 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
11980 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
11981 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
11982 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
11983 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
11984 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
11985 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
11986 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
11987 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
11988 // CHECK:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
11989 // CHECK:   ret void
11990 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
11991   vst4q_p16(a, b);
11992 }
11993
11994 // CHECK-LABEL: @test_vst4_u8(
11995 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
11996 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
11997 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
11998 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
11999 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12000 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12001 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
12002 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
12003 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12004 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
12005 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
12006 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12007 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
12008 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
12009 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
12010 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
12011 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
12012 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
12013 // CHECK:   ret void
12014 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
12015   vst4_u8(a, b);
12016 }
12017
12018 // CHECK-LABEL: @test_vst4_u16(
12019 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
12020 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
12021 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
12022 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12023 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12024 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12025 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
12026 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
12027 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12028 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12029 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
12030 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
12031 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12032 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12033 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
12034 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
12035 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12036 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
12037 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
12038 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
12039 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12040 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12041 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12042 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12043 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12044 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
12045 // CHECK:   ret void
12046 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
12047   vst4_u16(a, b);
12048 }
12049
12050 // CHECK-LABEL: @test_vst4_u32(
12051 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
12052 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
12053 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
12054 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12055 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12056 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12057 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
12058 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
12059 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12060 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12061 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
12062 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
12063 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12064 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12065 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
12066 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
12067 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12068 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
12069 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
12070 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
12071 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12072 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12073 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12074 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12075 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12076 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
12077 // CHECK:   ret void
12078 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
12079   vst4_u32(a, b);
12080 }
12081
12082 // CHECK-LABEL: @test_vst4_u64(
12083 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
12084 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
12085 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
12086 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12087 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12088 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12089 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12090 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12091 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12092 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12093 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12094 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12095 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12096 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12097 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12098 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12099 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12100 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
12101 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
12102 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
12103 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12104 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12105 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12106 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12107 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12108 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
12109 // CHECK:   ret void
12110 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
12111   vst4_u64(a, b);
12112 }
12113
12114 // CHECK-LABEL: @test_vst4_s8(
12115 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
12116 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
12117 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
12118 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12119 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12120 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12121 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
12122 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
12123 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12124 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
12125 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
12126 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12127 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
12128 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
12129 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
12130 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
12131 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
12132 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
12133 // CHECK:   ret void
12134 void test_vst4_s8(int8_t *a, int8x8x4_t b) {
12135   vst4_s8(a, b);
12136 }
12137
12138 // CHECK-LABEL: @test_vst4_s16(
12139 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
12140 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
12141 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
12142 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12143 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12144 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12145 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
12146 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
12147 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12148 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12149 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
12150 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
12151 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12152 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12153 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
12154 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
12155 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12156 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
12157 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
12158 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
12159 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12160 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12161 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12162 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12163 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12164 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
12165 // CHECK:   ret void
12166 void test_vst4_s16(int16_t *a, int16x4x4_t b) {
12167   vst4_s16(a, b);
12168 }
12169
12170 // CHECK-LABEL: @test_vst4_s32(
12171 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
12172 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
12173 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
12174 // CHECK:   store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12175 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12176 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12177 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
12178 // CHECK:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
12179 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
12180 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12181 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
12182 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
12183 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
12184 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12185 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
12186 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
12187 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
12188 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
12189 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
12190 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
12191 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
12192 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
12193 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
12194 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
12195 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
12196 // CHECK:   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
12197 // CHECK:   ret void
12198 void test_vst4_s32(int32_t *a, int32x2x4_t b) {
12199   vst4_s32(a, b);
12200 }
12201
12202 // CHECK-LABEL: @test_vst4_s64(
12203 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
12204 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
12205 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
12206 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12207 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12208 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12209 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12210 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12211 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12212 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12213 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12214 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12215 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12216 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12217 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12218 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12219 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12220 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
12221 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
12222 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
12223 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12224 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12225 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12226 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12227 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12228 // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
12229 // CHECK:   ret void
12230 void test_vst4_s64(int64_t *a, int64x1x4_t b) {
12231   vst4_s64(a, b);
12232 }
12233
12234 // CHECK-LABEL: @test_vst4_f16(
12235 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
12236 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
12237 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
12238 // CHECK:   store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12239 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12240 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12241 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
12242 // CHECK:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
12243 // CHECK:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
12244 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12245 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
12246 // CHECK:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
12247 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
12248 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12249 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
12250 // CHECK:   [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
12251 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
12252 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
12253 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
12254 // CHECK:   [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
12255 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
12256 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
12257 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
12258 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
12259 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
12260 // CHECK:   call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a)
12261 // CHECK:   ret void
12262 void test_vst4_f16(float16_t *a, float16x4x4_t b) {
12263   vst4_f16(a, b);
12264 }
12265
12266 // CHECK-LABEL: @test_vst4_f32(
12267 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
12268 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
12269 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
12270 // CHECK:   store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12271 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12272 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12273 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
12274 // CHECK:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
12275 // CHECK:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
12276 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12277 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
12278 // CHECK:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
12279 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
12280 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12281 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
12282 // CHECK:   [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
12283 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
12284 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
12285 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
12286 // CHECK:   [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
12287 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
12288 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
12289 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
12290 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
12291 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
12292 // CHECK:   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a)
12293 // CHECK:   ret void
12294 void test_vst4_f32(float32_t *a, float32x2x4_t b) {
12295   vst4_f32(a, b);
12296 }
12297
12298 // CHECK-LABEL: @test_vst4_f64(
12299 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12300 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12301 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
12302 // CHECK:   store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12303 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12304 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12305 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12306 // CHECK:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12307 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12308 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12309 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12310 // CHECK:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12311 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12312 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12313 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
12314 // CHECK:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
12315 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12316 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12317 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
12318 // CHECK:   [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
12319 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12320 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12321 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12322 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12323 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12324 // CHECK:   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
12325 // CHECK:   ret void
12326 void test_vst4_f64(float64_t *a, float64x1x4_t b) {
12327   vst4_f64(a, b);
12328 }
12329
12330 // CHECK-LABEL: @test_vst4_p8(
12331 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
12332 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
12333 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
12334 // CHECK:   store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12335 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12336 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12337 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
12338 // CHECK:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
12339 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12340 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
12341 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
12342 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12343 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
12344 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
12345 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
12346 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
12347 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
12348 // CHECK:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
12349 // CHECK:   ret void
12350 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
12351   vst4_p8(a, b);
12352 }
12353
12354 // CHECK-LABEL: @test_vst4_p16(
12355 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
12356 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
12357 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
12358 // CHECK:   store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12359 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12360 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12361 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
12362 // CHECK:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
12363 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
12364 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12365 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
12366 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
12367 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
12368 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12369 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
12370 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
12371 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
12372 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
12373 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
12374 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
12375 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
12376 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
12377 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
12378 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
12379 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
12380 // CHECK:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
12381 // CHECK:   ret void
12382 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
12383   vst4_p16(a, b);
12384 }
12385
12386 // CHECK-LABEL: @test_vld1q_f64_x2(
12387 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
12388 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
12389 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %a)
12390 // CHECK:   store { <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
12391 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
12392 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
12393 // CHECK:   ret %struct.float64x2x2_t [[TMP6]]
12394 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
12395   return vld1q_f64_x2(a);
12396 }
12397
12398 // CHECK-LABEL: @test_vld1q_p64_x2(
12399 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
12400 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
12401 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %a)
12402 // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
12403 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
12404 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
12405 // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
12406 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
12407   return vld1q_p64_x2(a);
12408 }
12409
12410 // CHECK-LABEL: @test_vld1_f64_x2(
12411 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
12412 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
12413 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %a)
12414 // CHECK:   store { <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
12415 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
12416 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
12417 // CHECK:   ret %struct.float64x1x2_t [[TMP6]]
12418 float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
12419   return vld1_f64_x2(a);
12420 }
12421
12422 // CHECK-LABEL: @test_vld1_p64_x2(
12423 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
12424 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
12425 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %a)
12426 // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
12427 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
12428 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
12429 // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
12430 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
12431   return vld1_p64_x2(a);
12432 }
12433
12434 // CHECK-LABEL: @test_vld1q_f64_x3(
12435 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
12436 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
12437 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %a)
12438 // CHECK:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
12439 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
12440 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
12441 // CHECK:   ret %struct.float64x2x3_t [[TMP6]]
12442 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
12443   return vld1q_f64_x3(a);
12444 }
12445
12446 // CHECK-LABEL: @test_vld1q_p64_x3(
12447 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
12448 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
12449 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %a)
12450 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
12451 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
12452 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
12453 // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
12454 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
12455   return vld1q_p64_x3(a);
12456 }
12457
12458 // CHECK-LABEL: @test_vld1_f64_x3(
12459 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
12460 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
12461 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %a)
12462 // CHECK:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
12463 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
12464 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
12465 // CHECK:   ret %struct.float64x1x3_t [[TMP6]]
12466 float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
12467   return vld1_f64_x3(a);
12468 }
12469
12470 // CHECK-LABEL: @test_vld1_p64_x3(
12471 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
12472 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
12473 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %a)
12474 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
12475 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
12476 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
12477 // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
12478 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
12479   return vld1_p64_x3(a);
12480 }
12481
12482 // CHECK-LABEL: @test_vld1q_f64_x4(
12483 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
12484 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
12485 // CHECK:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %a)
12486 // CHECK:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
12487 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
12488 // CHECK:   [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
12489 // CHECK:   ret %struct.float64x2x4_t [[TMP6]]
12490 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
12491   return vld1q_f64_x4(a);
12492 }
12493
12494 // CHECK-LABEL: @test_vld1q_p64_x4(
12495 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
12496 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
12497 // CHECK:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %a)
12498 // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
12499 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
12500 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
12501 // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
12502 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
12503   return vld1q_p64_x4(a);
12504 }
12505
12506 // CHECK-LABEL: @test_vld1_f64_x4(
12507 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
12508 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
12509 // CHECK:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %a)
12510 // CHECK:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
12511 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
12512 // CHECK:   [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
12513 // CHECK:   ret %struct.float64x1x4_t [[TMP6]]
12514 float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
12515   return vld1_f64_x4(a);
12516 }
12517
12518 // CHECK-LABEL: @test_vld1_p64_x4(
12519 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
12520 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
12521 // CHECK:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %a)
12522 // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
12523 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
12524 // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
12525 // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
12526 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
12527   return vld1_p64_x4(a);
12528 }
12529
12530 // CHECK-LABEL: @test_vst1q_f64_x2(
12531 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
12532 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
12533 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
12534 // CHECK:   store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12535 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
12536 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
12537 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
12538 // CHECK:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
12539 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12540 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
12541 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
12542 // CHECK:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
12543 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12544 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12545 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12546 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
12547 // CHECK:   ret void
12548 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
12549   vst1q_f64_x2(a, b);
12550 }
12551
12552 // CHECK-LABEL: @test_vst1q_p64_x2(
12553 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
12554 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
12555 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
12556 // CHECK:   store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12557 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false)
12558 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
12559 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
12560 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
12561 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12562 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
12563 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
12564 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
12565 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12566 // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12567 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12568 // CHECK:   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
12569 // CHECK:   ret void
12570 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
12571   vst1q_p64_x2(a, b);
12572 }
12573
12574 // CHECK-LABEL: @test_vst1_f64_x2(
12575 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
12576 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
12577 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
12578 // CHECK:   store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12579 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
12580 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
12581 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12582 // CHECK:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12583 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12584 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
12585 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12586 // CHECK:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12587 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12588 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12589 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12590 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
12591 // CHECK:   ret void
12592 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
12593   vst1_f64_x2(a, b);
12594 }
12595
12596 // CHECK-LABEL: @test_vst1_p64_x2(
12597 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
12598 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
12599 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
12600 // CHECK:   store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12601 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false)
12602 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
12603 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12604 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12605 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12606 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
12607 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12608 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12609 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12610 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12611 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12612 // CHECK:   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
12613 // CHECK:   ret void
12614 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
12615   vst1_p64_x2(a, b);
12616 }
12617
12618 // CHECK-LABEL: @test_vst1q_f64_x3(
12619 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
12620 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
12621 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
12622 // CHECK:   store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12623 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
12624 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
12625 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
12626 // CHECK:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
12627 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12628 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
12629 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
12630 // CHECK:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
12631 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12632 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
12633 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
12634 // CHECK:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
12635 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12636 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12637 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12638 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12639 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
12640 // CHECK:   ret void
12641 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
12642   vst1q_f64_x3(a, b);
12643 }
12644
12645 // CHECK-LABEL: @test_vst1q_p64_x3(
12646 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
12647 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
12648 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
12649 // CHECK:   store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12650 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false)
12651 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
12652 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
12653 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
12654 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12655 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
12656 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
12657 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
12658 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12659 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
12660 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
12661 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
12662 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12663 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12664 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12665 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12666 // CHECK:   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
12667 // CHECK:   ret void
12668 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
12669   vst1q_p64_x3(a, b);
12670 }
12671
12672 // CHECK-LABEL: @test_vst1_f64_x3(
12673 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
12674 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
12675 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
12676 // CHECK:   store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12677 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
12678 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
12679 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12680 // CHECK:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12681 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12682 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
12683 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12684 // CHECK:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12685 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12686 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
12687 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
12688 // CHECK:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
12689 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12690 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12691 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12692 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12693 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
12694 // CHECK:   ret void
12695 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
12696   vst1_f64_x3(a, b);
12697 }
12698
12699 // CHECK-LABEL: @test_vst1_p64_x3(
12700 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
12701 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
12702 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
12703 // CHECK:   store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12704 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false)
12705 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
12706 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12707 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12708 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12709 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
12710 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12711 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12712 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12713 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
12714 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12715 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12716 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12717 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12718 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12719 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12720 // CHECK:   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
12721 // CHECK:   ret void
12722 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
12723   vst1_p64_x3(a, b);
12724 }
12725
12726 // CHECK-LABEL: @test_vst1q_f64_x4(
12727 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
12728 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
12729 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
12730 // CHECK:   store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12731 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
12732 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12733 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
12734 // CHECK:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
12735 // CHECK:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
12736 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12737 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
12738 // CHECK:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
12739 // CHECK:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
12740 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12741 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
12742 // CHECK:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
12743 // CHECK:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
12744 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
12745 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
12746 // CHECK:   [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
12747 // CHECK:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
12748 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
12749 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
12750 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
12751 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
12752 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
12753 // CHECK:   ret void
12754 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
12755   vst1q_f64_x4(a, b);
12756 }
12757
12758 // CHECK-LABEL: @test_vst1q_p64_x4(
12759 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
12760 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
12761 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
12762 // CHECK:   store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16
12763 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false)
12764 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12765 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
12766 // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
12767 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
12768 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12769 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
12770 // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
12771 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
12772 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12773 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
12774 // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
12775 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
12776 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
12777 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
12778 // CHECK:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
12779 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
12780 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
12781 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
12782 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
12783 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
12784 // CHECK:   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
12785 // CHECK:   ret void
12786 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
12787   vst1q_p64_x4(a, b);
12788 }
12789
12790 // CHECK-LABEL: @test_vst1_f64_x4(
12791 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
12792 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
12793 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
12794 // CHECK:   store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12795 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12796 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12797 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
12798 // CHECK:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
12799 // CHECK:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
12800 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12801 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
12802 // CHECK:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
12803 // CHECK:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
12804 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12805 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
12806 // CHECK:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
12807 // CHECK:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
12808 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
12809 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
12810 // CHECK:   [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
12811 // CHECK:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
12812 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
12813 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
12814 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
12815 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
12816 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
12817 // CHECK:   ret void
12818 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
12819   vst1_f64_x4(a, b);
12820 }
12821
12822 // CHECK-LABEL: @test_vst1_p64_x4(
12823 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
12824 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
12825 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0
12826 // CHECK:   store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8
12827 // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false)
12828 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12829 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
12830 // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
12831 // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
12832 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12833 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
12834 // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
12835 // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
12836 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12837 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
12838 // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
12839 // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
12840 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
12841 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
12842 // CHECK:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
12843 // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
12844 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
12845 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
12846 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
12847 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
12848 // CHECK:   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
12849 // CHECK:   ret void
12850 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
12851   vst1_p64_x4(a, b);
12852 }
12853
12854 // CHECK-LABEL: @test_vceqd_s64(
12855 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
12856 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12857 // CHECK:   ret i64 [[VCEQD_I]]
12858 uint64_t test_vceqd_s64(int64_t a, int64_t b) {
12859   return (uint64_t)vceqd_s64(a, b);
12860 }
12861
12862 // CHECK-LABEL: @test_vceqd_u64(
12863 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, %b
12864 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12865 // CHECK:   ret i64 [[VCEQD_I]]
12866 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
12867   return (int64_t)vceqd_u64(a, b);
12868 }
12869
12870 // CHECK-LABEL: @test_vceqzd_s64(
12871 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
12872 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
12873 // CHECK:   ret i64 [[VCEQZ_I]]
12874 uint64_t test_vceqzd_s64(int64_t a) {
12875   return (uint64_t)vceqzd_s64(a);
12876 }
12877
12878 // CHECK-LABEL: @test_vceqzd_u64(
12879 // CHECK:   [[TMP0:%.*]] = icmp eq i64 %a, 0
12880 // CHECK:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
12881 // CHECK:   ret i64 [[VCEQZD_I]]
12882 int64_t test_vceqzd_u64(int64_t a) {
12883   return (int64_t)vceqzd_u64(a);
12884 }
12885
12886 // CHECK-LABEL: @test_vcged_s64(
12887 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, %b
12888 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12889 // CHECK:   ret i64 [[VCEQD_I]]
12890 uint64_t test_vcged_s64(int64_t a, int64_t b) {
12891   return (uint64_t)vcged_s64(a, b);
12892 }
12893
12894 // CHECK-LABEL: @test_vcged_u64(
12895 // CHECK:   [[TMP0:%.*]] = icmp uge i64 %a, %b
12896 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12897 // CHECK:   ret i64 [[VCEQD_I]]
12898 uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
12899   return (uint64_t)vcged_u64(a, b);
12900 }
12901
12902 // CHECK-LABEL: @test_vcgezd_s64(
12903 // CHECK:   [[TMP0:%.*]] = icmp sge i64 %a, 0
12904 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
12905 // CHECK:   ret i64 [[VCGEZ_I]]
12906 uint64_t test_vcgezd_s64(int64_t a) {
12907   return (uint64_t)vcgezd_s64(a);
12908 }
12909
12910 // CHECK-LABEL: @test_vcgtd_s64(
12911 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
12912 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12913 // CHECK:   ret i64 [[VCEQD_I]]
12914 uint64_t test_vcgtd_s64(int64_t a, int64_t b) {
12915   return (uint64_t)vcgtd_s64(a, b);
12916 }
12917
12918 // CHECK-LABEL: @test_vcgtd_u64(
12919 // CHECK:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
12920 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12921 // CHECK:   ret i64 [[VCEQD_I]]
12922 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
12923   return (uint64_t)vcgtd_u64(a, b);
12924 }
12925
12926 // CHECK-LABEL: @test_vcgtzd_s64(
12927 // CHECK:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
12928 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
12929 // CHECK:   ret i64 [[VCGTZ_I]]
12930 uint64_t test_vcgtzd_s64(int64_t a) {
12931   return (uint64_t)vcgtzd_s64(a);
12932 }
12933
12934 // CHECK-LABEL: @test_vcled_s64(
12935 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, %b
12936 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12937 // CHECK:   ret i64 [[VCEQD_I]]
12938 uint64_t test_vcled_s64(int64_t a, int64_t b) {
12939   return (uint64_t)vcled_s64(a, b);
12940 }
12941
12942 // CHECK-LABEL: @test_vcled_u64(
12943 // CHECK:   [[TMP0:%.*]] = icmp ule i64 %a, %b
12944 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12945 // CHECK:   ret i64 [[VCEQD_I]]
12946 uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
12947   return (uint64_t)vcled_u64(a, b);
12948 }
12949
12950 // CHECK-LABEL: @test_vclezd_s64(
12951 // CHECK:   [[TMP0:%.*]] = icmp sle i64 %a, 0
12952 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
12953 // CHECK:   ret i64 [[VCLEZ_I]]
12954 uint64_t test_vclezd_s64(int64_t a) {
12955   return (uint64_t)vclezd_s64(a);
12956 }
12957
12958 // CHECK-LABEL: @test_vcltd_s64(
12959 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, %b
12960 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12961 // CHECK:   ret i64 [[VCEQD_I]]
12962 uint64_t test_vcltd_s64(int64_t a, int64_t b) {
12963   return (uint64_t)vcltd_s64(a, b);
12964 }
12965
12966 // CHECK-LABEL: @test_vcltd_u64(
12967 // CHECK:   [[TMP0:%.*]] = icmp ult i64 %a, %b
12968 // CHECK:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
12969 // CHECK:   ret i64 [[VCEQD_I]]
12970 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
12971   return (uint64_t)vcltd_u64(a, b);
12972 }
12973
12974 // CHECK-LABEL: @test_vcltzd_s64(
12975 // CHECK:   [[TMP0:%.*]] = icmp slt i64 %a, 0
12976 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
12977 // CHECK:   ret i64 [[VCLTZ_I]]
12978 uint64_t test_vcltzd_s64(int64_t a) {
12979   return (uint64_t)vcltzd_s64(a);
12980 }
12981
12982 // CHECK-LABEL: @test_vtstd_s64(
12983 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
12984 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
12985 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
12986 // CHECK:   ret i64 [[VTSTD_I]]
12987 uint64_t test_vtstd_s64(int64_t a, int64_t b) {
12988   return (uint64_t)vtstd_s64(a, b);
12989 }
12990
12991 // CHECK-LABEL: @test_vtstd_u64(
12992 // CHECK:   [[TMP0:%.*]] = and i64 %a, %b
12993 // CHECK:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
12994 // CHECK:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
12995 // CHECK:   ret i64 [[VTSTD_I]]
12996 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
12997   return (uint64_t)vtstd_u64(a, b);
12998 }
12999
13000 // CHECK-LABEL: @test_vabsd_s64(
13001 // CHECK:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a)
13002 // CHECK:   ret i64 [[VABSD_S64_I]]
13003 int64_t test_vabsd_s64(int64_t a) {
13004   return (int64_t)vabsd_s64(a);
13005 }
13006
13007 // CHECK-LABEL: @test_vqabsb_s8(
13008 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13009 // CHECK:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
13010 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
13011 // CHECK:   ret i8 [[TMP1]]
13012 int8_t test_vqabsb_s8(int8_t a) {
13013   return (int8_t)vqabsb_s8(a);
13014 }
13015
13016 // CHECK-LABEL: @test_vqabsh_s16(
13017 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13018 // CHECK:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
13019 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
13020 // CHECK:   ret i16 [[TMP1]]
13021 int16_t test_vqabsh_s16(int16_t a) {
13022   return (int16_t)vqabsh_s16(a);
13023 }
13024
13025 // CHECK-LABEL: @test_vqabss_s32(
13026 // CHECK:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
13027 // CHECK:   ret i32 [[VQABSS_S32_I]]
13028 int32_t test_vqabss_s32(int32_t a) {
13029   return (int32_t)vqabss_s32(a);
13030 }
13031
13032 // CHECK-LABEL: @test_vqabsd_s64(
13033 // CHECK:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
13034 // CHECK:   ret i64 [[VQABSD_S64_I]]
13035 int64_t test_vqabsd_s64(int64_t a) {
13036   return (int64_t)vqabsd_s64(a);
13037 }
13038
13039 // CHECK-LABEL: @test_vnegd_s64(
13040 // CHECK:   [[VNEGD_I:%.*]] = sub i64 0, %a
13041 // CHECK:   ret i64 [[VNEGD_I]]
13042 int64_t test_vnegd_s64(int64_t a) {
13043   return (int64_t)vnegd_s64(a);
13044 }
13045
13046 // CHECK-LABEL: @test_vqnegb_s8(
13047 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13048 // CHECK:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
13049 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
13050 // CHECK:   ret i8 [[TMP1]]
13051 int8_t test_vqnegb_s8(int8_t a) {
13052   return (int8_t)vqnegb_s8(a);
13053 }
13054
13055 // CHECK-LABEL: @test_vqnegh_s16(
13056 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13057 // CHECK:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
13058 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
13059 // CHECK:   ret i16 [[TMP1]]
13060 int16_t test_vqnegh_s16(int16_t a) {
13061   return (int16_t)vqnegh_s16(a);
13062 }
13063
13064 // CHECK-LABEL: @test_vqnegs_s32(
13065 // CHECK:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
13066 // CHECK:   ret i32 [[VQNEGS_S32_I]]
13067 int32_t test_vqnegs_s32(int32_t a) {
13068   return (int32_t)vqnegs_s32(a);
13069 }
13070
13071 // CHECK-LABEL: @test_vqnegd_s64(
13072 // CHECK:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
13073 // CHECK:   ret i64 [[VQNEGD_S64_I]]
13074 int64_t test_vqnegd_s64(int64_t a) {
13075   return (int64_t)vqnegd_s64(a);
13076 }
13077
13078 // CHECK-LABEL: @test_vuqaddb_s8(
13079 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13080 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
13081 // CHECK:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13082 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
13083 // CHECK:   ret i8 [[TMP2]]
13084 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
13085   return (int8_t)vuqaddb_s8(a, b);
13086 }
13087
13088 // CHECK-LABEL: @test_vuqaddh_s16(
13089 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13090 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13091 // CHECK:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13092 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
13093 // CHECK:   ret i16 [[TMP2]]
13094 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
13095   return (int16_t)vuqaddh_s16(a, b);
13096 }
13097
13098 // CHECK-LABEL: @test_vuqadds_s32(
13099 // CHECK:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
13100 // CHECK:   ret i32 [[VUQADDS_S32_I]]
13101 int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
13102   return (int32_t)vuqadds_s32(a, b);
13103 }
13104
13105 // CHECK-LABEL: @test_vuqaddd_s64(
13106 // CHECK:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
13107 // CHECK:   ret i64 [[VUQADDD_S64_I]]
13108 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
13109   return (int64_t)vuqaddd_s64(a, b);
13110 }
13111
13112 // CHECK-LABEL: @test_vsqaddb_u8(
13113 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13114 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
13115 // CHECK:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
13116 // CHECK:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
13117 // CHECK:   ret i8 [[TMP2]]
13118 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
13119   return (uint8_t)vsqaddb_u8(a, b);
13120 }
13121
13122 // CHECK-LABEL: @test_vsqaddh_u16(
13123 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13124 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13125 // CHECK:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13126 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
13127 // CHECK:   ret i16 [[TMP2]]
13128 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
13129   return (uint16_t)vsqaddh_u16(a, b);
13130 }
13131
13132 // CHECK-LABEL: @test_vsqadds_u32(
13133 // CHECK:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
13134 // CHECK:   ret i32 [[VSQADDS_U32_I]]
13135 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
13136   return (uint32_t)vsqadds_u32(a, b);
13137 }
13138
13139 // CHECK-LABEL: @test_vsqaddd_u64(
13140 // CHECK:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
13141 // CHECK:   ret i64 [[VSQADDD_U64_I]]
13142 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
13143   return (uint64_t)vsqaddd_u64(a, b);
13144 }
13145
13146 // CHECK-LABEL: @test_vqdmlalh_s16(
13147 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13148 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
13149 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13150 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13151 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
13152 // CHECK:   ret i32 [[VQDMLXL1_I]]
13153 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
13154   return (int32_t)vqdmlalh_s16(a, b, c);
13155 }
13156
13157 // CHECK-LABEL: @test_vqdmlals_s32(
13158 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13159 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
13160 // CHECK:   ret i64 [[VQDMLXL1_I]]
13161 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
13162   return (int64_t)vqdmlals_s32(a, b, c);
13163 }
13164
13165 // CHECK-LABEL: @test_vqdmlslh_s16(
13166 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13167 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
13168 // CHECK:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13169 // CHECK:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
13170 // CHECK:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
13171 // CHECK:   ret i32 [[VQDMLXL1_I]]
13172 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
13173   return (int32_t)vqdmlslh_s16(a, b, c);
13174 }
13175
13176 // CHECK-LABEL: @test_vqdmlsls_s32(
13177 // CHECK:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
13178 // CHECK:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
13179 // CHECK:   ret i64 [[VQDMLXL1_I]]
13180 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
13181   return (int64_t)vqdmlsls_s32(a, b, c);
13182 }
13183
13184 // CHECK-LABEL: @test_vqdmullh_s16(
13185 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13186 // CHECK:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
13187 // CHECK:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
13188 // CHECK:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
13189 // CHECK:   ret i32 [[TMP2]]
13190 int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
13191   return (int32_t)vqdmullh_s16(a, b);
13192 }
13193
13194 // CHECK-LABEL: @test_vqdmulls_s32(
13195 // CHECK:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
13196 // CHECK:   ret i64 [[VQDMULLS_S32_I]]
13197 int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
13198   return (int64_t)vqdmulls_s32(a, b);
13199 }
13200
13201 // CHECK-LABEL: @test_vqmovunh_s16(
13202 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
13203 // CHECK:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
13204 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
13205 // CHECK:   ret i8 [[TMP1]]
13206 uint8_t test_vqmovunh_s16(int16_t a) {
13207   return (uint8_t)vqmovunh_s16(a);
13208 }
13209
13210 // CHECK-LABEL: @test_vqmovuns_s32(
13211 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
13212 // CHECK:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
13213 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
13214 // CHECK:   ret i16 [[TMP1]]
13215 uint16_t test_vqmovuns_s32(int32_t a) {
13216   return (uint16_t)vqmovuns_s32(a);
13217 }
13218
13219 // CHECK-LABEL: @test_vqmovund_s64(
13220 // CHECK:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
13221 // CHECK:   ret i32 [[VQMOVUND_S64_I]]
13222 uint32_t test_vqmovund_s64(int64_t a) {
13223   return (uint32_t)vqmovund_s64(a);
13224 }
13225
13226 // CHECK-LABEL: @test_vqmovnh_s16(
13227 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
13228 // CHECK:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
13229 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
13230 // CHECK:   ret i8 [[TMP1]]
13231 int8_t test_vqmovnh_s16(int16_t a) {
13232   return (int8_t)vqmovnh_s16(a);
13233 }
13234
13235 // CHECK-LABEL: @test_vqmovns_s32(
13236 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
13237 // CHECK:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
13238 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
13239 // CHECK:   ret i16 [[TMP1]]
13240 int16_t test_vqmovns_s32(int32_t a) {
13241   return (int16_t)vqmovns_s32(a);
13242 }
13243
13244 // CHECK-LABEL: @test_vqmovnd_s64(
13245 // CHECK:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
13246 // CHECK:   ret i32 [[VQMOVND_S64_I]]
13247 int32_t test_vqmovnd_s64(int64_t a) {
13248   return (int32_t)vqmovnd_s64(a);
13249 }
13250
13251 // CHECK-LABEL: @test_vqmovnh_u16(
13252 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
13253 // CHECK:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
13254 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
13255 // CHECK:   ret i8 [[TMP1]]
13256 int8_t test_vqmovnh_u16(int16_t a) {
13257   return (int8_t)vqmovnh_u16(a);
13258 }
13259
13260 // CHECK-LABEL: @test_vqmovns_u32(
13261 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
13262 // CHECK:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
13263 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
13264 // CHECK:   ret i16 [[TMP1]]
13265 int16_t test_vqmovns_u32(int32_t a) {
13266   return (int16_t)vqmovns_u32(a);
13267 }
13268
13269 // CHECK-LABEL: @test_vqmovnd_u64(
13270 // CHECK:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
13271 // CHECK:   ret i32 [[VQMOVND_U64_I]]
13272 int32_t test_vqmovnd_u64(int64_t a) {
13273   return (int32_t)vqmovnd_u64(a);
13274 }
13275
13276 // CHECK-LABEL: @test_vceqs_f32(
13277 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, %b
13278 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13279 // CHECK:   ret i32 [[VCMPD_I]]
13280 uint32_t test_vceqs_f32(float32_t a, float32_t b) {
13281   return (uint32_t)vceqs_f32(a, b);
13282 }
13283
13284 // CHECK-LABEL: @test_vceqd_f64(
13285 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, %b
13286 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13287 // CHECK:   ret i64 [[VCMPD_I]]
13288 uint64_t test_vceqd_f64(float64_t a, float64_t b) {
13289   return (uint64_t)vceqd_f64(a, b);
13290 }
13291
13292 // CHECK-LABEL: @test_vceqzs_f32(
13293 // CHECK:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
13294 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
13295 // CHECK:   ret i32 [[VCEQZ_I]]
13296 uint32_t test_vceqzs_f32(float32_t a) {
13297   return (uint32_t)vceqzs_f32(a);
13298 }
13299
13300 // CHECK-LABEL: @test_vceqzd_f64(
13301 // CHECK:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
13302 // CHECK:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
13303 // CHECK:   ret i64 [[VCEQZ_I]]
13304 uint64_t test_vceqzd_f64(float64_t a) {
13305   return (uint64_t)vceqzd_f64(a);
13306 }
13307
13308 // CHECK-LABEL: @test_vcges_f32(
13309 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, %b
13310 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13311 // CHECK:   ret i32 [[VCMPD_I]]
13312 uint32_t test_vcges_f32(float32_t a, float32_t b) {
13313   return (uint32_t)vcges_f32(a, b);
13314 }
13315
13316 // CHECK-LABEL: @test_vcged_f64(
13317 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, %b
13318 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13319 // CHECK:   ret i64 [[VCMPD_I]]
13320 uint64_t test_vcged_f64(float64_t a, float64_t b) {
13321   return (uint64_t)vcged_f64(a, b);
13322 }
13323
13324 // CHECK-LABEL: @test_vcgezs_f32(
13325 // CHECK:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
13326 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
13327 // CHECK:   ret i32 [[VCGEZ_I]]
13328 uint32_t test_vcgezs_f32(float32_t a) {
13329   return (uint32_t)vcgezs_f32(a);
13330 }
13331
13332 // CHECK-LABEL: @test_vcgezd_f64(
13333 // CHECK:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
13334 // CHECK:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13335 // CHECK:   ret i64 [[VCGEZ_I]]
13336 uint64_t test_vcgezd_f64(float64_t a) {
13337   return (uint64_t)vcgezd_f64(a);
13338 }
13339
13340 // CHECK-LABEL: @test_vcgts_f32(
13341 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, %b
13342 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13343 // CHECK:   ret i32 [[VCMPD_I]]
13344 uint32_t test_vcgts_f32(float32_t a, float32_t b) {
13345   return (uint32_t)vcgts_f32(a, b);
13346 }
13347
13348 // CHECK-LABEL: @test_vcgtd_f64(
13349 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, %b
13350 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13351 // CHECK:   ret i64 [[VCMPD_I]]
13352 uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
13353   return (uint64_t)vcgtd_f64(a, b);
13354 }
13355
13356 // CHECK-LABEL: @test_vcgtzs_f32(
13357 // CHECK:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
13358 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
13359 // CHECK:   ret i32 [[VCGTZ_I]]
13360 uint32_t test_vcgtzs_f32(float32_t a) {
13361   return (uint32_t)vcgtzs_f32(a);
13362 }
13363
13364 // CHECK-LABEL: @test_vcgtzd_f64(
13365 // CHECK:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
13366 // CHECK:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13367 // CHECK:   ret i64 [[VCGTZ_I]]
13368 uint64_t test_vcgtzd_f64(float64_t a) {
13369   return (uint64_t)vcgtzd_f64(a);
13370 }
13371
13372 // CHECK-LABEL: @test_vcles_f32(
13373 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, %b
13374 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13375 // CHECK:   ret i32 [[VCMPD_I]]
13376 uint32_t test_vcles_f32(float32_t a, float32_t b) {
13377   return (uint32_t)vcles_f32(a, b);
13378 }
13379
13380 // CHECK-LABEL: @test_vcled_f64(
13381 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, %b
13382 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13383 // CHECK:   ret i64 [[VCMPD_I]]
13384 uint64_t test_vcled_f64(float64_t a, float64_t b) {
13385   return (uint64_t)vcled_f64(a, b);
13386 }
13387
13388 // CHECK-LABEL: @test_vclezs_f32(
13389 // CHECK:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
13390 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
13391 // CHECK:   ret i32 [[VCLEZ_I]]
13392 uint32_t test_vclezs_f32(float32_t a) {
13393   return (uint32_t)vclezs_f32(a);
13394 }
13395
13396 // CHECK-LABEL: @test_vclezd_f64(
13397 // CHECK:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
13398 // CHECK:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
13399 // CHECK:   ret i64 [[VCLEZ_I]]
13400 uint64_t test_vclezd_f64(float64_t a) {
13401   return (uint64_t)vclezd_f64(a);
13402 }
13403
13404 // CHECK-LABEL: @test_vclts_f32(
13405 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, %b
13406 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
13407 // CHECK:   ret i32 [[VCMPD_I]]
13408 uint32_t test_vclts_f32(float32_t a, float32_t b) {
13409   return (uint32_t)vclts_f32(a, b);
13410 }
13411
13412 // CHECK-LABEL: @test_vcltd_f64(
13413 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, %b
13414 // CHECK:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
13415 // CHECK:   ret i64 [[VCMPD_I]]
13416 uint64_t test_vcltd_f64(float64_t a, float64_t b) {
13417   return (uint64_t)vcltd_f64(a, b);
13418 }
13419
13420 // CHECK-LABEL: @test_vcltzs_f32(
13421 // CHECK:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
13422 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
13423 // CHECK:   ret i32 [[VCLTZ_I]]
13424 uint32_t test_vcltzs_f32(float32_t a) {
13425   return (uint32_t)vcltzs_f32(a);
13426 }
13427
13428 // CHECK-LABEL: @test_vcltzd_f64(
13429 // CHECK:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
13430 // CHECK:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
13431 // CHECK:   ret i64 [[VCLTZ_I]]
13432 uint64_t test_vcltzd_f64(float64_t a) {
13433   return (uint64_t)vcltzd_f64(a);
13434 }
13435
13436 // CHECK-LABEL: @test_vcages_f32(
13437 // CHECK:   [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b)
13438 // CHECK:   ret i32 [[VCAGES_F32_I]]
13439 uint32_t test_vcages_f32(float32_t a, float32_t b) {
13440   return (uint32_t)vcages_f32(a, b);
13441 }
13442
13443 // CHECK-LABEL: @test_vcaged_f64(
13444 // CHECK:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b)
13445 // CHECK:   ret i64 [[VCAGED_F64_I]]
13446 uint64_t test_vcaged_f64(float64_t a, float64_t b) {
13447   return (uint64_t)vcaged_f64(a, b);
13448 }
13449
13450 // CHECK-LABEL: @test_vcagts_f32(
13451 // CHECK:   [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b)
13452 // CHECK:   ret i32 [[VCAGTS_F32_I]]
13453 uint32_t test_vcagts_f32(float32_t a, float32_t b) {
13454   return (uint32_t)vcagts_f32(a, b);
13455 }
13456
13457 // CHECK-LABEL: @test_vcagtd_f64(
13458 // CHECK:   [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b)
13459 // CHECK:   ret i64 [[VCAGTD_F64_I]]
13460 uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
13461   return (uint64_t)vcagtd_f64(a, b);
13462 }
13463
13464 // CHECK-LABEL: @test_vcales_f32(
13465 // CHECK:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a)
13466 // CHECK:   ret i32 [[VCALES_F32_I]]
13467 uint32_t test_vcales_f32(float32_t a, float32_t b) {
13468   return (uint32_t)vcales_f32(a, b);
13469 }
13470
13471 // CHECK-LABEL: @test_vcaled_f64(
13472 // CHECK:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a)
13473 // CHECK:   ret i64 [[VCALED_F64_I]]
13474 uint64_t test_vcaled_f64(float64_t a, float64_t b) {
13475   return (uint64_t)vcaled_f64(a, b);
13476 }
13477
13478 // CHECK-LABEL: @test_vcalts_f32(
13479 // CHECK:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
13480 // CHECK:   ret i32 [[VCALTS_F32_I]]
13481 uint32_t test_vcalts_f32(float32_t a, float32_t b) {
13482   return (uint32_t)vcalts_f32(a, b);
13483 }
13484
13485 // CHECK-LABEL: @test_vcaltd_f64(
13486 // CHECK:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a)
13487 // CHECK:   ret i64 [[VCALTD_F64_I]]
13488 uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
13489   return (uint64_t)vcaltd_f64(a, b);
13490 }
13491
13492 // CHECK-LABEL: @test_vshrd_n_s64(
13493 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %a, 1
13494 // CHECK:   ret i64 [[SHRD_N]]
13495 int64_t test_vshrd_n_s64(int64_t a) {
13496   return (int64_t)vshrd_n_s64(a, 1);
13497 }
13498
13499 // CHECK-LABEL: @test_vshr_n_s64(
13500 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13501 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13502 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
13503 // CHECK:   ret <1 x i64> [[VSHR_N]]
13504 int64x1_t test_vshr_n_s64(int64x1_t a) {
13505   return vshr_n_s64(a, 1);
13506 }
13507
13508 // CHECK-LABEL: @test_vshrd_n_u64(
13509 // CHECK:   ret i64 0
13510 uint64_t test_vshrd_n_u64(uint64_t a) {
13511   return (uint64_t)vshrd_n_u64(a, 64);
13512 }
13513
13514 // CHECK-LABEL: @test_vshrd_n_u64_2(
13515 // CHECK:   ret i64 0
13516 uint64_t test_vshrd_n_u64_2() {
13517   uint64_t a = UINT64_C(0xf000000000000000);
13518   return vshrd_n_u64(a, 64);
13519 }
13520
13521 // CHECK-LABEL: @test_vshr_n_u64(
13522 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13524 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
13525 // CHECK:   ret <1 x i64> [[VSHR_N]]
13526 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
13527   return vshr_n_u64(a, 1);
13528 }
13529
13530 // CHECK-LABEL: @test_vrshrd_n_s64(
13531 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63)
13532 // CHECK:   ret i64 [[VRSHR_N]]
13533 int64_t test_vrshrd_n_s64(int64_t a) {
13534   return (int64_t)vrshrd_n_s64(a, 63);
13535 }
13536
13537 // CHECK-LABEL: @test_vrshr_n_s64(
13538 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13539 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13540 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13541 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
13542 int64x1_t test_vrshr_n_s64(int64x1_t a) {
13543   return vrshr_n_s64(a, 1);
13544 }
13545
13546 // CHECK-LABEL: @test_vrshrd_n_u64(
13547 // CHECK:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63)
13548 // CHECK:   ret i64 [[VRSHR_N]]
13549 uint64_t test_vrshrd_n_u64(uint64_t a) {
13550   return (uint64_t)vrshrd_n_u64(a, 63);
13551 }
13552
13553 // CHECK-LABEL: @test_vrshr_n_u64(
13554 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13555 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13556 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13557 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
13558 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
13559   return vrshr_n_u64(a, 1);
13560 }
13561
13562 // CHECK-LABEL: @test_vsrad_n_s64(
13563 // CHECK:   [[SHRD_N:%.*]] = ashr i64 %b, 63
13564 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
13565 // CHECK:   ret i64 [[TMP0]]
13566 int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
13567   return (int64_t)vsrad_n_s64(a, b, 63);
13568 }
13569
13570 // CHECK-LABEL: @test_vsra_n_s64(
13571 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13572 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13573 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13574 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13575 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
13576 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
13577 // CHECK:   ret <1 x i64> [[TMP4]]
13578 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
13579   return vsra_n_s64(a, b, 1);
13580 }
13581
13582 // CHECK-LABEL: @test_vsrad_n_u64(
13583 // CHECK:   [[SHRD_N:%.*]] = lshr i64 %b, 63
13584 // CHECK:   [[TMP0:%.*]] = add i64 %a, [[SHRD_N]]
13585 // CHECK:   ret i64 [[TMP0]]
13586 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
13587   return (uint64_t)vsrad_n_u64(a, b, 63);
13588 }
13589
13590 // CHECK-LABEL: @test_vsrad_n_u64_2(
13591 // CHECK:   ret i64 %a
13592 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
13593   return (uint64_t)vsrad_n_u64(a, b, 64);
13594 }
13595
13596 // CHECK-LABEL: @test_vsra_n_u64(
13597 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13598 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13599 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13600 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13601 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
13602 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
13603 // CHECK:   ret <1 x i64> [[TMP4]]
13604 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
13605   return vsra_n_u64(a, b, 1);
13606 }
13607
13608 // CHECK-LABEL: @test_vrsrad_n_s64(
13609 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63)
13610 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
13611 // CHECK:   ret i64 [[TMP1]]
13612 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
13613   return (int64_t)vrsrad_n_s64(a, b, 63);
13614 }
13615
13616 // CHECK-LABEL: @test_vrsra_n_s64(
13617 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13618 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13619 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13620 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13621 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13622 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
13623 // CHECK:   ret <1 x i64> [[TMP3]]
13624 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
13625   return vrsra_n_s64(a, b, 1);
13626 }
13627
13628 // CHECK-LABEL: @test_vrsrad_n_u64(
13629 // CHECK:   [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63)
13630 // CHECK:   [[TMP1:%.*]] = add i64 %a, [[TMP0]]
13631 // CHECK:   ret i64 [[TMP1]]
13632 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
13633   return (uint64_t)vrsrad_n_u64(a, b, 63);
13634 }
13635
13636 // CHECK-LABEL: @test_vrsra_n_u64(
13637 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13638 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13639 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13640 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13641 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13642 // CHECK:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
13643 // CHECK:   ret <1 x i64> [[TMP3]]
13644 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
13645   return vrsra_n_u64(a, b, 1);
13646 }
13647
13648 // CHECK-LABEL: @test_vshld_n_s64(
13649 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 1
13650 // CHECK:   ret i64 [[SHLD_N]]
13651 int64_t test_vshld_n_s64(int64_t a) {
13652   return (int64_t)vshld_n_s64(a, 1);
13653 }
13654
13655 // CHECK-LABEL: @test_vshl_n_s64(
13656 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13657 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13658 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
13659 // CHECK:   ret <1 x i64> [[VSHL_N]]
13660 int64x1_t test_vshl_n_s64(int64x1_t a) {
13661   return vshl_n_s64(a, 1);
13662 }
13663
13664 // CHECK-LABEL: @test_vshld_n_u64(
13665 // CHECK:   [[SHLD_N:%.*]] = shl i64 %a, 63
13666 // CHECK:   ret i64 [[SHLD_N]]
13667 uint64_t test_vshld_n_u64(uint64_t a) {
13668   return (uint64_t)vshld_n_u64(a, 63);
13669 }
13670
13671 // CHECK-LABEL: @test_vshl_n_u64(
13672 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13673 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13674 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
13675 // CHECK:   ret <1 x i64> [[VSHL_N]]
13676 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
13677   return vshl_n_u64(a, 1);
13678 }
13679
13680 // CHECK-LABEL: @test_vqshlb_n_s8(
13681 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13682 // CHECK:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
13683 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
13684 // CHECK:   ret i8 [[TMP1]]
13685 int8_t test_vqshlb_n_s8(int8_t a) {
13686   return (int8_t)vqshlb_n_s8(a, 7);
13687 }
13688
13689 // CHECK-LABEL: @test_vqshlh_n_s16(
13690 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13691 // CHECK:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
13692 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
13693 // CHECK:   ret i16 [[TMP1]]
13694 int16_t test_vqshlh_n_s16(int16_t a) {
13695   return (int16_t)vqshlh_n_s16(a, 15);
13696 }
13697
13698 // CHECK-LABEL: @test_vqshls_n_s32(
13699 // CHECK:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
13700 // CHECK:   ret i32 [[VQSHLS_N_S32]]
13701 int32_t test_vqshls_n_s32(int32_t a) {
13702   return (int32_t)vqshls_n_s32(a, 31);
13703 }
13704
13705 // CHECK-LABEL: @test_vqshld_n_s64(
13706 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63)
13707 // CHECK:   ret i64 [[VQSHL_N]]
13708 int64_t test_vqshld_n_s64(int64_t a) {
13709   return (int64_t)vqshld_n_s64(a, 63);
13710 }
13711
13712 // CHECK-LABEL: @test_vqshl_n_s8(
13713 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
13714 // CHECK:   ret <8 x i8> [[VQSHL_N]]
13715 int8x8_t test_vqshl_n_s8(int8x8_t a) {
13716   return vqshl_n_s8(a, 0);
13717 }
13718
13719 // CHECK-LABEL: @test_vqshlq_n_s8(
13720 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
13721 // CHECK:   ret <16 x i8> [[VQSHL_N]]
13722 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
13723   return vqshlq_n_s8(a, 0);
13724 }
13725
13726 // CHECK-LABEL: @test_vqshl_n_s16(
13727 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13728 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13729 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
13730 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
13731 int16x4_t test_vqshl_n_s16(int16x4_t a) {
13732   return vqshl_n_s16(a, 0);
13733 }
13734
13735 // CHECK-LABEL: @test_vqshlq_n_s16(
13736 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13737 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13738 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
13739 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
13740 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
13741   return vqshlq_n_s16(a, 0);
13742 }
13743
13744 // CHECK-LABEL: @test_vqshl_n_s32(
13745 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13746 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13747 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
13748 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
13749 int32x2_t test_vqshl_n_s32(int32x2_t a) {
13750   return vqshl_n_s32(a, 0);
13751 }
13752
13753 // CHECK-LABEL: @test_vqshlq_n_s32(
13754 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13755 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13756 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
13757 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
13758 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
13759   return vqshlq_n_s32(a, 0);
13760 }
13761
13762 // CHECK-LABEL: @test_vqshlq_n_s64(
13763 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13764 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13765 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
13766 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
13767 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
13768   return vqshlq_n_s64(a, 0);
13769 }
13770
13771 // CHECK-LABEL: @test_vqshl_n_u8(
13772 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
13773 // CHECK:   ret <8 x i8> [[VQSHL_N]]
13774 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
13775   return vqshl_n_u8(a, 0);
13776 }
13777
13778 // CHECK-LABEL: @test_vqshlq_n_u8(
13779 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
13780 // CHECK:   ret <16 x i8> [[VQSHL_N]]
13781 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
13782   return vqshlq_n_u8(a, 0);
13783 }
13784
13785 // CHECK-LABEL: @test_vqshl_n_u16(
13786 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13787 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13788 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
13789 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
13790 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
13791   return vqshl_n_u16(a, 0);
13792 }
13793
13794 // CHECK-LABEL: @test_vqshlq_n_u16(
13795 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13796 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13797 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
13798 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
13799 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
13800   return vqshlq_n_u16(a, 0);
13801 }
13802
13803 // CHECK-LABEL: @test_vqshl_n_u32(
13804 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13805 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13806 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
13807 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
13808 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
13809   return vqshl_n_u32(a, 0);
13810 }
13811
13812 // CHECK-LABEL: @test_vqshlq_n_u32(
13813 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13814 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13815 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
13816 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
13817 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
13818   return vqshlq_n_u32(a, 0);
13819 }
13820
13821 // CHECK-LABEL: @test_vqshlq_n_u64(
13822 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13823 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13824 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
13825 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
13826 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
13827   return vqshlq_n_u64(a, 0);
13828 }
13829
13830 // CHECK-LABEL: @test_vqshl_n_s64(
13831 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13832 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13833 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
13834 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
13835 int64x1_t test_vqshl_n_s64(int64x1_t a) {
13836   return vqshl_n_s64(a, 1);
13837 }
13838
13839 // CHECK-LABEL: @test_vqshlb_n_u8(
13840 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13841 // CHECK:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
13842 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
13843 // CHECK:   ret i8 [[TMP1]]
13844 uint8_t test_vqshlb_n_u8(uint8_t a) {
13845   return (uint8_t)vqshlb_n_u8(a, 7);
13846 }
13847
13848 // CHECK-LABEL: @test_vqshlh_n_u16(
13849 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13850 // CHECK:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
13851 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
13852 // CHECK:   ret i16 [[TMP1]]
13853 uint16_t test_vqshlh_n_u16(uint16_t a) {
13854   return (uint16_t)vqshlh_n_u16(a, 15);
13855 }
13856
13857 // CHECK-LABEL: @test_vqshls_n_u32(
13858 // CHECK:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
13859 // CHECK:   ret i32 [[VQSHLS_N_U32]]
13860 uint32_t test_vqshls_n_u32(uint32_t a) {
13861   return (uint32_t)vqshls_n_u32(a, 31);
13862 }
13863
13864 // CHECK-LABEL: @test_vqshld_n_u64(
13865 // CHECK:   [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63)
13866 // CHECK:   ret i64 [[VQSHL_N]]
13867 uint64_t test_vqshld_n_u64(uint64_t a) {
13868   return (uint64_t)vqshld_n_u64(a, 63);
13869 }
13870
13871 // CHECK-LABEL: @test_vqshl_n_u64(
13872 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13873 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13874 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
13875 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
13876 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
13877   return vqshl_n_u64(a, 1);
13878 }
13879
13880 // CHECK-LABEL: @test_vqshlub_n_s8(
13881 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
13882 // CHECK:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
13883 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
13884 // CHECK:   ret i8 [[TMP1]]
13885 int8_t test_vqshlub_n_s8(int8_t a) {
13886   return (int8_t)vqshlub_n_s8(a, 7);
13887 }
13888
13889 // CHECK-LABEL: @test_vqshluh_n_s16(
13890 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
13891 // CHECK:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
13892 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
13893 // CHECK:   ret i16 [[TMP1]]
13894 int16_t test_vqshluh_n_s16(int16_t a) {
13895   return (int16_t)vqshluh_n_s16(a, 15);
13896 }
13897
13898 // CHECK-LABEL: @test_vqshlus_n_s32(
13899 // CHECK:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
13900 // CHECK:   ret i32 [[VQSHLUS_N_S32]]
13901 int32_t test_vqshlus_n_s32(int32_t a) {
13902   return (int32_t)vqshlus_n_s32(a, 31);
13903 }
13904
13905 // CHECK-LABEL: @test_vqshlud_n_s64(
13906 // CHECK:   [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63)
13907 // CHECK:   ret i64 [[VQSHLU_N]]
13908 int64_t test_vqshlud_n_s64(int64_t a) {
13909   return (int64_t)vqshlud_n_s64(a, 63);
13910 }
13911
13912 // CHECK-LABEL: @test_vqshlu_n_s64(
13913 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13914 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13915 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
13916 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
13917 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
13918   return vqshlu_n_s64(a, 1);
13919 }
13920
13921 // CHECK-LABEL: @test_vsrid_n_s64(
13922 // CHECK:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
13923 // CHECK:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
13924 // CHECK:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
13925 // CHECK:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
13926 // CHECK:   ret i64 [[VSRID_N_S643]]
13927 int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
13928   return (int64_t)vsrid_n_s64(a, b, 63);
13929 }
13930
13931 // CHECK-LABEL: @test_vsri_n_s64(
13932 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13933 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13934 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13935 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13936 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
13937 // CHECK:   ret <1 x i64> [[VSRI_N2]]
13938 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
13939   return vsri_n_s64(a, b, 1);
13940 }
13941
13942 // CHECK-LABEL: @test_vsrid_n_u64(
13943 // CHECK:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
13944 // CHECK:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
13945 // CHECK:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
13946 // CHECK:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
13947 // CHECK:   ret i64 [[VSRID_N_U643]]
13948 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
13949   return (uint64_t)vsrid_n_u64(a, b, 63);
13950 }
13951
13952 // CHECK-LABEL: @test_vsri_n_u64(
13953 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13954 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13955 // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13956 // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13957 // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
13958 // CHECK:   ret <1 x i64> [[VSRI_N2]]
13959 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
13960   return vsri_n_u64(a, b, 1);
13961 }
13962
13963 // CHECK-LABEL: @test_vslid_n_s64(
13964 // CHECK:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
13965 // CHECK:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
13966 // CHECK:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
13967 // CHECK:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
13968 // CHECK:   ret i64 [[VSLID_N_S643]]
13969 int64_t test_vslid_n_s64(int64_t a, int64_t b) {
13970   return (int64_t)vslid_n_s64(a, b, 63);
13971 }
13972
13973 // CHECK-LABEL: @test_vsli_n_s64(
13974 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13975 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13976 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13977 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13978 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
13979 // CHECK:   ret <1 x i64> [[VSLI_N2]]
13980 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
13981   return vsli_n_s64(a, b, 1);
13982 }
13983
13984 // CHECK-LABEL: @test_vslid_n_u64(
13985 // CHECK:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
13986 // CHECK:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
13987 // CHECK:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
13988 // CHECK:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
13989 // CHECK:   ret i64 [[VSLID_N_U643]]
13990 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
13991   return (uint64_t)vslid_n_u64(a, b, 63);
13992 }
13993
13994 // CHECK-LABEL: @test_vsli_n_u64(
13995 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13996 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13997 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13998 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
13999 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
14000 // CHECK:   ret <1 x i64> [[VSLI_N2]]
14001 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
14002   return vsli_n_u64(a, b, 1);
14003 }
14004
14005 // CHECK-LABEL: @test_vqshrnh_n_s16(
14006 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14007 // CHECK:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14008 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
14009 // CHECK:   ret i8 [[TMP1]]
14010 int8_t test_vqshrnh_n_s16(int16_t a) {
14011   return (int8_t)vqshrnh_n_s16(a, 8);
14012 }
14013
14014 // CHECK-LABEL: @test_vqshrns_n_s32(
14015 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14016 // CHECK:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14017 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
14018 // CHECK:   ret i16 [[TMP1]]
14019 int16_t test_vqshrns_n_s32(int32_t a) {
14020   return (int16_t)vqshrns_n_s32(a, 16);
14021 }
14022
14023 // CHECK-LABEL: @test_vqshrnd_n_s64(
14024 // CHECK:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
14025 // CHECK:   ret i32 [[VQSHRND_N_S64]]
14026 int32_t test_vqshrnd_n_s64(int64_t a) {
14027   return (int32_t)vqshrnd_n_s64(a, 32);
14028 }
14029
14030 // CHECK-LABEL: @test_vqshrnh_n_u16(
14031 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14032 // CHECK:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14033 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
14034 // CHECK:   ret i8 [[TMP1]]
14035 uint8_t test_vqshrnh_n_u16(uint16_t a) {
14036   return (uint8_t)vqshrnh_n_u16(a, 8);
14037 }
14038
14039 // CHECK-LABEL: @test_vqshrns_n_u32(
14040 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14041 // CHECK:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14042 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
14043 // CHECK:   ret i16 [[TMP1]]
14044 uint16_t test_vqshrns_n_u32(uint32_t a) {
14045   return (uint16_t)vqshrns_n_u32(a, 16);
14046 }
14047
14048 // CHECK-LABEL: @test_vqshrnd_n_u64(
14049 // CHECK:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
14050 // CHECK:   ret i32 [[VQSHRND_N_U64]]
14051 uint32_t test_vqshrnd_n_u64(uint64_t a) {
14052   return (uint32_t)vqshrnd_n_u64(a, 32);
14053 }
14054
14055 // CHECK-LABEL: @test_vqrshrnh_n_s16(
14056 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14057 // CHECK:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14058 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
14059 // CHECK:   ret i8 [[TMP1]]
14060 int8_t test_vqrshrnh_n_s16(int16_t a) {
14061   return (int8_t)vqrshrnh_n_s16(a, 8);
14062 }
14063
14064 // CHECK-LABEL: @test_vqrshrns_n_s32(
14065 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14066 // CHECK:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14067 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
14068 // CHECK:   ret i16 [[TMP1]]
14069 int16_t test_vqrshrns_n_s32(int32_t a) {
14070   return (int16_t)vqrshrns_n_s32(a, 16);
14071 }
14072
14073 // CHECK-LABEL: @test_vqrshrnd_n_s64(
14074 // CHECK:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
14075 // CHECK:   ret i32 [[VQRSHRND_N_S64]]
14076 int32_t test_vqrshrnd_n_s64(int64_t a) {
14077   return (int32_t)vqrshrnd_n_s64(a, 32);
14078 }
14079
14080 // CHECK-LABEL: @test_vqrshrnh_n_u16(
14081 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14082 // CHECK:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
14083 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
14084 // CHECK:   ret i8 [[TMP1]]
14085 uint8_t test_vqrshrnh_n_u16(uint16_t a) {
14086   return (uint8_t)vqrshrnh_n_u16(a, 8);
14087 }
14088
14089 // CHECK-LABEL: @test_vqrshrns_n_u32(
14090 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14091 // CHECK:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
14092 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
14093 // CHECK:   ret i16 [[TMP1]]
14094 uint16_t test_vqrshrns_n_u32(uint32_t a) {
14095   return (uint16_t)vqrshrns_n_u32(a, 16);
14096 }
14097
14098 // CHECK-LABEL: @test_vqrshrnd_n_u64(
14099 // CHECK:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
14100 // CHECK:   ret i32 [[VQRSHRND_N_U64]]
14101 uint32_t test_vqrshrnd_n_u64(uint64_t a) {
14102   return (uint32_t)vqrshrnd_n_u64(a, 32);
14103 }
14104
14105 // CHECK-LABEL: @test_vqshrunh_n_s16(
14106 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14107 // CHECK:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14108 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
14109 // CHECK:   ret i8 [[TMP1]]
14110 int8_t test_vqshrunh_n_s16(int16_t a) {
14111   return (int8_t)vqshrunh_n_s16(a, 8);
14112 }
14113
14114 // CHECK-LABEL: @test_vqshruns_n_s32(
14115 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14116 // CHECK:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14117 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
14118 // CHECK:   ret i16 [[TMP1]]
14119 int16_t test_vqshruns_n_s32(int32_t a) {
14120   return (int16_t)vqshruns_n_s32(a, 16);
14121 }
14122
14123 // CHECK-LABEL: @test_vqshrund_n_s64(
14124 // CHECK:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
14125 // CHECK:   ret i32 [[VQSHRUND_N_S64]]
14126 int32_t test_vqshrund_n_s64(int64_t a) {
14127   return (int32_t)vqshrund_n_s64(a, 32);
14128 }
14129
14130 // CHECK-LABEL: @test_vqrshrunh_n_s16(
14131 // CHECK:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
14132 // CHECK:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
14133 // CHECK:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
14134 // CHECK:   ret i8 [[TMP1]]
14135 int8_t test_vqrshrunh_n_s16(int16_t a) {
14136   return (int8_t)vqrshrunh_n_s16(a, 8);
14137 }
14138
14139 // CHECK-LABEL: @test_vqrshruns_n_s32(
14140 // CHECK:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
14141 // CHECK:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
14142 // CHECK:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
14143 // CHECK:   ret i16 [[TMP1]]
14144 int16_t test_vqrshruns_n_s32(int32_t a) {
14145   return (int16_t)vqrshruns_n_s32(a, 16);
14146 }
14147
14148 // CHECK-LABEL: @test_vqrshrund_n_s64(
14149 // CHECK:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
14150 // CHECK:   ret i32 [[VQRSHRUND_N_S64]]
14151 int32_t test_vqrshrund_n_s64(int64_t a) {
14152   return (int32_t)vqrshrund_n_s64(a, 32);
14153 }
14154
14155 // CHECK-LABEL: @test_vcvts_n_f32_s32(
14156 // CHECK:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
14157 // CHECK:   ret float [[VCVTS_N_F32_S32]]
14158 float32_t test_vcvts_n_f32_s32(int32_t a) {
14159   return vcvts_n_f32_s32(a, 1);
14160 }
14161
14162 // CHECK-LABEL: @test_vcvtd_n_f64_s64(
14163 // CHECK:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
14164 // CHECK:   ret double [[VCVTD_N_F64_S64]]
14165 float64_t test_vcvtd_n_f64_s64(int64_t a) {
14166   return vcvtd_n_f64_s64(a, 1);
14167 }
14168
14169 // CHECK-LABEL: @test_vcvts_n_f32_u32(
14170 // CHECK:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
14171 // CHECK:   ret float [[VCVTS_N_F32_U32]]
14172 float32_t test_vcvts_n_f32_u32(uint32_t a) {
14173   return vcvts_n_f32_u32(a, 32);
14174 }
14175
14176 // CHECK-LABEL: @test_vcvtd_n_f64_u64(
14177 // CHECK:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
14178 // CHECK:   ret double [[VCVTD_N_F64_U64]]
14179 float64_t test_vcvtd_n_f64_u64(uint64_t a) {
14180   return vcvtd_n_f64_u64(a, 64);
14181 }
14182
14183 // CHECK-LABEL: @test_vcvts_n_s32_f32(
14184 // CHECK:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
14185 // CHECK:   ret i32 [[VCVTS_N_S32_F32]]
14186 int32_t test_vcvts_n_s32_f32(float32_t a) {
14187   return (int32_t)vcvts_n_s32_f32(a, 1);
14188 }
14189
14190 // CHECK-LABEL: @test_vcvtd_n_s64_f64(
14191 // CHECK:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
14192 // CHECK:   ret i64 [[VCVTD_N_S64_F64]]
14193 int64_t test_vcvtd_n_s64_f64(float64_t a) {
14194   return (int64_t)vcvtd_n_s64_f64(a, 1);
14195 }
14196
14197 // CHECK-LABEL: @test_vcvts_n_u32_f32(
14198 // CHECK:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
14199 // CHECK:   ret i32 [[VCVTS_N_U32_F32]]
14200 uint32_t test_vcvts_n_u32_f32(float32_t a) {
14201   return (uint32_t)vcvts_n_u32_f32(a, 32);
14202 }
14203
14204 // CHECK-LABEL: @test_vcvtd_n_u64_f64(
14205 // CHECK:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
14206 // CHECK:   ret i64 [[VCVTD_N_U64_F64]]
14207 uint64_t test_vcvtd_n_u64_f64(float64_t a) {
14208   return (uint64_t)vcvtd_n_u64_f64(a, 64);
14209 }
14210
14211 // CHECK-LABEL: @test_vreinterpret_s8_s16(
14212 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14213 // CHECK:   ret <8 x i8> [[TMP0]]
14214 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
14215   return vreinterpret_s8_s16(a);
14216 }
14217
14218 // CHECK-LABEL: @test_vreinterpret_s8_s32(
14219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14220 // CHECK:   ret <8 x i8> [[TMP0]]
14221 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
14222   return vreinterpret_s8_s32(a);
14223 }
14224
14225 // CHECK-LABEL: @test_vreinterpret_s8_s64(
14226 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14227 // CHECK:   ret <8 x i8> [[TMP0]]
14228 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
14229   return vreinterpret_s8_s64(a);
14230 }
14231
14232 // CHECK-LABEL: @test_vreinterpret_s8_u8(
14233 // CHECK:   ret <8 x i8> %a
14234 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
14235   return vreinterpret_s8_u8(a);
14236 }
14237
14238 // CHECK-LABEL: @test_vreinterpret_s8_u16(
14239 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14240 // CHECK:   ret <8 x i8> [[TMP0]]
14241 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
14242   return vreinterpret_s8_u16(a);
14243 }
14244
14245 // CHECK-LABEL: @test_vreinterpret_s8_u32(
14246 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14247 // CHECK:   ret <8 x i8> [[TMP0]]
14248 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
14249   return vreinterpret_s8_u32(a);
14250 }
14251
14252 // CHECK-LABEL: @test_vreinterpret_s8_u64(
14253 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14254 // CHECK:   ret <8 x i8> [[TMP0]]
14255 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
14256   return vreinterpret_s8_u64(a);
14257 }
14258
14259 // CHECK-LABEL: @test_vreinterpret_s8_f16(
14260 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
14261 // CHECK:   ret <8 x i8> [[TMP0]]
14262 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
14263   return vreinterpret_s8_f16(a);
14264 }
14265
14266 // CHECK-LABEL: @test_vreinterpret_s8_f32(
14267 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
14268 // CHECK:   ret <8 x i8> [[TMP0]]
14269 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
14270   return vreinterpret_s8_f32(a);
14271 }
14272
14273 // CHECK-LABEL: @test_vreinterpret_s8_f64(
14274 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
14275 // CHECK:   ret <8 x i8> [[TMP0]]
14276 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
14277   return vreinterpret_s8_f64(a);
14278 }
14279
14280 // CHECK-LABEL: @test_vreinterpret_s8_p8(
14281 // CHECK:   ret <8 x i8> %a
14282 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
14283   return vreinterpret_s8_p8(a);
14284 }
14285
14286 // CHECK-LABEL: @test_vreinterpret_s8_p16(
14287 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14288 // CHECK:   ret <8 x i8> [[TMP0]]
14289 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
14290   return vreinterpret_s8_p16(a);
14291 }
14292
14293 // CHECK-LABEL: @test_vreinterpret_s8_p64(
14294 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14295 // CHECK:   ret <8 x i8> [[TMP0]]
14296 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
14297   return vreinterpret_s8_p64(a);
14298 }
14299
14300 // CHECK-LABEL: @test_vreinterpret_s16_s8(
14301 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14302 // CHECK:   ret <4 x i16> [[TMP0]]
14303 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
14304   return vreinterpret_s16_s8(a);
14305 }
14306
14307 // CHECK-LABEL: @test_vreinterpret_s16_s32(
14308 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14309 // CHECK:   ret <4 x i16> [[TMP0]]
14310 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
14311   return vreinterpret_s16_s32(a);
14312 }
14313
14314 // CHECK-LABEL: @test_vreinterpret_s16_s64(
14315 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14316 // CHECK:   ret <4 x i16> [[TMP0]]
14317 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
14318   return vreinterpret_s16_s64(a);
14319 }
14320
14321 // CHECK-LABEL: @test_vreinterpret_s16_u8(
14322 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14323 // CHECK:   ret <4 x i16> [[TMP0]]
14324 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
14325   return vreinterpret_s16_u8(a);
14326 }
14327
14328 // CHECK-LABEL: @test_vreinterpret_s16_u16(
14329 // CHECK:   ret <4 x i16> %a
14330 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
14331   return vreinterpret_s16_u16(a);
14332 }
14333
14334 // CHECK-LABEL: @test_vreinterpret_s16_u32(
14335 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14336 // CHECK:   ret <4 x i16> [[TMP0]]
14337 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
14338   return vreinterpret_s16_u32(a);
14339 }
14340
14341 // CHECK-LABEL: @test_vreinterpret_s16_u64(
14342 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14343 // CHECK:   ret <4 x i16> [[TMP0]]
14344 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
14345   return vreinterpret_s16_u64(a);
14346 }
14347
14348 // CHECK-LABEL: @test_vreinterpret_s16_f16(
14349 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
14350 // CHECK:   ret <4 x i16> [[TMP0]]
14351 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
14352   return vreinterpret_s16_f16(a);
14353 }
14354
14355 // CHECK-LABEL: @test_vreinterpret_s16_f32(
14356 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
14357 // CHECK:   ret <4 x i16> [[TMP0]]
14358 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
14359   return vreinterpret_s16_f32(a);
14360 }
14361
14362 // CHECK-LABEL: @test_vreinterpret_s16_f64(
14363 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
14364 // CHECK:   ret <4 x i16> [[TMP0]]
14365 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
14366   return vreinterpret_s16_f64(a);
14367 }
14368
14369 // CHECK-LABEL: @test_vreinterpret_s16_p8(
14370 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14371 // CHECK:   ret <4 x i16> [[TMP0]]
14372 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
14373   return vreinterpret_s16_p8(a);
14374 }
14375
14376 // CHECK-LABEL: @test_vreinterpret_s16_p16(
14377 // CHECK:   ret <4 x i16> %a
14378 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
14379   return vreinterpret_s16_p16(a);
14380 }
14381
14382 // CHECK-LABEL: @test_vreinterpret_s16_p64(
14383 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14384 // CHECK:   ret <4 x i16> [[TMP0]]
14385 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
14386   return vreinterpret_s16_p64(a);
14387 }
14388
14389 // CHECK-LABEL: @test_vreinterpret_s32_s8(
14390 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14391 // CHECK:   ret <2 x i32> [[TMP0]]
14392 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
14393   return vreinterpret_s32_s8(a);
14394 }
14395
14396 // CHECK-LABEL: @test_vreinterpret_s32_s16(
14397 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14398 // CHECK:   ret <2 x i32> [[TMP0]]
14399 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
14400   return vreinterpret_s32_s16(a);
14401 }
14402
14403 // CHECK-LABEL: @test_vreinterpret_s32_s64(
14404 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14405 // CHECK:   ret <2 x i32> [[TMP0]]
14406 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
14407   return vreinterpret_s32_s64(a);
14408 }
14409
14410 // CHECK-LABEL: @test_vreinterpret_s32_u8(
14411 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14412 // CHECK:   ret <2 x i32> [[TMP0]]
14413 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
14414   return vreinterpret_s32_u8(a);
14415 }
14416
14417 // CHECK-LABEL: @test_vreinterpret_s32_u16(
14418 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14419 // CHECK:   ret <2 x i32> [[TMP0]]
14420 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
14421   return vreinterpret_s32_u16(a);
14422 }
14423
14424 // CHECK-LABEL: @test_vreinterpret_s32_u32(
14425 // CHECK:   ret <2 x i32> %a
14426 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
14427   return vreinterpret_s32_u32(a);
14428 }
14429
14430 // CHECK-LABEL: @test_vreinterpret_s32_u64(
14431 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14432 // CHECK:   ret <2 x i32> [[TMP0]]
14433 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
14434   return vreinterpret_s32_u64(a);
14435 }
14436
14437 // CHECK-LABEL: @test_vreinterpret_s32_f16(
14438 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
14439 // CHECK:   ret <2 x i32> [[TMP0]]
14440 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
14441   return vreinterpret_s32_f16(a);
14442 }
14443
14444 // CHECK-LABEL: @test_vreinterpret_s32_f32(
14445 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
14446 // CHECK:   ret <2 x i32> [[TMP0]]
14447 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
14448   return vreinterpret_s32_f32(a);
14449 }
14450
14451 // CHECK-LABEL: @test_vreinterpret_s32_f64(
14452 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
14453 // CHECK:   ret <2 x i32> [[TMP0]]
14454 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
14455   return vreinterpret_s32_f64(a);
14456 }
14457
14458 // CHECK-LABEL: @test_vreinterpret_s32_p8(
14459 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14460 // CHECK:   ret <2 x i32> [[TMP0]]
14461 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
14462   return vreinterpret_s32_p8(a);
14463 }
14464
14465 // CHECK-LABEL: @test_vreinterpret_s32_p16(
14466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14467 // CHECK:   ret <2 x i32> [[TMP0]]
14468 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
14469   return vreinterpret_s32_p16(a);
14470 }
14471
14472 // CHECK-LABEL: @test_vreinterpret_s32_p64(
14473 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14474 // CHECK:   ret <2 x i32> [[TMP0]]
14475 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
14476   return vreinterpret_s32_p64(a);
14477 }
14478
14479 // CHECK-LABEL: @test_vreinterpret_s64_s8(
14480 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14481 // CHECK:   ret <1 x i64> [[TMP0]]
14482 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
14483   return vreinterpret_s64_s8(a);
14484 }
14485
14486 // CHECK-LABEL: @test_vreinterpret_s64_s16(
14487 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14488 // CHECK:   ret <1 x i64> [[TMP0]]
14489 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
14490   return vreinterpret_s64_s16(a);
14491 }
14492
14493 // CHECK-LABEL: @test_vreinterpret_s64_s32(
14494 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14495 // CHECK:   ret <1 x i64> [[TMP0]]
14496 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
14497   return vreinterpret_s64_s32(a);
14498 }
14499
14500 // CHECK-LABEL: @test_vreinterpret_s64_u8(
14501 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14502 // CHECK:   ret <1 x i64> [[TMP0]]
14503 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
14504   return vreinterpret_s64_u8(a);
14505 }
14506
14507 // CHECK-LABEL: @test_vreinterpret_s64_u16(
14508 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14509 // CHECK:   ret <1 x i64> [[TMP0]]
14510 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
14511   return vreinterpret_s64_u16(a);
14512 }
14513
14514 // CHECK-LABEL: @test_vreinterpret_s64_u32(
14515 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14516 // CHECK:   ret <1 x i64> [[TMP0]]
14517 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
14518   return vreinterpret_s64_u32(a);
14519 }
14520
14521 // CHECK-LABEL: @test_vreinterpret_s64_u64(
14522 // CHECK:   ret <1 x i64> %a
14523 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
14524   return vreinterpret_s64_u64(a);
14525 }
14526
14527 // CHECK-LABEL: @test_vreinterpret_s64_f16(
14528 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
14529 // CHECK:   ret <1 x i64> [[TMP0]]
14530 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
14531   return vreinterpret_s64_f16(a);
14532 }
14533
14534 // CHECK-LABEL: @test_vreinterpret_s64_f32(
14535 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
14536 // CHECK:   ret <1 x i64> [[TMP0]]
14537 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
14538   return vreinterpret_s64_f32(a);
14539 }
14540
14541 // CHECK-LABEL: @test_vreinterpret_s64_f64(
14542 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
14543 // CHECK:   ret <1 x i64> [[TMP0]]
14544 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
14545   return vreinterpret_s64_f64(a);
14546 }
14547
14548 // CHECK-LABEL: @test_vreinterpret_s64_p8(
14549 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14550 // CHECK:   ret <1 x i64> [[TMP0]]
14551 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
14552   return vreinterpret_s64_p8(a);
14553 }
14554
14555 // CHECK-LABEL: @test_vreinterpret_s64_p16(
14556 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14557 // CHECK:   ret <1 x i64> [[TMP0]]
14558 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
14559   return vreinterpret_s64_p16(a);
14560 }
14561
14562 // CHECK-LABEL: @test_vreinterpret_s64_p64(
14563 // CHECK:   ret <1 x i64> %a
14564 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
14565   return vreinterpret_s64_p64(a);
14566 }
14567
14568 // CHECK-LABEL: @test_vreinterpret_u8_s8(
14569 // CHECK:   ret <8 x i8> %a
14570 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
14571   return vreinterpret_u8_s8(a);
14572 }
14573
14574 // CHECK-LABEL: @test_vreinterpret_u8_s16(
14575 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14576 // CHECK:   ret <8 x i8> [[TMP0]]
14577 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
14578   return vreinterpret_u8_s16(a);
14579 }
14580
14581 // CHECK-LABEL: @test_vreinterpret_u8_s32(
14582 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14583 // CHECK:   ret <8 x i8> [[TMP0]]
14584 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
14585   return vreinterpret_u8_s32(a);
14586 }
14587
14588 // CHECK-LABEL: @test_vreinterpret_u8_s64(
14589 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14590 // CHECK:   ret <8 x i8> [[TMP0]]
14591 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
14592   return vreinterpret_u8_s64(a);
14593 }
14594
14595 // CHECK-LABEL: @test_vreinterpret_u8_u16(
14596 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14597 // CHECK:   ret <8 x i8> [[TMP0]]
14598 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
14599   return vreinterpret_u8_u16(a);
14600 }
14601
14602 // CHECK-LABEL: @test_vreinterpret_u8_u32(
14603 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14604 // CHECK:   ret <8 x i8> [[TMP0]]
14605 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
14606   return vreinterpret_u8_u32(a);
14607 }
14608
14609 // CHECK-LABEL: @test_vreinterpret_u8_u64(
14610 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14611 // CHECK:   ret <8 x i8> [[TMP0]]
14612 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
14613   return vreinterpret_u8_u64(a);
14614 }
14615
14616 // CHECK-LABEL: @test_vreinterpret_u8_f16(
14617 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
14618 // CHECK:   ret <8 x i8> [[TMP0]]
14619 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
14620   return vreinterpret_u8_f16(a);
14621 }
14622
14623 // CHECK-LABEL: @test_vreinterpret_u8_f32(
14624 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
14625 // CHECK:   ret <8 x i8> [[TMP0]]
14626 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
14627   return vreinterpret_u8_f32(a);
14628 }
14629
14630 // CHECK-LABEL: @test_vreinterpret_u8_f64(
14631 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
14632 // CHECK:   ret <8 x i8> [[TMP0]]
14633 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
14634   return vreinterpret_u8_f64(a);
14635 }
14636
14637 // CHECK-LABEL: @test_vreinterpret_u8_p8(
14638 // CHECK:   ret <8 x i8> %a
14639 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
14640   return vreinterpret_u8_p8(a);
14641 }
14642
14643 // CHECK-LABEL: @test_vreinterpret_u8_p16(
14644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14645 // CHECK:   ret <8 x i8> [[TMP0]]
14646 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
14647   return vreinterpret_u8_p16(a);
14648 }
14649
14650 // CHECK-LABEL: @test_vreinterpret_u8_p64(
14651 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14652 // CHECK:   ret <8 x i8> [[TMP0]]
14653 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
14654   return vreinterpret_u8_p64(a);
14655 }
14656
14657 // CHECK-LABEL: @test_vreinterpret_u16_s8(
14658 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14659 // CHECK:   ret <4 x i16> [[TMP0]]
14660 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
14661   return vreinterpret_u16_s8(a);
14662 }
14663
14664 // CHECK-LABEL: @test_vreinterpret_u16_s16(
14665 // CHECK:   ret <4 x i16> %a
14666 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
14667   return vreinterpret_u16_s16(a);
14668 }
14669
14670 // CHECK-LABEL: @test_vreinterpret_u16_s32(
14671 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14672 // CHECK:   ret <4 x i16> [[TMP0]]
14673 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
14674   return vreinterpret_u16_s32(a);
14675 }
14676
14677 // CHECK-LABEL: @test_vreinterpret_u16_s64(
14678 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14679 // CHECK:   ret <4 x i16> [[TMP0]]
14680 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
14681   return vreinterpret_u16_s64(a);
14682 }
14683
14684 // CHECK-LABEL: @test_vreinterpret_u16_u8(
14685 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14686 // CHECK:   ret <4 x i16> [[TMP0]]
14687 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
14688   return vreinterpret_u16_u8(a);
14689 }
14690
14691 // CHECK-LABEL: @test_vreinterpret_u16_u32(
14692 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
14693 // CHECK:   ret <4 x i16> [[TMP0]]
14694 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
14695   return vreinterpret_u16_u32(a);
14696 }
14697
14698 // CHECK-LABEL: @test_vreinterpret_u16_u64(
14699 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14700 // CHECK:   ret <4 x i16> [[TMP0]]
14701 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
14702   return vreinterpret_u16_u64(a);
14703 }
14704
14705 // CHECK-LABEL: @test_vreinterpret_u16_f16(
14706 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
14707 // CHECK:   ret <4 x i16> [[TMP0]]
14708 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
14709   return vreinterpret_u16_f16(a);
14710 }
14711
14712 // CHECK-LABEL: @test_vreinterpret_u16_f32(
14713 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
14714 // CHECK:   ret <4 x i16> [[TMP0]]
14715 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
14716   return vreinterpret_u16_f32(a);
14717 }
14718
14719 // CHECK-LABEL: @test_vreinterpret_u16_f64(
14720 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
14721 // CHECK:   ret <4 x i16> [[TMP0]]
14722 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
14723   return vreinterpret_u16_f64(a);
14724 }
14725
14726 // CHECK-LABEL: @test_vreinterpret_u16_p8(
14727 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
14728 // CHECK:   ret <4 x i16> [[TMP0]]
14729 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
14730   return vreinterpret_u16_p8(a);
14731 }
14732
14733 // CHECK-LABEL: @test_vreinterpret_u16_p16(
14734 // CHECK:   ret <4 x i16> %a
14735 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
14736   return vreinterpret_u16_p16(a);
14737 }
14738
14739 // CHECK-LABEL: @test_vreinterpret_u16_p64(
14740 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
14741 // CHECK:   ret <4 x i16> [[TMP0]]
14742 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
14743   return vreinterpret_u16_p64(a);
14744 }
14745
14746 // CHECK-LABEL: @test_vreinterpret_u32_s8(
14747 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14748 // CHECK:   ret <2 x i32> [[TMP0]]
14749 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
14750   return vreinterpret_u32_s8(a);
14751 }
14752
14753 // CHECK-LABEL: @test_vreinterpret_u32_s16(
14754 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14755 // CHECK:   ret <2 x i32> [[TMP0]]
14756 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
14757   return vreinterpret_u32_s16(a);
14758 }
14759
14760 // CHECK-LABEL: @test_vreinterpret_u32_s32(
14761 // CHECK:   ret <2 x i32> %a
14762 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
14763   return vreinterpret_u32_s32(a);
14764 }
14765
14766 // CHECK-LABEL: @test_vreinterpret_u32_s64(
14767 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14768 // CHECK:   ret <2 x i32> [[TMP0]]
14769 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
14770   return vreinterpret_u32_s64(a);
14771 }
14772
14773 // CHECK-LABEL: @test_vreinterpret_u32_u8(
14774 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14775 // CHECK:   ret <2 x i32> [[TMP0]]
14776 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
14777   return vreinterpret_u32_u8(a);
14778 }
14779
14780 // CHECK-LABEL: @test_vreinterpret_u32_u16(
14781 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14782 // CHECK:   ret <2 x i32> [[TMP0]]
14783 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
14784   return vreinterpret_u32_u16(a);
14785 }
14786
14787 // CHECK-LABEL: @test_vreinterpret_u32_u64(
14788 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14789 // CHECK:   ret <2 x i32> [[TMP0]]
14790 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
14791   return vreinterpret_u32_u64(a);
14792 }
14793
14794 // CHECK-LABEL: @test_vreinterpret_u32_f16(
14795 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
14796 // CHECK:   ret <2 x i32> [[TMP0]]
14797 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
14798   return vreinterpret_u32_f16(a);
14799 }
14800
14801 // CHECK-LABEL: @test_vreinterpret_u32_f32(
14802 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
14803 // CHECK:   ret <2 x i32> [[TMP0]]
14804 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
14805   return vreinterpret_u32_f32(a);
14806 }
14807
14808 // CHECK-LABEL: @test_vreinterpret_u32_f64(
14809 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
14810 // CHECK:   ret <2 x i32> [[TMP0]]
14811 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
14812   return vreinterpret_u32_f64(a);
14813 }
14814
14815 // CHECK-LABEL: @test_vreinterpret_u32_p8(
14816 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
14817 // CHECK:   ret <2 x i32> [[TMP0]]
14818 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
14819   return vreinterpret_u32_p8(a);
14820 }
14821
14822 // CHECK-LABEL: @test_vreinterpret_u32_p16(
14823 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
14824 // CHECK:   ret <2 x i32> [[TMP0]]
14825 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
14826   return vreinterpret_u32_p16(a);
14827 }
14828
14829 // CHECK-LABEL: @test_vreinterpret_u32_p64(
14830 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
14831 // CHECK:   ret <2 x i32> [[TMP0]]
14832 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
14833   return vreinterpret_u32_p64(a);
14834 }
14835
14836 // CHECK-LABEL: @test_vreinterpret_u64_s8(
14837 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14838 // CHECK:   ret <1 x i64> [[TMP0]]
14839 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
14840   return vreinterpret_u64_s8(a);
14841 }
14842
14843 // CHECK-LABEL: @test_vreinterpret_u64_s16(
14844 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14845 // CHECK:   ret <1 x i64> [[TMP0]]
14846 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
14847   return vreinterpret_u64_s16(a);
14848 }
14849
14850 // CHECK-LABEL: @test_vreinterpret_u64_s32(
14851 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14852 // CHECK:   ret <1 x i64> [[TMP0]]
14853 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
14854   return vreinterpret_u64_s32(a);
14855 }
14856
14857 // CHECK-LABEL: @test_vreinterpret_u64_s64(
14858 // CHECK:   ret <1 x i64> %a
14859 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
14860   return vreinterpret_u64_s64(a);
14861 }
14862
14863 // CHECK-LABEL: @test_vreinterpret_u64_u8(
14864 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14865 // CHECK:   ret <1 x i64> [[TMP0]]
14866 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
14867   return vreinterpret_u64_u8(a);
14868 }
14869
14870 // CHECK-LABEL: @test_vreinterpret_u64_u16(
14871 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14872 // CHECK:   ret <1 x i64> [[TMP0]]
14873 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
14874   return vreinterpret_u64_u16(a);
14875 }
14876
14877 // CHECK-LABEL: @test_vreinterpret_u64_u32(
14878 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
14879 // CHECK:   ret <1 x i64> [[TMP0]]
14880 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
14881   return vreinterpret_u64_u32(a);
14882 }
14883
14884 // CHECK-LABEL: @test_vreinterpret_u64_f16(
14885 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
14886 // CHECK:   ret <1 x i64> [[TMP0]]
14887 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
14888   return vreinterpret_u64_f16(a);
14889 }
14890
14891 // CHECK-LABEL: @test_vreinterpret_u64_f32(
14892 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
14893 // CHECK:   ret <1 x i64> [[TMP0]]
14894 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
14895   return vreinterpret_u64_f32(a);
14896 }
14897
14898 // CHECK-LABEL: @test_vreinterpret_u64_f64(
14899 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
14900 // CHECK:   ret <1 x i64> [[TMP0]]
14901 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
14902   return vreinterpret_u64_f64(a);
14903 }
14904
14905 // CHECK-LABEL: @test_vreinterpret_u64_p8(
14906 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
14907 // CHECK:   ret <1 x i64> [[TMP0]]
14908 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
14909   return vreinterpret_u64_p8(a);
14910 }
14911
14912 // CHECK-LABEL: @test_vreinterpret_u64_p16(
14913 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
14914 // CHECK:   ret <1 x i64> [[TMP0]]
14915 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
14916   return vreinterpret_u64_p16(a);
14917 }
14918
14919 // CHECK-LABEL: @test_vreinterpret_u64_p64(
14920 // CHECK:   ret <1 x i64> %a
14921 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
14922   return vreinterpret_u64_p64(a);
14923 }
14924
14925 // CHECK-LABEL: @test_vreinterpret_f16_s8(
14926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14927 // CHECK:   ret <4 x half> [[TMP0]]
14928 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
14929   return vreinterpret_f16_s8(a);
14930 }
14931
14932 // CHECK-LABEL: @test_vreinterpret_f16_s16(
14933 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14934 // CHECK:   ret <4 x half> [[TMP0]]
14935 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
14936   return vreinterpret_f16_s16(a);
14937 }
14938
14939 // CHECK-LABEL: @test_vreinterpret_f16_s32(
14940 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
14941 // CHECK:   ret <4 x half> [[TMP0]]
14942 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
14943   return vreinterpret_f16_s32(a);
14944 }
14945
14946 // CHECK-LABEL: @test_vreinterpret_f16_s64(
14947 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
14948 // CHECK:   ret <4 x half> [[TMP0]]
14949 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
14950   return vreinterpret_f16_s64(a);
14951 }
14952
14953 // CHECK-LABEL: @test_vreinterpret_f16_u8(
14954 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14955 // CHECK:   ret <4 x half> [[TMP0]]
14956 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
14957   return vreinterpret_f16_u8(a);
14958 }
14959
14960 // CHECK-LABEL: @test_vreinterpret_f16_u16(
14961 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
14962 // CHECK:   ret <4 x half> [[TMP0]]
14963 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
14964   return vreinterpret_f16_u16(a);
14965 }
14966
14967 // CHECK-LABEL: @test_vreinterpret_f16_u32(
14968 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
14969 // CHECK:   ret <4 x half> [[TMP0]]
14970 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
14971   return vreinterpret_f16_u32(a);
14972 }
14973
14974 // CHECK-LABEL: @test_vreinterpret_f16_u64(
14975 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
14976 // CHECK:   ret <4 x half> [[TMP0]]
14977 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
14978   return vreinterpret_f16_u64(a);
14979 }
14980
14981 // CHECK-LABEL: @test_vreinterpret_f16_f32(
14982 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
14983 // CHECK:   ret <4 x half> [[TMP0]]
14984 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
14985   return vreinterpret_f16_f32(a);
14986 }
14987
14988 // CHECK-LABEL: @test_vreinterpret_f16_f64(
14989 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
14990 // CHECK:   ret <4 x half> [[TMP0]]
14991 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
14992   return vreinterpret_f16_f64(a);
14993 }
14994
14995 // CHECK-LABEL: @test_vreinterpret_f16_p8(
14996 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
14997 // CHECK:   ret <4 x half> [[TMP0]]
14998 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
14999   return vreinterpret_f16_p8(a);
15000 }
15001
15002 // CHECK-LABEL: @test_vreinterpret_f16_p16(
15003 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
15004 // CHECK:   ret <4 x half> [[TMP0]]
15005 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
15006   return vreinterpret_f16_p16(a);
15007 }
15008
15009 // CHECK-LABEL: @test_vreinterpret_f16_p64(
15010 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
15011 // CHECK:   ret <4 x half> [[TMP0]]
15012 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
15013   return vreinterpret_f16_p64(a);
15014 }
15015
15016 // CHECK-LABEL: @test_vreinterpret_f32_s8(
15017 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15018 // CHECK:   ret <2 x float> [[TMP0]]
15019 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
15020   return vreinterpret_f32_s8(a);
15021 }
15022
15023 // CHECK-LABEL: @test_vreinterpret_f32_s16(
15024 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15025 // CHECK:   ret <2 x float> [[TMP0]]
15026 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
15027   return vreinterpret_f32_s16(a);
15028 }
15029
15030 // CHECK-LABEL: @test_vreinterpret_f32_s32(
15031 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15032 // CHECK:   ret <2 x float> [[TMP0]]
15033 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
15034   return vreinterpret_f32_s32(a);
15035 }
15036
15037 // CHECK-LABEL: @test_vreinterpret_f32_s64(
15038 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15039 // CHECK:   ret <2 x float> [[TMP0]]
15040 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
15041   return vreinterpret_f32_s64(a);
15042 }
15043
15044 // CHECK-LABEL: @test_vreinterpret_f32_u8(
15045 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15046 // CHECK:   ret <2 x float> [[TMP0]]
15047 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
15048   return vreinterpret_f32_u8(a);
15049 }
15050
15051 // CHECK-LABEL: @test_vreinterpret_f32_u16(
15052 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15053 // CHECK:   ret <2 x float> [[TMP0]]
15054 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
15055   return vreinterpret_f32_u16(a);
15056 }
15057
15058 // CHECK-LABEL: @test_vreinterpret_f32_u32(
15059 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
15060 // CHECK:   ret <2 x float> [[TMP0]]
15061 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
15062   return vreinterpret_f32_u32(a);
15063 }
15064
15065 // CHECK-LABEL: @test_vreinterpret_f32_u64(
15066 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15067 // CHECK:   ret <2 x float> [[TMP0]]
15068 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
15069   return vreinterpret_f32_u64(a);
15070 }
15071
15072 // CHECK-LABEL: @test_vreinterpret_f32_f16(
15073 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
15074 // CHECK:   ret <2 x float> [[TMP0]]
15075 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
15076   return vreinterpret_f32_f16(a);
15077 }
15078
15079 // CHECK-LABEL: @test_vreinterpret_f32_f64(
15080 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
15081 // CHECK:   ret <2 x float> [[TMP0]]
15082 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
15083   return vreinterpret_f32_f64(a);
15084 }
15085
15086 // CHECK-LABEL: @test_vreinterpret_f32_p8(
15087 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
15088 // CHECK:   ret <2 x float> [[TMP0]]
15089 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
15090   return vreinterpret_f32_p8(a);
15091 }
15092
15093 // CHECK-LABEL: @test_vreinterpret_f32_p16(
15094 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
15095 // CHECK:   ret <2 x float> [[TMP0]]
15096 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
15097   return vreinterpret_f32_p16(a);
15098 }
15099
15100 // CHECK-LABEL: @test_vreinterpret_f32_p64(
15101 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
15102 // CHECK:   ret <2 x float> [[TMP0]]
15103 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
15104   return vreinterpret_f32_p64(a);
15105 }
15106
15107 // CHECK-LABEL: @test_vreinterpret_f64_s8(
15108 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15109 // CHECK:   ret <1 x double> [[TMP0]]
15110 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
15111   return vreinterpret_f64_s8(a);
15112 }
15113
15114 // CHECK-LABEL: @test_vreinterpret_f64_s16(
15115 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15116 // CHECK:   ret <1 x double> [[TMP0]]
15117 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
15118   return vreinterpret_f64_s16(a);
15119 }
15120
15121 // CHECK-LABEL: @test_vreinterpret_f64_s32(
15122 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15123 // CHECK:   ret <1 x double> [[TMP0]]
15124 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
15125   return vreinterpret_f64_s32(a);
15126 }
15127
15128 // CHECK-LABEL: @test_vreinterpret_f64_s64(
15129 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15130 // CHECK:   ret <1 x double> [[TMP0]]
15131 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
15132   return vreinterpret_f64_s64(a);
15133 }
15134
15135 // CHECK-LABEL: @test_vreinterpret_f64_u8(
15136 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15137 // CHECK:   ret <1 x double> [[TMP0]]
15138 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
15139   return vreinterpret_f64_u8(a);
15140 }
15141
15142 // CHECK-LABEL: @test_vreinterpret_f64_u16(
15143 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15144 // CHECK:   ret <1 x double> [[TMP0]]
15145 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
15146   return vreinterpret_f64_u16(a);
15147 }
15148
15149 // CHECK-LABEL: @test_vreinterpret_f64_u32(
15150 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
15151 // CHECK:   ret <1 x double> [[TMP0]]
15152 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
15153   return vreinterpret_f64_u32(a);
15154 }
15155
15156 // CHECK-LABEL: @test_vreinterpret_f64_u64(
15157 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15158 // CHECK:   ret <1 x double> [[TMP0]]
15159 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
15160   return vreinterpret_f64_u64(a);
15161 }
15162
15163 // CHECK-LABEL: @test_vreinterpret_f64_f16(
15164 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
15165 // CHECK:   ret <1 x double> [[TMP0]]
15166 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
15167   return vreinterpret_f64_f16(a);
15168 }
15169
15170 // CHECK-LABEL: @test_vreinterpret_f64_f32(
15171 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
15172 // CHECK:   ret <1 x double> [[TMP0]]
15173 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
15174   return vreinterpret_f64_f32(a);
15175 }
15176
15177 // CHECK-LABEL: @test_vreinterpret_f64_p8(
15178 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
15179 // CHECK:   ret <1 x double> [[TMP0]]
15180 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
15181   return vreinterpret_f64_p8(a);
15182 }
15183
15184 // CHECK-LABEL: @test_vreinterpret_f64_p16(
15185 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
15186 // CHECK:   ret <1 x double> [[TMP0]]
15187 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
15188   return vreinterpret_f64_p16(a);
15189 }
15190
15191 // CHECK-LABEL: @test_vreinterpret_f64_p64(
15192 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
15193 // CHECK:   ret <1 x double> [[TMP0]]
15194 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
15195   return vreinterpret_f64_p64(a);
15196 }
15197
15198 // CHECK-LABEL: @test_vreinterpret_p8_s8(
15199 // CHECK:   ret <8 x i8> %a
15200 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
15201   return vreinterpret_p8_s8(a);
15202 }
15203
15204 // CHECK-LABEL: @test_vreinterpret_p8_s16(
15205 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15206 // CHECK:   ret <8 x i8> [[TMP0]]
15207 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
15208   return vreinterpret_p8_s16(a);
15209 }
15210
15211 // CHECK-LABEL: @test_vreinterpret_p8_s32(
15212 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15213 // CHECK:   ret <8 x i8> [[TMP0]]
15214 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
15215   return vreinterpret_p8_s32(a);
15216 }
15217
15218 // CHECK-LABEL: @test_vreinterpret_p8_s64(
15219 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15220 // CHECK:   ret <8 x i8> [[TMP0]]
15221 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
15222   return vreinterpret_p8_s64(a);
15223 }
15224
15225 // CHECK-LABEL: @test_vreinterpret_p8_u8(
15226 // CHECK:   ret <8 x i8> %a
15227 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
15228   return vreinterpret_p8_u8(a);
15229 }
15230
15231 // CHECK-LABEL: @test_vreinterpret_p8_u16(
15232 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15233 // CHECK:   ret <8 x i8> [[TMP0]]
15234 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
15235   return vreinterpret_p8_u16(a);
15236 }
15237
15238 // CHECK-LABEL: @test_vreinterpret_p8_u32(
15239 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15240 // CHECK:   ret <8 x i8> [[TMP0]]
15241 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
15242   return vreinterpret_p8_u32(a);
15243 }
15244
15245 // CHECK-LABEL: @test_vreinterpret_p8_u64(
15246 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15247 // CHECK:   ret <8 x i8> [[TMP0]]
15248 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
15249   return vreinterpret_p8_u64(a);
15250 }
15251
15252 // CHECK-LABEL: @test_vreinterpret_p8_f16(
15253 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
15254 // CHECK:   ret <8 x i8> [[TMP0]]
15255 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
15256   return vreinterpret_p8_f16(a);
15257 }
15258
15259 // CHECK-LABEL: @test_vreinterpret_p8_f32(
15260 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
15261 // CHECK:   ret <8 x i8> [[TMP0]]
15262 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
15263   return vreinterpret_p8_f32(a);
15264 }
15265
15266 // CHECK-LABEL: @test_vreinterpret_p8_f64(
15267 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
15268 // CHECK:   ret <8 x i8> [[TMP0]]
15269 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
15270   return vreinterpret_p8_f64(a);
15271 }
15272
15273 // CHECK-LABEL: @test_vreinterpret_p8_p16(
15274 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15275 // CHECK:   ret <8 x i8> [[TMP0]]
15276 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
15277   return vreinterpret_p8_p16(a);
15278 }
15279
15280 // CHECK-LABEL: @test_vreinterpret_p8_p64(
15281 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15282 // CHECK:   ret <8 x i8> [[TMP0]]
15283 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
15284   return vreinterpret_p8_p64(a);
15285 }
15286
15287 // CHECK-LABEL: @test_vreinterpret_p16_s8(
15288 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15289 // CHECK:   ret <4 x i16> [[TMP0]]
15290 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
15291   return vreinterpret_p16_s8(a);
15292 }
15293
15294 // CHECK-LABEL: @test_vreinterpret_p16_s16(
15295 // CHECK:   ret <4 x i16> %a
15296 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
15297   return vreinterpret_p16_s16(a);
15298 }
15299
15300 // CHECK-LABEL: @test_vreinterpret_p16_s32(
15301 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15302 // CHECK:   ret <4 x i16> [[TMP0]]
15303 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
15304   return vreinterpret_p16_s32(a);
15305 }
15306
15307 // CHECK-LABEL: @test_vreinterpret_p16_s64(
15308 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15309 // CHECK:   ret <4 x i16> [[TMP0]]
15310 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
15311   return vreinterpret_p16_s64(a);
15312 }
15313
15314 // CHECK-LABEL: @test_vreinterpret_p16_u8(
15315 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15316 // CHECK:   ret <4 x i16> [[TMP0]]
15317 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
15318   return vreinterpret_p16_u8(a);
15319 }
15320
15321 // CHECK-LABEL: @test_vreinterpret_p16_u16(
15322 // CHECK:   ret <4 x i16> %a
15323 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
15324   return vreinterpret_p16_u16(a);
15325 }
15326
15327 // CHECK-LABEL: @test_vreinterpret_p16_u32(
15328 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
15329 // CHECK:   ret <4 x i16> [[TMP0]]
15330 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
15331   return vreinterpret_p16_u32(a);
15332 }
15333
15334 // CHECK-LABEL: @test_vreinterpret_p16_u64(
15335 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15336 // CHECK:   ret <4 x i16> [[TMP0]]
15337 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
15338   return vreinterpret_p16_u64(a);
15339 }
15340
15341 // CHECK-LABEL: @test_vreinterpret_p16_f16(
15342 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
15343 // CHECK:   ret <4 x i16> [[TMP0]]
15344 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
15345   return vreinterpret_p16_f16(a);
15346 }
15347
15348 // CHECK-LABEL: @test_vreinterpret_p16_f32(
15349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
15350 // CHECK:   ret <4 x i16> [[TMP0]]
15351 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
15352   return vreinterpret_p16_f32(a);
15353 }
15354
15355 // CHECK-LABEL: @test_vreinterpret_p16_f64(
15356 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
15357 // CHECK:   ret <4 x i16> [[TMP0]]
15358 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
15359   return vreinterpret_p16_f64(a);
15360 }
15361
15362 // CHECK-LABEL: @test_vreinterpret_p16_p8(
15363 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
15364 // CHECK:   ret <4 x i16> [[TMP0]]
15365 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
15366   return vreinterpret_p16_p8(a);
15367 }
15368
15369 // CHECK-LABEL: @test_vreinterpret_p16_p64(
15370 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
15371 // CHECK:   ret <4 x i16> [[TMP0]]
15372 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
15373   return vreinterpret_p16_p64(a);
15374 }
15375
15376 // CHECK-LABEL: @test_vreinterpret_p64_s8(
15377 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15378 // CHECK:   ret <1 x i64> [[TMP0]]
15379 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
15380   return vreinterpret_p64_s8(a);
15381 }
15382
15383 // CHECK-LABEL: @test_vreinterpret_p64_s16(
15384 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15385 // CHECK:   ret <1 x i64> [[TMP0]]
15386 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
15387   return vreinterpret_p64_s16(a);
15388 }
15389
15390 // CHECK-LABEL: @test_vreinterpret_p64_s32(
15391 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15392 // CHECK:   ret <1 x i64> [[TMP0]]
15393 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
15394   return vreinterpret_p64_s32(a);
15395 }
15396
15397 // CHECK-LABEL: @test_vreinterpret_p64_s64(
15398 // CHECK:   ret <1 x i64> %a
15399 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
15400   return vreinterpret_p64_s64(a);
15401 }
15402
15403 // CHECK-LABEL: @test_vreinterpret_p64_u8(
15404 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15405 // CHECK:   ret <1 x i64> [[TMP0]]
15406 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
15407   return vreinterpret_p64_u8(a);
15408 }
15409
15410 // CHECK-LABEL: @test_vreinterpret_p64_u16(
15411 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15412 // CHECK:   ret <1 x i64> [[TMP0]]
15413 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
15414   return vreinterpret_p64_u16(a);
15415 }
15416
15417 // CHECK-LABEL: @test_vreinterpret_p64_u32(
15418 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
15419 // CHECK:   ret <1 x i64> [[TMP0]]
15420 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
15421   return vreinterpret_p64_u32(a);
15422 }
15423
15424 // CHECK-LABEL: @test_vreinterpret_p64_u64(
15425 // CHECK:   ret <1 x i64> %a
15426 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
15427   return vreinterpret_p64_u64(a);
15428 }
15429
15430 // CHECK-LABEL: @test_vreinterpret_p64_f16(
15431 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
15432 // CHECK:   ret <1 x i64> [[TMP0]]
15433 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
15434   return vreinterpret_p64_f16(a);
15435 }
15436
15437 // CHECK-LABEL: @test_vreinterpret_p64_f32(
15438 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
15439 // CHECK:   ret <1 x i64> [[TMP0]]
15440 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
15441   return vreinterpret_p64_f32(a);
15442 }
15443
15444 // CHECK-LABEL: @test_vreinterpret_p64_f64(
15445 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
15446 // CHECK:   ret <1 x i64> [[TMP0]]
15447 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
15448   return vreinterpret_p64_f64(a);
15449 }
15450
15451 // CHECK-LABEL: @test_vreinterpret_p64_p8(
15452 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
15453 // CHECK:   ret <1 x i64> [[TMP0]]
15454 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
15455   return vreinterpret_p64_p8(a);
15456 }
15457
15458 // CHECK-LABEL: @test_vreinterpret_p64_p16(
15459 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
15460 // CHECK:   ret <1 x i64> [[TMP0]]
15461 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
15462   return vreinterpret_p64_p16(a);
15463 }
15464
15465 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
15466 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15467 // CHECK:   ret <16 x i8> [[TMP0]]
15468 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
15469   return vreinterpretq_s8_s16(a);
15470 }
15471
15472 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
15473 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15474 // CHECK:   ret <16 x i8> [[TMP0]]
15475 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
15476   return vreinterpretq_s8_s32(a);
15477 }
15478
15479 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
15480 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15481 // CHECK:   ret <16 x i8> [[TMP0]]
15482 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
15483   return vreinterpretq_s8_s64(a);
15484 }
15485
15486 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
15487 // CHECK:   ret <16 x i8> %a
15488 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
15489   return vreinterpretq_s8_u8(a);
15490 }
15491
15492 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
15493 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15494 // CHECK:   ret <16 x i8> [[TMP0]]
15495 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
15496   return vreinterpretq_s8_u16(a);
15497 }
15498
15499 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
15500 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15501 // CHECK:   ret <16 x i8> [[TMP0]]
15502 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
15503   return vreinterpretq_s8_u32(a);
15504 }
15505
15506 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
15507 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15508 // CHECK:   ret <16 x i8> [[TMP0]]
15509 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
15510   return vreinterpretq_s8_u64(a);
15511 }
15512
15513 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
15514 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
15515 // CHECK:   ret <16 x i8> [[TMP0]]
15516 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
15517   return vreinterpretq_s8_f16(a);
15518 }
15519
15520 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
15521 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
15522 // CHECK:   ret <16 x i8> [[TMP0]]
15523 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
15524   return vreinterpretq_s8_f32(a);
15525 }
15526
15527 // CHECK-LABEL: @test_vreinterpretq_s8_f64(
15528 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
15529 // CHECK:   ret <16 x i8> [[TMP0]]
15530 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
15531   return vreinterpretq_s8_f64(a);
15532 }
15533
15534 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
15535 // CHECK:   ret <16 x i8> %a
15536 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
15537   return vreinterpretq_s8_p8(a);
15538 }
15539
15540 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
15541 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15542 // CHECK:   ret <16 x i8> [[TMP0]]
15543 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
15544   return vreinterpretq_s8_p16(a);
15545 }
15546
15547 // CHECK-LABEL: @test_vreinterpretq_s8_p64(
15548 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15549 // CHECK:   ret <16 x i8> [[TMP0]]
15550 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
15551   return vreinterpretq_s8_p64(a);
15552 }
15553
15554 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
15555 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15556 // CHECK:   ret <8 x i16> [[TMP0]]
15557 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
15558   return vreinterpretq_s16_s8(a);
15559 }
15560
15561 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
15562 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15563 // CHECK:   ret <8 x i16> [[TMP0]]
15564 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
15565   return vreinterpretq_s16_s32(a);
15566 }
15567
15568 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
15569 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15570 // CHECK:   ret <8 x i16> [[TMP0]]
15571 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
15572   return vreinterpretq_s16_s64(a);
15573 }
15574
15575 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
15576 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15577 // CHECK:   ret <8 x i16> [[TMP0]]
15578 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
15579   return vreinterpretq_s16_u8(a);
15580 }
15581
15582 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
15583 // CHECK:   ret <8 x i16> %a
15584 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
15585   return vreinterpretq_s16_u16(a);
15586 }
15587
15588 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
15589 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15590 // CHECK:   ret <8 x i16> [[TMP0]]
15591 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
15592   return vreinterpretq_s16_u32(a);
15593 }
15594
15595 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
15596 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15597 // CHECK:   ret <8 x i16> [[TMP0]]
15598 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
15599   return vreinterpretq_s16_u64(a);
15600 }
15601
15602 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
15603 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
15604 // CHECK:   ret <8 x i16> [[TMP0]]
15605 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
15606   return vreinterpretq_s16_f16(a);
15607 }
15608
15609 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
15610 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
15611 // CHECK:   ret <8 x i16> [[TMP0]]
15612 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
15613   return vreinterpretq_s16_f32(a);
15614 }
15615
15616 // CHECK-LABEL: @test_vreinterpretq_s16_f64(
15617 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
15618 // CHECK:   ret <8 x i16> [[TMP0]]
15619 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
15620   return vreinterpretq_s16_f64(a);
15621 }
15622
15623 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
15624 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15625 // CHECK:   ret <8 x i16> [[TMP0]]
15626 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
15627   return vreinterpretq_s16_p8(a);
15628 }
15629
15630 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
15631 // CHECK:   ret <8 x i16> %a
15632 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
15633   return vreinterpretq_s16_p16(a);
15634 }
15635
15636 // CHECK-LABEL: @test_vreinterpretq_s16_p64(
15637 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15638 // CHECK:   ret <8 x i16> [[TMP0]]
15639 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
15640   return vreinterpretq_s16_p64(a);
15641 }
15642
15643 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
15644 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15645 // CHECK:   ret <4 x i32> [[TMP0]]
15646 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
15647   return vreinterpretq_s32_s8(a);
15648 }
15649
15650 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
15651 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15652 // CHECK:   ret <4 x i32> [[TMP0]]
15653 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
15654   return vreinterpretq_s32_s16(a);
15655 }
15656
15657 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
15658 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15659 // CHECK:   ret <4 x i32> [[TMP0]]
15660 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
15661   return vreinterpretq_s32_s64(a);
15662 }
15663
15664 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
15665 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15666 // CHECK:   ret <4 x i32> [[TMP0]]
15667 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
15668   return vreinterpretq_s32_u8(a);
15669 }
15670
15671 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
15672 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15673 // CHECK:   ret <4 x i32> [[TMP0]]
15674 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
15675   return vreinterpretq_s32_u16(a);
15676 }
15677
15678 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
15679 // CHECK:   ret <4 x i32> %a
15680 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
15681   return vreinterpretq_s32_u32(a);
15682 }
15683
15684 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
15685 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15686 // CHECK:   ret <4 x i32> [[TMP0]]
15687 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
15688   return vreinterpretq_s32_u64(a);
15689 }
15690
15691 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
15692 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
15693 // CHECK:   ret <4 x i32> [[TMP0]]
15694 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
15695   return vreinterpretq_s32_f16(a);
15696 }
15697
15698 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
15699 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
15700 // CHECK:   ret <4 x i32> [[TMP0]]
15701 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
15702   return vreinterpretq_s32_f32(a);
15703 }
15704
15705 // CHECK-LABEL: @test_vreinterpretq_s32_f64(
15706 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
15707 // CHECK:   ret <4 x i32> [[TMP0]]
15708 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
15709   return vreinterpretq_s32_f64(a);
15710 }
15711
15712 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
15713 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
15714 // CHECK:   ret <4 x i32> [[TMP0]]
15715 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
15716   return vreinterpretq_s32_p8(a);
15717 }
15718
15719 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
15720 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
15721 // CHECK:   ret <4 x i32> [[TMP0]]
15722 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
15723   return vreinterpretq_s32_p16(a);
15724 }
15725
15726 // CHECK-LABEL: @test_vreinterpretq_s32_p64(
15727 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
15728 // CHECK:   ret <4 x i32> [[TMP0]]
15729 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
15730   return vreinterpretq_s32_p64(a);
15731 }
15732
15733 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
15734 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15735 // CHECK:   ret <2 x i64> [[TMP0]]
15736 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
15737   return vreinterpretq_s64_s8(a);
15738 }
15739
15740 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
15741 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15742 // CHECK:   ret <2 x i64> [[TMP0]]
15743 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
15744   return vreinterpretq_s64_s16(a);
15745 }
15746
15747 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
15748 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
15749 // CHECK:   ret <2 x i64> [[TMP0]]
15750 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
15751   return vreinterpretq_s64_s32(a);
15752 }
15753
15754 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
15755 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15756 // CHECK:   ret <2 x i64> [[TMP0]]
15757 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
15758   return vreinterpretq_s64_u8(a);
15759 }
15760
15761 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
15762 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15763 // CHECK:   ret <2 x i64> [[TMP0]]
15764 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
15765   return vreinterpretq_s64_u16(a);
15766 }
15767
15768 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
15769 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
15770 // CHECK:   ret <2 x i64> [[TMP0]]
15771 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
15772   return vreinterpretq_s64_u32(a);
15773 }
15774
15775 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
15776 // CHECK:   ret <2 x i64> %a
15777 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
15778   return vreinterpretq_s64_u64(a);
15779 }
15780
15781 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
15782 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
15783 // CHECK:   ret <2 x i64> [[TMP0]]
15784 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
15785   return vreinterpretq_s64_f16(a);
15786 }
15787
15788 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
15789 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
15790 // CHECK:   ret <2 x i64> [[TMP0]]
15791 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
15792   return vreinterpretq_s64_f32(a);
15793 }
15794
15795 // CHECK-LABEL: @test_vreinterpretq_s64_f64(
15796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
15797 // CHECK:   ret <2 x i64> [[TMP0]]
15798 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
15799   return vreinterpretq_s64_f64(a);
15800 }
15801
15802 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
15803 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
15804 // CHECK:   ret <2 x i64> [[TMP0]]
15805 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
15806   return vreinterpretq_s64_p8(a);
15807 }
15808
15809 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
15810 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
15811 // CHECK:   ret <2 x i64> [[TMP0]]
15812 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
15813   return vreinterpretq_s64_p16(a);
15814 }
15815
15816 // CHECK-LABEL: @test_vreinterpretq_s64_p64(
15817 // CHECK:   ret <2 x i64> %a
15818 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
15819   return vreinterpretq_s64_p64(a);
15820 }
15821
15822 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
15823 // CHECK:   ret <16 x i8> %a
15824 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
15825   return vreinterpretq_u8_s8(a);
15826 }
15827
15828 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
15829 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15830 // CHECK:   ret <16 x i8> [[TMP0]]
15831 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
15832   return vreinterpretq_u8_s16(a);
15833 }
15834
15835 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
15836 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15837 // CHECK:   ret <16 x i8> [[TMP0]]
15838 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
15839   return vreinterpretq_u8_s32(a);
15840 }
15841
15842 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
15843 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15844 // CHECK:   ret <16 x i8> [[TMP0]]
15845 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
15846   return vreinterpretq_u8_s64(a);
15847 }
15848
15849 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
15850 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15851 // CHECK:   ret <16 x i8> [[TMP0]]
15852 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
15853   return vreinterpretq_u8_u16(a);
15854 }
15855
15856 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
15857 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15858 // CHECK:   ret <16 x i8> [[TMP0]]
15859 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
15860   return vreinterpretq_u8_u32(a);
15861 }
15862
15863 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
15864 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15865 // CHECK:   ret <16 x i8> [[TMP0]]
15866 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
15867   return vreinterpretq_u8_u64(a);
15868 }
15869
15870 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
15871 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
15872 // CHECK:   ret <16 x i8> [[TMP0]]
15873 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
15874   return vreinterpretq_u8_f16(a);
15875 }
15876
15877 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
15878 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
15879 // CHECK:   ret <16 x i8> [[TMP0]]
15880 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
15881   return vreinterpretq_u8_f32(a);
15882 }
15883
15884 // CHECK-LABEL: @test_vreinterpretq_u8_f64(
15885 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
15886 // CHECK:   ret <16 x i8> [[TMP0]]
15887 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
15888   return vreinterpretq_u8_f64(a);
15889 }
15890
15891 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
15892 // CHECK:   ret <16 x i8> %a
15893 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
15894   return vreinterpretq_u8_p8(a);
15895 }
15896
15897 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
15898 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15899 // CHECK:   ret <16 x i8> [[TMP0]]
15900 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
15901   return vreinterpretq_u8_p16(a);
15902 }
15903
15904 // CHECK-LABEL: @test_vreinterpretq_u8_p64(
15905 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15906 // CHECK:   ret <16 x i8> [[TMP0]]
15907 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
15908   return vreinterpretq_u8_p64(a);
15909 }
15910
15911 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
15912 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15913 // CHECK:   ret <8 x i16> [[TMP0]]
15914 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
15915   return vreinterpretq_u16_s8(a);
15916 }
15917
15918 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
15919 // CHECK:   ret <8 x i16> %a
15920 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
15921   return vreinterpretq_u16_s16(a);
15922 }
15923
15924 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
15925 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15926 // CHECK:   ret <8 x i16> [[TMP0]]
15927 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
15928   return vreinterpretq_u16_s32(a);
15929 }
15930
15931 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
15932 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15933 // CHECK:   ret <8 x i16> [[TMP0]]
15934 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
15935   return vreinterpretq_u16_s64(a);
15936 }
15937
15938 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
15939 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15940 // CHECK:   ret <8 x i16> [[TMP0]]
15941 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
15942   return vreinterpretq_u16_u8(a);
15943 }
15944
15945 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
15946 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
15947 // CHECK:   ret <8 x i16> [[TMP0]]
15948 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
15949   return vreinterpretq_u16_u32(a);
15950 }
15951
15952 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
15953 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15954 // CHECK:   ret <8 x i16> [[TMP0]]
15955 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
15956   return vreinterpretq_u16_u64(a);
15957 }
15958
15959 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
15960 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
15961 // CHECK:   ret <8 x i16> [[TMP0]]
15962 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
15963   return vreinterpretq_u16_f16(a);
15964 }
15965
15966 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
15967 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
15968 // CHECK:   ret <8 x i16> [[TMP0]]
15969 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
15970   return vreinterpretq_u16_f32(a);
15971 }
15972
15973 // CHECK-LABEL: @test_vreinterpretq_u16_f64(
15974 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
15975 // CHECK:   ret <8 x i16> [[TMP0]]
15976 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
15977   return vreinterpretq_u16_f64(a);
15978 }
15979
15980 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
15981 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
15982 // CHECK:   ret <8 x i16> [[TMP0]]
15983 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
15984   return vreinterpretq_u16_p8(a);
15985 }
15986
15987 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
15988 // CHECK:   ret <8 x i16> %a
15989 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
15990   return vreinterpretq_u16_p16(a);
15991 }
15992
15993 // CHECK-LABEL: @test_vreinterpretq_u16_p64(
15994 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
15995 // CHECK:   ret <8 x i16> [[TMP0]]
15996 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
15997   return vreinterpretq_u16_p64(a);
15998 }
15999
16000 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
16001 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16002 // CHECK:   ret <4 x i32> [[TMP0]]
16003 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
16004   return vreinterpretq_u32_s8(a);
16005 }
16006
16007 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
16008 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16009 // CHECK:   ret <4 x i32> [[TMP0]]
16010 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
16011   return vreinterpretq_u32_s16(a);
16012 }
16013
16014 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
16015 // CHECK:   ret <4 x i32> %a
16016 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
16017   return vreinterpretq_u32_s32(a);
16018 }
16019
16020 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
16021 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16022 // CHECK:   ret <4 x i32> [[TMP0]]
16023 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
16024   return vreinterpretq_u32_s64(a);
16025 }
16026
16027 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
16028 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16029 // CHECK:   ret <4 x i32> [[TMP0]]
16030 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
16031   return vreinterpretq_u32_u8(a);
16032 }
16033
16034 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
16035 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16036 // CHECK:   ret <4 x i32> [[TMP0]]
16037 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
16038   return vreinterpretq_u32_u16(a);
16039 }
16040
16041 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
16042 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16043 // CHECK:   ret <4 x i32> [[TMP0]]
16044 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
16045   return vreinterpretq_u32_u64(a);
16046 }
16047
16048 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
16049 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
16050 // CHECK:   ret <4 x i32> [[TMP0]]
16051 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
16052   return vreinterpretq_u32_f16(a);
16053 }
16054
16055 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
16056 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
16057 // CHECK:   ret <4 x i32> [[TMP0]]
16058 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
16059   return vreinterpretq_u32_f32(a);
16060 }
16061
16062 // CHECK-LABEL: @test_vreinterpretq_u32_f64(
16063 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
16064 // CHECK:   ret <4 x i32> [[TMP0]]
16065 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
16066   return vreinterpretq_u32_f64(a);
16067 }
16068
16069 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
16070 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
16071 // CHECK:   ret <4 x i32> [[TMP0]]
16072 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
16073   return vreinterpretq_u32_p8(a);
16074 }
16075
16076 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
16077 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
16078 // CHECK:   ret <4 x i32> [[TMP0]]
16079 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
16080   return vreinterpretq_u32_p16(a);
16081 }
16082
16083 // CHECK-LABEL: @test_vreinterpretq_u32_p64(
16084 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
16085 // CHECK:   ret <4 x i32> [[TMP0]]
16086 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
16087   return vreinterpretq_u32_p64(a);
16088 }
16089
16090 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
16091 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16092 // CHECK:   ret <2 x i64> [[TMP0]]
16093 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
16094   return vreinterpretq_u64_s8(a);
16095 }
16096
16097 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
16098 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16099 // CHECK:   ret <2 x i64> [[TMP0]]
16100 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
16101   return vreinterpretq_u64_s16(a);
16102 }
16103
16104 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
16105 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16106 // CHECK:   ret <2 x i64> [[TMP0]]
16107 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
16108   return vreinterpretq_u64_s32(a);
16109 }
16110
16111 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
16112 // CHECK:   ret <2 x i64> %a
16113 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
16114   return vreinterpretq_u64_s64(a);
16115 }
16116
16117 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
16118 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16119 // CHECK:   ret <2 x i64> [[TMP0]]
16120 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
16121   return vreinterpretq_u64_u8(a);
16122 }
16123
16124 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
16125 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16126 // CHECK:   ret <2 x i64> [[TMP0]]
16127 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
16128   return vreinterpretq_u64_u16(a);
16129 }
16130
16131 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
16132 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16133 // CHECK:   ret <2 x i64> [[TMP0]]
16134 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
16135   return vreinterpretq_u64_u32(a);
16136 }
16137
16138 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
16139 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16140 // CHECK:   ret <2 x i64> [[TMP0]]
16141 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
16142   return vreinterpretq_u64_f16(a);
16143 }
16144
16145 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
16146 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16147 // CHECK:   ret <2 x i64> [[TMP0]]
16148 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
16149   return vreinterpretq_u64_f32(a);
16150 }
16151
16152 // CHECK-LABEL: @test_vreinterpretq_u64_f64(
16153 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16154 // CHECK:   ret <2 x i64> [[TMP0]]
16155 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
16156   return vreinterpretq_u64_f64(a);
16157 }
16158
16159 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
16160 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16161 // CHECK:   ret <2 x i64> [[TMP0]]
16162 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
16163   return vreinterpretq_u64_p8(a);
16164 }
16165
16166 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
16167 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16168 // CHECK:   ret <2 x i64> [[TMP0]]
16169 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
16170   return vreinterpretq_u64_p16(a);
16171 }
16172
16173 // CHECK-LABEL: @test_vreinterpretq_u64_p64(
16174 // CHECK:   ret <2 x i64> %a
16175 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
16176   return vreinterpretq_u64_p64(a);
16177 }
16178
16179 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
16180 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16181 // CHECK:   ret <8 x half> [[TMP0]]
16182 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
16183   return vreinterpretq_f16_s8(a);
16184 }
16185
16186 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
16187 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16188 // CHECK:   ret <8 x half> [[TMP0]]
16189 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
16190   return vreinterpretq_f16_s16(a);
16191 }
16192
16193 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
16194 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16195 // CHECK:   ret <8 x half> [[TMP0]]
16196 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
16197   return vreinterpretq_f16_s32(a);
16198 }
16199
16200 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
16201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16202 // CHECK:   ret <8 x half> [[TMP0]]
16203 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
16204   return vreinterpretq_f16_s64(a);
16205 }
16206
16207 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
16208 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16209 // CHECK:   ret <8 x half> [[TMP0]]
16210 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
16211   return vreinterpretq_f16_u8(a);
16212 }
16213
16214 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
16215 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16216 // CHECK:   ret <8 x half> [[TMP0]]
16217 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
16218   return vreinterpretq_f16_u16(a);
16219 }
16220
16221 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
16222 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
16223 // CHECK:   ret <8 x half> [[TMP0]]
16224 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
16225   return vreinterpretq_f16_u32(a);
16226 }
16227
16228 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
16229 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16230 // CHECK:   ret <8 x half> [[TMP0]]
16231 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
16232   return vreinterpretq_f16_u64(a);
16233 }
16234
16235 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
16236 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
16237 // CHECK:   ret <8 x half> [[TMP0]]
16238 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
16239   return vreinterpretq_f16_f32(a);
16240 }
16241
16242 // CHECK-LABEL: @test_vreinterpretq_f16_f64(
16243 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
16244 // CHECK:   ret <8 x half> [[TMP0]]
16245 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
16246   return vreinterpretq_f16_f64(a);
16247 }
16248
16249 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
16250 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
16251 // CHECK:   ret <8 x half> [[TMP0]]
16252 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
16253   return vreinterpretq_f16_p8(a);
16254 }
16255
16256 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
16257 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
16258 // CHECK:   ret <8 x half> [[TMP0]]
16259 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
16260   return vreinterpretq_f16_p16(a);
16261 }
16262
16263 // CHECK-LABEL: @test_vreinterpretq_f16_p64(
16264 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
16265 // CHECK:   ret <8 x half> [[TMP0]]
16266 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
16267   return vreinterpretq_f16_p64(a);
16268 }
16269
16270 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
16271 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
16272 // CHECK:   ret <4 x float> [[TMP0]]
16273 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
16274   return vreinterpretq_f32_s8(a);
16275 }
16276
16277 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
16278 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
16279 // CHECK:   ret <4 x float> [[TMP0]]
16280 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
16281   return vreinterpretq_f32_s16(a);
16282 }
16283
16284 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
16285 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
16286 // CHECK:   ret <4 x float> [[TMP0]]
16287 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
16288   return vreinterpretq_f32_s32(a);
16289 }
16290
16291 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
16292 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
16293 // CHECK:   ret <4 x float> [[TMP0]]
16294 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
16295   return vreinterpretq_f32_s64(a);
16296 }
16297
16298 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
16299 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
16300 // CHECK:   ret <4 x float> [[TMP0]]
16301 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
16302   return vreinterpretq_f32_u8(a);
16303 }
16304
16305 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
16306 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
16307 // CHECK:   ret <4 x float> [[TMP0]]
16308 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
16309   return vreinterpretq_f32_u16(a);
16310 }
16311
16312 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
16313 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
16314 // CHECK:   ret <4 x float> [[TMP0]]
16315 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
16316   return vreinterpretq_f32_u32(a);
16317 }
16318
16319 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
16320 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
16321 // CHECK:   ret <4 x float> [[TMP0]]
16322 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
16323   return vreinterpretq_f32_u64(a);
16324 }
16325
16326 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
16327 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
16328 // CHECK:   ret <4 x float> [[TMP0]]
16329 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
16330   return vreinterpretq_f32_f16(a);
16331 }
16332
16333 // CHECK-LABEL: @test_vreinterpretq_f32_f64(
16334 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
16335 // CHECK:   ret <4 x float> [[TMP0]]
16336 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
16337   return vreinterpretq_f32_f64(a);
16338 }
16339
16340 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
16341 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
16342 // CHECK:   ret <4 x float> [[TMP0]]
16343 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
16344   return vreinterpretq_f32_p8(a);
16345 }
16346
16347 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
16348 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
16349 // CHECK:   ret <4 x float> [[TMP0]]
16350 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
16351   return vreinterpretq_f32_p16(a);
16352 }
16353
16354 // CHECK-LABEL: @test_vreinterpretq_f32_p64(
16355 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
16356 // CHECK:   ret <4 x float> [[TMP0]]
16357 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
16358   return vreinterpretq_f32_p64(a);
16359 }
16360
16361 // CHECK-LABEL: @test_vreinterpretq_f64_s8(
16362 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
16363 // CHECK:   ret <2 x double> [[TMP0]]
16364 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
16365   return vreinterpretq_f64_s8(a);
16366 }
16367
16368 // CHECK-LABEL: @test_vreinterpretq_f64_s16(
16369 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
16370 // CHECK:   ret <2 x double> [[TMP0]]
16371 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
16372   return vreinterpretq_f64_s16(a);
16373 }
16374
16375 // CHECK-LABEL: @test_vreinterpretq_f64_s32(
16376 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
16377 // CHECK:   ret <2 x double> [[TMP0]]
16378 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
16379   return vreinterpretq_f64_s32(a);
16380 }
16381
16382 // CHECK-LABEL: @test_vreinterpretq_f64_s64(
16383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
16384 // CHECK:   ret <2 x double> [[TMP0]]
16385 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
16386   return vreinterpretq_f64_s64(a);
16387 }
16388
16389 // CHECK-LABEL: @test_vreinterpretq_f64_u8(
16390 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
16391 // CHECK:   ret <2 x double> [[TMP0]]
16392 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
16393   return vreinterpretq_f64_u8(a);
16394 }
16395
16396 // CHECK-LABEL: @test_vreinterpretq_f64_u16(
16397 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
16398 // CHECK:   ret <2 x double> [[TMP0]]
16399 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
16400   return vreinterpretq_f64_u16(a);
16401 }
16402
16403 // CHECK-LABEL: @test_vreinterpretq_f64_u32(
16404 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
16405 // CHECK:   ret <2 x double> [[TMP0]]
16406 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
16407   return vreinterpretq_f64_u32(a);
16408 }
16409
16410 // CHECK-LABEL: @test_vreinterpretq_f64_u64(
16411 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
16412 // CHECK:   ret <2 x double> [[TMP0]]
16413 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
16414   return vreinterpretq_f64_u64(a);
16415 }
16416
16417 // CHECK-LABEL: @test_vreinterpretq_f64_f16(
16418 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
16419 // CHECK:   ret <2 x double> [[TMP0]]
16420 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
16421   return vreinterpretq_f64_f16(a);
16422 }
16423
16424 // CHECK-LABEL: @test_vreinterpretq_f64_f32(
16425 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
16426 // CHECK:   ret <2 x double> [[TMP0]]
16427 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
16428   return vreinterpretq_f64_f32(a);
16429 }
16430
16431 // CHECK-LABEL: @test_vreinterpretq_f64_p8(
16432 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
16433 // CHECK:   ret <2 x double> [[TMP0]]
16434 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
16435   return vreinterpretq_f64_p8(a);
16436 }
16437
16438 // CHECK-LABEL: @test_vreinterpretq_f64_p16(
16439 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
16440 // CHECK:   ret <2 x double> [[TMP0]]
16441 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
16442   return vreinterpretq_f64_p16(a);
16443 }
16444
16445 // CHECK-LABEL: @test_vreinterpretq_f64_p64(
16446 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
16447 // CHECK:   ret <2 x double> [[TMP0]]
16448 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
16449   return vreinterpretq_f64_p64(a);
16450 }
16451
16452 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
16453 // CHECK:   ret <16 x i8> %a
16454 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
16455   return vreinterpretq_p8_s8(a);
16456 }
16457
16458 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
16459 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16460 // CHECK:   ret <16 x i8> [[TMP0]]
16461 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
16462   return vreinterpretq_p8_s16(a);
16463 }
16464
16465 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
16466 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16467 // CHECK:   ret <16 x i8> [[TMP0]]
16468 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
16469   return vreinterpretq_p8_s32(a);
16470 }
16471
16472 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
16473 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16474 // CHECK:   ret <16 x i8> [[TMP0]]
16475 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
16476   return vreinterpretq_p8_s64(a);
16477 }
16478
16479 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
16480 // CHECK:   ret <16 x i8> %a
16481 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
16482   return vreinterpretq_p8_u8(a);
16483 }
16484
16485 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
16486 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16487 // CHECK:   ret <16 x i8> [[TMP0]]
16488 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
16489   return vreinterpretq_p8_u16(a);
16490 }
16491
16492 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
16493 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16494 // CHECK:   ret <16 x i8> [[TMP0]]
16495 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
16496   return vreinterpretq_p8_u32(a);
16497 }
16498
16499 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
16500 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16501 // CHECK:   ret <16 x i8> [[TMP0]]
16502 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
16503   return vreinterpretq_p8_u64(a);
16504 }
16505
16506 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
16507 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
16508 // CHECK:   ret <16 x i8> [[TMP0]]
16509 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
16510   return vreinterpretq_p8_f16(a);
16511 }
16512
16513 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
16514 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
16515 // CHECK:   ret <16 x i8> [[TMP0]]
16516 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
16517   return vreinterpretq_p8_f32(a);
16518 }
16519
16520 // CHECK-LABEL: @test_vreinterpretq_p8_f64(
16521 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
16522 // CHECK:   ret <16 x i8> [[TMP0]]
16523 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
16524   return vreinterpretq_p8_f64(a);
16525 }
16526
16527 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
16528 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16529 // CHECK:   ret <16 x i8> [[TMP0]]
16530 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
16531   return vreinterpretq_p8_p16(a);
16532 }
16533
16534 // CHECK-LABEL: @test_vreinterpretq_p8_p64(
16535 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16536 // CHECK:   ret <16 x i8> [[TMP0]]
16537 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
16538   return vreinterpretq_p8_p64(a);
16539 }
16540
16541 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
16542 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16543 // CHECK:   ret <8 x i16> [[TMP0]]
16544 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
16545   return vreinterpretq_p16_s8(a);
16546 }
16547
16548 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
16549 // CHECK:   ret <8 x i16> %a
16550 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
16551   return vreinterpretq_p16_s16(a);
16552 }
16553
16554 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
16555 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16556 // CHECK:   ret <8 x i16> [[TMP0]]
16557 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
16558   return vreinterpretq_p16_s32(a);
16559 }
16560
16561 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
16562 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16563 // CHECK:   ret <8 x i16> [[TMP0]]
16564 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
16565   return vreinterpretq_p16_s64(a);
16566 }
16567
16568 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
16569 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16570 // CHECK:   ret <8 x i16> [[TMP0]]
16571 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
16572   return vreinterpretq_p16_u8(a);
16573 }
16574
16575 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
16576 // CHECK:   ret <8 x i16> %a
16577 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
16578   return vreinterpretq_p16_u16(a);
16579 }
16580
16581 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
16582 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
16583 // CHECK:   ret <8 x i16> [[TMP0]]
16584 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
16585   return vreinterpretq_p16_u32(a);
16586 }
16587
16588 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
16589 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16590 // CHECK:   ret <8 x i16> [[TMP0]]
16591 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
16592   return vreinterpretq_p16_u64(a);
16593 }
16594
16595 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
16596 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
16597 // CHECK:   ret <8 x i16> [[TMP0]]
16598 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
16599   return vreinterpretq_p16_f16(a);
16600 }
16601
16602 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
16603 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
16604 // CHECK:   ret <8 x i16> [[TMP0]]
16605 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
16606   return vreinterpretq_p16_f32(a);
16607 }
16608
16609 // CHECK-LABEL: @test_vreinterpretq_p16_f64(
16610 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
16611 // CHECK:   ret <8 x i16> [[TMP0]]
16612 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
16613   return vreinterpretq_p16_f64(a);
16614 }
16615
16616 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
16617 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
16618 // CHECK:   ret <8 x i16> [[TMP0]]
16619 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
16620   return vreinterpretq_p16_p8(a);
16621 }
16622
16623 // CHECK-LABEL: @test_vreinterpretq_p16_p64(
16624 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
16625 // CHECK:   ret <8 x i16> [[TMP0]]
16626 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
16627   return vreinterpretq_p16_p64(a);
16628 }
16629
16630 // CHECK-LABEL: @test_vreinterpretq_p64_s8(
16631 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16632 // CHECK:   ret <2 x i64> [[TMP0]]
16633 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
16634   return vreinterpretq_p64_s8(a);
16635 }
16636
16637 // CHECK-LABEL: @test_vreinterpretq_p64_s16(
16638 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16639 // CHECK:   ret <2 x i64> [[TMP0]]
16640 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
16641   return vreinterpretq_p64_s16(a);
16642 }
16643
16644 // CHECK-LABEL: @test_vreinterpretq_p64_s32(
16645 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16646 // CHECK:   ret <2 x i64> [[TMP0]]
16647 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
16648   return vreinterpretq_p64_s32(a);
16649 }
16650
16651 // CHECK-LABEL: @test_vreinterpretq_p64_s64(
16652 // CHECK:   ret <2 x i64> %a
16653 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
16654   return vreinterpretq_p64_s64(a);
16655 }
16656
16657 // CHECK-LABEL: @test_vreinterpretq_p64_u8(
16658 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16659 // CHECK:   ret <2 x i64> [[TMP0]]
16660 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
16661   return vreinterpretq_p64_u8(a);
16662 }
16663
16664 // CHECK-LABEL: @test_vreinterpretq_p64_u16(
16665 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16666 // CHECK:   ret <2 x i64> [[TMP0]]
16667 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
16668   return vreinterpretq_p64_u16(a);
16669 }
16670
16671 // CHECK-LABEL: @test_vreinterpretq_p64_u32(
16672 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
16673 // CHECK:   ret <2 x i64> [[TMP0]]
16674 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
16675   return vreinterpretq_p64_u32(a);
16676 }
16677
16678 // CHECK-LABEL: @test_vreinterpretq_p64_u64(
16679 // CHECK:   ret <2 x i64> %a
16680 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
16681   return vreinterpretq_p64_u64(a);
16682 }
16683
16684 // CHECK-LABEL: @test_vreinterpretq_p64_f16(
16685 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
16686 // CHECK:   ret <2 x i64> [[TMP0]]
16687 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
16688   return vreinterpretq_p64_f16(a);
16689 }
16690
16691 // CHECK-LABEL: @test_vreinterpretq_p64_f32(
16692 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
16693 // CHECK:   ret <2 x i64> [[TMP0]]
16694 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
16695   return vreinterpretq_p64_f32(a);
16696 }
16697
16698 // CHECK-LABEL: @test_vreinterpretq_p64_f64(
16699 // CHECK:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
16700 // CHECK:   ret <2 x i64> [[TMP0]]
16701 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
16702   return vreinterpretq_p64_f64(a);
16703 }
16704
16705 // CHECK-LABEL: @test_vreinterpretq_p64_p8(
16706 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
16707 // CHECK:   ret <2 x i64> [[TMP0]]
16708 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
16709   return vreinterpretq_p64_p8(a);
16710 }
16711
16712 // CHECK-LABEL: @test_vreinterpretq_p64_p16(
16713 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
16714 // CHECK:   ret <2 x i64> [[TMP0]]
16715 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
16716   return vreinterpretq_p64_p16(a);
16717 }
16718
16719 // CHECK-LABEL: @test_vabds_f32(
16720 // CHECK:   [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b)
16721 // CHECK:   ret float [[VABDS_F32_I]]
16722 float32_t test_vabds_f32(float32_t a, float32_t b) {
16723   return vabds_f32(a, b);
16724 }
16725
16726 // CHECK-LABEL: @test_vabdd_f64(
16727 // CHECK:   [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b)
16728 // CHECK:   ret double [[VABDD_F64_I]]
16729 float64_t test_vabdd_f64(float64_t a, float64_t b) {
16730   return vabdd_f64(a, b);
16731 }
16732
16733 // CHECK-LABEL: @test_vuqaddq_s8(
16734 // CHECK: entry:
16735 // CHECK-NEXT:  [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
16736 // CHECK-NEXT:  ret <16 x i8> [[V]]
16737 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
16738   return vuqaddq_s8(a, b);
16739 }
16740
16741 // CHECK-LABEL: @test_vuqaddq_s32(
16742 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
16743 // CHECK-NEXT:  ret <4 x i32> [[V]]
16744 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
16745   return vuqaddq_s32(a, b);
16746 }
16747
16748 // CHECK-LABEL: @test_vuqaddq_s64(
16749 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
16750 // CHECK-NEXT:  ret <2 x i64> [[V]]
16751 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
16752   return vuqaddq_s64(a, b);
16753 }
16754
16755 // CHECK-LABEL: @test_vuqaddq_s16(
16756 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
16757 // CHECK-NEXT:  ret <8 x i16> [[V]]
16758 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
16759   return vuqaddq_s16(a, b);
16760 }
16761
16762 // CHECK-LABEL: @test_vuqadd_s8(
16763 // CHECK: entry:
16764 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
16765 // CHECK-NEXT: ret <8 x i8> [[V]]
16766 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
16767   return vuqadd_s8(a, b);
16768 }
16769
16770 // CHECK-LABEL: @test_vuqadd_s32(
16771 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
16772 // CHECK-NEXT:  ret <2 x i32> [[V]]
16773 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
16774   return vuqadd_s32(a, b);
16775 }
16776
16777 // CHECK-LABEL: @test_vuqadd_s64(
16778 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16779 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16780 // CHECK:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
16781 // CHECK:   ret <1 x i64> [[VUQADD2_I]]
16782 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
16783   return vuqadd_s64(a, b);
16784 }
16785
16786 // CHECK-LABEL: @test_vuqadd_s16(
16787 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
16788 // CHECK-NEXT:  ret <4 x i16> [[V]]
16789 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
16790   return vuqadd_s16(a, b);
16791 }
16792
16793 // CHECK-LABEL: @test_vsqadd_u64(
16794 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16795 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
16796 // CHECK:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
16797 // CHECK:   ret <1 x i64> [[VSQADD2_I]]
16798 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
16799   return vsqadd_u64(a, b);
16800 }
16801
16802 // CHECK-LABEL: @test_vsqadd_u8(
16803 // CHECK:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
16804 // CHECK:   ret <8 x i8> [[VSQADD_I]]
16805 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
16806   return vsqadd_u8(a, b);
16807 }
16808
16809 // CHECK-LABEL: @test_vsqaddq_u8(
16810 // CHECK:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
16811 // CHECK:   ret <16 x i8> [[VSQADD_I]]
16812 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
16813   return vsqaddq_u8(a, b);
16814 }
16815
16816 // CHECK-LABEL: @test_vsqadd_u16(
16817 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
16818 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
16819 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
16820 // CHECK:   ret <4 x i16> [[VSQADD2_I]]
16821 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
16822   return vsqadd_u16(a, b);
16823 }
16824
16825 // CHECK-LABEL: @test_vsqaddq_u16(
16826 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
16827 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
16828 // CHECK:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
16829 // CHECK:   ret <8 x i16> [[VSQADD2_I]]
16830 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
16831   return vsqaddq_u16(a, b);
16832 }
16833
16834 // CHECK-LABEL: @test_vsqadd_u32(
16835 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
16836 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
16837 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
16838 // CHECK:   ret <2 x i32> [[VSQADD2_I]]
16839 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
16840   return vsqadd_u32(a, b);
16841 }
16842
16843 // CHECK-LABEL: @test_vsqaddq_u32(
16844 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
16845 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
16846 // CHECK:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
16847 // CHECK:   ret <4 x i32> [[VSQADD2_I]]
16848 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
16849   return vsqaddq_u32(a, b);
16850 }
16851
16852 // CHECK-LABEL: @test_vsqaddq_u64(
16853 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
16854 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
16855 // CHECK:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
16856 // CHECK:   ret <2 x i64> [[VSQADD2_I]]
16857 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
16858   return vsqaddq_u64(a, b);
16859 }
16860
16861 // CHECK-LABEL: @test_vabs_s64(
16862 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16863 // CHECK:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
16864 // CHECK:   ret <1 x i64> [[VABS1_I]]
16865 int64x1_t test_vabs_s64(int64x1_t a) {
16866   return vabs_s64(a);
16867 }
16868
16869 // CHECK-LABEL: @test_vqabs_s64(
16870 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16871 // CHECK:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
16872 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
16873 // CHECK:   ret <1 x i64> [[VQABS_V1_I]]
16874 int64x1_t test_vqabs_s64(int64x1_t a) {
16875   return vqabs_s64(a);
16876 }
16877
16878 // CHECK-LABEL: @test_vqneg_s64(
16879 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
16880 // CHECK:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
16881 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
16882 // CHECK:   ret <1 x i64> [[VQNEG_V1_I]]
16883 int64x1_t test_vqneg_s64(int64x1_t a) {
16884   return vqneg_s64(a);
16885 }
16886
16887 // CHECK-LABEL: @test_vneg_s64(
16888 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
16889 // CHECK:   ret <1 x i64> [[SUB_I]]
16890 int64x1_t test_vneg_s64(int64x1_t a) {
16891   return vneg_s64(a);
16892 }
16893
16894 // CHECK-LABEL: @test_vaddv_f32(
16895 // CHECK:   [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
16896 // CHECK:   ret float [[VADDV_F32_I]]
16897 float32_t test_vaddv_f32(float32x2_t a) {
16898   return vaddv_f32(a);
16899 }
16900
16901 // CHECK-LABEL: @test_vaddvq_f32(
16902 // CHECK:   [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
16903 // CHECK:   ret float [[VADDVQ_F32_I]]
16904 float32_t test_vaddvq_f32(float32x4_t a) {
16905   return vaddvq_f32(a);
16906 }
16907
16908 // CHECK-LABEL: @test_vaddvq_f64(
16909 // CHECK:   [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
16910 // CHECK:   ret double [[VADDVQ_F64_I]]
16911 float64_t test_vaddvq_f64(float64x2_t a) {
16912   return vaddvq_f64(a);
16913 }
16914
16915 // CHECK-LABEL: @test_vmaxv_f32(
16916 // CHECK:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
16917 // CHECK:   ret float [[VMAXV_F32_I]]
16918 float32_t test_vmaxv_f32(float32x2_t a) {
16919   return vmaxv_f32(a);
16920 }
16921
16922 // CHECK-LABEL: @test_vmaxvq_f64(
16923 // CHECK:   [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
16924 // CHECK:   ret double [[VMAXVQ_F64_I]]
16925 float64_t test_vmaxvq_f64(float64x2_t a) {
16926   return vmaxvq_f64(a);
16927 }
16928
16929 // CHECK-LABEL: @test_vminv_f32(
16930 // CHECK:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
16931 // CHECK:   ret float [[VMINV_F32_I]]
16932 float32_t test_vminv_f32(float32x2_t a) {
16933   return vminv_f32(a);
16934 }
16935
16936 // CHECK-LABEL: @test_vminvq_f64(
16937 // CHECK:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
16938 // CHECK:   ret double [[VMINVQ_F64_I]]
16939 float64_t test_vminvq_f64(float64x2_t a) {
16940   return vminvq_f64(a);
16941 }
16942
16943 // CHECK-LABEL: @test_vmaxnmvq_f64(
16944 // CHECK:   [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
16945 // CHECK:   ret double [[VMAXNMVQ_F64_I]]
16946 float64_t test_vmaxnmvq_f64(float64x2_t a) {
16947   return vmaxnmvq_f64(a);
16948 }
16949
16950 // CHECK-LABEL: @test_vmaxnmv_f32(
16951 // CHECK:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
16952 // CHECK:   ret float [[VMAXNMV_F32_I]]
16953 float32_t test_vmaxnmv_f32(float32x2_t a) {
16954   return vmaxnmv_f32(a);
16955 }
16956
16957 // CHECK-LABEL: @test_vminnmvq_f64(
16958 // CHECK:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
16959 // CHECK:   ret double [[VMINNMVQ_F64_I]]
16960 float64_t test_vminnmvq_f64(float64x2_t a) {
16961   return vminnmvq_f64(a);
16962 }
16963
16964 // CHECK-LABEL: @test_vminnmv_f32(
16965 // CHECK:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
16966 // CHECK:   ret float [[VMINNMV_F32_I]]
16967 float32_t test_vminnmv_f32(float32x2_t a) {
16968   return vminnmv_f32(a);
16969 }
16970
16971 // CHECK-LABEL: @test_vpaddq_s64(
16972 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
16973 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
16974 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
16975 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
16976   return vpaddq_s64(a, b);
16977 }
16978
16979 // CHECK-LABEL: @test_vpaddq_u64(
16980 // CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
16981 // CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
16982 // CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
16983 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
16984   return vpaddq_u64(a, b);
16985 }
16986
16987 // CHECK-LABEL: @test_vpaddd_u64(
16988 // CHECK:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
16989 // CHECK:   ret i64 [[VPADDD_U64_I]]
16990 uint64_t test_vpaddd_u64(uint64x2_t a) {
16991   return vpaddd_u64(a);
16992 }
16993
16994 // CHECK-LABEL: @test_vaddvq_s64(
16995 // CHECK:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
16996 // CHECK:   ret i64 [[VADDVQ_S64_I]]
16997 int64_t test_vaddvq_s64(int64x2_t a) {
16998   return vaddvq_s64(a);
16999 }
17000
17001 // CHECK-LABEL: @test_vaddvq_u64(
17002 // CHECK:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
17003 // CHECK:   ret i64 [[VADDVQ_U64_I]]
17004 uint64_t test_vaddvq_u64(uint64x2_t a) {
17005   return vaddvq_u64(a);
17006 }
17007
17008 // CHECK-LABEL: @test_vadd_f64(
17009 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
17010 // CHECK:   ret <1 x double> [[ADD_I]]
17011 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
17012   return vadd_f64(a, b);
17013 }
17014
17015 // CHECK-LABEL: @test_vmul_f64(
17016 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
17017 // CHECK:   ret <1 x double> [[MUL_I]]
17018 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
17019   return vmul_f64(a, b);
17020 }
17021
17022 // CHECK-LABEL: @test_vdiv_f64(
17023 // CHECK:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
17024 // CHECK:   ret <1 x double> [[DIV_I]]
17025 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
17026   return vdiv_f64(a, b);
17027 }
17028
17029 // CHECK-LABEL: @test_vmla_f64(
17030 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17031 // CHECK:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
17032 // CHECK:   ret <1 x double> [[ADD_I]]
17033 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17034   return vmla_f64(a, b, c);
17035 }
17036
17037 // CHECK-LABEL: @test_vmls_f64(
17038 // CHECK:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
17039 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
17040 // CHECK:   ret <1 x double> [[SUB_I]]
17041 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17042   return vmls_f64(a, b, c);
17043 }
17044
17045 // CHECK-LABEL: @test_vfma_f64(
17046 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17047 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17048 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17049 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
17050 // CHECK:   ret <1 x double> [[TMP3]]
17051 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17052   return vfma_f64(a, b, c);
17053 }
17054
17055 // CHECK-LABEL: @test_vfms_f64(
17056 // CHECK:   [[SUB_I:%.*]] = fneg <1 x double> %b
17057 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17058 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
17059 // CHECK:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
17060 // CHECK:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
17061 // CHECK:   ret <1 x double> [[TMP3]]
17062 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
17063   return vfms_f64(a, b, c);
17064 }
17065
17066 // CHECK-LABEL: @test_vsub_f64(
17067 // CHECK:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
17068 // CHECK:   ret <1 x double> [[SUB_I]]
17069 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
17070   return vsub_f64(a, b);
17071 }
17072
17073 // CHECK-LABEL: @test_vabd_f64(
17074 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17075 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17076 // CHECK:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
17077 // CHECK:   ret <1 x double> [[VABD2_I]]
17078 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
17079   return vabd_f64(a, b);
17080 }
17081
17082 // CHECK-LABEL: @test_vmax_f64(
17083 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17084 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17085 // CHECK:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b)
17086 // CHECK:   ret <1 x double> [[VMAX2_I]]
17087 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
17088   return vmax_f64(a, b);
17089 }
17090
17091 // CHECK-LABEL: @test_vmin_f64(
17092 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17093 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17094 // CHECK:   [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b)
17095 // CHECK:   ret <1 x double> [[VMIN2_I]]
17096 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
17097   return vmin_f64(a, b);
17098 }
17099
17100 // CHECK-LABEL: @test_vmaxnm_f64(
17101 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17102 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17103 // CHECK:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
17104 // CHECK:   ret <1 x double> [[VMAXNM2_I]]
17105 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
17106   return vmaxnm_f64(a, b);
17107 }
17108
17109 // CHECK-LABEL: @test_vminnm_f64(
17110 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17111 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17112 // CHECK:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
17113 // CHECK:   ret <1 x double> [[VMINNM2_I]]
17114 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
17115   return vminnm_f64(a, b);
17116 }
17117
17118 // CHECK-LABEL: @test_vabs_f64(
17119 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17120 // CHECK:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
17121 // CHECK:   ret <1 x double> [[VABS1_I]]
17122 float64x1_t test_vabs_f64(float64x1_t a) {
17123   return vabs_f64(a);
17124 }
17125
17126 // CHECK-LABEL: @test_vneg_f64(
17127 // CHECK:   [[SUB_I:%.*]] = fneg <1 x double> %a
17128 // CHECK:   ret <1 x double> [[SUB_I]]
17129 float64x1_t test_vneg_f64(float64x1_t a) {
17130   return vneg_f64(a);
17131 }
17132
17133 // CHECK-LABEL: @test_vcvt_s64_f64(
17134 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17135 // CHECK:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
17136 // CHECK:   ret <1 x i64> [[TMP1]]
17137 int64x1_t test_vcvt_s64_f64(float64x1_t a) {
17138   return vcvt_s64_f64(a);
17139 }
17140
17141 // CHECK-LABEL: @test_vcvt_u64_f64(
17142 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17143 // CHECK:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
17144 // CHECK:   ret <1 x i64> [[TMP1]]
17145 uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
17146   return vcvt_u64_f64(a);
17147 }
17148
17149 // CHECK-LABEL: @test_vcvtn_s64_f64(
17150 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17151 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
17152 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
17153 int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
17154   return vcvtn_s64_f64(a);
17155 }
17156
17157 // CHECK-LABEL: @test_vcvtn_u64_f64(
17158 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17159 // CHECK:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
17160 // CHECK:   ret <1 x i64> [[VCVTN1_I]]
17161 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
17162   return vcvtn_u64_f64(a);
17163 }
17164
17165 // CHECK-LABEL: @test_vcvtp_s64_f64(
17166 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17167 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
17168 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
17169 int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
17170   return vcvtp_s64_f64(a);
17171 }
17172
17173 // CHECK-LABEL: @test_vcvtp_u64_f64(
17174 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17175 // CHECK:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
17176 // CHECK:   ret <1 x i64> [[VCVTP1_I]]
17177 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
17178   return vcvtp_u64_f64(a);
17179 }
17180
17181 // CHECK-LABEL: @test_vcvtm_s64_f64(
17182 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17183 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
17184 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
17185 int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
17186   return vcvtm_s64_f64(a);
17187 }
17188
17189 // CHECK-LABEL: @test_vcvtm_u64_f64(
17190 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17191 // CHECK:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
17192 // CHECK:   ret <1 x i64> [[VCVTM1_I]]
17193 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
17194   return vcvtm_u64_f64(a);
17195 }
17196
17197 // CHECK-LABEL: @test_vcvta_s64_f64(
17198 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17199 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
17200 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
17201 int64x1_t test_vcvta_s64_f64(float64x1_t a) {
17202   return vcvta_s64_f64(a);
17203 }
17204
17205 // CHECK-LABEL: @test_vcvta_u64_f64(
17206 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17207 // CHECK:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
17208 // CHECK:   ret <1 x i64> [[VCVTA1_I]]
17209 uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
17210   return vcvta_u64_f64(a);
17211 }
17212
17213 // CHECK-LABEL: @test_vcvt_f64_s64(
17214 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17215 // CHECK:   [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
17216 // CHECK:   ret <1 x double> [[VCVT_I]]
17217 float64x1_t test_vcvt_f64_s64(int64x1_t a) {
17218   return vcvt_f64_s64(a);
17219 }
17220
17221 // CHECK-LABEL: @test_vcvt_f64_u64(
17222 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17223 // CHECK:   [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
17224 // CHECK:   ret <1 x double> [[VCVT_I]]
17225 float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
17226   return vcvt_f64_u64(a);
17227 }
17228
17229 // CHECK-LABEL: @test_vcvt_n_s64_f64(
17230 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17231 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
17232 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
17233 // CHECK:   ret <1 x i64> [[VCVT_N1]]
17234 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
17235   return vcvt_n_s64_f64(a, 64);
17236 }
17237
17238 // CHECK-LABEL: @test_vcvt_n_u64_f64(
17239 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17240 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
17241 // CHECK:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
17242 // CHECK:   ret <1 x i64> [[VCVT_N1]]
17243 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
17244   return vcvt_n_u64_f64(a, 64);
17245 }
17246
17247 // CHECK-LABEL: @test_vcvt_n_f64_s64(
17248 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17249 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17250 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
17251 // CHECK:   ret <1 x double> [[VCVT_N1]]
17252 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
17253   return vcvt_n_f64_s64(a, 64);
17254 }
17255
17256 // CHECK-LABEL: @test_vcvt_n_f64_u64(
17257 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
17258 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
17259 // CHECK:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
17260 // CHECK:   ret <1 x double> [[VCVT_N1]]
17261 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
17262   return vcvt_n_f64_u64(a, 64);
17263 }
17264
17265 // CHECK-LABEL: @test_vrndn_f64(
17266 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17267 // CHECK:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
17268 // CHECK:   ret <1 x double> [[VRNDN1_I]]
17269 float64x1_t test_vrndn_f64(float64x1_t a) {
17270   return vrndn_f64(a);
17271 }
17272
17273 // CHECK-LABEL: @test_vrnda_f64(
17274 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17275 // CHECK:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
17276 // CHECK:   ret <1 x double> [[VRNDA1_I]]
17277 float64x1_t test_vrnda_f64(float64x1_t a) {
17278   return vrnda_f64(a);
17279 }
17280
17281 // CHECK-LABEL: @test_vrndp_f64(
17282 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17283 // CHECK:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
17284 // CHECK:   ret <1 x double> [[VRNDP1_I]]
17285 float64x1_t test_vrndp_f64(float64x1_t a) {
17286   return vrndp_f64(a);
17287 }
17288
17289 // CHECK-LABEL: @test_vrndm_f64(
17290 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17291 // CHECK:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
17292 // CHECK:   ret <1 x double> [[VRNDM1_I]]
17293 float64x1_t test_vrndm_f64(float64x1_t a) {
17294   return vrndm_f64(a);
17295 }
17296
17297 // CHECK-LABEL: @test_vrndx_f64(
17298 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17299 // CHECK:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
17300 // CHECK:   ret <1 x double> [[VRNDX1_I]]
17301 float64x1_t test_vrndx_f64(float64x1_t a) {
17302   return vrndx_f64(a);
17303 }
17304
17305 // CHECK-LABEL: @test_vrnd_f64(
17306 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17307 // CHECK:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
17308 // CHECK:   ret <1 x double> [[VRNDZ1_I]]
17309 float64x1_t test_vrnd_f64(float64x1_t a) {
17310   return vrnd_f64(a);
17311 }
17312
17313 // CHECK-LABEL: @test_vrndi_f64(
17314 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17315 // CHECK:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
17316 // CHECK:   ret <1 x double> [[VRNDI1_I]]
17317 float64x1_t test_vrndi_f64(float64x1_t a) {
17318   return vrndi_f64(a);
17319 }
17320
17321 // CHECK-LABEL: @test_vrsqrte_f64(
17322 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17323 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
17324 // CHECK:   ret <1 x double> [[VRSQRTE_V1_I]]
17325 float64x1_t test_vrsqrte_f64(float64x1_t a) {
17326   return vrsqrte_f64(a);
17327 }
17328
17329 // CHECK-LABEL: @test_vrecpe_f64(
17330 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17331 // CHECK:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
17332 // CHECK:   ret <1 x double> [[VRECPE_V1_I]]
17333 float64x1_t test_vrecpe_f64(float64x1_t a) {
17334   return vrecpe_f64(a);
17335 }
17336
17337 // CHECK-LABEL: @test_vsqrt_f64(
17338 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17339 // CHECK:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
17340 // CHECK:   ret <1 x double> [[VSQRT_I]]
17341 float64x1_t test_vsqrt_f64(float64x1_t a) {
17342   return vsqrt_f64(a);
17343 }
17344
17345 // CHECK-LABEL: @test_vrecps_f64(
17346 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17347 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17348 // CHECK:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
17349 // CHECK:   ret <1 x double> [[VRECPS_V2_I]]
17350 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
17351   return vrecps_f64(a, b);
17352 }
17353
17354 // CHECK-LABEL: @test_vrsqrts_f64(
17355 // CHECK:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
17356 // CHECK:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
17357 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
17358 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
17359 // CHECK:   ret <1 x double> [[VRSQRTS_V2_I]]
17360 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
17361   return vrsqrts_f64(a, b);
17362 }
17363
17364 // CHECK-LABEL: @test_vminv_s32(
17365 // CHECK:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
17366 // CHECK:   ret i32 [[VMINV_S32_I]]
17367 int32_t test_vminv_s32(int32x2_t a) {
17368   return vminv_s32(a);
17369 }
17370
17371 // CHECK-LABEL: @test_vminv_u32(
17372 // CHECK:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
17373 // CHECK:   ret i32 [[VMINV_U32_I]]
17374 uint32_t test_vminv_u32(uint32x2_t a) {
17375   return vminv_u32(a);
17376 }
17377
17378 // CHECK-LABEL: @test_vmaxv_s32(
17379 // CHECK:   [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
17380 // CHECK:   ret i32 [[VMAXV_S32_I]]
17381 int32_t test_vmaxv_s32(int32x2_t a) {
17382   return vmaxv_s32(a);
17383 }
17384
17385 // CHECK-LABEL: @test_vmaxv_u32(
17386 // CHECK:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
17387 // CHECK:   ret i32 [[VMAXV_U32_I]]
17388 uint32_t test_vmaxv_u32(uint32x2_t a) {
17389   return vmaxv_u32(a);
17390 }
17391
17392 // CHECK-LABEL: @test_vaddv_s32(
17393 // CHECK:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
17394 // CHECK:   ret i32 [[VADDV_S32_I]]
17395 int32_t test_vaddv_s32(int32x2_t a) {
17396   return vaddv_s32(a);
17397 }
17398
17399 // CHECK-LABEL: @test_vaddv_u32(
17400 // CHECK:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
17401 // CHECK:   ret i32 [[VADDV_U32_I]]
17402 uint32_t test_vaddv_u32(uint32x2_t a) {
17403   return vaddv_u32(a);
17404 }
17405
17406 // CHECK-LABEL: @test_vaddlv_s32(
17407 // CHECK:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
17408 // CHECK:   ret i64 [[VADDLV_S32_I]]
17409 int64_t test_vaddlv_s32(int32x2_t a) {
17410   return vaddlv_s32(a);
17411 }
17412
17413 // CHECK-LABEL: @test_vaddlv_u32(
17414 // CHECK:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
17415 // CHECK:   ret i64 [[VADDLV_U32_I]]
17416 uint64_t test_vaddlv_u32(uint32x2_t a) {
17417   return vaddlv_u32(a);
17418 }