clang/test/CodeGen/AArch64/neon-perm.c

   1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
   2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
   3
   4 // REQUIRES: aarch64-registered-target || arm-registered-target
   5
   6 #include <arm_neon.h>
   7
   8 // CHECK-LABEL: @test_vuzp1_s8(
   9 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  10 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
  11 int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
  12   return vuzp1_s8(a, b);
  13 }
  14
  15 // CHECK-LABEL: @test_vuzp1q_s8(
  16 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
  17 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
  18 int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
  19   return vuzp1q_s8(a, b);
  20 }
  21
  22 // CHECK-LABEL: @test_vuzp1_s16(
  23 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  24 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
  25 int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
  26   return vuzp1_s16(a, b);
  27 }
  28
  29 // CHECK-LABEL: @test_vuzp1q_s16(
  30 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  31 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
  32 int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
  33   return vuzp1q_s16(a, b);
  34 }
  35
  36 // CHECK-LABEL: @test_vuzp1_s32(
  37 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
  38 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
  39 int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
  40   return vuzp1_s32(a, b);
  41 }
  42
  43 // CHECK-LABEL: @test_vuzp1q_s32(
  44 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  45 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
  46 int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
  47   return vuzp1q_s32(a, b);
  48 }
  49
  50 // CHECK-LABEL: @test_vuzp1q_s64(
  51 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
  52 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
  53 int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
  54   return vuzp1q_s64(a, b);
  55 }
  56
  57 // CHECK-LABEL: @test_vuzp1_u8(
  58 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  59 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
  60 uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
  61   return vuzp1_u8(a, b);
  62 }
  63
  64 // CHECK-LABEL: @test_vuzp1q_u8(
  65 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
  66 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
  67 uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
  68   return vuzp1q_u8(a, b);
  69 }
  70
  71 // CHECK-LABEL: @test_vuzp1_u16(
  72 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  73 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
  74 uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
  75   return vuzp1_u16(a, b);
  76 }
  77
  78 // CHECK-LABEL: @test_vuzp1q_u16(
  79 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  80 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
  81 uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
  82   return vuzp1q_u16(a, b);
  83 }
  84
  85 // CHECK-LABEL: @test_vuzp1_u32(
  86 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
  87 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
  88 uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
  89   return vuzp1_u32(a, b);
  90 }
  91
  92 // CHECK-LABEL: @test_vuzp1q_u32(
  93 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  94 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
  95 uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
  96   return vuzp1q_u32(a, b);
  97 }
  98
  99 // CHECK-LABEL: @test_vuzp1q_u64(
 100 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 101 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 102 uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
 103   return vuzp1q_u64(a, b);
 104 }
 105
 106 // CHECK-LABEL: @test_vuzp1_f32(
 107 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 108 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 109 float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
 110   return vuzp1_f32(a, b);
 111 }
 112
 113 // CHECK-LABEL: @test_vuzp1q_f32(
 114 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 115 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 116 float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
 117   return vuzp1q_f32(a, b);
 118 }
 119
 120 // CHECK-LABEL: @test_vuzp1q_f64(
 121 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 122 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 123 float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
 124   return vuzp1q_f64(a, b);
 125 }
 126
 127 // CHECK-LABEL: @test_vuzp1_p8(
 128 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 129 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 130 poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
 131   return vuzp1_p8(a, b);
 132 }
 133
 134 // CHECK-LABEL: @test_vuzp1q_p8(
 135 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 136 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 137 poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
 138   return vuzp1q_p8(a, b);
 139 }
 140
 141 // CHECK-LABEL: @test_vuzp1_p16(
 142 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 143 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 144 poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
 145   return vuzp1_p16(a, b);
 146 }
 147
 148 // CHECK-LABEL: @test_vuzp1q_p16(
 149 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 150 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 151 poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
 152   return vuzp1q_p16(a, b);
 153 }
 154
 155 // CHECK-LABEL: @test_vuzp2_s8(
 156 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 157 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 158 int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
 159   return vuzp2_s8(a, b);
 160 }
 161
 162 // CHECK-LABEL: @test_vuzp2q_s8(
 163 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 164 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 165 int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
 166   return vuzp2q_s8(a, b);
 167 }
 168
 169 // CHECK-LABEL: @test_vuzp2_s16(
 170 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 171 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 172 int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
 173   return vuzp2_s16(a, b);
 174 }
 175
 176 // CHECK-LABEL: @test_vuzp2q_s16(
 177 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 178 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 179 int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
 180   return vuzp2q_s16(a, b);
 181 }
 182
 183 // CHECK-LABEL: @test_vuzp2_s32(
 184 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 185 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 186 int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
 187   return vuzp2_s32(a, b);
 188 }
 189
 190 // CHECK-LABEL: @test_vuzp2q_s32(
 191 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 192 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 193 int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
 194   return vuzp2q_s32(a, b);
 195 }
 196
 197 // CHECK-LABEL: @test_vuzp2q_s64(
 198 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 199 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 200 int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
 201   return vuzp2q_s64(a, b);
 202 }
 203
 204 // CHECK-LABEL: @test_vuzp2_u8(
 205 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 206 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 207 uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
 208   return vuzp2_u8(a, b);
 209 }
 210
 211 // CHECK-LABEL: @test_vuzp2q_u8(
 212 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 213 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 214 uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
 215   return vuzp2q_u8(a, b);
 216 }
 217
 218 // CHECK-LABEL: @test_vuzp2_u16(
 219 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 220 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 221 uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
 222   return vuzp2_u16(a, b);
 223 }
 224
 225 // CHECK-LABEL: @test_vuzp2q_u16(
 226 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 227 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 228 uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
 229   return vuzp2q_u16(a, b);
 230 }
 231
 232 // CHECK-LABEL: @test_vuzp2_u32(
 233 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 234 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 235 uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
 236   return vuzp2_u32(a, b);
 237 }
 238
 239 // CHECK-LABEL: @test_vuzp2q_u32(
 240 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 241 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 242 uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
 243   return vuzp2q_u32(a, b);
 244 }
 245
 246 // CHECK-LABEL: @test_vuzp2q_u64(
 247 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 248 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 249 uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
 250   return vuzp2q_u64(a, b);
 251 }
 252
 253 // CHECK-LABEL: @test_vuzp2_f32(
 254 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 255 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 256 float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
 257   return vuzp2_f32(a, b);
 258 }
 259
 260 // CHECK-LABEL: @test_vuzp2q_f32(
 261 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 262 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 263 float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
 264   return vuzp2q_f32(a, b);
 265 }
 266
 267 // CHECK-LABEL: @test_vuzp2q_f64(
 268 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 269 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 270 float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
 271   return vuzp2q_f64(a, b);
 272 }
 273
 274 // CHECK-LABEL: @test_vuzp2_p8(
 275 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 276 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 277 poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
 278   return vuzp2_p8(a, b);
 279 }
 280
 281 // CHECK-LABEL: @test_vuzp2q_p8(
 282 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
 283 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 284 poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
 285   return vuzp2q_p8(a, b);
 286 }
 287
 288 // CHECK-LABEL: @test_vuzp2_p16(
 289 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 290 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 291 poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
 292   return vuzp2_p16(a, b);
 293 }
 294
 295 // CHECK-LABEL: @test_vuzp2q_p16(
 296 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 297 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 298 poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
 299   return vuzp2q_p16(a, b);
 300 }
 301
 302 // CHECK-LABEL: @test_vzip1_s8(
 303 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 304 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 305 int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
 306   return vzip1_s8(a, b);
 307 }
 308
 309 // CHECK-LABEL: @test_vzip1q_s8(
 310 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 311 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 312 int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
 313   return vzip1q_s8(a, b);
 314 }
 315
 316 // CHECK-LABEL: @test_vzip1_s16(
 317 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 318 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 319 int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
 320   return vzip1_s16(a, b);
 321 }
 322
 323 // CHECK-LABEL: @test_vzip1q_s16(
 324 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 325 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 326 int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
 327   return vzip1q_s16(a, b);
 328 }
 329
 330 // CHECK-LABEL: @test_vzip1_s32(
 331 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 332 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 333 int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
 334   return vzip1_s32(a, b);
 335 }
 336
 337 // CHECK-LABEL: @test_vzip1q_s32(
 338 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 339 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 340 int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
 341   return vzip1q_s32(a, b);
 342 }
 343
 344 // CHECK-LABEL: @test_vzip1q_s64(
 345 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 346 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 347 int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
 348   return vzip1q_s64(a, b);
 349 }
 350
 351 // CHECK-LABEL: @test_vzip1_u8(
 352 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 353 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 354 uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
 355   return vzip1_u8(a, b);
 356 }
 357
 358 // CHECK-LABEL: @test_vzip1q_u8(
 359 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 360 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 361 uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
 362   return vzip1q_u8(a, b);
 363 }
 364
 365 // CHECK-LABEL: @test_vzip1_u16(
 366 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 367 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 368 uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
 369   return vzip1_u16(a, b);
 370 }
 371
 372 // CHECK-LABEL: @test_vzip1q_u16(
 373 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 374 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 375 uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
 376   return vzip1q_u16(a, b);
 377 }
 378
 379 // CHECK-LABEL: @test_vzip1_u32(
 380 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 381 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 382 uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
 383   return vzip1_u32(a, b);
 384 }
 385
 386 // CHECK-LABEL: @test_vzip1q_u32(
 387 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 388 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 389 uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
 390   return vzip1q_u32(a, b);
 391 }
 392
 393 // CHECK-LABEL: @test_vzip1q_u64(
 394 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 395 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 396 uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
 397   return vzip1q_u64(a, b);
 398 }
 399
 400 // CHECK-LABEL: @test_vzip1_f32(
 401 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 402 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 403 float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
 404   return vzip1_f32(a, b);
 405 }
 406
 407 // CHECK-LABEL: @test_vzip1q_f32(
 408 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 409 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 410 float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
 411   return vzip1q_f32(a, b);
 412 }
 413
 414 // CHECK-LABEL: @test_vzip1q_f64(
 415 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 416 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 417 float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
 418   return vzip1q_f64(a, b);
 419 }
 420
 421 // CHECK-LABEL: @test_vzip1_p8(
 422 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 423 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 424 poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
 425   return vzip1_p8(a, b);
 426 }
 427
 428 // CHECK-LABEL: @test_vzip1q_p8(
 429 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 430 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 431 poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
 432   return vzip1q_p8(a, b);
 433 }
 434
 435 // CHECK-LABEL: @test_vzip1_p16(
 436 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 437 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 438 poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
 439   return vzip1_p16(a, b);
 440 }
 441
 442 // CHECK-LABEL: @test_vzip1q_p16(
 443 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 444 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 445 poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
 446   return vzip1q_p16(a, b);
 447 }
 448
 449 // CHECK-LABEL: @test_vzip2_s8(
 450 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 451 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 452 int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
 453   return vzip2_s8(a, b);
 454 }
 455
 456 // CHECK-LABEL: @test_vzip2q_s8(
 457 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 458 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 459 int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
 460   return vzip2q_s8(a, b);
 461 }
 462
 463 // CHECK-LABEL: @test_vzip2_s16(
 464 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 465 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 466 int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
 467   return vzip2_s16(a, b);
 468 }
 469
 470 // CHECK-LABEL: @test_vzip2q_s16(
 471 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 472 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 473 int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
 474   return vzip2q_s16(a, b);
 475 }
 476
 477 // CHECK-LABEL: @test_vzip2_s32(
 478 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 479 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 480 int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
 481   return vzip2_s32(a, b);
 482 }
 483
 484 // CHECK-LABEL: @test_vzip2q_s32(
 485 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 486 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 487 int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
 488   return vzip2q_s32(a, b);
 489 }
 490
 491 // CHECK-LABEL: @test_vzip2q_s64(
 492 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 493 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 494 int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
 495   return vzip2q_s64(a, b);
 496 }
 497
 498 // CHECK-LABEL: @test_vzip2_u8(
 499 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 500 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 501 uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
 502   return vzip2_u8(a, b);
 503 }
 504
 505 // CHECK-LABEL: @test_vzip2q_u8(
 506 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 507 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 508 uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
 509   return vzip2q_u8(a, b);
 510 }
 511
 512 // CHECK-LABEL: @test_vzip2_u16(
 513 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 514 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 515 uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
 516   return vzip2_u16(a, b);
 517 }
 518
 519 // CHECK-LABEL: @test_vzip2q_u16(
 520 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 521 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 522 uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
 523   return vzip2q_u16(a, b);
 524 }
 525
 526 // CHECK-LABEL: @test_vzip2_u32(
 527 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 528 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 529 uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
 530   return vzip2_u32(a, b);
 531 }
 532
 533 // CHECK-LABEL: @test_vzip2q_u32(
 534 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 535 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 536 uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
 537   return vzip2q_u32(a, b);
 538 }
 539
 540 // CHECK-LABEL: @test_vzip2q_u64(
 541 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 542 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 543 uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
 544   return vzip2q_u64(a, b);
 545 }
 546
 547 // CHECK-LABEL: @test_vzip2_f32(
 548 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 549 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 550 float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
 551   return vzip2_f32(a, b);
 552 }
 553
 554 // CHECK-LABEL: @test_vzip2q_f32(
 555 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 556 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 557 float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
 558   return vzip2q_f32(a, b);
 559 }
 560
 561 // CHECK-LABEL: @test_vzip2q_f64(
 562 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 563 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 564 float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
 565   return vzip2q_f64(a, b);
 566 }
 567
 568 // CHECK-LABEL: @test_vzip2_p8(
 569 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 570 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 571 poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
 572   return vzip2_p8(a, b);
 573 }
 574
 575 // CHECK-LABEL: @test_vzip2q_p8(
 576 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 577 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 578 poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
 579   return vzip2q_p8(a, b);
 580 }
 581
 582 // CHECK-LABEL: @test_vzip2_p16(
 583 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 584 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 585 poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
 586   return vzip2_p16(a, b);
 587 }
 588
 589 // CHECK-LABEL: @test_vzip2q_p16(
 590 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 591 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 592 poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
 593   return vzip2q_p16(a, b);
 594 }
 595
 596 // CHECK-LABEL: @test_vtrn1_s8(
 597 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 598 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 599 int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
 600   return vtrn1_s8(a, b);
 601 }
 602
 603 // CHECK-LABEL: @test_vtrn1q_s8(
 604 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 605 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 606 int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
 607   return vtrn1q_s8(a, b);
 608 }
 609
 610 // CHECK-LABEL: @test_vtrn1_s16(
 611 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 612 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 613 int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
 614   return vtrn1_s16(a, b);
 615 }
 616
 617 // CHECK-LABEL: @test_vtrn1q_s16(
 618 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 619 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 620 int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
 621   return vtrn1q_s16(a, b);
 622 }
 623
 624 // CHECK-LABEL: @test_vtrn1_s32(
 625 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 626 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 627 int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
 628   return vtrn1_s32(a, b);
 629 }
 630
 631 // CHECK-LABEL: @test_vtrn1q_s32(
 632 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 633 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 634 int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
 635   return vtrn1q_s32(a, b);
 636 }
 637
 638 // CHECK-LABEL: @test_vtrn1q_s64(
 639 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 640 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 641 int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
 642   return vtrn1q_s64(a, b);
 643 }
 644
 645 // CHECK-LABEL: @test_vtrn1_u8(
 646 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 647 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 648 uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
 649   return vtrn1_u8(a, b);
 650 }
 651
 652 // CHECK-LABEL: @test_vtrn1q_u8(
 653 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 654 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 655 uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
 656   return vtrn1q_u8(a, b);
 657 }
 658
 659 // CHECK-LABEL: @test_vtrn1_u16(
 660 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 661 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 662 uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
 663   return vtrn1_u16(a, b);
 664 }
 665
 666 // CHECK-LABEL: @test_vtrn1q_u16(
 667 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 668 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 669 uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
 670   return vtrn1q_u16(a, b);
 671 }
 672
 673 // CHECK-LABEL: @test_vtrn1_u32(
 674 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 675 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 676 uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
 677   return vtrn1_u32(a, b);
 678 }
 679
 680 // CHECK-LABEL: @test_vtrn1q_u32(
 681 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 682 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 683 uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
 684   return vtrn1q_u32(a, b);
 685 }
 686
 687 // CHECK-LABEL: @test_vtrn1q_u64(
 688 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
 689 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 690 uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
 691   return vtrn1q_u64(a, b);
 692 }
 693
 694 // CHECK-LABEL: @test_vtrn1_f32(
 695 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
 696 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 697 float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
 698   return vtrn1_f32(a, b);
 699 }
 700
 701 // CHECK-LABEL: @test_vtrn1q_f32(
 702 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 703 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 704 float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
 705   return vtrn1q_f32(a, b);
 706 }
 707
 708 // CHECK-LABEL: @test_vtrn1q_f64(
 709 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
 710 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 711 float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
 712   return vtrn1q_f64(a, b);
 713 }
 714
 715 // CHECK-LABEL: @test_vtrn1_p8(
 716 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 717 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 718 poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
 719   return vtrn1_p8(a, b);
 720 }
 721
 722 // CHECK-LABEL: @test_vtrn1q_p8(
 723 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 724 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 725 poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
 726   return vtrn1q_p8(a, b);
 727 }
 728
 729 // CHECK-LABEL: @test_vtrn1_p16(
 730 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 731 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 732 poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
 733   return vtrn1_p16(a, b);
 734 }
 735
 736 // CHECK-LABEL: @test_vtrn1q_p16(
 737 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 738 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 739 poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
 740   return vtrn1q_p16(a, b);
 741 }
 742
 743 // CHECK-LABEL: @test_vtrn2_s8(
 744 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 745 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 746 int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
 747   return vtrn2_s8(a, b);
 748 }
 749
 750 // CHECK-LABEL: @test_vtrn2q_s8(
 751 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 752 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 753 int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
 754   return vtrn2q_s8(a, b);
 755 }
 756
 757 // CHECK-LABEL: @test_vtrn2_s16(
 758 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 759 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 760 int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
 761   return vtrn2_s16(a, b);
 762 }
 763
 764 // CHECK-LABEL: @test_vtrn2q_s16(
 765 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 766 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 767 int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
 768   return vtrn2q_s16(a, b);
 769 }
 770
 771 // CHECK-LABEL: @test_vtrn2_s32(
 772 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 773 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 774 int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
 775   return vtrn2_s32(a, b);
 776 }
 777
 778 // CHECK-LABEL: @test_vtrn2q_s32(
 779 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 780 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 781 int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
 782   return vtrn2q_s32(a, b);
 783 }
 784
 785 // CHECK-LABEL: @test_vtrn2q_s64(
 786 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 787 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 788 int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
 789   return vtrn2q_s64(a, b);
 790 }
 791
 792 // CHECK-LABEL: @test_vtrn2_u8(
 793 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 794 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 795 uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
 796   return vtrn2_u8(a, b);
 797 }
 798
 799 // CHECK-LABEL: @test_vtrn2q_u8(
 800 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 801 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 802 uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
 803   return vtrn2q_u8(a, b);
 804 }
 805
 806 // CHECK-LABEL: @test_vtrn2_u16(
 807 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 808 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 809 uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
 810   return vtrn2_u16(a, b);
 811 }
 812
 813 // CHECK-LABEL: @test_vtrn2q_u16(
 814 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 815 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 816 uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
 817   return vtrn2q_u16(a, b);
 818 }
 819
 820 // CHECK-LABEL: @test_vtrn2_u32(
 821 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 822 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
 823 uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
 824   return vtrn2_u32(a, b);
 825 }
 826
 827 // CHECK-LABEL: @test_vtrn2q_u32(
 828 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 829 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
 830 uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
 831   return vtrn2q_u32(a, b);
 832 }
 833
 834 // CHECK-LABEL: @test_vtrn2q_u64(
 835 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
 836 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
 837 uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
 838   return vtrn2q_u64(a, b);
 839 }
 840
 841 // CHECK-LABEL: @test_vtrn2_f32(
 842 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
 843 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
 844 float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
 845   return vtrn2_f32(a, b);
 846 }
 847
 848 // CHECK-LABEL: @test_vtrn2q_f32(
 849 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 850 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
 851 float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
 852   return vtrn2q_f32(a, b);
 853 }
 854
 855 // CHECK-LABEL: @test_vtrn2q_f64(
 856 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
 857 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
 858 float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
 859   return vtrn2q_f64(a, b);
 860 }
 861
 862 // CHECK-LABEL: @test_vtrn2_p8(
 863 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 864 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
 865 poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
 866   return vtrn2_p8(a, b);
 867 }
 868
 869 // CHECK-LABEL: @test_vtrn2q_p8(
 870 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 871 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
 872 poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
 873   return vtrn2q_p8(a, b);
 874 }
 875
 876 // CHECK-LABEL: @test_vtrn2_p16(
 877 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 878 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
 879 poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
 880   return vtrn2_p16(a, b);
 881 }
 882
 883 // CHECK-LABEL: @test_vtrn2q_p16(
 884 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 885 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
 886 poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
 887   return vtrn2q_p16(a, b);
 888 }
 889
 890 // CHECK-LABEL: @test_vuzp_s8(
 891 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
 892 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
 893 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 894 // CHECK:   store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
 895 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
 896 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 897 // CHECK:   store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
 898 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
 899 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
 900 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
 901 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
 902 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
 903 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
 904 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
 905   return vuzp_s8(a, b);
 906 }
 907
 908 // CHECK-LABEL: @test_vuzp_s16(
 909 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
 910 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
 911 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 912 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 913 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 914 // CHECK:   store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
 915 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
 916 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 917 // CHECK:   store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
 918 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
 919 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
 920 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
 921 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
 922 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
 923 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
 924 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
 925   return vuzp_s16(a, b);
 926 }
 927
 928 // CHECK-LABEL: @test_vuzp_s32(
 929 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
 930 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
 931 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 932 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 933 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 934 // CHECK:   store <2 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
 935 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
 936 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 937 // CHECK:   store <2 x i32> [[VUZP1_I]], ptr [[TMP4]]
 938 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
 939 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
 940 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
 941 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
 942 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
 943 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
 944 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
 945   return vuzp_s32(a, b);
 946 }
 947
 948 // CHECK-LABEL: @test_vuzp_u8(
 949 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
 950 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
 951 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 952 // CHECK:   store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
 953 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
 954 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
 955 // CHECK:   store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
 956 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
 957 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
 958 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
 959 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
 960 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
 961 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
 962 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
 963   return vuzp_u8(a, b);
 964 }
 965
 966 // CHECK-LABEL: @test_vuzp_u16(
 967 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
 968 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
 969 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 970 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
 971 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 972 // CHECK:   store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
 973 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
 974 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 975 // CHECK:   store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
 976 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
 977 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
 978 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
 979 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
 980 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
 981 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
 982 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
 983   return vuzp_u16(a, b);
 984 }
 985
 986 // CHECK-LABEL: @test_vuzp_u32(
 987 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
 988 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
 989 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 990 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
 991 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
 992 // CHECK:   store <2 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
 993 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
 994 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
 995 // CHECK:   store <2 x i32> [[VUZP1_I]], ptr [[TMP4]]
 996 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
 997 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
 998 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
 999 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1000 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
1001 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1002 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
1003   return vuzp_u32(a, b);
1004 }
1005
1006 // CHECK-LABEL: @test_vuzp_f32(
1007 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1008 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1009 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1010 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1011 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1012 // CHECK:   store <2 x float> [[VUZP_I]], ptr [[RETVAL_I]]
1013 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
1014 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1015 // CHECK:   store <2 x float> [[VUZP1_I]], ptr [[TMP4]]
1016 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
1017 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1018 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1019 // CHECK:   store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
1020 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
1021 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1022 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
1023   return vuzp_f32(a, b);
1024 }
1025
1026 // CHECK-LABEL: @test_vuzp_p8(
1027 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1028 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1029 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1030 // CHECK:   store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1031 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1032 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1033 // CHECK:   store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
1034 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
1035 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1036 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1037 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1038 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
1039 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1040 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
1041   return vuzp_p8(a, b);
1042 }
1043
1044 // CHECK-LABEL: @test_vuzp_p16(
1045 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1046 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1047 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1048 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1049 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1050 // CHECK:   store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1051 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1052 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1053 // CHECK:   store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
1054 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
1055 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1056 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1057 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1058 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
1059 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1060 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
1061   return vuzp_p16(a, b);
1062 }
1063
1064 // CHECK-LABEL: @test_vuzpq_s8(
1065 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1066 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1067 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1068 // CHECK:   store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1069 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1070 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1071 // CHECK:   store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
1072 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
1073 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1074 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1075 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1076 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
1077 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1078 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
1079   return vuzpq_s8(a, b);
1080 }
1081
1082 // CHECK-LABEL: @test_vuzpq_s16(
1083 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1084 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1085 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1086 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1087 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1088 // CHECK:   store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1089 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1090 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1091 // CHECK:   store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
1092 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
1093 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1094 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1095 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1096 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
1097 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1098 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
1099   return vuzpq_s16(a, b);
1100 }
1101
1102 // CHECK-LABEL: @test_vuzpq_s32(
1103 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1104 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1105 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1106 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1107 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1108 // CHECK:   store <4 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
1109 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1110 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1111 // CHECK:   store <4 x i32> [[VUZP1_I]], ptr [[TMP4]]
1112 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
1113 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1114 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1115 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1116 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
1117 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1118 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
1119   return vuzpq_s32(a, b);
1120 }
1121
1122 // CHECK-LABEL: @test_vuzpq_u8(
1123 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1124 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1125 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1126 // CHECK:   store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1127 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1128 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1129 // CHECK:   store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
1130 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
1131 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1132 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1133 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1134 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
1135 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1136 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
1137   return vuzpq_u8(a, b);
1138 }
1139
1140 // CHECK-LABEL: @test_vuzpq_u16(
1141 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1142 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1143 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1144 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1145 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1146 // CHECK:   store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1147 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1148 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1149 // CHECK:   store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
1150 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
1151 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1152 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1153 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1154 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
1155 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1156 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
1157   return vuzpq_u16(a, b);
1158 }
1159
1160 // CHECK-LABEL: @test_vuzpq_u32(
1161 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1162 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1163 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1164 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1165 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1166 // CHECK:   store <4 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
1167 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1168 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1169 // CHECK:   store <4 x i32> [[VUZP1_I]], ptr [[TMP4]]
1170 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
1171 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1172 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1173 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1174 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
1175 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1176 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
1177   return vuzpq_u32(a, b);
1178 }
1179
1180 // CHECK-LABEL: @test_vuzpq_f32(
1181 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1182 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1183 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1184 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1185 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1186 // CHECK:   store <4 x float> [[VUZP_I]], ptr [[RETVAL_I]]
1187 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
1188 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1189 // CHECK:   store <4 x float> [[VUZP1_I]], ptr [[TMP4]]
1190 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
1191 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1192 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1193 // CHECK:   store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
1194 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
1195 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1196 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
1197   return vuzpq_f32(a, b);
1198 }
1199
1200 // CHECK-LABEL: @test_vuzpq_p8(
1201 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1202 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1203 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1204 // CHECK:   store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1205 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1206 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1207 // CHECK:   store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
1208 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
1209 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1210 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1211 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1212 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
1213 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1214 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
1215   return vuzpq_p8(a, b);
1216 }
1217
1218 // CHECK-LABEL: @test_vuzpq_p16(
1219 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1220 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1221 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1222 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1223 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1224 // CHECK:   store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1225 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1226 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1227 // CHECK:   store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
1228 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
1229 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1230 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1231 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1232 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
1233 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1234 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
1235   return vuzpq_p16(a, b);
1236 }
1237
1238 // CHECK-LABEL: @test_vzip_s8(
1239 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1240 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1241 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1242 // CHECK:   store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1243 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1244 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1245 // CHECK:   store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
1246 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
1247 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1248 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1249 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1250 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
1251 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
1252 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
1253   return vzip_s8(a, b);
1254 }
1255
1256 // CHECK-LABEL: @test_vzip_s16(
1257 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1258 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1259 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1260 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1261 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1262 // CHECK:   store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1263 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1264 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1265 // CHECK:   store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
1266 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
1267 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1268 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1269 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1270 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
1271 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
1272 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
1273   return vzip_s16(a, b);
1274 }
1275
1276 // CHECK-LABEL: @test_vzip_s32(
1277 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1278 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1279 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1280 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1281 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1282 // CHECK:   store <2 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1283 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1284 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1285 // CHECK:   store <2 x i32> [[VZIP1_I]], ptr [[TMP4]]
1286 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
1287 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1288 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1289 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1290 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
1291 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
1292 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
1293   return vzip_s32(a, b);
1294 }
1295
1296 // CHECK-LABEL: @test_vzip_u8(
1297 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1298 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1299 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1300 // CHECK:   store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1301 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1302 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1303 // CHECK:   store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
1304 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
1305 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1306 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1307 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1308 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
1309 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
1310 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
1311   return vzip_u8(a, b);
1312 }
1313
1314 // CHECK-LABEL: @test_vzip_u16(
1315 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1316 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1317 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1318 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1319 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1320 // CHECK:   store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1321 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1322 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1323 // CHECK:   store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
1324 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
1325 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1326 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1327 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1328 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
1329 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1330 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
1331   return vzip_u16(a, b);
1332 }
1333
1334 // CHECK-LABEL: @test_vzip_u32(
1335 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1336 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1337 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1338 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1339 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1340 // CHECK:   store <2 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1341 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1342 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1343 // CHECK:   store <2 x i32> [[VZIP1_I]], ptr [[TMP4]]
1344 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
1345 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1346 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1347 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1348 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
1349 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1350 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
1351   return vzip_u32(a, b);
1352 }
1353
1354 // CHECK-LABEL: @test_vzip_f32(
1355 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1356 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1357 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1358 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1359 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1360 // CHECK:   store <2 x float> [[VZIP_I]], ptr [[RETVAL_I]]
1361 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
1362 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1363 // CHECK:   store <2 x float> [[VZIP1_I]], ptr [[TMP4]]
1364 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
1365 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1366 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1367 // CHECK:   store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
1368 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
1369 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1370 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
1371   return vzip_f32(a, b);
1372 }
1373
1374 // CHECK-LABEL: @test_vzip_p8(
1375 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1376 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1377 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1378 // CHECK:   store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1379 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1380 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1381 // CHECK:   store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
1382 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
1383 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1384 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1385 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1386 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
1387 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1388 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
1389   return vzip_p8(a, b);
1390 }
1391
1392 // CHECK-LABEL: @test_vzip_p16(
1393 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1394 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1395 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1396 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1397 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1398 // CHECK:   store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1399 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1400 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1401 // CHECK:   store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
1402 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
1403 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1404 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1405 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1406 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
1407 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1408 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
1409   return vzip_p16(a, b);
1410 }
1411
1412 // CHECK-LABEL: @test_vzipq_s8(
1413 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1414 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1415 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1416 // CHECK:   store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1417 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1418 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1419 // CHECK:   store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
1420 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
1421 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1422 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1423 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1424 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
1425 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1426 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
1427   return vzipq_s8(a, b);
1428 }
1429
1430 // CHECK-LABEL: @test_vzipq_s16(
1431 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1432 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1433 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1434 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1435 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1436 // CHECK:   store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1437 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1438 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1439 // CHECK:   store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
1440 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
1441 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1442 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1443 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1444 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
1445 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1446 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
1447   return vzipq_s16(a, b);
1448 }
1449
1450 // CHECK-LABEL: @test_vzipq_s32(
1451 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1452 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1453 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1454 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1455 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1456 // CHECK:   store <4 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1457 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1458 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1459 // CHECK:   store <4 x i32> [[VZIP1_I]], ptr [[TMP4]]
1460 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
1461 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1462 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1463 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1464 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
1465 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1466 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
1467   return vzipq_s32(a, b);
1468 }
1469
1470 // CHECK-LABEL: @test_vzipq_u8(
1471 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1472 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1473 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1474 // CHECK:   store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1475 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1476 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1477 // CHECK:   store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
1478 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
1479 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1480 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1481 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1482 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
1483 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1484 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
1485   return vzipq_u8(a, b);
1486 }
1487
1488 // CHECK-LABEL: @test_vzipq_u16(
1489 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1490 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1491 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1492 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1493 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1494 // CHECK:   store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1495 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1496 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1497 // CHECK:   store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
1498 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
1499 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1500 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1501 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1502 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
1503 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1504 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
1505   return vzipq_u16(a, b);
1506 }
1507
1508 // CHECK-LABEL: @test_vzipq_u32(
1509 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1510 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1511 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1512 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1513 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1514 // CHECK:   store <4 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1515 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1516 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1517 // CHECK:   store <4 x i32> [[VZIP1_I]], ptr [[TMP4]]
1518 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
1519 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1520 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1521 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1522 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
1523 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1524 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
1525   return vzipq_u32(a, b);
1526 }
1527
1528 // CHECK-LABEL: @test_vzipq_f32(
1529 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1530 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1531 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1532 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1533 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1534 // CHECK:   store <4 x float> [[VZIP_I]], ptr [[RETVAL_I]]
1535 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
1536 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1537 // CHECK:   store <4 x float> [[VZIP1_I]], ptr [[TMP4]]
1538 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
1539 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1540 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1541 // CHECK:   store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
1542 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
1543 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1544 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
1545   return vzipq_f32(a, b);
1546 }
1547
1548 // CHECK-LABEL: @test_vzipq_p8(
1549 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1550 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1551 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1552 // CHECK:   store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1553 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1554 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1555 // CHECK:   store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
1556 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
1557 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1558 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1559 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1560 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
1561 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1562 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
1563   return vzipq_p8(a, b);
1564 }
1565
1566 // CHECK-LABEL: @test_vzipq_p16(
1567 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1568 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1569 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1570 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1571 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1572 // CHECK:   store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1573 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1574 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1575 // CHECK:   store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
1576 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
1577 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1578 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1579 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1580 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
1581 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1582 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
1583   return vzipq_p16(a, b);
1584 }
1585
1586 // CHECK-LABEL: @test_vtrn_s8(
1587 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1588 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1589 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1590 // CHECK:   store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1591 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1592 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1593 // CHECK:   store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
1594 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
1595 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1596 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1597 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1598 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
1599 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
1600 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
1601   return vtrn_s8(a, b);
1602 }
1603
1604 // CHECK-LABEL: @test_vtrn_s16(
1605 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1606 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1608 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1609 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1610 // CHECK:   store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1611 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1612 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1613 // CHECK:   store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
1614 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
1615 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1616 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1617 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1618 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
1619 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
1620 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
1621   return vtrn_s16(a, b);
1622 }
1623
1624 // CHECK-LABEL: @test_vtrn_s32(
1625 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1626 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1628 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1629 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1630 // CHECK:   store <2 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1631 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1632 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1633 // CHECK:   store <2 x i32> [[VTRN1_I]], ptr [[TMP4]]
1634 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
1635 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1636 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1637 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1638 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
1639 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
1640 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
1641   return vtrn_s32(a, b);
1642 }
1643
1644 // CHECK-LABEL: @test_vtrn_u8(
1645 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1646 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1647 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1648 // CHECK:   store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1649 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1650 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1651 // CHECK:   store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
1652 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
1653 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1654 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1655 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1656 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
1657 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
1658 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
1659   return vtrn_u8(a, b);
1660 }
1661
1662 // CHECK-LABEL: @test_vtrn_u16(
1663 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1664 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1665 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1666 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1667 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1668 // CHECK:   store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1669 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1670 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1671 // CHECK:   store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
1672 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
1673 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1674 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1675 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1676 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
1677 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1678 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
1679   return vtrn_u16(a, b);
1680 }
1681
1682 // CHECK-LABEL: @test_vtrn_u32(
1683 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1684 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1685 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1686 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1687 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1688 // CHECK:   store <2 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1689 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1690 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1691 // CHECK:   store <2 x i32> [[VTRN1_I]], ptr [[TMP4]]
1692 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
1693 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1694 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1695 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1696 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
1697 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1698 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
1699   return vtrn_u32(a, b);
1700 }
1701
1702 // CHECK-LABEL: @test_vtrn_f32(
1703 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1704 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1705 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1706 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1707 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1708 // CHECK:   store <2 x float> [[VTRN_I]], ptr [[RETVAL_I]]
1709 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
1710 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1711 // CHECK:   store <2 x float> [[VTRN1_I]], ptr [[TMP4]]
1712 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
1713 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1714 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1715 // CHECK:   store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
1716 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
1717 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1718 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
1719   return vtrn_f32(a, b);
1720 }
1721
1722 // CHECK-LABEL: @test_vtrn_p8(
1723 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1724 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1725 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1726 // CHECK:   store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1727 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1728 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1729 // CHECK:   store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
1730 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
1731 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1732 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1733 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1734 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
1735 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1736 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
1737   return vtrn_p8(a, b);
1738 }
1739
1740 // CHECK-LABEL: @test_vtrn_p16(
1741 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1742 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1743 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1744 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1745 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1746 // CHECK:   store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1747 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1748 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1749 // CHECK:   store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
1750 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
1751 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1752 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1753 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1754 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
1755 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1756 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
1757   return vtrn_p16(a, b);
1758 }
1759
1760 // CHECK-LABEL: @test_vtrnq_s8(
1761 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1762 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1763 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1764 // CHECK:   store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1765 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1766 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1767 // CHECK:   store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
1768 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
1769 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1770 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1771 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1772 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
1773 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1774 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
1775   return vtrnq_s8(a, b);
1776 }
1777
1778 // CHECK-LABEL: @test_vtrnq_s16(
1779 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1780 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1781 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1782 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1783 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1784 // CHECK:   store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1785 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1786 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1787 // CHECK:   store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
1788 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
1789 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1790 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1791 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1792 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
1793 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1794 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
1795   return vtrnq_s16(a, b);
1796 }
1797
1798 // CHECK-LABEL: @test_vtrnq_s32(
1799 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1800 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1801 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1802 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1803 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1804 // CHECK:   store <4 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1805 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1806 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1807 // CHECK:   store <4 x i32> [[VTRN1_I]], ptr [[TMP4]]
1808 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
1809 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1810 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1811 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1812 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
1813 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1814 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
1815   return vtrnq_s32(a, b);
1816 }
1817
1818 // CHECK-LABEL: @test_vtrnq_u8(
1819 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1820 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1821 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1822 // CHECK:   store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1823 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1824 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1825 // CHECK:   store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
1826 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
1827 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1828 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1829 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1830 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
1831 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1832 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
1833   return vtrnq_u8(a, b);
1834 }
1835
1836 // CHECK-LABEL: @test_vtrnq_u16(
1837 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1838 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1839 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1840 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1841 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1842 // CHECK:   store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1843 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1844 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1845 // CHECK:   store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
1846 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
1847 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1848 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1849 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1850 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
1851 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1852 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
1853   return vtrnq_u16(a, b);
1854 }
1855
1856 // CHECK-LABEL: @test_vtrnq_u32(
1857 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1858 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1859 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1860 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1861 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1862 // CHECK:   store <4 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1863 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1864 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1865 // CHECK:   store <4 x i32> [[VTRN1_I]], ptr [[TMP4]]
1866 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
1867 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1868 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1869 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1870 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
1871 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1872 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
1873   return vtrnq_u32(a, b);
1874 }
1875
1876 // CHECK-LABEL: @test_vtrnq_f32(
1877 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1878 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1879 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1880 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1881 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1882 // CHECK:   store <4 x float> [[VTRN_I]], ptr [[RETVAL_I]]
1883 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
1884 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1885 // CHECK:   store <4 x float> [[VTRN1_I]], ptr [[TMP4]]
1886 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
1887 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1888 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1889 // CHECK:   store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
1890 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
1891 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1892 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
1893   return vtrnq_f32(a, b);
1894 }
1895
1896 // CHECK-LABEL: @test_vtrnq_p8(
1897 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1898 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1899 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1900 // CHECK:   store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1901 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1902 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1903 // CHECK:   store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
1904 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
1905 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1906 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1907 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1908 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
1909 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1910 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
1911   return vtrnq_p8(a, b);
1912 }
1913
1914 // CHECK-LABEL: @test_vtrnq_p16(
1915 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1916 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1917 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1918 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1919 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1920 // CHECK:   store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1921 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1922 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1923 // CHECK:   store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
1924 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
1925 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1926 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1927 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1928 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
1929 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1930 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
1931   return vtrnq_p16(a, b);
1932 }