llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
   3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin | FileCheck %s
   4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
   5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin | FileCheck %s
   6
   7 ; Integers
   8
   9 define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
  10 ; CHECK-LABEL: vector_deinterleave_load_v16i1_v32i1:
  11 ; CHECK:       # %bb.0:
  12 ; CHECK-NEXT:    li a1, 32
  13 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
  14 ; CHECK-NEXT:    vmv.v.i v10, 0
  15 ; CHECK-NEXT:    vid.v v9
  16 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
  17 ; CHECK-NEXT:    vlm.v v8, (a0)
  18 ; CHECK-NEXT:    li a0, -256
  19 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
  20 ; CHECK-NEXT:    vadd.vv v11, v9, v9
  21 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
  22 ; CHECK-NEXT:    vmv.s.x v9, a0
  23 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
  24 ; CHECK-NEXT:    vadd.vi v12, v11, -16
  25 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
  26 ; CHECK-NEXT:    vslidedown.vi v0, v8, 2
  27 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
  28 ; CHECK-NEXT:    vadd.vi v11, v11, -15
  29 ; CHECK-NEXT:    vmerge.vim v13, v10, 1, v0
  30 ; CHECK-NEXT:    vmv1r.v v0, v8
  31 ; CHECK-NEXT:    vmerge.vim v8, v10, 1, v0
  32 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
  33 ; CHECK-NEXT:    vnsrl.wi v10, v8, 0
  34 ; CHECK-NEXT:    vnsrl.wi v8, v8, 8
  35 ; CHECK-NEXT:    vmv1r.v v0, v9
  36 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
  37 ; CHECK-NEXT:    vrgather.vv v10, v13, v12, v0.t
  38 ; CHECK-NEXT:    vrgather.vv v8, v13, v11, v0.t
  39 ; CHECK-NEXT:    vmsne.vi v0, v10, 0
  40 ; CHECK-NEXT:    vmsne.vi v8, v8, 0
  41 ; CHECK-NEXT:    ret
  42   %vec = load <32 x i1>, ptr %p
  43   %deinterleaved.results = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec)
  44   %t0 = extractvalue { <16 x i1>, <16 x i1> } %deinterleaved.results, 0
  45   %t1 = extractvalue { <16 x i1>, <16 x i1> } %deinterleaved.results, 1
  46   %res0 = insertvalue { <16 x i1>, <16 x i1> } undef, <16 x i1> %t0, 0
  47   %res1 = insertvalue { <16 x i1>, <16 x i1> } %res0, <16 x i1> %t1, 1
  48   ret {<16 x i1>, <16 x i1>} %res1
  49 }
  50
  51 define {<16 x i8>, <16 x i8>} @vector_deinterleave_load_v16i8_v32i8(ptr %p) {
  52 ; CHECK-LABEL: vector_deinterleave_load_v16i8_v32i8:
  53 ; CHECK:       # %bb.0:
  54 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
  55 ; CHECK-NEXT:    vlseg2e8.v v8, (a0)
  56 ; CHECK-NEXT:    ret
  57   %vec = load <32 x i8>, ptr %p
  58   %deinterleaved.results = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec)
  59   %t0 = extractvalue { <16 x i8>, <16 x i8> } %deinterleaved.results, 0
  60   %t1 = extractvalue { <16 x i8>, <16 x i8> } %deinterleaved.results, 1
  61   %res0 = insertvalue { <16 x i8>, <16 x i8> } undef, <16 x i8> %t0, 0
  62   %res1 = insertvalue { <16 x i8>, <16 x i8> } %res0, <16 x i8> %t1, 1
  63   ret {<16 x i8>, <16 x i8>} %res1
  64 }
  65
  66 ; Shouldn't be lowered to vlseg because it's unaligned
  67 define {<8 x i16>, <8 x i16>} @vector_deinterleave_load_v8i16_v16i16_align1(ptr %p) {
  68 ; CHECK-LABEL: vector_deinterleave_load_v8i16_v16i16_align1:
  69 ; CHECK:       # %bb.0:
  70 ; CHECK-NEXT:    li a1, 32
  71 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
  72 ; CHECK-NEXT:    vle8.v v10, (a0)
  73 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
  74 ; CHECK-NEXT:    vnsrl.wi v8, v10, 0
  75 ; CHECK-NEXT:    vnsrl.wi v9, v10, 16
  76 ; CHECK-NEXT:    ret
  77   %vec = load <16 x i16>, ptr %p, align 1
  78   %deinterleaved.results = call {<8 x i16>, <8 x i16>} @llvm.vector.deinterleave2.v16i16(<16 x i16> %vec)
  79   %t0 = extractvalue { <8 x i16>, <8 x i16> } %deinterleaved.results, 0
  80   %t1 = extractvalue { <8 x i16>, <8 x i16> } %deinterleaved.results, 1
  81   %res0 = insertvalue { <8 x i16>, <8 x i16> } undef, <8 x i16> %t0, 0
  82   %res1 = insertvalue { <8 x i16>, <8 x i16> } %res0, <8 x i16> %t1, 1
  83   ret {<8 x i16>, <8 x i16>} %res1
  84 }
  85
  86 define {<8 x i16>, <8 x i16>} @vector_deinterleave_load_v8i16_v16i16(ptr %p) {
  87 ; CHECK-LABEL: vector_deinterleave_load_v8i16_v16i16:
  88 ; CHECK:       # %bb.0:
  89 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
  90 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
  91 ; CHECK-NEXT:    ret
  92   %vec = load <16 x i16>, ptr %p
  93   %deinterleaved.results = call {<8 x i16>, <8 x i16>} @llvm.vector.deinterleave2.v16i16(<16 x i16> %vec)
  94   %t0 = extractvalue { <8 x i16>, <8 x i16> } %deinterleaved.results, 0
  95   %t1 = extractvalue { <8 x i16>, <8 x i16> } %deinterleaved.results, 1
  96   %res0 = insertvalue { <8 x i16>, <8 x i16> } undef, <8 x i16> %t0, 0
  97   %res1 = insertvalue { <8 x i16>, <8 x i16> } %res0, <8 x i16> %t1, 1
  98   ret {<8 x i16>, <8 x i16>} %res1
  99 }
 100
 101 define {<4 x i32>, <4 x i32>} @vector_deinterleave_load_v4i32_vv8i32(ptr %p) {
 102 ; CHECK-LABEL: vector_deinterleave_load_v4i32_vv8i32:
 103 ; CHECK:       # %bb.0:
 104 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 105 ; CHECK-NEXT:    vlseg2e32.v v8, (a0)
 106 ; CHECK-NEXT:    ret
 107   %vec = load <8 x i32>, ptr %p
 108   %deinterleaved.results = call {<4 x i32>, <4 x i32>} @llvm.vector.deinterleave2.v8i32(<8 x i32> %vec)
 109   %t0 = extractvalue { <4 x i32>, <4 x i32> } %deinterleaved.results, 0
 110   %t1 = extractvalue { <4 x i32>, <4 x i32> } %deinterleaved.results, 1
 111   %res0 = insertvalue { <4 x i32>, <4 x i32> } undef, <4 x i32> %t0, 0
 112   %res1 = insertvalue { <4 x i32>, <4 x i32> } %res0, <4 x i32> %t1, 1
 113   ret {<4 x i32>, <4 x i32>} %res1
 114 }
 115
 116 define {<2 x i64>, <2 x i64>} @vector_deinterleave_load_v2i64_v4i64(ptr %p) {
 117 ; CHECK-LABEL: vector_deinterleave_load_v2i64_v4i64:
 118 ; CHECK:       # %bb.0:
 119 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 120 ; CHECK-NEXT:    vlseg2e64.v v8, (a0)
 121 ; CHECK-NEXT:    ret
 122   %vec = load <4 x i64>, ptr %p
 123   %deinterleaved.results = call {<2 x i64>, <2 x i64>} @llvm.vector.deinterleave2.v4i64(<4 x i64> %vec)
 124   %t0 = extractvalue { <2 x i64>, <2 x i64> } %deinterleaved.results, 0
 125   %t1 = extractvalue { <2 x i64>, <2 x i64> } %deinterleaved.results, 1
 126   %res0 = insertvalue { <2 x i64>, <2 x i64> } undef, <2 x i64> %t0, 0
 127   %res1 = insertvalue { <2 x i64>, <2 x i64> } %res0, <2 x i64> %t1, 1
 128   ret {<2 x i64>, <2 x i64>} %res1
 129 }
 130
 131 ; Floats
 132
 133 define {<2 x bfloat>, <2 x bfloat>} @vector_deinterleave_load_v2bf16_v4bf16(ptr %p) {
 134 ; CHECK-LABEL: vector_deinterleave_load_v2bf16_v4bf16:
 135 ; CHECK:       # %bb.0:
 136 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 137 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
 138 ; CHECK-NEXT:    ret
 139   %vec = load <4 x bfloat>, ptr %p
 140   %deinterleaved.results = call {<2 x bfloat>, <2 x bfloat>} @llvm.vector.deinterleave2.v4bf16(<4 x bfloat> %vec)
 141   %t0 = extractvalue { <2 x bfloat>, <2 x bfloat> } %deinterleaved.results, 0
 142   %t1 = extractvalue { <2 x bfloat>, <2 x bfloat> } %deinterleaved.results, 1
 143   %res0 = insertvalue { <2 x bfloat>, <2 x bfloat> } undef, <2 x bfloat> %t0, 0
 144   %res1 = insertvalue { <2 x bfloat>, <2 x bfloat> } %res0, <2 x bfloat> %t1, 1
 145   ret {<2 x bfloat>, <2 x bfloat>} %res1
 146 }
 147
 148 define {<4 x bfloat>, <4 x bfloat>} @vector_deinterleave_load_v4bf16_v8bf16(ptr %p) {
 149 ; CHECK-LABEL: vector_deinterleave_load_v4bf16_v8bf16:
 150 ; CHECK:       # %bb.0:
 151 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 152 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
 153 ; CHECK-NEXT:    ret
 154   %vec = load <8 x bfloat>, ptr %p
 155   %deinterleaved.results = call {<4 x bfloat>, <4 x bfloat>} @llvm.vector.deinterleave2.v8bf16(<8 x bfloat> %vec)
 156   %t0 = extractvalue { <4 x bfloat>, <4 x bfloat> } %deinterleaved.results, 0
 157   %t1 = extractvalue { <4 x bfloat>, <4 x bfloat> } %deinterleaved.results, 1
 158   %res0 = insertvalue { <4 x bfloat>, <4 x bfloat> } undef, <4 x bfloat> %t0, 0
 159   %res1 = insertvalue { <4 x bfloat>, <4 x bfloat> } %res0, <4 x bfloat> %t1, 1
 160   ret {<4 x bfloat>, <4 x bfloat>} %res1
 161 }
 162
 163 define {<2 x half>, <2 x half>} @vector_deinterleave_load_v2f16_v4f16(ptr %p) {
 164 ; CHECK-LABEL: vector_deinterleave_load_v2f16_v4f16:
 165 ; CHECK:       # %bb.0:
 166 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
 167 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
 168 ; CHECK-NEXT:    ret
 169   %vec = load <4 x half>, ptr %p
 170   %deinterleaved.results = call {<2 x half>, <2 x half>} @llvm.vector.deinterleave2.v4f16(<4 x half> %vec)
 171   %t0 = extractvalue { <2 x half>, <2 x half> } %deinterleaved.results, 0
 172   %t1 = extractvalue { <2 x half>, <2 x half> } %deinterleaved.results, 1
 173   %res0 = insertvalue { <2 x half>, <2 x half> } undef, <2 x half> %t0, 0
 174   %res1 = insertvalue { <2 x half>, <2 x half> } %res0, <2 x half> %t1, 1
 175   ret {<2 x half>, <2 x half>} %res1
 176 }
 177
 178 define {<4 x half>, <4 x half>} @vector_deinterleave_load_v4f16_v8f16(ptr %p) {
 179 ; CHECK-LABEL: vector_deinterleave_load_v4f16_v8f16:
 180 ; CHECK:       # %bb.0:
 181 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 182 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
 183 ; CHECK-NEXT:    ret
 184   %vec = load <8 x half>, ptr %p
 185   %deinterleaved.results = call {<4 x half>, <4 x half>} @llvm.vector.deinterleave2.v8f16(<8 x half> %vec)
 186   %t0 = extractvalue { <4 x half>, <4 x half> } %deinterleaved.results, 0
 187   %t1 = extractvalue { <4 x half>, <4 x half> } %deinterleaved.results, 1
 188   %res0 = insertvalue { <4 x half>, <4 x half> } undef, <4 x half> %t0, 0
 189   %res1 = insertvalue { <4 x half>, <4 x half> } %res0, <4 x half> %t1, 1
 190   ret {<4 x half>, <4 x half>} %res1
 191 }
 192
 193 define {<2 x float>, <2 x float>} @vector_deinterleave_load_v2f32_v4f32(ptr %p) {
 194 ; CHECK-LABEL: vector_deinterleave_load_v2f32_v4f32:
 195 ; CHECK:       # %bb.0:
 196 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 197 ; CHECK-NEXT:    vlseg2e32.v v8, (a0)
 198 ; CHECK-NEXT:    ret
 199   %vec = load <4 x float>, ptr %p
 200   %deinterleaved.results = call {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float> %vec)
 201   %t0 = extractvalue { <2 x float>, <2 x float> } %deinterleaved.results, 0
 202   %t1 = extractvalue { <2 x float>, <2 x float> } %deinterleaved.results, 1
 203   %res0 = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> %t0, 0
 204   %res1 = insertvalue { <2 x float>, <2 x float> } %res0, <2 x float> %t1, 1
 205   ret {<2 x float>, <2 x float>} %res1
 206 }
 207
 208 define {<8 x bfloat>, <8 x bfloat>} @vector_deinterleave_load_v8bf16_v16bf16(ptr %p) {
 209 ; CHECK-LABEL: vector_deinterleave_load_v8bf16_v16bf16:
 210 ; CHECK:       # %bb.0:
 211 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 212 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
 213 ; CHECK-NEXT:    ret
 214   %vec = load <16 x bfloat>, ptr %p
 215   %deinterleaved.results = call {<8 x bfloat>, <8 x bfloat>} @llvm.vector.deinterleave2.v16bf16(<16 x bfloat> %vec)
 216   %t0 = extractvalue { <8 x bfloat>, <8 x bfloat> } %deinterleaved.results, 0
 217   %t1 = extractvalue { <8 x bfloat>, <8 x bfloat> } %deinterleaved.results, 1
 218   %res0 = insertvalue { <8 x bfloat>, <8 x bfloat> } undef, <8 x bfloat> %t0, 0
 219   %res1 = insertvalue { <8 x bfloat>, <8 x bfloat> } %res0, <8 x bfloat> %t1, 1
 220   ret {<8 x bfloat>, <8 x bfloat>} %res1
 221 }
 222
 223 define {<8 x half>, <8 x half>} @vector_deinterleave_load_v8f16_v16f16(ptr %p) {
 224 ; CHECK-LABEL: vector_deinterleave_load_v8f16_v16f16:
 225 ; CHECK:       # %bb.0:
 226 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 227 ; CHECK-NEXT:    vlseg2e16.v v8, (a0)
 228 ; CHECK-NEXT:    ret
 229   %vec = load <16 x half>, ptr %p
 230   %deinterleaved.results = call {<8 x half>, <8 x half>} @llvm.vector.deinterleave2.v16f16(<16 x half> %vec)
 231   %t0 = extractvalue { <8 x half>, <8 x half> } %deinterleaved.results, 0
 232   %t1 = extractvalue { <8 x half>, <8 x half> } %deinterleaved.results, 1
 233   %res0 = insertvalue { <8 x half>, <8 x half> } undef, <8 x half> %t0, 0
 234   %res1 = insertvalue { <8 x half>, <8 x half> } %res0, <8 x half> %t1, 1
 235   ret {<8 x half>, <8 x half>} %res1
 236 }
 237
 238 define {<4 x float>, <4 x float>} @vector_deinterleave_load_v4f32_v8f32(ptr %p) {
 239 ; CHECK-LABEL: vector_deinterleave_load_v4f32_v8f32:
 240 ; CHECK:       # %bb.0:
 241 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 242 ; CHECK-NEXT:    vlseg2e32.v v8, (a0)
 243 ; CHECK-NEXT:    ret
 244   %vec = load <8 x float>, ptr %p
 245   %deinterleaved.results = call {<4 x float>, <4 x float>} @llvm.vector.deinterleave2.v8f32(<8 x float> %vec)
 246   %t0 = extractvalue { <4 x float>, <4 x float> } %deinterleaved.results, 0
 247   %t1 = extractvalue { <4 x float>, <4 x float> } %deinterleaved.results, 1
 248   %res0 = insertvalue { <4 x float>, <4 x float> } undef, <4 x float> %t0, 0
 249   %res1 = insertvalue { <4 x float>, <4 x float> } %res0, <4 x float> %t1, 1
 250   ret {<4 x float>, <4 x float>} %res1
 251 }
 252
 253 define {<2 x double>, <2 x double>} @vector_deinterleave_load_v2f64_v4f64(ptr %p) {
 254 ; CHECK-LABEL: vector_deinterleave_load_v2f64_v4f64:
 255 ; CHECK:       # %bb.0:
 256 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 257 ; CHECK-NEXT:    vlseg2e64.v v8, (a0)
 258 ; CHECK-NEXT:    ret
 259   %vec = load <4 x double>, ptr %p
 260   %deinterleaved.results = call {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double> %vec)
 261   %t0 = extractvalue { <2 x double>, <2 x double> } %deinterleaved.results, 0
 262   %t1 = extractvalue { <2 x double>, <2 x double> } %deinterleaved.results, 1
 263   %res0 = insertvalue { <2 x double>, <2 x double> } undef, <2 x double> %t0, 0
 264   %res1 = insertvalue { <2 x double>, <2 x double> } %res0, <2 x double> %t1, 1
 265   ret {<2 x double>, <2 x double>} %res1
 266 }
 267
 268 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4(ptr %p) {
 269 ; CHECK-LABEL: vector_deinterleave_load_factor4:
 270 ; CHECK:       # %bb.0:
 271 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
 272 ; CHECK-NEXT:    vlseg4e8.v v8, (a0)
 273 ; CHECK-NEXT:    ret
 274   %vec = load <32 x i8>, ptr %p
 275   %d0 = call {<16 x i8>, <16 x i8>} @llvm.vector.deinterleave2.v32i8(<32 x i8> %vec)
 276   %d0.0 = extractvalue { <16 x i8>, <16 x i8> } %d0, 0
 277   %d0.1 = extractvalue { <16 x i8>, <16 x i8> } %d0, 1
 278   %d1 = call {<8 x i8>, <8 x i8>} @llvm.vector.deinterleave2.v16i8(<16 x i8> %d0.0)
 279   %t0 = extractvalue { <8 x i8>, <8 x i8> } %d1, 0
 280   %t2 = extractvalue { <8 x i8>, <8 x i8> } %d1, 1
 281   %d2 = call {<8 x i8>, <8 x i8>} @llvm.vector.deinterleave2.v16i8(<16 x i8> %d0.1)
 282   %t1 = extractvalue { <8 x i8>, <8 x i8> } %d2, 0
 283   %t3 = extractvalue { <8 x i8>, <8 x i8> } %d2, 1
 284
 285   %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } undef, <8 x i8> %t0, 0
 286   %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 1
 287   %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 2
 288   %res3 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res2, <8 x i8> %t3, 3
 289   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %res3
 290 }
 291
 292 define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave_load_factor8(ptr %ptr) {
 293 ; CHECK-LABEL: vector_deinterleave_load_factor8:
 294 ; CHECK:       # %bb.0:
 295 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
 296 ; CHECK-NEXT:    vlseg8e32.v v8, (a0)
 297 ; CHECK-NEXT:    ret
 298   %vec = load <16 x i32>, ptr %ptr
 299   %d0 = call { <8 x i32>, <8 x i32> } @llvm.vector.deinterleave2.v16i32(<16 x i32> %vec)
 300   %d0.0 = extractvalue { <8 x i32>, <8 x i32> } %d0, 0
 301   %d0.1 = extractvalue { <8 x i32>, <8 x i32> } %d0, 1
 302   %d1 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %d0.0)
 303   %d1.0 = extractvalue { <4 x i32>, <4 x i32> } %d1, 0
 304   %d1.1 = extractvalue { <4 x i32>, <4 x i32> } %d1, 1
 305   %d2 = call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> %d0.1)
 306   %d2.0 = extractvalue { <4 x i32>, <4 x i32> } %d2, 0
 307   %d2.1 = extractvalue { <4 x i32>, <4 x i32> } %d2, 1
 308
 309   %d3 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d1.0)
 310   %t0 = extractvalue { <2 x i32>, <2 x i32> } %d3, 0
 311   %t4 = extractvalue { <2 x i32>, <2 x i32> } %d3, 1
 312   %d4 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d1.1)
 313   %t2 = extractvalue { <2 x i32>, <2 x i32> } %d4, 0
 314   %t6 = extractvalue { <2 x i32>, <2 x i32> } %d4, 1
 315   %d5 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d2.0)
 316   %t1 = extractvalue { <2 x i32>, <2 x i32> } %d5, 0
 317   %t5 = extractvalue { <2 x i32>, <2 x i32> } %d5, 1
 318   %d6 = call { <2 x i32>, <2 x i32> } @llvm.vector.deinterleave2.v4i32(<4 x i32> %d2.1)
 319   %t3 = extractvalue { <2 x i32>, <2 x i32> } %d6, 0
 320   %t7 = extractvalue { <2 x i32>, <2 x i32> } %d6, 1
 321
 322   %res0 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } undef, <2 x i32> %t0, 0
 323   %res1 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res0, <2 x i32> %t1, 1
 324   %res2 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res1, <2 x i32> %t2, 2
 325   %res3 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res2, <2 x i32> %t3, 3
 326   %res4 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res3, <2 x i32> %t4, 4
 327   %res5 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res4, <2 x i32> %t5, 5
 328   %res6 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res5, <2 x i32> %t6, 6
 329   %res7 = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res6, <2 x i32> %t7, 7
 330   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %res7
 331 }