llvm/test/CodeGen/AMDGPU/concat_vectors.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
   3
   4 ; GCN-LABEL: {{^}}test_concat_v1i32:
   5 ; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF
   6 ; instructions that access scratch memory.  Bit 23, which is the add_tid_enable
   7 ; bit, is only set for scratch access, so we can check for the absence of this
   8 ; value if we want to ensure scratch memory is not being used.
   9 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  10 ; GCN-NOT: movrel
  11 define amdgpu_kernel void @test_concat_v1i32(ptr addrspace(1) %out, <1 x i32> %a, <1 x i32> %b) nounwind {
  12   %concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
  13   store <2 x i32> %concat, ptr addrspace(1) %out, align 8
  14   ret void
  15 }
  16
  17 ; GCN-LABEL: {{^}}test_concat_v2i32:
  18 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  19 ; GCN-NOT: movrel
  20 define amdgpu_kernel void @test_concat_v2i32(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
  21   %concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  22   store <4 x i32> %concat, ptr addrspace(1) %out, align 16
  23   ret void
  24 }
  25
  26 ; GCN-LABEL: {{^}}test_concat_v4i32:
  27 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  28 ; GCN-NOT: movrel
  29 define amdgpu_kernel void @test_concat_v4i32(ptr addrspace(1) %out, <4 x i32> %a, <4 x i32> %b) nounwind {
  30   %concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  31   store <8 x i32> %concat, ptr addrspace(1) %out, align 32
  32   ret void
  33 }
  34
  35 ; GCN-LABEL: {{^}}test_concat_v8i32:
  36 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  37 ; GCN-NOT: movrel
  38 define amdgpu_kernel void @test_concat_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8 x i32> %b) nounwind {
  39   %concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  40   store <16 x i32> %concat, ptr addrspace(1) %out, align 64
  41   ret void
  42 }
  43
  44 ; GCN-LABEL: {{^}}test_concat_v16i32:
  45 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  46 ; GCN-NOT: movrel
  47 define amdgpu_kernel void @test_concat_v16i32(ptr addrspace(1) %out, <16 x i32> %a, <16 x i32> %b) nounwind {
  48   %concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  49   store <32 x i32> %concat, ptr addrspace(1) %out, align 128
  50   ret void
  51 }
  52
  53 ; GCN-LABEL: {{^}}test_concat_v1f32:
  54 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  55 ; GCN-NOT: movrel
  56 define amdgpu_kernel void @test_concat_v1f32(ptr addrspace(1) %out, <1 x float> %a, <1 x float> %b) nounwind {
  57   %concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
  58   store <2 x float> %concat, ptr addrspace(1) %out, align 8
  59   ret void
  60 }
  61
  62 ; GCN-LABEL: {{^}}test_concat_v2f32:
  63 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  64 ; GCN-NOT: movrel
  65 define amdgpu_kernel void @test_concat_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) nounwind {
  66   %concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  67   store <4 x float> %concat, ptr addrspace(1) %out, align 16
  68   ret void
  69 }
  70
  71 ; GCN-LABEL: {{^}}test_concat_v4f32:
  72 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  73 ; GCN-NOT: movrel
  74 define amdgpu_kernel void @test_concat_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) nounwind {
  75   %concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  76   store <8 x float> %concat, ptr addrspace(1) %out, align 32
  77   ret void
  78 }
  79
  80 ; GCN-LABEL: {{^}}test_concat_v8f32:
  81 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  82 ; GCN-NOT: movrel
  83 define amdgpu_kernel void @test_concat_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) nounwind {
  84   %concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  85   store <16 x float> %concat, ptr addrspace(1) %out, align 64
  86   ret void
  87 }
  88
  89 ; GCN-LABEL: {{^}}test_concat_v16f32:
  90 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
  91 ; GCN-NOT: movrel
  92 define amdgpu_kernel void @test_concat_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) nounwind {
  93   %concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  94   store <32 x float> %concat, ptr addrspace(1) %out, align 128
  95   ret void
  96 }
  97
  98 ; GCN-LABEL: {{^}}test_concat_v1i64:
  99 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 100 ; GCN-NOT: movrel
 101 define amdgpu_kernel void @test_concat_v1i64(ptr addrspace(1) %out, <1 x double> %a, <1 x double> %b) nounwind {
 102   %concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
 103   store <2 x double> %concat, ptr addrspace(1) %out, align 16
 104   ret void
 105 }
 106
 107 ; GCN-LABEL: {{^}}test_concat_v2i64:
 108 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 109 ; GCN-NOT: movrel
 110 define amdgpu_kernel void @test_concat_v2i64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
 111   %concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 112   store <4 x double> %concat, ptr addrspace(1) %out, align 32
 113   ret void
 114 }
 115
 116 ; GCN-LABEL: {{^}}test_concat_v4i64:
 117 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 118 ; GCN-NOT: movrel
 119 define amdgpu_kernel void @test_concat_v4i64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
 120   %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 121   store <8 x double> %concat, ptr addrspace(1) %out, align 64
 122   ret void
 123 }
 124
 125 ; GCN-LABEL: {{^}}test_concat_v8i64:
 126 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 127 ; GCN-NOT: movrel
 128 define amdgpu_kernel void @test_concat_v8i64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
 129   %concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 130   store <16 x double> %concat, ptr addrspace(1) %out, align 128
 131   ret void
 132 }
 133
 134 ; GCN-LABEL: {{^}}test_concat_v16i64:
 135 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 136 ; GCN-NOT: movrel
 137 define amdgpu_kernel void @test_concat_v16i64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
 138   %concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 139   store <32 x double> %concat, ptr addrspace(1) %out, align 256
 140   ret void
 141 }
 142
 143 ; GCN-LABEL: {{^}}test_concat_v1f64:
 144 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 145 ; GCN-NOT: movrel
 146 define amdgpu_kernel void @test_concat_v1f64(ptr addrspace(1) %out, <1 x double> %a, <1 x double> %b) nounwind {
 147   %concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
 148   store <2 x double> %concat, ptr addrspace(1) %out, align 16
 149   ret void
 150 }
 151
 152 ; GCN-LABEL: {{^}}test_concat_v2f64:
 153 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 154 ; GCN-NOT: movrel
 155 define amdgpu_kernel void @test_concat_v2f64(ptr addrspace(1) %out, <2 x double> %a, <2 x double> %b) nounwind {
 156   %concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 157   store <4 x double> %concat, ptr addrspace(1) %out, align 32
 158   ret void
 159 }
 160
 161 ; GCN-LABEL: {{^}}test_concat_v4f64:
 162 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 163 ; GCN-NOT: movrel
 164 define amdgpu_kernel void @test_concat_v4f64(ptr addrspace(1) %out, <4 x double> %a, <4 x double> %b) nounwind {
 165   %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 166   store <8 x double> %concat, ptr addrspace(1) %out, align 64
 167   ret void
 168 }
 169
 170 ; GCN-LABEL: {{^}}test_concat_v8f64:
 171 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 172 ; GCN-NOT: movrel
 173 define amdgpu_kernel void @test_concat_v8f64(ptr addrspace(1) %out, <8 x double> %a, <8 x double> %b) nounwind {
 174   %concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 175   store <16 x double> %concat, ptr addrspace(1) %out, align 128
 176   ret void
 177 }
 178
 179 ; GCN-LABEL: {{^}}test_concat_v16f64:
 180 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 181 ; GCN-NOT: movrel
 182 define amdgpu_kernel void @test_concat_v16f64(ptr addrspace(1) %out, <16 x double> %a, <16 x double> %b) nounwind {
 183   %concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 184   store <32 x double> %concat, ptr addrspace(1) %out, align 256
 185   ret void
 186 }
 187
 188 ; GCN-LABEL: {{^}}test_concat_v1i1:
 189 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 190 ; GCN-NOT: movrel
 191 define amdgpu_kernel void @test_concat_v1i1(ptr addrspace(1) %out, <1 x i1> %a, <1 x i1> %b) nounwind {
 192   %concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
 193   store <2 x i1> %concat, ptr addrspace(1) %out
 194   ret void
 195 }
 196
 197 ; GCN-LABEL: {{^}}test_concat_v2i1:
 198 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 199 ; GCN-NOT: movrel
 200 define amdgpu_kernel void @test_concat_v2i1(ptr addrspace(1) %out, <2 x i1> %a, <2 x i1> %b) nounwind {
 201   %concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 202   store <4 x i1> %concat, ptr addrspace(1) %out
 203   ret void
 204 }
 205
 206 ; GCN-LABEL: {{^}}test_concat_v4i1:
 207 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 208 ; GCN-NOT: movrel
 209 define amdgpu_kernel void @test_concat_v4i1(ptr addrspace(1) %out, <4 x i1> %a, <4 x i1> %b) nounwind {
 210   %concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 211   store <8 x i1> %concat, ptr addrspace(1) %out
 212   ret void
 213 }
 214
 215 ; GCN-LABEL: {{^}}test_concat_v8i1:
 216 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 217 ; GCN-NOT: movrel
 218 define amdgpu_kernel void @test_concat_v8i1(ptr addrspace(1) %out, <8 x i1> %a, <8 x i1> %b) nounwind {
 219   %concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 220   store <16 x i1> %concat, ptr addrspace(1) %out
 221   ret void
 222 }
 223
 224 ; GCN-LABEL: {{^}}test_concat_v16i1:
 225 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 226 ; GCN-NOT: movrel
 227 define amdgpu_kernel void @test_concat_v16i1(ptr addrspace(1) %out, <16 x i1> %a, <16 x i1> %b) nounwind {
 228   %concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 229   store <32 x i1> %concat, ptr addrspace(1) %out
 230   ret void
 231 }
 232
 233 ; GCN-LABEL: {{^}}test_concat_v32i1:
 234 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 235 ; GCN-NOT: movrel
 236 define amdgpu_kernel void @test_concat_v32i1(ptr addrspace(1) %out, <32 x i1> %a, <32 x i1> %b) nounwind {
 237   %concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 238   store <64 x i1> %concat, ptr addrspace(1) %out
 239   ret void
 240 }
 241
 242 ; GCN-LABEL: {{^}}test_concat_v1i16:
 243 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 244 ; GCN-NOT: movrel
 245 define amdgpu_kernel void @test_concat_v1i16(ptr addrspace(1) %out, <1 x i16> %a, <1 x i16> %b) nounwind {
 246   %concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
 247   store <2 x i16> %concat, ptr addrspace(1) %out, align 4
 248   ret void
 249 }
 250
 251 ; GCN-LABEL: {{^}}test_concat_v2i16:
 252 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 253 ; GCN-NOT: movrel
 254 define amdgpu_kernel void @test_concat_v2i16(ptr addrspace(1) %out, <2 x i16> %a, <2 x i16> %b) nounwind {
 255   %concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 256   store <4 x i16> %concat, ptr addrspace(1) %out, align 8
 257   ret void
 258 }
 259
 260 ; GCN-LABEL: {{^}}test_concat_v4i16:
 261 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 262 ; GCN-NOT: movrel
 263 define amdgpu_kernel void @test_concat_v4i16(ptr addrspace(1) %out, <4 x i16> %a, <4 x i16> %b) nounwind {
 264   %concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 265   store <8 x i16> %concat, ptr addrspace(1) %out, align 16
 266   ret void
 267 }
 268
 269 ; GCN-LABEL: {{^}}test_concat_v8i16:
 270 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 271 ; GCN-NOT: movrel
 272 define amdgpu_kernel void @test_concat_v8i16(ptr addrspace(1) %out, <8 x i16> %a, <8 x i16> %b) nounwind {
 273   %concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 274   store <16 x i16> %concat, ptr addrspace(1) %out, align 32
 275   ret void
 276 }
 277
 278 ; GCN-LABEL: {{^}}test_concat_v16i16:
 279 ; GCN-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
 280 ; GCN-NOT: movrel
 281 define amdgpu_kernel void @test_concat_v16i16(ptr addrspace(1) %out, <16 x i16> %a, <16 x i16> %b) nounwind {
 282   %concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 283   store <32 x i16> %concat, ptr addrspace(1) %out, align 64
 284   ret void
 285 }
 286
 287 ; GCN-LABEL: {{^}}concat_vector_crash:
 288 ; GCN: s_endpgm
 289 define amdgpu_kernel void @concat_vector_crash(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 290 bb:
 291   %tmp = load <2 x float>, ptr addrspace(1) %in, align 4
 292   %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 293   %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
 294   store <8 x float> %tmp2, ptr addrspace(1) %out, align 32
 295   ret void
 296 }
 297
 298 ; GCN-LABEL: {{^}}concat_vector_crash2:
 299 ; GCN: s_endpgm
 300 define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 301   %tmp = load i32, ptr addrspace(1) %in, align 1
 302   %tmp1 = trunc i32 %tmp to i24
 303   %tmp2 = bitcast i24 %tmp1 to <3 x i8>
 304   %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef>
 305   %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
 306   store <8 x i8> %tmp4, ptr addrspace(1) %out, align 8
 307   ret void
 308 }
 309
 310 ; GCN-LABEL: {{^}}build_vector_splat_concat_v8i16:
 311 ; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
 312 ; VI: ds_write_b128
 313 ; VI: ds_write_b128
 314 define amdgpu_kernel void @build_vector_splat_concat_v8i16() {
 315 entry:
 316   store <8 x i16> zeroinitializer, ptr addrspace(3) undef, align 16
 317   store <8 x i16> zeroinitializer, ptr addrspace(3) null, align 16
 318   ret void
 319 }