1 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; The bitcast should be pushed through the bitcasts so the vectors can
5 ; be broken down and the shared components can be CSEd
7 ; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
8 ; GCN: buffer_store_dwordx4
9 ; GCN: buffer_store_dwordx4
11 ; GCN: buffer_store_dwordx4
13 ; GCN: buffer_store_dwordx4
14 define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v8f32(ptr addrspace(1) %out, <8 x i32> %vec) {
15 %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
16 store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
18 %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
19 store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
23 ; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
24 ; GCN: buffer_store_dwordx4
25 ; GCN: buffer_store_dwordx4
27 ; GCN: buffer_store_dwordx4
29 ; GCN: buffer_store_dwordx4
30 define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v8f32(ptr addrspace(1) %out, <4 x i64> %vec) {
31 %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
32 store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
34 %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
35 store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
39 ; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
40 ; GCN: buffer_store_dwordx4
41 ; GCN: buffer_store_dwordx4
43 ; GCN: buffer_store_dwordx4
45 ; GCN: buffer_store_dwordx4
46 define amdgpu_kernel void @store_bitcast_constant_v4i64_to_v4f64(ptr addrspace(1) %out, <4 x i64> %vec) {
47 %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
48 store volatile <4 x double> %vec0.bc, ptr addrspace(1) %out
50 %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
51 store volatile <4 x double> %vec1.bc, ptr addrspace(1) %out
55 ; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
56 ; GCN: buffer_store_dwordx4
57 ; GCN: buffer_store_dwordx4
59 ; GCN: buffer_store_dwordx4
61 ; GCN: buffer_store_dwordx4
62 define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(1) %out, <16 x i16> %vec) {
63 %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
64 store volatile <8 x float> %vec0.bc, ptr addrspace(1) %out
66 %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
67 store volatile <8 x float> %vec1.bc, ptr addrspace(1) %out
71 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source:
72 ; GCN-NOT: store_dword
73 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
74 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
75 %bc = bitcast i64 %undef to <2 x i32>
76 store <2 x i32> %bc, ptr addrspace(1) %out
80 ; GCN-LABEL: {{^}}store_value_lowered_to_undef_bitcast_source_extractelt:
81 ; GCN-NOT: store_dword
82 define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractelt(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
83 %undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
84 %bc = bitcast i64 %undef to <2 x i32>
85 %elt1 = extractelement <2 x i32> %bc, i32 1
86 store i32 %elt1, ptr addrspace(1) %out
90 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
92 attributes #0 = { nounwind }
93 attributes #1 = { nounwind readnone convergent }