1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
3 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
4 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
7 ; ALL: estimated cost of 1 for {{.*}} or i32
8 define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
9 %vec = load i32, i32 addrspace(1)* %vaddr
11 store i32 %or, i32 addrspace(1)* %out
16 ; ALL: estimated cost of 2 for {{.*}} or i64
17 define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
18 %vec = load i64, i64 addrspace(1)* %vaddr
20 store i64 %or, i64 addrspace(1)* %out
25 ; SLOW16: estimated cost of 2 for {{.*}} or <2 x i16>
26 ; FAST16: estimated cost of 1 for {{.*}} or <2 x i16>
27 define amdgpu_kernel void @or_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
28 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
29 %or = or <2 x i16> %vec, %b
30 store <2 x i16> %or, <2 x i16> addrspace(1)* %out
35 ; ALL: estimated cost of 1 for {{.*}} xor i32
36 define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
37 %vec = load i32, i32 addrspace(1)* %vaddr
38 %or = xor i32 %vec, %b
39 store i32 %or, i32 addrspace(1)* %out
44 ; ALL: estimated cost of 2 for {{.*}} xor i64
45 define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
46 %vec = load i64, i64 addrspace(1)* %vaddr
47 %or = xor i64 %vec, %b
48 store i64 %or, i64 addrspace(1)* %out
53 ; SLOW16: estimated cost of 2 for {{.*}} xor <2 x i16>
54 ; FAST16: estimated cost of 1 for {{.*}} xor <2 x i16>
55 define amdgpu_kernel void @xor_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
56 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
57 %xor = xor <2 x i16> %vec, %b
58 store <2 x i16> %xor, <2 x i16> addrspace(1)* %out
63 ; ALL: estimated cost of 1 for {{.*}} and i32
64 define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
65 %vec = load i32, i32 addrspace(1)* %vaddr
66 %or = and i32 %vec, %b
67 store i32 %or, i32 addrspace(1)* %out
72 ; ALL: estimated cost of 2 for {{.*}} and i64
73 define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
74 %vec = load i64, i64 addrspace(1)* %vaddr
75 %or = and i64 %vec, %b
76 store i64 %or, i64 addrspace(1)* %out
81 ; SLOW16: estimated cost of 2 for {{.*}} and <2 x i16>
82 ; FAST16: estimated cost of 1 for {{.*}} and <2 x i16>
83 define amdgpu_kernel void @and_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
84 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
85 %and = and <2 x i16> %vec, %b
86 store <2 x i16> %and, <2 x i16> addrspace(1)* %out
90 attributes #0 = { nounwind }