llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll

   1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
   2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
   3 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s
   4 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s
   5
   6 ; ALL: 'or_i32'
   7 ; ALL: estimated cost of 1 for {{.*}} or i32
   8 define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
   9   %vec = load i32, i32 addrspace(1)* %vaddr
  10   %or = or i32 %vec, %b
  11   store i32 %or, i32 addrspace(1)* %out
  12   ret void
  13 }
  14
  15 ; ALL: 'or_i64'
  16 ; ALL: estimated cost of 2 for {{.*}} or i64
  17 define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
  18   %vec = load i64, i64 addrspace(1)* %vaddr
  19   %or = or i64 %vec, %b
  20   store i64 %or, i64 addrspace(1)* %out
  21   ret void
  22 }
  23
  24 ; ALL: 'or_v2i16'
  25 ; SLOW16: estimated cost of 2 for {{.*}} or <2 x i16>
  26 ; FAST16: estimated cost of 1 for {{.*}} or <2 x i16>
  27 define amdgpu_kernel void @or_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
  28   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
  29   %or = or <2 x i16> %vec, %b
  30   store <2 x i16> %or, <2 x i16> addrspace(1)* %out
  31   ret void
  32 }
  33
  34 ; ALL: 'xor_i32'
  35 ; ALL: estimated cost of 1 for {{.*}} xor i32
  36 define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
  37   %vec = load i32, i32 addrspace(1)* %vaddr
  38   %or = xor i32 %vec, %b
  39   store i32 %or, i32 addrspace(1)* %out
  40   ret void
  41 }
  42
  43 ; ALL: 'xor_i64'
  44 ; ALL: estimated cost of 2 for {{.*}} xor i64
  45 define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
  46   %vec = load i64, i64 addrspace(1)* %vaddr
  47   %or = xor i64 %vec, %b
  48   store i64 %or, i64 addrspace(1)* %out
  49   ret void
  50 }
  51
  52 ; ALL: 'xor_v2i16'
  53 ; SLOW16: estimated cost of 2 for {{.*}} xor <2 x i16>
  54 ; FAST16: estimated cost of 1 for {{.*}} xor <2 x i16>
  55 define amdgpu_kernel void @xor_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
  56   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
  57   %xor = xor <2 x i16> %vec, %b
  58   store <2 x i16> %xor, <2 x i16> addrspace(1)* %out
  59   ret void
  60 }
  61
  62 ; ALL: 'and_i32'
  63 ; ALL: estimated cost of 1 for {{.*}} and i32
  64 define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
  65   %vec = load i32, i32 addrspace(1)* %vaddr
  66   %or = and i32 %vec, %b
  67   store i32 %or, i32 addrspace(1)* %out
  68   ret void
  69 }
  70
  71 ; ALL: 'and_i64'
  72 ; ALL: estimated cost of 2 for {{.*}} and i64
  73 define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
  74   %vec = load i64, i64 addrspace(1)* %vaddr
  75   %or = and i64 %vec, %b
  76   store i64 %or, i64 addrspace(1)* %out
  77   ret void
  78 }
  79
  80 ; ALL: 'and_v2i16'
  81 ; SLOW16: estimated cost of 2 for {{.*}} and <2 x i16>
  82 ; FAST16: estimated cost of 1 for {{.*}} and <2 x i16>
  83 define amdgpu_kernel void @and_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
  84   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
  85   %and = and <2 x i16> %vec, %b
  86   store <2 x i16> %and, <2 x i16> addrspace(1)* %out
  87   ret void
  88 }
  89
  90 attributes #0 = { nounwind }