test/CodeGen/AMDGPU/zext-lid.ll

   1 ; RUN: llc -march=amdgcn < %s | FileCheck %s
   2 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s
   3
   4 ; CHECK-NOT: and_b32
   5
   6 ; OPT-LABEL: @zext_grp_size_128
   7 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
   8 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
   9 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
  10 define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 {
  11 bb:
  12   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  13   %tmp1 = and i32 %tmp, 127
  14   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  15   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
  16   %tmp3 = and i32 %tmp2, 127
  17   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  18   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
  19   %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
  20   %tmp6 = and i32 %tmp5, 127
  21   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  22   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
  23   ret void
  24 }
  25
  26 ; OPT-LABEL: @zext_grp_size_32x4x1
  27 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2
  28 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3
  29 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4
  30 define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 {
  31 bb:
  32   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  33   %tmp1 = and i32 %tmp, 31
  34   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  35   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
  36   %tmp3 = and i32 %tmp2, 3
  37   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  38   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
  39   %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
  40   %tmp6 = and i32 %tmp5, 1
  41   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  42   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
  43   ret void
  44 }
  45
  46 ; OPT-LABEL: @zext_grp_size_512
  47 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !5
  48 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !5
  49 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !5
  50 define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 {
  51 bb:
  52   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  53   %tmp1 = and i32 %tmp, 65535
  54   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  55   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
  56   %tmp3 = and i32 %tmp2, 65535
  57   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  58   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
  59   %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
  60   %tmp6 = and i32 %tmp5, 65535
  61   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  62   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
  63   ret void
  64 }
  65
  66 declare i32 @llvm.amdgcn.workitem.id.x() #2
  67
  68 declare i32 @llvm.amdgcn.workitem.id.y() #2
  69
  70 declare i32 @llvm.amdgcn.workitem.id.z() #2
  71
  72 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
  73 attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
  74 attributes #2 = { nounwind readnone speculatable }
  75 attributes #3 = { nounwind readnone }
  76
  77 !0 = !{i32 32, i32 4, i32 1}
  78
  79 ; OPT: !0 = !{i32 0, i32 128}
  80 ; OPT: !1 = !{i32 32, i32 4, i32 1}
  81 ; OPT: !2 = !{i32 0, i32 32}
  82 ; OPT: !3 = !{i32 0, i32 4}
  83 ; OPT: !4 = !{i32 0, i32 1}
  84 ; OPT: !5 = !{i32 0, i32 512}