test/CodeGen/AMDGPU/zext-lid.ll

   1 ; RUN: llc -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN,O2 %s
   2 ; RUN: llc -O0 -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
   3 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s
   4
   5 ; GCN-LABEL: {{^}}zext_grp_size_128:
   6 ; GCN-NOT: and_b32
   7
   8 ; OPT-LABEL: @zext_grp_size_128
   9 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
  10 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0
  11 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0
  12 define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 {
  13 bb:
  14   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  15   %tmp1 = and i32 %tmp, 127
  16   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  17   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
  18   %tmp3 = and i32 %tmp2, 127
  19   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  20   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
  21   %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
  22   %tmp6 = and i32 %tmp5, 127
  23   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  24   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
  25   ret void
  26 }
  27
  28 ; GCN-LABEL: {{^}}zext_grp_size_32x4x1:
  29 ; GCN-NOT: and_b32
  30
  31 ; OPT-LABEL: @zext_grp_size_32x4x1
  32 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2
  33 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3
  34 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4
  35 define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 {
  36 bb:
  37   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  38   %tmp1 = and i32 %tmp, 31
  39   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  40   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
  41   %tmp3 = and i32 %tmp2, 3
  42   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  43   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
  44   %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
  45   %tmp6 = and i32 %tmp5, 1
  46   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  47   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
  48   ret void
  49 }
  50
  51 ; GCN-LABEL: {{^}}zext_grp_size_1x1x1:
  52 ; GCN-NOT: and_b32
  53
  54 ; When EarlyCSE is not run this call produces a range max with 0 active bits,
  55 ; which is a special case as an AssertZext from width 0 is invalid.
  56 ; OPT-LABEL: @zext_grp_size_1x1x1
  57 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !4
  58 define amdgpu_kernel void @zext_grp_size_1x1x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !1 {
  59   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  60   %tmp1 = and i32 %tmp, 1
  61   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  62   ret void
  63 }
  64
  65 ; GCN-LABEL: {{^}}zext_grp_size_512:
  66 ; GCN-NOT: and_b32
  67
  68 ; OPT-LABEL: @zext_grp_size_512
  69 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !6
  70 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !6
  71 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !6
  72 define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 {
  73 bb:
  74   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
  75   %tmp1 = and i32 %tmp, 65535
  76   store i32 %tmp1, i32 addrspace(1)* %arg, align 4
  77   %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
  78   %tmp3 = and i32 %tmp2, 65535
  79   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
  80   store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4
  81   %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
  82   %tmp6 = and i32 %tmp5, 65535
  83   %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
  84   store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4
  85   ret void
  86 }
  87
  88 ; GCN-LABEL: {{^}}func_test_workitem_id_x_known_max_range:
  89 ; O2-NOT: and_b32
  90 ; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
  91 ; O2-NOT: and_b32
  92
  93 ; OPT-LABEL: @func_test_workitem_id_x_known_max_range(
  94 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
  95 define void @func_test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
  96 entry:
  97   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  98   %and = and i32 %id, 1023
  99   store i32 %and, i32 addrspace(1)* %out, align 4
 100   ret void
 101 }
 102
 103 ; GCN-LABEL: {{^}}func_test_workitem_id_x_default_range:
 104 ; O2-NOT: and_b32
 105 ; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
 106 ; O2-NOT: and_b32
 107
 108 ; OPT-LABEL: @func_test_workitem_id_x_default_range(
 109 ; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !7
 110 define void @func_test_workitem_id_x_default_range(i32 addrspace(1)* nocapture %out) #4 {
 111 entry:
 112   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 113   %and = and i32 %id, 1023
 114   store i32 %and, i32 addrspace(1)* %out, align 4
 115   ret void
 116 }
 117
 118 declare i32 @llvm.amdgcn.workitem.id.x() #2
 119
 120 declare i32 @llvm.amdgcn.workitem.id.y() #2
 121
 122 declare i32 @llvm.amdgcn.workitem.id.z() #2
 123
 124 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
 125 attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
 126 attributes #2 = { nounwind readnone speculatable }
 127 attributes #3 = { nounwind readnone }
 128 attributes #4 = { nounwind }
 129
 130 !0 = !{i32 32, i32 4, i32 1}
 131 !1 = !{i32 1, i32 1, i32 1}
 132
 133 ; OPT: !0 = !{i32 0, i32 128}
 134 ; OPT: !1 = !{i32 32, i32 4, i32 1}
 135 ; OPT: !2 = !{i32 0, i32 32}
 136 ; OPT: !3 = !{i32 0, i32 4}
 137 ; OPT: !4 = !{i32 0, i32 1}
 138 ; OPT: !5 = !{i32 1, i32 1, i32 1}
 139 ; OPT: !6 = !{i32 0, i32 512}
 140 ; OPT: !7 = !{i32 0, i32 1024}