llvm/test/CodeGen/AMDGPU/lower-module-lds-single-var-ambiguous.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=module | FileCheck -check-prefixes=CHECK,M_OR_HY %s
   3 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefixes=CHECK,TABLE %s
   4 ; RUN: not --crash opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=kernel 2>&1 | FileCheck -check-prefixes=KERNEL %s
   5 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=hybrid | FileCheck -check-prefixes=CHECK,M_OR_HY %s
   6
   7 ;; Two kernels access the same variable, specialisation gives them each their own copy of it
   8
   9 @kernel.lds = addrspace(3) global i8 undef
  10 define amdgpu_kernel void @k0() {
  11 ; CHECK-LABEL: @k0(
  12 ; CHECK-NEXT:    [[LD:%.*]] = load i8, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 1
  13 ; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[LD]], 2
  14 ; CHECK-NEXT:    store i8 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 1
  15 ; CHECK-NEXT:    ret void
  16 ;
  17   %ld = load i8, ptr addrspace(3) @kernel.lds
  18   %mul = mul i8 %ld, 2
  19   store i8 %mul, ptr  addrspace(3) @kernel.lds
  20   ret void
  21 }
  22
  23 define amdgpu_kernel void @k1() {
  24 ; CHECK-LABEL: @k1(
  25 ; CHECK-NEXT:    [[LD:%.*]] = load i8, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 1
  26 ; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[LD]], 3
  27 ; CHECK-NEXT:    store i8 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 1
  28 ; CHECK-NEXT:    ret void
  29 ;
  30   %ld = load i8, ptr addrspace(3) @kernel.lds
  31   %mul = mul i8 %ld, 3
  32   store i8 %mul, ptr  addrspace(3) @kernel.lds
  33   ret void
  34 }
  35
  36 ;; Function accesses variable, reachable from two kernels, can't use kernel lowering for either
  37 ;; Hybrid can put it in module lds without cost as the first variable is free
  38
  39 ; KERNEL: LLVM ERROR: cannot lower LDS 'function.lds' to kernel access as it is reachable from multiple kernels
  40
  41 @function.lds = addrspace(3) global i16 undef
  42 define void @f0() {
  43 ; M_OR_HY-LABEL: @f0(
  44 ; M_OR_HY-NEXT:    [[LD:%.*]] = load i16, ptr addrspace(3) @llvm.amdgcn.module.lds, align 2
  45 ; M_OR_HY-NEXT:    [[MUL:%.*]] = mul i16 [[LD]], 4
  46 ; M_OR_HY-NEXT:    store i16 [[MUL]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 2
  47 ; M_OR_HY-NEXT:    ret void
  48 ;
  49 ; TABLE-LABEL: @f0(
  50 ; TABLE-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
  51 ; TABLE-NEXT:    [[FUNCTION_LDS2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
  52 ; TABLE-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS2]], align 4
  53 ; TABLE-NEXT:    [[FUNCTION_LDS3:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3)
  54 ; TABLE-NEXT:    [[LD:%.*]] = load i16, ptr addrspace(3) [[FUNCTION_LDS3]], align 2
  55 ; TABLE-NEXT:    [[MUL:%.*]] = mul i16 [[LD]], 4
  56 ; TABLE-NEXT:    [[FUNCTION_LDS:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
  57 ; TABLE-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(4) [[FUNCTION_LDS]], align 4
  58 ; TABLE-NEXT:    [[FUNCTION_LDS1:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3)
  59 ; TABLE-NEXT:    store i16 [[MUL]], ptr addrspace(3) [[FUNCTION_LDS1]], align 2
  60 ; TABLE-NEXT:    ret void
  61 ;
  62   %ld = load i16, ptr addrspace(3) @function.lds
  63   %mul = mul i16 %ld, 4
  64   store i16 %mul, ptr  addrspace(3) @function.lds
  65   ret void
  66 }
  67
  68
  69 define amdgpu_kernel void @k0_f0() {
  70 ; M_OR_HY-LABEL: @k0_f0(
  71 ; M_OR_HY-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
  72 ; M_OR_HY-NEXT:    call void @f0()
  73 ; M_OR_HY-NEXT:    ret void
  74 ;
  75 ; TABLE-LABEL: @k0_f0(
  76 ; TABLE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds) ]
  77 ; TABLE-NEXT:    call void @f0()
  78 ; TABLE-NEXT:    ret void
  79 ;
  80   call void @f0()
  81   ret void
  82 }
  83
  84 define amdgpu_kernel void @k1_f0() {
  85 ; M_OR_HY-LABEL: @k1_f0(
  86 ; M_OR_HY-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
  87 ; M_OR_HY-NEXT:    call void @f0()
  88 ; M_OR_HY-NEXT:    ret void
  89 ;
  90 ; TABLE-LABEL: @k1_f0(
  91 ; TABLE-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ]
  92 ; TABLE-NEXT:    call void @f0()
  93 ; TABLE-NEXT:    ret void
  94 ;
  95   call void @f0()
  96   ret void
  97 }