llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll

   1 ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
   2
   3 @lds0 = addrspace(3) global [512 x float] undef
   4 @lds1 = addrspace(3) global [256 x float] undef
   5 @lds2 = addrspace(3) global [4096 x float] undef
   6 @lds3 = addrspace(3) global [67 x i8] undef
   7
   8 @dynamic_shared0 = external addrspace(3) global [0 x float]
   9 @dynamic_shared1 = external addrspace(3) global [0 x double]
  10 @dynamic_shared2 = external addrspace(3) global [0 x double], align 4
  11 @dynamic_shared3 = external addrspace(3) global [0 x double], align 16
  12
  13 ; CHECK-LABEL: {{^}}dynamic_shared_array_0:
  14 ; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
  15 define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) {
  16   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  17   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x
  18   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
  19   %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
  20   store float %val0, float addrspace(3)* %arrayidx1, align 4
  21   ret void
  22 }
  23
  24 ; CHECK-LABEL: {{^}}dynamic_shared_array_1:
  25 ; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}}
  26 ; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}}
  27 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
  28 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]]
  29 define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) {
  30 entry:
  31   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  32   %idx.0 = add nsw i32 %tid.x, 64
  33   %tmp = icmp eq i32 %cond, 0
  34   br i1 %tmp, label %if, label %else
  35
  36 if:                                               ; preds = %entry
  37   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
  38   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
  39   br label %endif
  40
  41 else:                                             ; preds = %entry
  42   %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
  43   %val1 = load float, float addrspace(3)* %arrayidx1, align 4
  44   br label %endif
  45
  46 endif:                                            ; preds = %else, %if
  47   %val = phi float [ %val0, %if ], [ %val1, %else ]
  48   %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
  49   store float %val, float addrspace(3)* %arrayidx, align 4
  50   ret void
  51 }
  52
  53 ; CHECK-LABEL: {{^}}dynamic_shared_array_2:
  54 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
  55 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x4000, [[IDX]]
  56 define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) {
  57   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  58   %vidx = add i32 %tid.x, %idx
  59   %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx
  60   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
  61   %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
  62   store float %val0, float addrspace(3)* %arrayidx1, align 4
  63   ret void
  64 }
  65
  66 ; The offset to the dynamic shared memory array should be aligned on the type
  67 ; specified.
  68 ; CHECK-LABEL: {{^}}dynamic_shared_array_3:
  69 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
  70 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0x44, [[IDX]]
  71 define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) {
  72   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  73   %vidx = add i32 %tid.x, %idx
  74   %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
  75   %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
  76   %val1 = uitofp i8 %val0 to float
  77   %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
  78   store float %val1, float addrspace(3)* %arrayidx1, align 4
  79   ret void
  80 }
  81
  82 ; The offset to the dynamic shared memory array should be aligned on the
  83 ; maximal one.
  84 ; CHECK-LABEL: {{^}}dynamic_shared_array_4:
  85 ; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48
  86 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
  87 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]]
  88 define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) {
  89   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
  90   %vidx = add i32 %tid.x, %idx
  91   %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
  92   %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
  93   %val1 = uitofp i8 %val0 to float
  94   %val2 = uitofp i8 %val0 to double
  95   %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
  96   store float %val1, float addrspace(3)* %arrayidx1, align 4
  97   %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x
  98   store double %val2, double addrspace(3)* %arrayidx2, align 4
  99   ret void
 100 }
 101
 102 ; Honor the explicit alignment from the specified variable.
 103 ; CHECK-LABEL: {{^}}dynamic_shared_array_5:
 104 ; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44
 105 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
 106 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]]
 107 define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) {
 108   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
 109   %vidx = add i32 %tid.x, %idx
 110   %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
 111   %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
 112   %val1 = uitofp i8 %val0 to float
 113   %val2 = uitofp i8 %val0 to double
 114   %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
 115   store float %val1, float addrspace(3)* %arrayidx1, align 4
 116   %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x
 117   store double %val2, double addrspace(3)* %arrayidx2, align 4
 118   ret void
 119 }
 120
 121 ; Honor the explicit alignment from the specified variable.
 122 ; CHECK-LABEL: {{^}}dynamic_shared_array_6:
 123 ; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50
 124 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
 125 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, [[DYNLDS]], [[IDX]]
 126 define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
 127   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
 128   %vidx = add i32 %tid.x, %idx
 129   %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
 130   %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
 131   %val1 = uitofp i8 %val0 to float
 132   %val2 = uitofp i8 %val0 to double
 133   %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
 134   store float %val1, float addrspace(3)* %arrayidx1, align 4
 135   %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x
 136   store double %val2, double addrspace(3)* %arrayidx2, align 4
 137   ret void
 138 }
 139
 140 declare i32 @llvm.amdgcn.workitem.id.x()