llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -instcombine -S < %s | FileCheck %s
   3
   4 ; --------------------------------------------------------------------
   5 ; llvm.amdgcn.image.sample a16 is disabled on pre-gfx9
   6 ; --------------------------------------------------------------------
   7
   8 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
   9 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  10 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  11 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  12 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  13 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
  14
  15 define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
  16 ; CHECK-LABEL: @image_sample_a16_1d(
  17 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
  18 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
  19 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
  20 ; CHECK-NEXT:    ret void
  21 ;
  22   %s32 = fpext half %s to float
  23   %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  24   store <4 x float> %res, <4 x float> addrspace(1)* %out
  25   ret void
  26 }
  27
  28 define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
  29 ; CHECK-LABEL: @image_sample_a16_2d(
  30 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
  31 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
  32 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
  33 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
  34 ; CHECK-NEXT:    ret void
  35 ;
  36   %s32 = fpext half %s to float
  37   %t32 = fpext half %t to float
  38   %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  39   store <4 x float> %res, <4 x float> addrspace(1)* %out
  40   ret void
  41 }
  42
  43 define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
  44 ; CHECK-LABEL: @image_sample_a16_3d(
  45 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
  46 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
  47 ; CHECK-NEXT:    [[R32:%.*]] = fpext half [[R:%.*]] to float
  48 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
  49 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
  50 ; CHECK-NEXT:    ret void
  51 ;
  52   %s32 = fpext half %s to float
  53   %t32 = fpext half %t to float
  54   %r32 = fpext half %r to float
  55   %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  56   store <4 x float> %res, <4 x float> addrspace(1)* %out
  57   ret void
  58 }
  59
  60 define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
  61 ;
  62 ; CHECK-LABEL: @image_sample_a16_cube(
  63 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
  64 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
  65 ; CHECK-NEXT:    [[FACE32:%.*]] = fpext half [[FACE:%.*]] to float
  66 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
  67 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
  68 ; CHECK-NEXT:    ret void
  69 ;
  70   %s32 = fpext half %s to float
  71   %t32 = fpext half %t to float
  72   %face32 = fpext half %face to float
  73   %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  74   store <4 x float> %res, <4 x float> addrspace(1)* %out
  75   ret void
  76 }
  77
  78 define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
  79 ; CHECK-LABEL: @image_sample_a16_1darray(
  80 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
  81 ; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
  82 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
  83 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
  84 ; CHECK-NEXT:    ret void
  85 ;
  86   %s32 = fpext half %s to float
  87   %slice32 = fpext half %slice to float
  88   %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  89   store <4 x float> %res, <4 x float> addrspace(1)* %out
  90   ret void
  91 }
  92
  93 define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
  94 ; CHECK-LABEL: @image_sample_a16_2darray(
  95 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
  96 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
  97 ; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
  98 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
  99 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 100 ; CHECK-NEXT:    ret void
 101 ;
 102   %s32 = fpext half %s to float
 103   %t32 = fpext half %t to float
 104   %slice32 = fpext half %slice to float
 105   %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
 106   store <4 x float> %res, <4 x float> addrspace(1)* %out
 107   ret void
 108 }