llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll

   1 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
   2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s
   3
   4 ; GFX90A-LABEL: {{^}}sample_1d:
   5 ; GFX90A-NOT: s_wqm_b64
   6 ; GFX90A:     image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
   7 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
   8 main_body:
   9   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  10   ret <4 x float> %v
  11 }
  12
  13 ; GFX90A-LABEL: {{^}}sample_1d_lwe:
  14 ; GFX90A-NOT: s_wqm_b64
  15 ; GFX90A:     image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf lwe
  16 define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
  17 main_body:
  18   %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
  19   %v.vec = extractvalue {<4 x float>, i32} %v, 0
  20   %v.err = extractvalue {<4 x float>, i32} %v, 1
  21   store i32 %v.err, ptr addrspace(1) %out, align 4
  22   ret <4 x float> %v.vec
  23 }
  24
  25 ; GFX90A-LABEL: {{^}}sample_2d:
  26 ; GFX90A-NOT: s_wqm_b64
  27 ; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
  28 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
  29 main_body:
  30   %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  31   ret <4 x float> %v
  32 }
  33
  34 ; GFX90A-LABEL: {{^}}sample_3d:
  35 ; GFX90A-NOT: s_wqm_b64
  36 ; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
  37 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
  38 main_body:
  39   %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  40   ret <4 x float> %v
  41 }
  42
  43 ; GFX90A-LABEL: {{^}}sample_cube:
  44 ; GFX90A-NOT: s_wqm_b64
  45 ; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf da
  46 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
  47 main_body:
  48   %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  49   ret <4 x float> %v
  50 }
  51
  52 ; GFX90A-LABEL: {{^}}sample_1darray:
  53 ; GFX90A-NOT: s_wqm_b64
  54 ; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf da
  55 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
  56 main_body:
  57   %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  58   ret <4 x float> %v
  59 }
  60
  61 ; GFX90A-LABEL: {{^}}sample_1d_unorm:
  62 ; GFX90A-NOT: s_wqm_b64
  63 ; GFX90A:     image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf unorm
  64 define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
  65 main_body:
  66   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
  67   ret <4 x float> %v
  68 }
  69
  70 ; Address register must be even aligned.
  71
  72 ; GFX90A-LABEL: {{^}}sample_1d_addr_align:
  73 ; GFX90A: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1
  74 ; SDAG:   image_sample v{{[0-9]+}}, [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0x1
  75 ; GISEL:  image_sample v[{{[0-9:]+}}], [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
  76 define amdgpu_ps float @sample_1d_addr_align(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x float> %s) {
  77 main_body:
  78   %s1 = extractelement <2 x float> %s, i32 1
  79   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  80   %v1 = extractelement <4 x float> %v, i32 0
  81   ret float %v1
  82 }
  83
  84 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
  85 declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
  86 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
  87 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
  88 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
  89 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)