test/CodeGen/AMDGPU/nsa-reassign.ll

   1 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   2
   3 ; GCN-LABEL: {{^}}sample_contig_nsa:
   4 ; GCN-DAG: image_sample_c_l v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
   5 ; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
   6 define amdgpu_ps <2 x float> @sample_contig_nsa(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
   7 main_body:
   8   %zcompare.1 = fadd float %zcompare, 1.0
   9   %s1.1 = fadd float %s1, 1.0
  10   %t1.1 = fadd float %t1, 1.0
  11   %r1.1 = fadd float %r1, 1.0
  12   %s2.1 = fadd float %s2, 1.0
  13   %t2.1 = fadd float %t2, 1.0
  14   %r2.1 = fadd float %r2, 1.0
  15   %lod.1 = fadd float %lod, 1.0
  16   %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare.1, float %s1.1, float %t1.1, float %r1.1, float %lod.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  17   %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  18   %r.0 = insertelement <2 x float> undef, float %v1, i32 0
  19   %r = insertelement <2 x float> %r.0, float %v2, i32 1
  20   ret <2 x float> %r
  21 }
  22
  23 ; GCN-LABEL: {{^}}sample_contig_nsa_10vgprs:
  24 ; GCN-DAG: image_sample_c_l v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}],
  25 ; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
  26 define amdgpu_ps <2 x float> @sample_contig_nsa_10vgprs(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) #0 {
  27 main_body:
  28   %zcompare.1 = fadd float %zcompare, 1.0
  29   %s1.1 = fadd float %s1, 1.0
  30   %t1.1 = fadd float %t1, 1.0
  31   %r1.1 = fadd float %r1, 1.0
  32   %s2.1 = fadd float %s2, 1.0
  33   %t2.1 = fadd float %t2, 1.0
  34   %r2.1 = fadd float %r2, 1.0
  35   %lod.1 = fadd float %lod, 1.0
  36   %v1 = call float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32 1, float %zcompare.1, float %s1.1, float %t1.1, float %r1.1, float %lod.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  37   %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  38   %r.0 = insertelement <2 x float> undef, float %v1, i32 0
  39   %r = insertelement <2 x float> %r.0, float %v2, i32 1
  40   ret <2 x float> %r
  41 }
  42
  43 ; GCN-LABEL: {{^}}sample_contig_nsa_conflict:
  44 ; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
  45 ; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
  46 define amdgpu_ps <2 x float> @sample_contig_nsa_conflict(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
  47 main_body:
  48   %zcompare.1 = fadd float %zcompare, 1.0
  49   %s1.1 = fadd float %s1, 1.0
  50   %t1.1 = fadd float %t1, 1.0
  51   %r1.1 = fadd float %r1, 1.0
  52   %s2.1 = fadd float %s2, 1.0
  53   %t2.1 = fadd float %t2, 1.0
  54   %r2.1 = fadd float %r2, 1.0
  55   %lod.1 = fadd float %lod, 1.0
  56   %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  57   %v1 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %t2.1, float %s2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  58   %r.0 = insertelement <2 x float> undef, float %v1, i32 0
  59   %r = insertelement <2 x float> %r.0, float %v2, i32 1
  60   ret <2 x float> %r
  61 }
  62
  63 ; GCN-LABEL: {{^}}sample_contig_nsa_same_addr:
  64 ; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
  65 ; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}],
  66 define amdgpu_ps <2 x float> @sample_contig_nsa_same_addr(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
  67 main_body:
  68   %zcompare.1 = fadd float %zcompare, 1.0
  69   %s1.1 = fadd float %s1, 1.0
  70   %t1.1 = fadd float %t1, 1.0
  71   %r1.1 = fadd float %r1, 1.0
  72   %s2.1 = fadd float %s2, 1.0
  73   %t2.1 = fadd float %t2, 1.0
  74   %r2.1 = fadd float %r2, 1.0
  75   %lod.1 = fadd float %lod, 1.0
  76   %v2 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
  77   %v1 = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %s2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  78   %r.0 = insertelement <2 x float> undef, float %v1, i32 0
  79   %r = insertelement <2 x float> %r.0, float %v2, i32 1
  80   ret <2 x float> %r
  81 }
  82
  83 ; GCN-LABEL: {{^}}sample_contig_nsa_same_reg:
  84 ; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
  85 define amdgpu_ps float @sample_contig_nsa_same_reg(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) {
  86 main_body:
  87   %zcompare.1 = fadd float %zcompare, 1.0
  88   %s1.1 = fadd float %s1, 1.0
  89   %t1.1 = fadd float %t1, 1.0
  90   %r1.1 = fadd float %r1, 1.0
  91   %s2.1 = fadd float %s2, 1.0
  92   %t2.1 = fadd float %t2, 1.0
  93   %r2.1 = fadd float %r2, 1.0
  94   %lod.1 = fadd float %lod, 1.0
  95   %v = call float @llvm.amdgcn.image.sample.3d.f32.f32(i32 1, float %t2.1, float %t2.1, float %r2.1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
  96   ret float %v
  97 }
  98
  99 declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 100 declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 101
 102 attributes #0 = {"amdgpu-num-vgpr"="10"}