llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll

   1 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX908 %s
   2 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s
   3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s
   5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s
   6
   7 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
   8
   9 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vgpr:
  10 ; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  11 ; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
  12 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vgpr(ptr addrspace(1) %arg) #0 {
  13 bb:
  14   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  15   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
  16   store <32 x float> %mai.1, ptr addrspace(1) %arg
  17   ret void
  18 }
  19
  20 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_agpr:
  21 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  22 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_agpr(ptr addrspace(1) %arg) #1 {
  23 bb:
  24   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  25   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
  26   store <32 x float> %mai.1, ptr addrspace(1) %arg
  27   ret void
  28 }
  29
  30 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr:
  31 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  32 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_virtual_agpr(ptr addrspace(1) %arg) #0 {
  33 bb:
  34   %acc = call i32 asm sideeffect "; def $0", "={a0}"()
  35   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  36   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
  37   store <32 x float> %mai.1, ptr addrspace(1) %arg
  38   ret void
  39 }
  40
  41 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_phys_agpr:
  42 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  43 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_phys_agpr(ptr addrspace(1) %arg) #0 {
  44 bb:
  45   call void asm sideeffect "; use $0", "{a[100:131]}"(<32 x float> undef)
  46   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  47   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
  48   store <32 x float> %mai.1, ptr addrspace(1) %arg
  49   ret void
  50 }
  51
  52 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_inline_asm_no_agprs:
  53 ; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  54 ; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
  55 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_inline_asm_no_agprs(ptr addrspace(1) %arg) #0 {
  56 bb:
  57   %acc = call i32 asm sideeffect "; def $0", "={v0}"()
  58   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  59   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
  60   store <32 x float> %mai.1, ptr addrspace(1) %arg
  61   ret void
  62 }
  63
  64 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call:
  65 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  66 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call(ptr addrspace(1) %arg) #0 {
  67 bb:
  68   call void @foo()
  69   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  70   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
  71   store <32 x float> %mai.1, ptr addrspace(1) %arg
  72   ret void
  73 }
  74
  75 ; We could avoid scan to find calls since we see these during lowering before selection.
  76 ; However, in SDag lowering and selection is done block by block, so it would only work
  77 ; in Global ISel.
  78
  79 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_call_multi_bb:
  80 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  81 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_call_multi_bb(ptr addrspace(1) %arg, i1 %c0) #0 {
  82 bb1:
  83   %in.1 = load <32 x float>, ptr addrspace(1) %arg
  84   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3)
  85   store <32 x float> %mai.1, ptr addrspace(1) %arg
  86   br i1 %c0, label %bb2, label %bb3
  87   br label %bb2
  88
  89 bb2:
  90   call void @foo()
  91   br label %bb3
  92
  93 bb3:
  94   ret void
  95 }
  96
  97 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
  98 ; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
  99 define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
 100 bb:
 101   %in.1 = load <32 x float>, ptr addrspace(1) %arg
 102   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
 103   store <32 x float> %mai.1, ptr addrspace(1) %arg
 104   ret void
 105 }
 106
 107 declare void @foo()
 108
 109 attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
 110 attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }