llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
   3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
   4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
   5
   6 define i32 @v_sdot8(i32 %a, i32 %b, i32 %c) {
   7 ; GFX906-LABEL: v_sdot8:
   8 ; GFX906:       ; %bb.0:
   9 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  10 ; GFX906-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2
  11 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  12 ;
  13 ; GFX10-LABEL: v_sdot8:
  14 ; GFX10:       ; %bb.0:
  15 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  16 ; GFX10-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2
  17 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  18   %r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 false)
  19   ret i32 %r
  20 }
  21
  22 define i32 @v_sdot8_clamp(i32 %a, i32 %b, i32 %c) {
  23 ; GFX906-LABEL: v_sdot8_clamp:
  24 ; GFX906:       ; %bb.0:
  25 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  26 ; GFX906-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2 clamp
  27 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  28 ;
  29 ; GFX10-LABEL: v_sdot8_clamp:
  30 ; GFX10:       ; %bb.0:
  31 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  32 ; GFX10-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2 clamp
  33 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  34   %r = call i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 true)
  35   ret i32 %r
  36 }
  37
  38 ; FIXME: Fix argument do not let these casts expand
  39 ; define i32 @v_sdot8_cast_v8i4(<8 x i4> %a, <8 x i4> %b, i32 %c) {
  40 ;   %a.cast = bitcast <8 x i4> %a to i32
  41 ;   %b.cast = bitcast <8 x i4> %b to i32
  42 ;   %r = call i32 @llvm.amdgcn.sdot8(i32 %a.cast, i32 %b.cast, i32 %c, i1 false)
  43 ;   ret i32 %r
  44 ; }
  45
  46 define i32 @v_sdot8_fnegf32_a(float %a, i32 %b, i32 %c) {
  47 ; GFX906-LABEL: v_sdot8_fnegf32_a:
  48 ; GFX906:       ; %bb.0:
  49 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  50 ; GFX906-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
  51 ; GFX906-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2
  52 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  53 ;
  54 ; GFX10-LABEL: v_sdot8_fnegf32_a:
  55 ; GFX10:       ; %bb.0:
  56 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  57 ; GFX10-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
  58 ; GFX10-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2
  59 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  60   %neg.a = fneg float %a
  61   %cast.neg.a = bitcast float %neg.a to i32
  62   %r = call i32 @llvm.amdgcn.sdot8(i32 %cast.neg.a, i32 %b, i32 %c, i1 false)
  63   ret i32 %r
  64 }
  65
  66 define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
  67 ; GFX906-LABEL: v_sdot8_fnegv2f16_a:
  68 ; GFX906:       ; %bb.0:
  69 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  70 ; GFX906-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
  71 ; GFX906-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2
  72 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  73 ;
  74 ; GFX10-LABEL: v_sdot8_fnegv2f16_a:
  75 ; GFX10:       ; %bb.0:
  76 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  77 ; GFX10-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
  78 ; GFX10-NEXT:    v_dot8_i32_i4 v0, v0, v1, v2
  79 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  80   %neg.a = fneg <2 x half> %a
  81   %cast.neg.a = bitcast <2 x half> %neg.a to i32
  82   %r = call i32 @llvm.amdgcn.sdot8(i32 %cast.neg.a, i32 %b, i32 %c, i1 false)
  83   ret i32 %r
  84 }
  85
  86 declare i32 @llvm.amdgcn.sdot8(i32, i32, i32, i1 immarg) #0
  87
  88 attributes #0 = { nounwind readnone speculatable }