llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
   3 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
   4 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
   5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
   6
   7 define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) {
   8 ; GFX906-LABEL: v_fdot2:
   9 ; GFX906:       ; %bb.0:
  10 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  11 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
  12 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  13   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
  14   ret float %r
  15 }
  16
  17 define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) {
  18 ; GFX906-LABEL: v_fdot2_clamp:
  19 ; GFX906:       ; %bb.0:
  20 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  21 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
  22 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  23 ;
  24 ; GFX10PLUS-LABEL: v_fdot2_clamp:
  25 ; GFX10PLUS:       ; %bb.0:
  26 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  27 ; GFX10PLUS-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 clamp
  28 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
  29   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
  30   ret float %r
  31 }
  32
  33 define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) {
  34 ; GFX906-LABEL: v_fdot2_neg_a:
  35 ; GFX906:       ; %bb.0:
  36 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  37 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
  38 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  39   %neg.a = fneg <2 x half> %a
  40   %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
  41   ret float %r
  42 }
  43
  44 define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
  45 ; GFX906-LABEL: v_fdot2_neg_b:
  46 ; GFX906:       ; %bb.0:
  47 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  48 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
  49 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  50   %neg.b = fneg <2 x half> %b
  51   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
  52   ret float %r
  53 }
  54
  55 define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
  56 ; GFX906-LABEL: v_fdot2_neg_a_neg_b:
  57 ; GFX906:       ; %bb.0:
  58 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  59 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
  60 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  61   %neg.a = fneg <2 x half> %b
  62   %neg.b = fneg <2 x half> %b
  63   %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false)
  64   ret float %r
  65 }
  66
  67 define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
  68 ; GFX906-LABEL: v_fdot2_neg_c:
  69 ; GFX906:       ; %bb.0:
  70 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  71 ; GFX906-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
  72 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, v2
  73 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  74   %neg.c = fneg float %c
  75   %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
  76   ret float %r
  77 }
  78
  79 define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
  80 ; GFX906-LABEL: v_fdot2_inline_literal_a:
  81 ; GFX906:       ; %bb.0:
  82 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  83 ; GFX906-NEXT:    v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
  84 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  85   %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
  86   ret float %ret
  87 }
  88
  89 define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
  90 ; GFX906-LABEL: v_fdot2_inline_literal_b:
  91 ; GFX906:       ; %bb.0:
  92 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  93 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
  94 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
  95   %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
  96   ret float %ret
  97 }
  98
  99 define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
 100 ; GFX906-LABEL: v_fdot2_inline_literal_c:
 101 ; GFX906:       ; %bb.0:
 102 ; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 103 ; GFX906-NEXT:    v_dot2_f32_f16 v0, v0, v1, 1.0
 104 ; GFX906-NEXT:    s_setpc_b64 s[30:31]
 105   %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
 106   ret float %ret
 107 }
 108
 109 declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
 110
 111 attributes #0 = { nounwind readnone speculatable }