1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX906 %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
4 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
5 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10PLUS %s
7 define float @v_fdot2(<2 x half> %a, <2 x half> %b, float %c) {
8 ; GFX906-LABEL: v_fdot2:
10 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2
12 ; GFX906-NEXT: s_setpc_b64 s[30:31]
13 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 false)
17 define float @v_fdot2_clamp(<2 x half> %a, <2 x half> %b, float %c) {
18 ; GFX906-LABEL: v_fdot2_clamp:
20 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 clamp
22 ; GFX906-NEXT: s_setpc_b64 s[30:31]
24 ; GFX10PLUS-LABEL: v_fdot2_clamp:
26 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX10PLUS-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 clamp
28 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
29 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 true)
33 define float @v_fdot2_neg_a(<2 x half> %a, <2 x half> %b, float %c) {
34 ; GFX906-LABEL: v_fdot2_neg_a:
36 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
38 ; GFX906-NEXT: s_setpc_b64 s[30:31]
39 %neg.a = fneg <2 x half> %a
40 %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %b, float %c, i1 false)
44 define float @v_fdot2_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
45 ; GFX906-LABEL: v_fdot2_neg_b:
47 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
49 ; GFX906-NEXT: s_setpc_b64 s[30:31]
50 %neg.b = fneg <2 x half> %b
51 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %neg.b, float %c, i1 false)
55 define float @v_fdot2_neg_a_neg_b(<2 x half> %a, <2 x half> %b, float %c) {
56 ; GFX906-LABEL: v_fdot2_neg_a_neg_b:
58 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX906-NEXT: v_dot2_f32_f16 v0, v1, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
60 ; GFX906-NEXT: s_setpc_b64 s[30:31]
61 %neg.a = fneg <2 x half> %b
62 %neg.b = fneg <2 x half> %b
63 %r = call float @llvm.amdgcn.fdot2(<2 x half> %neg.a, <2 x half> %neg.b, float %c, i1 false)
67 define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) {
68 ; GFX906-LABEL: v_fdot2_neg_c:
70 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71 ; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
72 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2
73 ; GFX906-NEXT: s_setpc_b64 s[30:31]
74 %neg.c = fneg float %c
75 %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false)
79 define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
80 ; GFX906-LABEL: v_fdot2_inline_literal_a:
82 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX906-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
84 ; GFX906-NEXT: s_setpc_b64 s[30:31]
85 %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
89 define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
90 ; GFX906-LABEL: v_fdot2_inline_literal_b:
92 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
94 ; GFX906-NEXT: s_setpc_b64 s[30:31]
95 %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
99 define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
100 ; GFX906-LABEL: v_fdot2_inline_literal_c:
102 ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103 ; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, 1.0
104 ; GFX906-NEXT: s_setpc_b64 s[30:31]
105 %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false)
109 declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
111 attributes #0 = { nounwind readnone speculatable }