1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5 declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
7 ; GCN-LABEL: {{^}}mad_f16:
8 ; GFX8: v_ma{{[dc]}}_f16
9 ; GFX9: v_mad_legacy_f16
10 define amdgpu_kernel void @mad_f16(
11 half addrspace(1)* %r,
12 half addrspace(1)* %a,
13 half addrspace(1)* %b,
14 half addrspace(1)* %c) {
15 %a.val = load half, half addrspace(1)* %a
16 %b.val = load half, half addrspace(1)* %b
17 %c.val = load half, half addrspace(1)* %c
18 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val)
19 store half %r.val, half addrspace(1)* %r
23 ; GCN-LABEL: {{^}}mad_f16_imm_a:
24 ; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+}}
25 define amdgpu_kernel void @mad_f16_imm_a(
26 half addrspace(1)* %r,
27 half addrspace(1)* %b,
28 half addrspace(1)* %c) {
29 %b.val = load half, half addrspace(1)* %b
30 %c.val = load half, half addrspace(1)* %c
31 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half 8.0, half %b.val, half %c.val)
32 store half %r.val, half addrspace(1)* %r
36 ; GCN-LABEL: {{^}}mad_f16_imm_b:
37 ; GCN: s_movk_i32 [[KB:s[0-9]+]], 0x4800
38 ; GFX8: v_mad_f16 {{v[0-9]+}}, {{v[0-9]+}}, [[KB]],
39 ; GFX9: v_mad_legacy_f16 {{v[0-9]+}}, {{v[0-9]+}}, [[KB]],
40 define amdgpu_kernel void @mad_f16_imm_b(
41 half addrspace(1)* %r,
42 half addrspace(1)* %a,
43 half addrspace(1)* %c) {
44 %a.val = load half, half addrspace(1)* %a
45 %c.val = load half, half addrspace(1)* %c
46 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half 8.0, half %c.val)
47 store half %r.val, half addrspace(1)* %r
51 ; GCN-LABEL: {{^}}mad_f16_imm_c:
52 ; GCN: v_madak_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4800{{$}}
53 define amdgpu_kernel void @mad_f16_imm_c(
54 half addrspace(1)* %r,
55 half addrspace(1)* %a,
56 half addrspace(1)* %b) {
57 %a.val = load half, half addrspace(1)* %a
58 %b.val = load half, half addrspace(1)* %b
59 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half 8.0)
60 store half %r.val, half addrspace(1)* %r
64 ; GCN-LABEL: {{^}}mad_f16_neg_b:
65 ; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
66 ; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
67 define amdgpu_kernel void @mad_f16_neg_b(
68 half addrspace(1)* %r,
69 half addrspace(1)* %a,
70 half addrspace(1)* %b,
71 half addrspace(1)* %c) {
72 %a.val = load half, half addrspace(1)* %a
73 %b.val = load half, half addrspace(1)* %b
74 %c.val = load half, half addrspace(1)* %c
75 %neg.b = fsub half -0.0, %b.val
76 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.b, half %c.val)
77 store half %r.val, half addrspace(1)* %r
81 ; GCN-LABEL: {{^}}mad_f16_abs_b:
82 ; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
83 ; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
84 define amdgpu_kernel void @mad_f16_abs_b(
85 half addrspace(1)* %r,
86 half addrspace(1)* %a,
87 half addrspace(1)* %b,
88 half addrspace(1)* %c) {
89 %a.val = load half, half addrspace(1)* %a
90 %b.val = load half, half addrspace(1)* %b
91 %c.val = load half, half addrspace(1)* %c
92 %abs.b = call half @llvm.fabs.f16(half %b.val)
93 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %abs.b, half %c.val)
94 store half %r.val, half addrspace(1)* %r
98 ; GCN-LABEL: {{^}}mad_f16_neg_abs_b:
99 ; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}}
100 ; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}}
101 define amdgpu_kernel void @mad_f16_neg_abs_b(
102 half addrspace(1)* %r,
103 half addrspace(1)* %a,
104 half addrspace(1)* %b,
105 half addrspace(1)* %c) {
106 %a.val = load half, half addrspace(1)* %a
107 %b.val = load half, half addrspace(1)* %b
108 %c.val = load half, half addrspace(1)* %c
109 %abs.b = call half @llvm.fabs.f16(half %b.val)
110 %neg.abs.b = fsub half -0.0, %abs.b
111 %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.abs.b, half %c.val)
112 store half %r.val, half addrspace(1)* %r
116 declare half @llvm.fabs.f16(half)