1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
4 declare double @llvm.fma.f64(double, double, double) nounwind readnone
5 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
6 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
7 declare double @llvm.fabs.f64(double) nounwind readnone
9 ; FUNC-LABEL: {{^}}fma_f64:
10 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
11 define amdgpu_kernel void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
12 double addrspace(1)* %in2, double addrspace(1)* %in3) {
13 %r0 = load double, double addrspace(1)* %in1
14 %r1 = load double, double addrspace(1)* %in2
15 %r2 = load double, double addrspace(1)* %in3
16 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
17 store double %r3, double addrspace(1)* %out
21 ; FUNC-LABEL: {{^}}fma_v2f64:
24 define amdgpu_kernel void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
25 <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
26 %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
27 %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
28 %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
29 %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
30 store <2 x double> %r3, <2 x double> addrspace(1)* %out
34 ; FUNC-LABEL: {{^}}fma_v4f64:
39 define amdgpu_kernel void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
40 <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
41 %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
42 %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
43 %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
44 %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
45 store <4 x double> %r3, <4 x double> addrspace(1)* %out
49 ; FUNC-LABEL: {{^}}fma_f64_abs_src0:
50 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
51 define amdgpu_kernel void @fma_f64_abs_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
52 double addrspace(1)* %in2, double addrspace(1)* %in3) {
53 %r0 = load double, double addrspace(1)* %in1
54 %r1 = load double, double addrspace(1)* %in2
55 %r2 = load double, double addrspace(1)* %in3
56 %fabs = call double @llvm.fabs.f64(double %r0)
57 %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2)
58 store double %r3, double addrspace(1)* %out
62 ; FUNC-LABEL: {{^}}fma_f64_abs_src1:
63 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
64 define amdgpu_kernel void @fma_f64_abs_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
65 double addrspace(1)* %in2, double addrspace(1)* %in3) {
66 %r0 = load double, double addrspace(1)* %in1
67 %r1 = load double, double addrspace(1)* %in2
68 %r2 = load double, double addrspace(1)* %in3
69 %fabs = call double @llvm.fabs.f64(double %r1)
70 %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2)
71 store double %r3, double addrspace(1)* %out
75 ; FUNC-LABEL: {{^}}fma_f64_abs_src2:
76 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}}
77 define amdgpu_kernel void @fma_f64_abs_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
78 double addrspace(1)* %in2, double addrspace(1)* %in3) {
79 %r0 = load double, double addrspace(1)* %in1
80 %r1 = load double, double addrspace(1)* %in2
81 %r2 = load double, double addrspace(1)* %in3
82 %fabs = call double @llvm.fabs.f64(double %r2)
83 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs)
84 store double %r3, double addrspace(1)* %out
88 ; FUNC-LABEL: {{^}}fma_f64_neg_src0:
89 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
90 define amdgpu_kernel void @fma_f64_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
91 double addrspace(1)* %in2, double addrspace(1)* %in3) {
92 %r0 = load double, double addrspace(1)* %in1
93 %r1 = load double, double addrspace(1)* %in2
94 %r2 = load double, double addrspace(1)* %in3
95 %fsub = fsub double -0.000000e+00, %r0
96 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
97 store double %r3, double addrspace(1)* %out
101 ; FUNC-LABEL: {{^}}fma_f64_neg_src1:
102 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
103 define amdgpu_kernel void @fma_f64_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
104 double addrspace(1)* %in2, double addrspace(1)* %in3) {
105 %r0 = load double, double addrspace(1)* %in1
106 %r1 = load double, double addrspace(1)* %in2
107 %r2 = load double, double addrspace(1)* %in3
108 %fsub = fsub double -0.000000e+00, %r1
109 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
110 store double %r3, double addrspace(1)* %out
114 ; FUNC-LABEL: {{^}}fma_f64_neg_src2:
115 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
116 define amdgpu_kernel void @fma_f64_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
117 double addrspace(1)* %in2, double addrspace(1)* %in3) {
118 %r0 = load double, double addrspace(1)* %in1
119 %r1 = load double, double addrspace(1)* %in2
120 %r2 = load double, double addrspace(1)* %in3
121 %fsub = fsub double -0.000000e+00, %r2
122 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
123 store double %r3, double addrspace(1)* %out
127 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0:
128 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
129 define amdgpu_kernel void @fma_f64_abs_neg_src0(double addrspace(1)* %out, double addrspace(1)* %in1,
130 double addrspace(1)* %in2, double addrspace(1)* %in3) {
131 %r0 = load double, double addrspace(1)* %in1
132 %r1 = load double, double addrspace(1)* %in2
133 %r2 = load double, double addrspace(1)* %in3
134 %fabs = call double @llvm.fabs.f64(double %r0)
135 %fsub = fsub double -0.000000e+00, %fabs
136 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
137 store double %r3, double addrspace(1)* %out
141 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1:
142 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
143 define amdgpu_kernel void @fma_f64_abs_neg_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
144 double addrspace(1)* %in2, double addrspace(1)* %in3) {
145 %r0 = load double, double addrspace(1)* %in1
146 %r1 = load double, double addrspace(1)* %in2
147 %r2 = load double, double addrspace(1)* %in3
148 %fabs = call double @llvm.fabs.f64(double %r1)
149 %fsub = fsub double -0.000000e+00, %fabs
150 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
151 store double %r3, double addrspace(1)* %out
155 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2:
156 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
157 define amdgpu_kernel void @fma_f64_abs_neg_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
158 double addrspace(1)* %in2, double addrspace(1)* %in3) {
159 %r0 = load double, double addrspace(1)* %in1
160 %r1 = load double, double addrspace(1)* %in2
161 %r2 = load double, double addrspace(1)* %in3
162 %fabs = call double @llvm.fabs.f64(double %r2)
163 %fsub = fsub double -0.000000e+00, %fabs
164 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
165 store double %r3, double addrspace(1)* %out
169 ; FUNC-LABEL: {{^}}fma_f64_lit_src0:
170 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
171 define amdgpu_kernel void @fma_f64_lit_src0(double addrspace(1)* %out,
172 double addrspace(1)* %in2, double addrspace(1)* %in3) {
173 %r1 = load double, double addrspace(1)* %in2
174 %r2 = load double, double addrspace(1)* %in3
175 %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2)
176 store double %r3, double addrspace(1)* %out
180 ; FUNC-LABEL: {{^}}fma_f64_lit_src1:
181 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
182 define amdgpu_kernel void @fma_f64_lit_src1(double addrspace(1)* %out, double addrspace(1)* %in1,
183 double addrspace(1)* %in3) {
184 %r0 = load double, double addrspace(1)* %in1
185 %r2 = load double, double addrspace(1)* %in3
186 %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2)
187 store double %r3, double addrspace(1)* %out
191 ; FUNC-LABEL: {{^}}fma_f64_lit_src2:
192 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}}
193 define amdgpu_kernel void @fma_f64_lit_src2(double addrspace(1)* %out, double addrspace(1)* %in1,
194 double addrspace(1)* %in2) {
195 %r0 = load double, double addrspace(1)* %in1
196 %r1 = load double, double addrspace(1)* %in2
197 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0)
198 store double %r3, double addrspace(1)* %out