1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s
4 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
6 declare double @llvm.fma.f64(double, double, double) nounwind readnone
7 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
8 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
9 declare double @llvm.fabs.f64(double) nounwind readnone
11 ; FUNC-LABEL: {{^}}fma_f64:
12 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
13 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
14 define amdgpu_kernel void @fma_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
15 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
16 %r0 = load double, ptr addrspace(1) %in1
17 %r1 = load double, ptr addrspace(1) %in2
18 %r2 = load double, ptr addrspace(1) %in3
19 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
20 store double %r3, ptr addrspace(1) %out
24 ; FUNC-LABEL: {{^}}fma_v2f64:
25 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
26 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
27 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
28 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
29 define amdgpu_kernel void @fma_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
30 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
31 %r0 = load <2 x double>, ptr addrspace(1) %in1
32 %r1 = load <2 x double>, ptr addrspace(1) %in2
33 %r2 = load <2 x double>, ptr addrspace(1) %in3
34 %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
35 store <2 x double> %r3, ptr addrspace(1) %out
39 ; FUNC-LABEL: {{^}}fma_v4f64:
40 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
41 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
42 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
43 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
44 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
45 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
46 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
47 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
48 define amdgpu_kernel void @fma_v4f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
49 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
50 %r0 = load <4 x double>, ptr addrspace(1) %in1
51 %r1 = load <4 x double>, ptr addrspace(1) %in2
52 %r2 = load <4 x double>, ptr addrspace(1) %in3
53 %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
54 store <4 x double> %r3, ptr addrspace(1) %out
58 ; FUNC-LABEL: {{^}}fma_f64_abs_src0:
59 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
60 define amdgpu_kernel void @fma_f64_abs_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
61 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
62 %r0 = load double, ptr addrspace(1) %in1
63 %r1 = load double, ptr addrspace(1) %in2
64 %r2 = load double, ptr addrspace(1) %in3
65 %fabs = call double @llvm.fabs.f64(double %r0)
66 %r3 = tail call double @llvm.fma.f64(double %fabs, double %r1, double %r2)
67 store double %r3, ptr addrspace(1) %out
71 ; FUNC-LABEL: {{^}}fma_f64_abs_src1:
72 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
73 define amdgpu_kernel void @fma_f64_abs_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
74 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
75 %r0 = load double, ptr addrspace(1) %in1
76 %r1 = load double, ptr addrspace(1) %in2
77 %r2 = load double, ptr addrspace(1) %in3
78 %fabs = call double @llvm.fabs.f64(double %r1)
79 %r3 = tail call double @llvm.fma.f64(double %r0, double %fabs, double %r2)
80 store double %r3, ptr addrspace(1) %out
84 ; FUNC-LABEL: {{^}}fma_f64_abs_src2:
85 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], \|v\[[0-9]+:[0-9]+\]\|}}
86 define amdgpu_kernel void @fma_f64_abs_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
87 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
88 %r0 = load double, ptr addrspace(1) %in1
89 %r1 = load double, ptr addrspace(1) %in2
90 %r2 = load double, ptr addrspace(1) %in3
91 %fabs = call double @llvm.fabs.f64(double %r2)
92 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fabs)
93 store double %r3, ptr addrspace(1) %out
97 ; FUNC-LABEL: {{^}}fma_f64_neg_src0:
98 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
99 define amdgpu_kernel void @fma_f64_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
100 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
101 %r0 = load double, ptr addrspace(1) %in1
102 %r1 = load double, ptr addrspace(1) %in2
103 %r2 = load double, ptr addrspace(1) %in3
104 %fsub = fsub double -0.000000e+00, %r0
105 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
106 store double %r3, ptr addrspace(1) %out
110 ; FUNC-LABEL: {{^}}fma_f64_neg_src1:
111 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
112 define amdgpu_kernel void @fma_f64_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
113 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
114 %r0 = load double, ptr addrspace(1) %in1
115 %r1 = load double, ptr addrspace(1) %in2
116 %r2 = load double, ptr addrspace(1) %in3
117 %fsub = fsub double -0.000000e+00, %r1
118 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
119 store double %r3, ptr addrspace(1) %out
123 ; FUNC-LABEL: {{^}}fma_f64_neg_src2:
124 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
125 define amdgpu_kernel void @fma_f64_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
126 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
127 %r0 = load double, ptr addrspace(1) %in1
128 %r1 = load double, ptr addrspace(1) %in2
129 %r2 = load double, ptr addrspace(1) %in3
130 %fsub = fsub double -0.000000e+00, %r2
131 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
132 store double %r3, ptr addrspace(1) %out
136 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src0:
137 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
138 define amdgpu_kernel void @fma_f64_abs_neg_src0(ptr addrspace(1) %out, ptr addrspace(1) %in1,
139 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
140 %r0 = load double, ptr addrspace(1) %in1
141 %r1 = load double, ptr addrspace(1) %in2
142 %r2 = load double, ptr addrspace(1) %in3
143 %fabs = call double @llvm.fabs.f64(double %r0)
144 %fsub = fsub double -0.000000e+00, %fabs
145 %r3 = tail call double @llvm.fma.f64(double %fsub, double %r1, double %r2)
146 store double %r3, ptr addrspace(1) %out
150 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src1:
151 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|, v\[[0-9]+:[0-9]+\]}}
152 define amdgpu_kernel void @fma_f64_abs_neg_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
153 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
154 %r0 = load double, ptr addrspace(1) %in1
155 %r1 = load double, ptr addrspace(1) %in2
156 %r2 = load double, ptr addrspace(1) %in3
157 %fabs = call double @llvm.fabs.f64(double %r1)
158 %fsub = fsub double -0.000000e+00, %fabs
159 %r3 = tail call double @llvm.fma.f64(double %r0, double %fsub, double %r2)
160 store double %r3, ptr addrspace(1) %out
164 ; FUNC-LABEL: {{^}}fma_f64_abs_neg_src2:
165 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
166 define amdgpu_kernel void @fma_f64_abs_neg_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
167 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
168 %r0 = load double, ptr addrspace(1) %in1
169 %r1 = load double, ptr addrspace(1) %in2
170 %r2 = load double, ptr addrspace(1) %in3
171 %fabs = call double @llvm.fabs.f64(double %r2)
172 %fsub = fsub double -0.000000e+00, %fabs
173 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %fsub)
174 store double %r3, ptr addrspace(1) %out
178 ; FUNC-LABEL: {{^}}fma_f64_lit_src0:
179 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
180 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
181 define amdgpu_kernel void @fma_f64_lit_src0(ptr addrspace(1) %out,
182 ptr addrspace(1) %in2, ptr addrspace(1) %in3) {
183 %r1 = load double, ptr addrspace(1) %in2
184 %r2 = load double, ptr addrspace(1) %in3
185 %r3 = tail call double @llvm.fma.f64(double +2.0, double %r1, double %r2)
186 store double %r3, ptr addrspace(1) %out
190 ; FUNC-LABEL: {{^}}fma_f64_lit_src1:
191 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
192 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], 2.0, v\[[0-9]+:[0-9]+\]}}
193 define amdgpu_kernel void @fma_f64_lit_src1(ptr addrspace(1) %out, ptr addrspace(1) %in1,
194 ptr addrspace(1) %in3) {
195 %r0 = load double, ptr addrspace(1) %in1
196 %r2 = load double, ptr addrspace(1) %in3
197 %r3 = tail call double @llvm.fma.f64(double %r0, double +2.0, double %r2)
198 store double %r3, ptr addrspace(1) %out
202 ; FUNC-LABEL: {{^}}fma_f64_lit_src2:
203 ; GCN: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], 2.0}}
204 define amdgpu_kernel void @fma_f64_lit_src2(ptr addrspace(1) %out, ptr addrspace(1) %in1,
205 ptr addrspace(1) %in2) {
206 %r0 = load double, ptr addrspace(1) %in1
207 %r1 = load double, ptr addrspace(1) %in2
208 %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double +2.0)
209 store double %r3, ptr addrspace(1) %out