1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
5 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
8 ; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
9 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
11 define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
12 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
14 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v1
16 ; GCN-NEXT: s_setpc_b64 s[30:31]
18 ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict:
20 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21 ; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
22 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
23 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
27 define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 {
28 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_ignore:
30 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v1
32 ; GCN-NEXT: s_setpc_b64 s[30:31]
34 ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_ignore:
36 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37 ; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
38 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
39 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
43 define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 {
44 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
46 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v1
48 ; GCN-NEXT: s_setpc_b64 s[30:31]
50 ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
52 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
54 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
55 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
59 define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
60 ; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
62 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v2
64 ; GCN-NEXT: v_sub_f32_e32 v1, v1, v3
65 ; GCN-NEXT: s_setpc_b64 s[30:31]
67 ; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
69 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
71 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3
72 ; GFX10-NEXT: s_setpc_b64 s[30:31]
74 ; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
76 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
78 ; GFX11-NEXT: s_setpc_b64 s[30:31]
79 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
83 define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
84 ; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
86 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v2
88 ; GCN-NEXT: v_sub_f32_e32 v1, v1, v3
89 ; GCN-NEXT: s_setpc_b64 s[30:31]
91 ; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
93 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
95 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3
96 ; GFX10-NEXT: s_setpc_b64 s[30:31]
98 ; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
100 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
102 ; GFX11-NEXT: s_setpc_b64 s[30:31]
103 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
107 define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
108 ; GCN-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
110 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v2
112 ; GCN-NEXT: v_sub_f32_e32 v1, v1, v3
113 ; GCN-NEXT: s_setpc_b64 s[30:31]
115 ; GFX10-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
117 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v2
119 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v3
120 ; GFX10-NEXT: s_setpc_b64 s[30:31]
122 ; GFX11-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
124 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
126 ; GFX11-NEXT: s_setpc_b64 s[30:31]
127 %val = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
131 define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
132 ; GCN-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
134 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135 ; GCN-NEXT: v_sub_f32_e32 v0, v0, v3
136 ; GCN-NEXT: v_sub_f32_e32 v1, v1, v4
137 ; GCN-NEXT: v_sub_f32_e32 v2, v2, v5
138 ; GCN-NEXT: s_setpc_b64 s[30:31]
140 ; GFX10-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
142 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v3
144 ; GFX10-NEXT: v_sub_f32_e32 v1, v1, v4
145 ; GFX10-NEXT: v_sub_f32_e32 v2, v2, v5
146 ; GFX10-NEXT: s_setpc_b64 s[30:31]
148 ; GFX11-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
150 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GFX11-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
152 ; GFX11-NEXT: v_sub_f32_e32 v2, v2, v5
153 ; GFX11-NEXT: s_setpc_b64 s[30:31]
154 %val = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
158 define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
159 ; GCN-LABEL: s_constained_fsub_f32_fpexcept_strict:
161 ; GCN-NEXT: v_mov_b32_e32 v0, s3
162 ; GCN-NEXT: v_sub_f32_e32 v0, s2, v0
163 ; GCN-NEXT: ; return to shader part epilog
165 ; GFX10PLUS-LABEL: s_constained_fsub_f32_fpexcept_strict:
166 ; GFX10PLUS: ; %bb.0:
167 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, s2, s3
168 ; GFX10PLUS-NEXT: ; return to shader part epilog
169 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
173 define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
174 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
176 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177 ; GCN-NEXT: v_sub_f32_e64 v0, |v0|, v1
178 ; GCN-NEXT: s_setpc_b64 s[30:31]
180 ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
181 ; GFX10PLUS: ; %bb.0:
182 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, |v0|, v1
184 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
185 %fabs.x = call float @llvm.fabs.f32(float %x) #0
186 %val = call float @llvm.experimental.constrained.fsub.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
190 define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
191 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
193 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GCN-NEXT: v_sub_f32_e64 v0, v0, |v1|
195 ; GCN-NEXT: s_setpc_b64 s[30:31]
197 ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
198 ; GFX10PLUS: ; %bb.0:
199 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v0, |v1|
201 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
202 %fabs.y = call float @llvm.fabs.f32(float %y) #0
203 %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
207 define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
208 ; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
210 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211 ; GCN-NEXT: v_sub_f32_e64 v0, -|v0|, v1
212 ; GCN-NEXT: s_setpc_b64 s[30:31]
214 ; GFX10PLUS-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
215 ; GFX10PLUS: ; %bb.0:
216 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217 ; GFX10PLUS-NEXT: v_sub_f32_e64 v0, -|v0|, v1
218 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
219 %fabs.x = call float @llvm.fabs.f32(float %x) #0
220 %neg.fabs.x = fneg float %fabs.x
221 %val = call float @llvm.experimental.constrained.fsub.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
225 declare float @llvm.fabs.f32(float)
226 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
227 declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata)
228 declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
230 attributes #0 = { strictfp }