1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s
6 define double @v_floor_f64_ieee(double %x) {
7 ; GFX6-LABEL: v_floor_f64_ieee:
9 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10 ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
11 ; GFX6-NEXT: s_mov_b32 s4, -1
12 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
13 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
14 ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
15 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
16 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
17 ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
18 ; GFX6-NEXT: s_setpc_b64 s[30:31]
20 ; GFX78-LABEL: v_floor_f64_ieee:
22 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
24 ; GFX78-NEXT: s_setpc_b64 s[30:31]
25 %result = call double @llvm.floor.f64(double %x)
29 define double @v_floor_f64_ieee_nnan(double %x) {
30 ; GFX6-LABEL: v_floor_f64_ieee_nnan:
32 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
34 ; GFX6-NEXT: s_mov_b32 s4, -1
35 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
36 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
37 ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
38 ; GFX6-NEXT: s_setpc_b64 s[30:31]
40 ; GFX78-LABEL: v_floor_f64_ieee_nnan:
42 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
44 ; GFX78-NEXT: s_setpc_b64 s[30:31]
45 %result = call nnan double @llvm.floor.f64(double %x)
49 define double @v_floor_f64_ieee_fneg(double %x) {
50 ; GFX6-LABEL: v_floor_f64_ieee_fneg:
52 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53 ; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
54 ; GFX6-NEXT: s_mov_b32 s4, -1
55 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
56 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
57 ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
58 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
59 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
60 ; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3]
61 ; GFX6-NEXT: s_setpc_b64 s[30:31]
63 ; GFX78-LABEL: v_floor_f64_ieee_fneg:
65 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1]
67 ; GFX78-NEXT: s_setpc_b64 s[30:31]
68 %neg.x = fneg double %x
69 %result = call double @llvm.floor.f64(double %neg.x)
73 define double @v_floor_f64_nonieee(double %x) #1 {
74 ; GFX6-LABEL: v_floor_f64_nonieee:
76 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
78 ; GFX6-NEXT: s_mov_b32 s4, -1
79 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
80 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
81 ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
82 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
83 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
84 ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
85 ; GFX6-NEXT: s_setpc_b64 s[30:31]
87 ; GFX78-LABEL: v_floor_f64_nonieee:
89 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
91 ; GFX78-NEXT: s_setpc_b64 s[30:31]
92 %result = call double @llvm.floor.f64(double %x)
96 define double @v_floor_f64_nonieee_nnan(double %x) #1 {
97 ; GFX6-LABEL: v_floor_f64_nonieee_nnan:
99 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1]
101 ; GFX6-NEXT: s_mov_b32 s4, -1
102 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
103 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
104 ; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
105 ; GFX6-NEXT: s_setpc_b64 s[30:31]
107 ; GFX78-LABEL: v_floor_f64_nonieee_nnan:
109 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1]
111 ; GFX78-NEXT: s_setpc_b64 s[30:31]
112 %result = call nnan double @llvm.floor.f64(double %x)
116 define double @v_floor_f64_non_ieee_fneg(double %x) #1 {
117 ; GFX6-LABEL: v_floor_f64_non_ieee_fneg:
119 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120 ; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1]
121 ; GFX6-NEXT: s_mov_b32 s4, -1
122 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
123 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
124 ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
125 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
126 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
127 ; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3]
128 ; GFX6-NEXT: s_setpc_b64 s[30:31]
130 ; GFX78-LABEL: v_floor_f64_non_ieee_fneg:
132 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1]
134 ; GFX78-NEXT: s_setpc_b64 s[30:31]
135 %neg.x = fneg double %x
136 %result = call double @llvm.floor.f64(double %neg.x)
140 define double @v_floor_f64_fabs(double %x) {
141 ; GFX6-LABEL: v_floor_f64_fabs:
143 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144 ; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]|
145 ; GFX6-NEXT: s_mov_b32 s4, -1
146 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
147 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
148 ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
149 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
150 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
151 ; GFX6-NEXT: v_add_f64 v[0:1], |v[0:1]|, -v[2:3]
152 ; GFX6-NEXT: s_setpc_b64 s[30:31]
154 ; GFX78-LABEL: v_floor_f64_fabs:
156 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]|
158 ; GFX78-NEXT: s_setpc_b64 s[30:31]
159 %abs.x = call double @llvm.fabs.f64(double %x)
160 %result = call double @llvm.floor.f64(double %abs.x)
164 define double @v_floor_f64_fneg_fabs(double %x) {
165 ; GFX6-LABEL: v_floor_f64_fneg_fabs:
167 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]|
169 ; GFX6-NEXT: s_mov_b32 s4, -1
170 ; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff
171 ; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5]
172 ; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1]
173 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc
174 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc
175 ; GFX6-NEXT: v_add_f64 v[0:1], -|v[0:1]|, -v[2:3]
176 ; GFX6-NEXT: s_setpc_b64 s[30:31]
178 ; GFX78-LABEL: v_floor_f64_fneg_fabs:
180 ; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|v[0:1]|
182 ; GFX78-NEXT: s_setpc_b64 s[30:31]
183 %abs.x = call double @llvm.fabs.f64(double %x)
184 %neg.abs.x = fneg double %abs.x
185 %result = call double @llvm.floor.f64(double %neg.abs.x)
189 define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) {
190 ; GFX6-LABEL: s_floor_f64:
192 ; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3]
193 ; GFX6-NEXT: s_mov_b32 s0, -1
194 ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
195 ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
196 ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
197 ; GFX6-NEXT: v_mov_b32_e32 v2, s2
198 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
199 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
200 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
201 ; GFX6-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1]
202 ; GFX6-NEXT: ; return to shader part epilog
204 ; GFX78-LABEL: s_floor_f64:
206 ; GFX78-NEXT: v_floor_f64_e32 v[0:1], s[2:3]
207 ; GFX78-NEXT: ; return to shader part epilog
208 %result = call double @llvm.floor.f64(double %x)
209 %cast = bitcast double %result to <2 x float>
210 ret <2 x float> %cast
213 define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) {
214 ; GFX6-LABEL: s_floor_f64_fneg:
216 ; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3]
217 ; GFX6-NEXT: s_mov_b32 s0, -1
218 ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
219 ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
220 ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
221 ; GFX6-NEXT: v_mov_b32_e32 v2, s2
222 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
223 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
224 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
225 ; GFX6-NEXT: v_add_f64 v[0:1], -s[2:3], -v[0:1]
226 ; GFX6-NEXT: ; return to shader part epilog
228 ; GFX78-LABEL: s_floor_f64_fneg:
230 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -s[2:3]
231 ; GFX78-NEXT: ; return to shader part epilog
232 %neg.x = fneg double %x
233 %result = call double @llvm.floor.f64(double %neg.x)
234 %cast = bitcast double %result to <2 x float>
235 ret <2 x float> %cast
238 define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) {
239 ; GFX6-LABEL: s_floor_f64_fabs:
241 ; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]|
242 ; GFX6-NEXT: s_mov_b32 s0, -1
243 ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
244 ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
245 ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
246 ; GFX6-NEXT: v_mov_b32_e32 v2, s2
247 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
248 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
249 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
250 ; GFX6-NEXT: v_add_f64 v[0:1], |s[2:3]|, -v[0:1]
251 ; GFX6-NEXT: ; return to shader part epilog
253 ; GFX78-LABEL: s_floor_f64_fabs:
255 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], |s[2:3]|
256 ; GFX78-NEXT: ; return to shader part epilog
257 %abs.x = call double @llvm.fabs.f64(double %x)
258 %result = call double @llvm.floor.f64(double %abs.x)
259 %cast = bitcast double %result to <2 x float>
260 ret <2 x float> %cast
263 define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
264 ; GFX6-LABEL: s_floor_f64_fneg_fabs:
266 ; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]|
267 ; GFX6-NEXT: s_mov_b32 s0, -1
268 ; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff
269 ; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1]
270 ; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3]
271 ; GFX6-NEXT: v_mov_b32_e32 v2, s2
272 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
273 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
274 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
275 ; GFX6-NEXT: v_add_f64 v[0:1], -|s[2:3]|, -v[0:1]
276 ; GFX6-NEXT: ; return to shader part epilog
278 ; GFX78-LABEL: s_floor_f64_fneg_fabs:
280 ; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|s[2:3]|
281 ; GFX78-NEXT: ; return to shader part epilog
282 %abs.x = call double @llvm.fabs.f64(double %x)
283 %neg.abs.x = fneg double %abs.x
284 %result = call double @llvm.floor.f64(double %neg.abs.x)
285 %cast = bitcast double %result to <2 x float>
286 ret <2 x float> %cast
289 declare double @llvm.floor.f64(double) #0
290 declare double @llvm.fabs.f64(double) #0
292 attributes #0 = { nounwind readnone speculatable willreturn }
293 attributes #1 = { "amdgpu-ieee"="false" }