1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
3 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
5 define float @v_rsq_clamp_f32(float %src) #0 {
6 ; SI-LABEL: v_rsq_clamp_f32:
8 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
10 ; SI-NEXT: s_setpc_b64 s[30:31]
12 ; VI-LABEL: v_rsq_clamp_f32:
14 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15 ; VI-NEXT: v_rsq_f32_e32 v0, v0
16 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
17 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
18 ; VI-NEXT: s_setpc_b64 s[30:31]
19 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
23 define float @v_rsq_clamp_fabs_f32(float %src) #0 {
24 ; SI-LABEL: v_rsq_clamp_fabs_f32:
26 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0|
28 ; SI-NEXT: s_setpc_b64 s[30:31]
30 ; VI-LABEL: v_rsq_clamp_fabs_f32:
32 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; VI-NEXT: v_rsq_f32_e64 v0, |v0|
34 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
35 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
36 ; VI-NEXT: s_setpc_b64 s[30:31]
37 %fabs.src = call float @llvm.fabs.f32(float %src)
38 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src)
42 define double @v_rsq_clamp_f64(double %src) #0 {
43 ; SI-LABEL: v_rsq_clamp_f64:
45 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46 ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
47 ; SI-NEXT: s_setpc_b64 s[30:31]
49 ; VI-LABEL: v_rsq_clamp_f64:
51 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
53 ; VI-NEXT: s_mov_b32 s4, -1
54 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff
55 ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
56 ; VI-NEXT: s_mov_b32 s5, 0xffefffff
57 ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
58 ; VI-NEXT: s_setpc_b64 s[30:31]
59 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
63 define double @v_rsq_clamp_fabs_f64(double %src) #0 {
64 ; SI-LABEL: v_rsq_clamp_fabs_f64:
66 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67 ; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]|
68 ; SI-NEXT: s_setpc_b64 s[30:31]
70 ; VI-LABEL: v_rsq_clamp_fabs_f64:
72 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
74 ; VI-NEXT: s_mov_b32 s4, -1
75 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff
76 ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
77 ; VI-NEXT: s_mov_b32 s5, 0xffefffff
78 ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
79 ; VI-NEXT: s_setpc_b64 s[30:31]
80 %fabs.src = call double @llvm.fabs.f64(double %src)
81 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
85 define float @v_rsq_clamp_undef_f32() #0 {
86 ; SI-LABEL: v_rsq_clamp_undef_f32:
88 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4
90 ; SI-NEXT: s_setpc_b64 s[30:31]
92 ; VI-LABEL: v_rsq_clamp_undef_f32:
94 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95 ; VI-NEXT: v_rsq_f32_e32 v0, s4
96 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
97 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
98 ; VI-NEXT: s_setpc_b64 s[30:31]
99 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
103 define double @v_rsq_clamp_undef_f64() #0 {
104 ; SI-LABEL: v_rsq_clamp_undef_f64:
106 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107 ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5]
108 ; SI-NEXT: s_setpc_b64 s[30:31]
110 ; VI-LABEL: v_rsq_clamp_undef_f64:
112 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
114 ; VI-NEXT: s_mov_b32 s4, -1
115 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff
116 ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
117 ; VI-NEXT: s_mov_b32 s5, 0xffefffff
118 ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
119 ; VI-NEXT: s_setpc_b64 s[30:31]
120 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
121 ret double %rsq_clamp
124 define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
125 ; SI-LABEL: v_rsq_clamp_f32_non_ieee:
127 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
129 ; SI-NEXT: s_setpc_b64 s[30:31]
131 ; VI-LABEL: v_rsq_clamp_f32_non_ieee:
133 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; VI-NEXT: v_rsq_f32_e32 v0, v0
135 ; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
136 ; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
137 ; VI-NEXT: s_setpc_b64 s[30:31]
138 %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
142 define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
143 ; SI-LABEL: v_rsq_clamp_f64_non_ieee:
145 ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
147 ; SI-NEXT: s_setpc_b64 s[30:31]
149 ; VI-LABEL: v_rsq_clamp_f64_non_ieee:
151 ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
153 ; VI-NEXT: s_mov_b32 s4, -1
154 ; VI-NEXT: s_mov_b32 s5, 0x7fefffff
155 ; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
156 ; VI-NEXT: s_mov_b32 s5, 0xffefffff
157 ; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
158 ; VI-NEXT: s_setpc_b64 s[30:31]
159 %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
160 ret double %rsq_clamp
163 declare float @llvm.fabs.f32(float) #1
164 declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
165 declare double @llvm.fabs.f64(double) #1
166 declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
168 attributes #0 = { nounwind }
169 attributes #1 = { nounwind readnone }
170 attributes #2 = { nounwind "amdgpu-ieee"="false" }