1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
2 ; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
3 ; Make sure this doesn't crash with no triple
5 ; NOOP-LABEL: @noop_fdiv_fpmath(
6 ; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
7 define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
8 %md.25ulp = fdiv float %a, %b, !fpmath !0
9 store volatile float %md.25ulp, float addrspace(1)* %out
13 ; CHECK-LABEL: @fdiv_fpmath(
14 ; CHECK: %no.md = fdiv float %a, %b{{$}}
15 ; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
16 ; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
17 ; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
18 ; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
19 ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
20 ; CHECK: arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
21 define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
22 %no.md = fdiv float %a, %b
23 store volatile float %no.md, float addrspace(1)* %out
25 %md.half.ulp = fdiv float %a, %b, !fpmath !1
26 store volatile float %md.half.ulp, float addrspace(1)* %out
28 %md.1ulp = fdiv float %a, %b, !fpmath !2
29 store volatile float %md.1ulp, float addrspace(1)* %out
31 %md.25ulp = fdiv float %a, %b, !fpmath !0
32 store volatile float %md.25ulp, float addrspace(1)* %out
34 %md.3ulp = fdiv float %a, %b, !fpmath !3
35 store volatile float %md.3ulp, float addrspace(1)* %out
37 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
38 store volatile float %fast.md.25ulp, float addrspace(1)* %out
40 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
41 store volatile float %arcp.md.25ulp, float addrspace(1)* %out
46 ; CHECK-LABEL: @rcp_fdiv_fpmath(
47 ; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
48 ; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
49 ; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
50 ; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
51 ; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
52 ; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
53 ; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
54 define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
55 %no.md = fdiv float 1.0, %x
56 store volatile float %no.md, float addrspace(1)* %out
58 %md.25ulp = fdiv float 1.0, %x, !fpmath !0
59 store volatile float %md.25ulp, float addrspace(1)* %out
61 %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
62 store volatile float %md.half.ulp, float addrspace(1)* %out
64 %arcp.no.md = fdiv arcp float 1.0, %x
65 store volatile float %arcp.no.md, float addrspace(1)* %out
67 %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
68 store volatile float %arcp.25ulp, float addrspace(1)* %out
70 %fast.no.md = fdiv fast float 1.0, %x
71 store volatile float %fast.no.md, float addrspace(1)* %out
73 %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
74 store volatile float %fast.25ulp, float addrspace(1)* %out
79 ; CHECK-LABEL: @fdiv_fpmath_vector(
80 ; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
81 ; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
82 ; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
84 ; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
85 ; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
86 ; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
87 ; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
88 ; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
89 ; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
90 ; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
91 ; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
92 define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
93 %no.md = fdiv <2 x float> %a, %b
94 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
96 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
97 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
99 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
100 store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
102 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
103 store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
108 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
109 ; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
110 ; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
111 ; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
112 ; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
113 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
114 ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
115 ; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
116 define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
117 %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
118 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
120 %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
121 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
123 %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
124 store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
126 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
127 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
129 %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
130 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
132 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
133 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
138 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
139 ; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
140 ; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
141 ; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}
142 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
143 ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
144 ; CHECK: store volatile <2 x float> %fast.25ulp
145 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
146 %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
147 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
149 %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
150 store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
152 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
153 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
155 %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
156 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
158 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
159 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
164 ; FIXME: Should be able to get fdiv for 1.0 component
165 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
166 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
167 ; CHECK: store volatile <2 x float> %arcp.25ulp
169 ; CHECK: %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
170 ; CHECK: store volatile <2 x float> %fast.25ulp
171 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
172 %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
174 %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
175 store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
177 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
178 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
183 ; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
184 ; CHECK: %no.md = fdiv float %a, %b{{$}}
185 ; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
186 ; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
187 ; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
188 ; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
189 ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
190 ; CHECK: %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
191 define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
192 %no.md = fdiv float %a, %b
193 store volatile float %no.md, float addrspace(1)* %out
195 %md.half.ulp = fdiv float %a, %b, !fpmath !1
196 store volatile float %md.half.ulp, float addrspace(1)* %out
198 %md.1ulp = fdiv float %a, %b, !fpmath !2
199 store volatile float %md.1ulp, float addrspace(1)* %out
201 %md.25ulp = fdiv float %a, %b, !fpmath !0
202 store volatile float %md.25ulp, float addrspace(1)* %out
204 %md.3ulp = fdiv float %a, %b, !fpmath !3
205 store volatile float %md.3ulp, float addrspace(1)* %out
207 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
208 store volatile float %fast.md.25ulp, float addrspace(1)* %out
210 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
211 store volatile float %arcp.md.25ulp, float addrspace(1)* %out
216 attributes #0 = { nounwind optnone noinline }
217 attributes #1 = { nounwind }
218 attributes #2 = { nounwind "target-features"="+fp32-denormals" }
220 ; CHECK: !0 = !{float 2.500000e+00}
221 ; CHECK: !1 = !{float 5.000000e-01}
222 ; CHECK: !2 = !{float 1.000000e+00}
223 ; CHECK: !3 = !{float 3.000000e+00}
225 !0 = !{float 2.500000e+00}
226 !1 = !{float 5.000000e-01}
227 !2 = !{float 1.000000e+00}
228 !3 = !{float 3.000000e+00}