1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
2 ; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
3 ; Make sure this doesn't crash with no triple
5 ; NOOP-LABEL: @noop_fdiv_fpmath(
6 ; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
7 define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
8 %md.25ulp = fdiv float %a, %b, !fpmath !0
9 store volatile float %md.25ulp, float addrspace(1)* %out
13 ; CHECK-LABEL: @fdiv_fpmath(
14 ; CHECK: %no.md = fdiv float %a, %b{{$}}
15 ; CHECK: %md.half.ulp = fdiv float %a, %b
16 ; CHECK: %md.1ulp = fdiv float %a, %b
17 ; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
18 ; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
19 ; CHECK: %[[FAST_RCP:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %b)
20 ; CHECK: %fast.md.25ulp = fmul fast float %a, %[[FAST_RCP]]
21 ; CHECK: %[[AFN_RCP:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %b)
22 ; CHECK: afn.md.25ulp = fmul afn float %a, %[[AFN_RCP]]
23 define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
24 %no.md = fdiv float %a, %b
25 store volatile float %no.md, float addrspace(1)* %out
27 %md.half.ulp = fdiv float %a, %b, !fpmath !1
28 store volatile float %md.half.ulp, float addrspace(1)* %out
30 %md.1ulp = fdiv float %a, %b, !fpmath !2
31 store volatile float %md.1ulp, float addrspace(1)* %out
33 %md.25ulp = fdiv float %a, %b, !fpmath !0
34 store volatile float %md.25ulp, float addrspace(1)* %out
36 %md.3ulp = fdiv float %a, %b, !fpmath !3
37 store volatile float %md.3ulp, float addrspace(1)* %out
39 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
40 store volatile float %fast.md.25ulp, float addrspace(1)* %out
42 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
43 store volatile float %afn.md.25ulp, float addrspace(1)* %out
48 ; CHECK-LABEL: @rcp_fdiv_fpmath(
49 ; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
50 ; CHECK: %md.25ulp = call float @llvm.amdgcn.rcp.f32(float %x)
51 ; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x
52 ; CHECK: %afn.no.md = call afn float @llvm.amdgcn.rcp.f32(float %x)
53 ; CHECK: %afn.25ulp = call afn float @llvm.amdgcn.rcp.f32(float %x)
54 ; CHECK: %fast.no.md = call fast float @llvm.amdgcn.rcp.f32(float %x)
55 ; CHECK: %fast.25ulp = call fast float @llvm.amdgcn.rcp.f32(float %x)
56 define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
57 %no.md = fdiv float 1.0, %x
58 store volatile float %no.md, float addrspace(1)* %out
60 %md.25ulp = fdiv float 1.0, %x, !fpmath !0
61 store volatile float %md.25ulp, float addrspace(1)* %out
63 %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
64 store volatile float %md.half.ulp, float addrspace(1)* %out
66 %afn.no.md = fdiv afn float 1.0, %x
67 store volatile float %afn.no.md, float addrspace(1)* %out
69 %afn.25ulp = fdiv afn float 1.0, %x, !fpmath !0
70 store volatile float %afn.25ulp, float addrspace(1)* %out
72 %fast.no.md = fdiv fast float 1.0, %x
73 store volatile float %fast.no.md, float addrspace(1)* %out
75 %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
76 store volatile float %fast.25ulp, float addrspace(1)* %out
81 ; CHECK-LABEL: @fdiv_fpmath_vector(
82 ; CHECK: %[[NO_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
83 ; CHECK: %[[NO_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
84 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float %[[NO_A0]], %[[NO_B0]]
85 ; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
86 ; CHECK: %[[NO_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
87 ; CHECK: %[[NO_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
88 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float %[[NO_A1]], %[[NO_B1]]
89 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
90 ; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
92 ; CHECK: %[[HALF_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
93 ; CHECK: %[[HALF_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
94 ; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float %[[HALF_A0]], %[[HALF_B0]]
95 ; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
96 ; CHECK: %[[HALF_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
97 ; CHECK: %[[HALF_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
98 ; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float %[[HALF_A1]], %[[HALF_B1]]
99 ; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
100 ; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
102 ; CHECK: %[[ONE_A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
103 ; CHECK: %[[ONE_B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
104 ; CHECK: %[[ONE_FDIV0:[0-9]+]] = fdiv float %[[ONE_A0]], %[[ONE_B0]]
105 ; CHECK: %[[ONE_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[ONE_FDIV0]], i64 0
106 ; CHECK: %[[ONE_A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
107 ; CHECK: %[[ONE_B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
108 ; CHECK: %[[ONE_FDIV1:[0-9]+]] = fdiv float %[[ONE_A1]], %[[ONE_B1]]
109 ; CHECK: %md.1ulp = insertelement <2 x float> %[[ONE_INS0]], float %[[ONE_FDIV1]], i64 1
110 ; CHECK: store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
112 ; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
113 ; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
114 ; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]])
115 ; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
116 ; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
117 ; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
118 ; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]])
119 ; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
120 define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
121 %no.md = fdiv <2 x float> %a, %b
122 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
124 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
125 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
127 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
128 store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
130 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
131 store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
136 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
137 ; CHECK: %[[NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
138 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
139 ; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
140 ; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
141 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[NO1]]
142 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
143 ; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
145 ; CHECK: %[[HALF0:[0-9]+]] = extractelement <2 x float> %x, i64 0
146 ; CHECK: %[[HALF_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF0]]
147 ; CHECK: %[[HALF_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[HALF_FDIV0]], i64 0
148 ; CHECK: %[[HALF1:[0-9]+]] = extractelement <2 x float> %x, i64 1
149 ; CHECK: %[[HALF_FDIV1:[0-9]+]] = fdiv float 1.000000e+00, %[[HALF1]]
150 ; CHECK: %md.half.ulp = insertelement <2 x float> %[[HALF_INS0]], float %[[HALF_FDIV1]], i64 1
151 ; CHECK: store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
153 ; CHECK: %[[AFN_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
154 ; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
155 ; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
156 ; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
157 ; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
158 ; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_FDIV1]], i64 1
159 ; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
161 ; CHECK: %[[FAST_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
162 ; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
163 ; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
164 ; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
165 ; CHECK: %[[FAST_NO_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
166 ; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_RCP1]], i64 1
167 ; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
169 ; CHECK: %[[AFN_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
170 ; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
171 ; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
172 ; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
173 ; CHECK: %[[AFN_25_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
174 ; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_RCP1]], i64 1
175 ; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
177 ; CHECK: %[[FAST_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
178 ; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
179 ; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
180 ; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
181 ; CHECK: %[[FAST_25_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
182 ; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_RCP1]], i64 1
183 ; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
184 define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
185 %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
186 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
188 %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
189 store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
191 %afn.no.md = fdiv afn <2 x float> <float 1.0, float 1.0>, %x
192 store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
194 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
195 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
197 %afn.25ulp = fdiv afn <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
198 store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
200 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
201 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
206 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
207 ; CHECK: %[[NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
208 ; CHECK: %[[NO_FDIV0:[0-9]+]] = fdiv float 1.000000e+00, %[[NO0]]
209 ; CHECK: %[[NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[NO_FDIV0]], i64 0
210 ; CHECK: %[[NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
211 ; CHECK: %[[NO_FDIV1:[0-9]+]] = fdiv float 2.000000e+00, %[[NO1]]
212 ; CHECK: %no.md = insertelement <2 x float> %[[NO_INS0]], float %[[NO_FDIV1]], i64 1
213 ; CHECK: store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
215 ; CHECK: %[[AFN_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
216 ; CHECK: %[[AFN_NO_FDIV0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO0]])
217 ; CHECK: %[[AFN_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_NO_FDIV0]], i64 0
218 ; CHECK: %[[AFN_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
219 ; CHECK: %[[AFN_NO_FDIV1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_NO1]])
220 ; CHECK: %[[AFN_NO_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_NO_FDIV1]]
221 ; CHECK: %afn.no.md = insertelement <2 x float> %[[AFN_NO_INS0]], float %[[AFN_NO_MUL1]], i64 1
222 ; CHECK: store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
224 ; CHECK: %[[FAST_NO0:[0-9]+]] = extractelement <2 x float> %x, i64 0
225 ; CHECK: %[[FAST_NO_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO0]])
226 ; CHECK: %[[FAST_NO_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_NO_RCP0]], i64 0
227 ; CHECK: %[[FAST_NO1:[0-9]+]] = extractelement <2 x float> %x, i64 1
228 ; CHECK: %[[FAST_NO_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_NO1]])
229 ; CHECK: %[[FAST_NO_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_NO_RCP1]]
230 ; CHECK: %fast.no.md = insertelement <2 x float> %[[FAST_NO_INS0]], float %[[FAST_NO_MUL1]], i64 1
231 ; CHECK: store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
233 ; CHECK: %[[AFN_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
234 ; CHECK: %[[AFN_25_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_250]])
235 ; CHECK: %[[AFN_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_25_RCP0]], i64 0
236 ; CHECK: %[[AFN_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
237 ; CHECK: %[[AFN_25_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_251]])
238 ; CHECK: %[[AFN_25_MUL1:[0-9]+]] = fmul afn float 2.000000e+00, %[[AFN_25_RCP1]]
239 ; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_25_INS0]], float %[[AFN_25_MUL1]], i64 1
240 ; CHECK: store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
242 ; CHECK: %[[FAST_250:[0-9]+]] = extractelement <2 x float> %x, i64 0
243 ; CHECK: %[[FAST_25_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_250]])
244 ; CHECK: %[[FAST_25_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_25_RCP0]], i64 0
245 ; CHECK: %[[FAST_251:[0-9]+]] = extractelement <2 x float> %x, i64 1
246 ; CHECK: %[[FAST_25_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_251]])
247 ; CHECK: %[[FAST_25_MUL1:[0-9]+]] = fmul fast float 2.000000e+00, %[[FAST_25_RCP1]]
248 ; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_25_INS0]], float %[[FAST_25_MUL1]], i64 1
249 ; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
250 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
251 %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
252 store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
254 %afn.no.md = fdiv afn <2 x float> <float 1.0, float 2.0>, %x
255 store volatile <2 x float> %afn.no.md, <2 x float> addrspace(1)* %out
257 %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
258 store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
260 %afn.25ulp = fdiv afn <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
261 store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
263 %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
264 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
269 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
270 ; CHECK: %[[AFN_A0:[0-9]+]] = extractelement <2 x float> %x.insert, i64 0
271 ; CHECK: %[[AFN_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
272 ; CHECK: %[[AFN_RCP0:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B0]])
273 ; CHECK: %[[AFN_MUL0:[0-9]+]] = fmul afn float %[[AFN_A0]], %[[AFN_RCP0]]
274 ; CHECK: %[[AFN_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[AFN_MUL0]], i64 0
275 ; CHECK: %[[AFN_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
276 ; CHECK: %[[AFN_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
277 ; CHECK: %[[AFN_RCP1:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %[[AFN_B1]])
278 ; CHECK: %[[AFN_MUL1:[0-9]+]] = fmul afn float %[[AFN_A1]], %[[AFN_RCP1]]
279 ; CHECK: %afn.25ulp = insertelement <2 x float> %[[AFN_INS0]], float %[[AFN_MUL1]], i64 1
280 ; CHECK: store volatile <2 x float> %afn.25ulp
282 ; CHECK: %[[FAST_A0:[0-9]+]] = extractelement <2 x float> %x.insert, i64 0
283 ; CHECK: %[[FAST_B0:[0-9]+]] = extractelement <2 x float> %y, i64 0
284 ; CHECK: %[[FAST_RCP0:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B0]])
285 ; CHECK: %[[FAST_MUL0:[0-9]+]] = fmul fast float %[[FAST_A0]], %[[FAST_RCP0]]
286 ; CHECK: %[[FAST_INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FAST_MUL0]], i64 0
287 ; CHECK: %[[FAST_A1:[0-9]+]] = extractelement <2 x float> %x.insert, i64 1
288 ; CHECK: %[[FAST_B1:[0-9]+]] = extractelement <2 x float> %y, i64 1
289 ; CHECK: %[[FAST_RCP1:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %[[FAST_B1]])
290 ; CHECK: %[[FAST_MUL1:[0-9]+]] = fmul fast float %[[FAST_A1]], %[[FAST_RCP1]]
291 ; CHECK: %fast.25ulp = insertelement <2 x float> %[[FAST_INS0]], float %[[FAST_MUL1]], i64 1
292 ; CHECK: store volatile <2 x float> %fast.25ulp
293 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
294 %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
296 %afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0
297 store volatile <2 x float> %afn.25ulp, <2 x float> addrspace(1)* %out
299 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
300 store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
305 ; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
306 ; CHECK: %no.md = fdiv float %a, %b{{$}}
307 ; CHECK: %md.half.ulp = fdiv float %a, %b
308 ; CHECK: %md.1ulp = fdiv float %a, %b
309 ; CHECK: %md.25ulp = fdiv float %a, %b
310 ; CHECK: %md.3ulp = fdiv float %a, %b
311 ; CHECK: %[[RCP_FAST:[0-9]+]] = call fast float @llvm.amdgcn.rcp.f32(float %b)
312 ; CHECK: %fast.md.25ulp = fmul fast float %a, %[[RCP_FAST]]
313 ; CHECK: %[[RCP_AFN:[0-9]+]] = call afn float @llvm.amdgcn.rcp.f32(float %b)
314 ; CHECK: %afn.md.25ulp = fmul afn float %a, %[[RCP_AFN]]
315 define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
316 %no.md = fdiv float %a, %b
317 store volatile float %no.md, float addrspace(1)* %out
319 %md.half.ulp = fdiv float %a, %b, !fpmath !1
320 store volatile float %md.half.ulp, float addrspace(1)* %out
322 %md.1ulp = fdiv float %a, %b, !fpmath !2
323 store volatile float %md.1ulp, float addrspace(1)* %out
325 %md.25ulp = fdiv float %a, %b, !fpmath !0
326 store volatile float %md.25ulp, float addrspace(1)* %out
328 %md.3ulp = fdiv float %a, %b, !fpmath !3
329 store volatile float %md.3ulp, float addrspace(1)* %out
331 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
332 store volatile float %fast.md.25ulp, float addrspace(1)* %out
334 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
335 store volatile float %afn.md.25ulp, float addrspace(1)* %out
340 attributes #0 = { nounwind optnone noinline }
341 attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
342 attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
344 !0 = !{float 2.500000e+00}
345 !1 = !{float 5.000000e-01}
346 !2 = !{float 1.000000e+00}
347 !3 = !{float 3.000000e+00}