1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s
3 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-BADFREXP %s
4 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s
5 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s
7 ; Make sure this doesn't crash with no triple
8 ; TODO: Delete when old PM deleted
9 ; RUN: opt -amdgpu-codegenprepare -disable-output %s
12 define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #0 {
13 ; CHECK-LABEL: define amdgpu_kernel void @noop_fdiv_fpmath(
14 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] {
15 ; CHECK-NEXT: [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META0:![0-9]+]]
16 ; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
17 ; CHECK-NEXT: ret void
19 %md.25ulp = fdiv float %a, %b, !fpmath !0
20 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
24 define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, float %b) {
25 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32(
26 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] {
27 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
28 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
29 ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]]
30 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
31 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
32 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
33 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
34 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
35 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
36 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
37 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
38 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
39 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
40 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
41 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
42 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
43 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
44 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
45 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
46 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
47 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
48 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
49 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
50 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
51 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
52 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
53 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
54 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
55 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
56 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
57 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
58 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
59 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
60 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
61 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
62 ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
63 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
64 ; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
65 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
66 ; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
67 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
68 ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
69 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
70 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
71 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
72 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
73 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
74 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
75 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
76 ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
77 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
78 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
79 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
80 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
81 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
82 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
83 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
84 ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
85 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
86 ; IEEE-GOODFREXP-NEXT: ret void
88 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32(
89 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] {
90 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
91 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
92 ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]]
93 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
94 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
95 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
96 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
97 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
98 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
99 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
100 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
101 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
102 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
103 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
104 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
105 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
106 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
107 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
108 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
109 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
110 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
111 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
112 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
113 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
114 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
115 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
116 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
117 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
118 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
119 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
120 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
121 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
122 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
123 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
124 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
125 ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
126 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
127 ; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
128 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
129 ; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
130 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
131 ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
132 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
133 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
134 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
135 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
136 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
137 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
138 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
139 ; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
140 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
141 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
142 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
143 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
144 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
145 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
146 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
147 ; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
148 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
149 ; IEEE-BADFREXP-NEXT: ret void
151 ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32(
152 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] {
153 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
154 ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
155 ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]]
156 ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
157 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
158 ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
159 ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
160 ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
161 ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
162 ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
163 ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
164 ; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
165 ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
166 ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
167 ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
168 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
169 ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
170 ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
171 ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
172 ; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
173 ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
174 ; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
175 ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
176 ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
177 ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
178 ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
179 ; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]]
180 ; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
181 ; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
182 ; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]]
183 ; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
186 %no.md = fdiv float %a, %b
187 store volatile float %no.md, ptr addrspace(1) %out, align 4
188 %md.half.ulp = fdiv float %a, %b, !fpmath !1
189 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
190 %md.1ulp = fdiv float %a, %b, !fpmath !2
191 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
192 %md.25ulp = fdiv float %a, %b, !fpmath !0
193 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
194 %md.3ulp = fdiv float %a, %b, !fpmath !3
195 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
196 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
197 store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
198 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
199 store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
200 %no.md.arcp = fdiv arcp float %a, %b
201 store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
202 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
203 store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
204 %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
205 store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
209 define amdgpu_kernel void @fdiv_fpmath_f32_flags(ptr addrspace(1) %out, float %a, float %b) {
210 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags(
211 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
212 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
213 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
214 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
215 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
216 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
217 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
218 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
219 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]]
220 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
221 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
222 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
223 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
224 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
225 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
226 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]])
227 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
228 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
229 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
230 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]]
231 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
232 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
233 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
234 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
235 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
236 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
237 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]])
238 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
239 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
240 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
241 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]]
242 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
243 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
244 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
245 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
246 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
247 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
248 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]])
249 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
250 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
251 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1
252 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]]
253 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
254 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
255 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
256 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
257 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0
258 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractvalue { float, i32 } [[TMP37]], 1
259 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]])
260 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
261 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0
262 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP41]], 1
263 ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]]
264 ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]]
265 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]])
266 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
267 ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
268 ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0
269 ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1
270 ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]])
271 ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
272 ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0
273 ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP50]], 1
274 ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]]
275 ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]]
276 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]])
277 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
278 ; IEEE-GOODFREXP-NEXT: ret void
280 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags(
281 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
282 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
283 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
284 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
285 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
286 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
287 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
288 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
289 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]]
290 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
291 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
292 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
293 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
294 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
295 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
296 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]])
297 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
298 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
299 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
300 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]]
301 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
302 ; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
303 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
304 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
305 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
306 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
307 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]])
308 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
309 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
310 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
311 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]]
312 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
313 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
314 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
315 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
316 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
317 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
318 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]])
319 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
320 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
321 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
322 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]]
323 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
324 ; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
325 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
326 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
327 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0
328 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
329 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]])
330 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
331 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0
332 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
333 ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]]
334 ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]]
335 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]])
336 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
337 ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
338 ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0
339 ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
340 ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]])
341 ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
342 ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0
343 ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
344 ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]]
345 ; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]]
346 ; IEEE-BADFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]])
347 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
348 ; IEEE-BADFREXP-NEXT: ret void
350 ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags(
351 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
352 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
353 ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
354 ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
355 ; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
356 ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
357 ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
358 ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
359 ; DAZ-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]]
360 ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
361 ; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
362 ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
363 ; DAZ-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
364 ; DAZ-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
365 ; DAZ-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
366 ; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
367 ; DAZ-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
368 ; DAZ-NEXT: [[TMP13:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]])
369 ; DAZ-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
370 ; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
371 ; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
372 ; DAZ-NEXT: [[TMP17:%.*]] = fmul ninf float [[TMP15]], [[TMP13]]
373 ; DAZ-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
374 ; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
375 ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
376 ; DAZ-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
377 ; DAZ-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
378 ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
379 ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
380 ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
381 ; DAZ-NEXT: [[TMP22:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP20]])
382 ; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
383 ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
384 ; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
385 ; DAZ-NEXT: [[TMP26:%.*]] = fmul nnan float [[TMP24]], [[TMP22]]
386 ; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
387 ; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
388 ; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
389 ; DAZ-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
390 ; DAZ-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
393 %md.1ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !2
394 store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4
396 %md.25ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !0
397 store volatile float %md.25ulp.ninf.nnan, ptr addrspace(1) %out, align 4
399 %md.1ulp.ninf = fdiv ninf float %a, %b, !fpmath !2
400 store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4
402 %md.25ulp.ninf = fdiv ninf float %a, %b, !fpmath !0
403 store volatile float %md.25ulp.ninf, ptr addrspace(1) %out, align 4
405 %md.1ulp.nnan = fdiv nnan float %a, %b, !fpmath !2
406 store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4
408 %md.25ulp.nnan = fdiv nnan float %a, %b, !fpmath !0
409 store volatile float %md.25ulp.nnan, ptr addrspace(1) %out, align 4
414 define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) {
415 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath(
416 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
417 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]]
418 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
419 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
420 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
421 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
422 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
423 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
424 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
425 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
426 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
427 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
428 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
429 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
430 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
431 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
432 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
433 ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]]
434 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
435 ; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]]
436 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
437 ; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]]
438 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
439 ; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]]
440 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
441 ; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]]
442 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
443 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]]
444 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
445 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
446 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
447 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
448 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]])
449 ; IEEE-GOODFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
450 ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
451 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]]
452 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
453 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
454 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
455 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
456 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]])
457 ; IEEE-GOODFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
458 ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
459 ; IEEE-GOODFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]]
460 ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
461 ; IEEE-GOODFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]]
462 ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
463 ; IEEE-GOODFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]]
464 ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
465 ; IEEE-GOODFREXP-NEXT: ret void
467 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath(
468 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
469 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]]
470 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
471 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
472 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
473 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
474 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
475 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
476 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
477 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
478 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
479 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
480 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
481 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
482 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
483 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
484 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
485 ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]]
486 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
487 ; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]]
488 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
489 ; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]]
490 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
491 ; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]]
492 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
493 ; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]]
494 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
495 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]]
496 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
497 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
498 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
499 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
500 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]])
501 ; IEEE-BADFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
502 ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
503 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]]
504 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
505 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
506 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]])
507 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
508 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]])
509 ; IEEE-BADFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
510 ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
511 ; IEEE-BADFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]]
512 ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
513 ; IEEE-BADFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]]
514 ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
515 ; IEEE-BADFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]]
516 ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
517 ; IEEE-BADFREXP-NEXT: ret void
519 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath(
520 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
521 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]]
522 ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
523 ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
524 ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
525 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
526 ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
527 ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]]
528 ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
529 ; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]]
530 ; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
531 ; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]]
532 ; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
533 ; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]]
534 ; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
535 ; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]]
536 ; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
537 ; DAZ-NEXT: [[TMP1:%.*]] = fneg float [[X]]
538 ; DAZ-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]])
539 ; DAZ-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
540 ; DAZ-NEXT: [[TMP2:%.*]] = fneg float [[X]]
541 ; DAZ-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
542 ; DAZ-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
543 ; DAZ-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]]
544 ; DAZ-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
545 ; DAZ-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]]
546 ; DAZ-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
547 ; DAZ-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]]
548 ; DAZ-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
551 %no.md = fdiv float 1.000000e+00, %x
552 store volatile float %no.md, ptr addrspace(1) %out, align 4
553 %md.1ulp = fdiv float 1.000000e+00, %x, !fpmath !2
554 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
555 %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
556 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
557 %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
558 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
559 %afn.no.md = fdiv afn float 1.000000e+00, %x
560 store volatile float %afn.no.md, ptr addrspace(1) %out, align 4
561 %afn.25ulp = fdiv afn float 1.000000e+00, %x, !fpmath !0
562 store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4
563 %fast.no.md = fdiv fast float 1.000000e+00, %x
564 store volatile float %fast.no.md, ptr addrspace(1) %out, align 4
565 %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
566 store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4
567 %neg.md.1ulp = fdiv float -1.000000e+00, %x, !fpmath !2
568 store volatile float %neg.md.1ulp, ptr addrspace(1) %out, align 4
569 %neg.md.25ulp = fdiv float -1.000000e+00, %x, !fpmath !0
570 store volatile float %neg.md.25ulp, ptr addrspace(1) %out, align 4
571 %neg.afn.no.md = fdiv afn float -1.000000e+00, %x
572 store volatile float %neg.afn.no.md, ptr addrspace(1) %out, align 4
573 %neg.afn.25ulp = fdiv afn float -1.000000e+00, %x, !fpmath !0
574 store volatile float %neg.afn.25ulp, ptr addrspace(1) %out, align 4
575 %neg.fast.no.md = fdiv fast float -1.000000e+00, %x
576 store volatile float %neg.fast.no.md, ptr addrspace(1) %out, align 4
580 define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(ptr addrspace(1) %out, float %x) {
581 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(
582 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
583 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
584 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
585 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
586 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
587 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
588 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
589 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
590 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
591 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
592 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
593 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
594 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]])
595 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
596 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
597 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
598 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
599 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1
600 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
601 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]])
602 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
603 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
604 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
605 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
606 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
607 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]]
608 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]])
609 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]])
610 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
611 ; IEEE-GOODFREXP-NEXT: ret void
613 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(
614 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
615 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
616 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
617 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
618 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
619 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
620 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
621 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
622 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
623 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
624 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
625 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
626 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]])
627 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
628 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
629 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
630 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
631 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
632 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
633 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]])
634 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
635 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
636 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
637 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
638 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
639 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]]
640 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]])
641 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]])
642 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
643 ; IEEE-BADFREXP-NEXT: ret void
645 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(
646 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
647 ; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[X]])
648 ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
649 ; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[X]])
650 ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
651 ; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[X]])
652 ; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
653 ; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[X]])
654 ; DAZ-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
657 %md.1ulp.ninf.nnan = fdiv ninf nnan float 1.000000e+00, %x, !fpmath !2
658 store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4
660 %md.1ulp.ninf = fdiv ninf float 1.000000e+00, %x, !fpmath !2
661 store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4
663 %md.1ulp.nnan = fdiv nnan float 1.000000e+00, %x, !fpmath !2
664 store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4
666 %md.1ulp.nsz = fdiv nsz float 1.000000e+00, %x, !fpmath !2
667 store volatile float %md.1ulp.nsz, ptr addrspace(1) %out, align 4
672 define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(ptr addrspace(1) %out,
673 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(
674 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
675 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]])
676 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
677 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
678 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
679 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
680 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
681 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
682 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]])
683 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
684 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
685 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
686 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
687 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
688 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4
689 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]])
690 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
691 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1
692 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
693 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
694 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
695 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
696 ; IEEE-GOODFREXP-NEXT: ret void
698 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(
699 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
700 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]])
701 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
702 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NAN]])
703 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
704 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
705 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
706 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
707 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]])
708 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
709 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF]])
710 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
711 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
712 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
713 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4
714 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]])
715 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
716 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF_NAN]])
717 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
718 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
719 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
720 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
721 ; IEEE-BADFREXP-NEXT: ret void
723 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(
724 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
725 ; DAZ-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NAN]])
726 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
727 ; DAZ-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF]])
728 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4
729 ; DAZ-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF_NAN]])
730 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
733 float nofpclass(nan) %no.nan,
734 float nofpclass(nan) %no.inf,
735 float nofpclass(inf nan) %no.inf.nan) {
736 %md.1ulp.no.nan = fdiv float 1.000000e+00, %no.nan, !fpmath !2
737 store volatile float %md.1ulp.no.nan, ptr addrspace(1) %out, align 4
739 %md.1ulp.no.inf = fdiv float 1.000000e+00, %no.inf, !fpmath !2
740 store volatile float %md.1ulp.no.inf, ptr addrspace(1) %out, align 4
742 %md.1ulp.no.inf.nan = fdiv float 1.000000e+00, %no.inf.nan, !fpmath !2
743 store volatile float %md.1ulp.no.inf.nan, ptr addrspace(1) %out, align 4
748 define amdgpu_kernel void @rcp_fdiv_f32_nozero(ptr addrspace(1) %out,
749 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero(
750 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
751 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]])
752 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
753 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
754 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
755 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
756 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
757 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
758 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]])
759 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
760 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
761 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
762 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
763 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
764 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
765 ; IEEE-GOODFREXP-NEXT: ret void
767 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero(
768 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
769 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]])
770 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
771 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO]])
772 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
773 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
774 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
775 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
776 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]])
777 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
778 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO_SUB]])
779 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
780 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
781 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
782 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
783 ; IEEE-BADFREXP-NEXT: ret void
785 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero(
786 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
787 ; DAZ-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO]])
788 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
789 ; DAZ-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO_SUB]])
790 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
793 float nofpclass(zero) %no.zero,
794 float nofpclass(zero sub) %no.zero.sub) {
795 %md.1ulp.no.zero = fdiv float 1.000000e+00, %no.zero, !fpmath !2
796 store volatile float %md.1ulp.no.zero, ptr addrspace(1) %out, align 4
798 %md.1ulp.no.zero.sub = fdiv float 1.000000e+00, %no.zero.sub, !fpmath !2
799 store volatile float %md.1ulp.no.zero.sub, ptr addrspace(1) %out, align 4
803 define amdgpu_kernel void @rcp_fdiv_f32_nosub(ptr addrspace(1) %out,
804 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub(
805 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] {
806 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]])
807 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
808 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
809 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
810 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
811 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
812 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
813 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]])
814 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
815 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
816 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
817 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
818 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
819 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
820 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]])
821 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
822 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1
823 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
824 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
825 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
826 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
827 ; IEEE-GOODFREXP-NEXT: ret void
829 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub(
830 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] {
831 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]])
832 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
833 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_SUB]])
834 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
835 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
836 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
837 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
838 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]])
839 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
840 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NSUB]])
841 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
842 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]])
843 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
844 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
845 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]])
846 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0
847 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_PSUB]])
848 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]]
849 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]])
850 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]])
851 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
852 ; IEEE-BADFREXP-NEXT: ret void
854 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub(
855 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] {
856 ; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_SUB]])
857 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
858 ; DAZ-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NSUB]])
859 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
860 ; DAZ-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_PSUB]])
861 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
864 float nofpclass(sub) %no.sub,
865 float nofpclass(nsub) %no.nsub,
866 float nofpclass(psub) %no.psub) {
867 %md.1ulp.no.sub = fdiv float 1.000000e+00, %no.sub, !fpmath !2
868 store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4
870 %md.1ulp.no.nsub = fdiv float 1.000000e+00, %no.nsub, !fpmath !2
871 store volatile float %md.1ulp.no.nsub, ptr addrspace(1) %out, align 4
873 %md.1ulp.no.psub = fdiv float 1.000000e+00, %no.psub, !fpmath !2
874 store volatile float %md.1ulp.no.psub, ptr addrspace(1) %out, align 4
879 define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(ptr addrspace(1) %out, float %x) {
880 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(
881 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
882 ; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
883 ; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
884 ; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
885 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
886 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
887 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
888 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
889 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
890 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
891 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
892 ; IEEE-GOODFREXP-NEXT: ret void
894 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(
895 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
896 ; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
897 ; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
898 ; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
899 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
900 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
901 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
902 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
903 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
904 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
905 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
906 ; IEEE-BADFREXP-NEXT: ret void
908 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(
909 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
910 ; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
911 ; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
912 ; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
913 ; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
914 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
917 %fabs.x = call float @llvm.fabs.f32(float %x)
918 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
919 call void @llvm.assume(i1 %is.not.subnormal)
920 %md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2
921 store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4
925 ; Test if we have an assumption on the output that it's not denormal.
926 define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(ptr addrspace(1) %out, float %x) {
927 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(
928 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
929 ; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
930 ; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
931 ; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
932 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
933 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
934 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
935 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
936 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
937 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
938 ; IEEE-GOODFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]])
939 ; IEEE-GOODFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000
940 ; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]])
941 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
942 ; IEEE-GOODFREXP-NEXT: ret void
944 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(
945 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
946 ; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
947 ; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
948 ; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
949 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
950 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
951 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]])
952 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
953 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
954 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
955 ; IEEE-BADFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]])
956 ; IEEE-BADFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000
957 ; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]])
958 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
959 ; IEEE-BADFREXP-NEXT: ret void
961 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(
962 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
963 ; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
964 ; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
965 ; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
966 ; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]])
967 ; DAZ-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]])
968 ; DAZ-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000
969 ; DAZ-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]])
970 ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
973 %fabs.x = call float @llvm.fabs.f32(float %x)
974 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
975 call void @llvm.assume(i1 %is.not.subnormal)
976 %md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2
978 %fabs.result = call float @llvm.fabs.f32(float %md.1ulp.no.sub)
979 %result.is.not.subnormal = fcmp oge float %fabs.result, 0x3810000000000000
980 call void @llvm.assume(i1 %result.is.not.subnormal)
981 store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4
985 define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(ptr addrspace(1) %out, <2 x float> %x) {
986 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(
987 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
988 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
989 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
990 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
991 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
992 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1
993 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]]
994 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]])
995 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]])
996 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
997 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
998 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1
999 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
1000 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]])
1001 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]])
1002 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
1003 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
1004 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
1005 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
1006 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
1007 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]])
1008 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
1009 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
1010 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
1011 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]])
1012 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
1013 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
1014 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
1015 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1
1016 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
1017 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]])
1018 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
1019 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0
1020 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1
1021 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
1022 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0
1023 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1
1024 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]])
1025 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0
1026 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1
1027 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]]
1028 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]])
1029 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]])
1030 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]])
1031 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
1032 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1
1033 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]]
1034 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]])
1035 ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]])
1036 ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0
1037 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1
1038 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
1039 ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0
1040 ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1
1041 ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]])
1042 ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0
1043 ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1
1044 ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]]
1045 ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]])
1046 ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]])
1047 ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]])
1048 ; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0
1049 ; IEEE-GOODFREXP-NEXT: [[TMP56:%.*]] = extractvalue { float, i32 } [[TMP54]], 1
1050 ; IEEE-GOODFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]]
1051 ; IEEE-GOODFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]])
1052 ; IEEE-GOODFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]])
1053 ; IEEE-GOODFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
1054 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1
1055 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
1056 ; IEEE-GOODFREXP-NEXT: ret void
1058 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(
1059 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
1060 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
1061 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
1062 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
1063 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
1064 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
1065 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]]
1066 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]])
1067 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]])
1068 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
1069 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
1070 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]])
1071 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]]
1072 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]])
1073 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]])
1074 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
1075 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
1076 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
1077 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
1078 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
1079 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]])
1080 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
1081 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP16]])
1082 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
1083 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]])
1084 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
1085 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]])
1086 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
1087 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]])
1088 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
1089 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]])
1090 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
1091 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0
1092 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1
1093 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
1094 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0
1095 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1
1096 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]])
1097 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0
1098 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP31]])
1099 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]]
1100 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]])
1101 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]])
1102 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]])
1103 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
1104 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP32]])
1105 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]]
1106 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]])
1107 ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]])
1108 ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0
1109 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1
1110 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
1111 ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0
1112 ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1
1113 ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]])
1114 ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0
1115 ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP46]])
1116 ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]]
1117 ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]])
1118 ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]])
1119 ; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]])
1120 ; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0
1121 ; IEEE-BADFREXP-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP47]])
1122 ; IEEE-BADFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]]
1123 ; IEEE-BADFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]])
1124 ; IEEE-BADFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]])
1125 ; IEEE-BADFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0
1126 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1
1127 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
1128 ; IEEE-BADFREXP-NEXT: ret void
1130 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(
1131 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
1132 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
1133 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
1134 ; DAZ-NEXT: [[TMP3:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP1]])
1135 ; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]])
1136 ; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
1137 ; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
1138 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
1139 ; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0
1140 ; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1
1141 ; DAZ-NEXT: [[TMP8:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP6]])
1142 ; DAZ-NEXT: [[TMP9:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]])
1143 ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
1144 ; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i64 1
1145 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4
1146 ; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 0
1147 ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[X]], i64 1
1148 ; DAZ-NEXT: [[TMP13:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP11]])
1149 ; DAZ-NEXT: [[TMP14:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]])
1150 ; DAZ-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0
1151 ; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
1152 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4
1153 ; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
1154 ; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
1155 ; DAZ-NEXT: [[TMP18:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP16]])
1156 ; DAZ-NEXT: [[TMP19:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]])
1157 ; DAZ-NEXT: [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP18]], i64 0
1158 ; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP20]], float [[TMP19]], i64 1
1159 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4
1160 ; DAZ-NEXT: ret void
1162 %md.1ulp.ninf.nnan = fdiv ninf nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
1163 store volatile <2 x float> %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4
1165 %md.1ulp.ninf = fdiv ninf <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
1166 store volatile <2 x float> %md.1ulp.ninf, ptr addrspace(1) %out, align 4
1168 %md.1ulp.nnan = fdiv nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
1169 store volatile <2 x float> %md.1ulp.nnan, ptr addrspace(1) %out, align 4
1171 %md.1ulp.nsz = fdiv nsz <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2
1172 store volatile <2 x float> %md.1ulp.nsz, ptr addrspace(1) %out, align 4
1177 define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) {
1178 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector(
1179 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] {
1180 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]]
1181 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
1182 ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]]
1183 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
1184 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0
1185 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1
1186 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0
1187 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1
1188 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
1189 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
1190 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
1191 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
1192 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
1193 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
1194 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1
1195 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]]
1196 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]]
1197 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
1198 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
1199 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
1200 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1
1201 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
1202 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
1203 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
1204 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
1205 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]]
1206 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
1207 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
1208 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
1209 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1
1210 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8
1211 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0
1212 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1
1213 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0
1214 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1
1215 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]])
1216 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0
1217 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP30]], 1
1218 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]])
1219 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]])
1220 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
1221 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
1222 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]]
1223 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]]
1224 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]])
1225 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]])
1226 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0
1227 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1
1228 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]])
1229 ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]])
1230 ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0
1231 ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractvalue { float, i32 } [[TMP44]], 1
1232 ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]]
1233 ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]]
1234 ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]])
1235 ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0
1236 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1
1237 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8
1238 ; IEEE-GOODFREXP-NEXT: ret void
1240 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector(
1241 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] {
1242 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]]
1243 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
1244 ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]]
1245 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
1246 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0
1247 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1
1248 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0
1249 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1
1250 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
1251 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
1252 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
1253 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
1254 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
1255 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
1256 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
1257 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]]
1258 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]]
1259 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
1260 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
1261 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
1262 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
1263 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
1264 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
1265 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
1266 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]])
1267 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]]
1268 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
1269 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
1270 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
1271 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1
1272 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8
1273 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0
1274 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1
1275 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0
1276 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1
1277 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]])
1278 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0
1279 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP28]])
1280 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]])
1281 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]])
1282 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
1283 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP26]])
1284 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]]
1285 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]]
1286 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]])
1287 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]])
1288 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0
1289 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP29]])
1290 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]])
1291 ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]])
1292 ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0
1293 ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP27]])
1294 ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]]
1295 ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]]
1296 ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]])
1297 ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0
1298 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1
1299 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8
1300 ; IEEE-BADFREXP-NEXT: ret void
1302 ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector(
1303 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] {
1304 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]]
1305 ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
1306 ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]]
1307 ; DAZ-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
1308 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0
1309 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1
1310 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0
1311 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1
1312 ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
1313 ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
1314 ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
1315 ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]])
1316 ; DAZ-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
1317 ; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0
1318 ; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1
1319 ; DAZ-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]]
1320 ; DAZ-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]]
1321 ; DAZ-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
1322 ; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
1323 ; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
1324 ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1
1325 ; DAZ-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]])
1326 ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]])
1327 ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
1328 ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
1329 ; DAZ-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]]
1330 ; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
1331 ; DAZ-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
1332 ; DAZ-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
1333 ; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1
1334 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8
1335 ; DAZ-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0
1336 ; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1
1337 ; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0
1338 ; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1
1339 ; DAZ-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP26]], float [[TMP28]])
1340 ; DAZ-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP27]], float [[TMP29]])
1341 ; DAZ-NEXT: [[TMP32:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
1342 ; DAZ-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP32]], float [[TMP31]], i64 1
1343 ; DAZ-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8
1344 ; DAZ-NEXT: ret void
1346 %no.md = fdiv <2 x float> %a, %b
1347 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8
1348 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
1349 store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8
1350 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
1351 store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 8
1352 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
1353 store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 8
1357 define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) {
1358 ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(
1359 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] {
1360 ; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
1361 ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
1362 ; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META1:![0-9]+]]
1363 ; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
1364 ; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]]
1365 ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8
1366 ; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]]
1367 ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8
1368 ; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]]
1369 ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
1370 ; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]]
1371 ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
1372 ; CHECK-NEXT: ret void
1374 %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x
1375 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8
1376 %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
1377 store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8
1378 %afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x
1379 store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8
1380 %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x
1381 store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8
1382 %afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
1383 store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8
1384 %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
1385 store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8
1389 define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat(ptr addrspace(1) %out, <2 x float> %x) {
1390 ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat(
1391 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
1392 ; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]]
1393 ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
1394 ; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]]
1395 ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8
1396 ; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]]
1397 ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8
1398 ; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath [[META0]]
1399 ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
1400 ; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath [[META0]]
1401 ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
1402 ; CHECK-NEXT: ret void
1404 %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
1405 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8
1406 %afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
1407 store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8
1408 %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
1409 store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8
1410 %afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
1411 store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8
1412 %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
1413 store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8
1417 define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) {
1418 ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(
1419 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
1420 ; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
1421 ; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]]
1422 ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
1423 ; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]]
1424 ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
1425 ; CHECK-NEXT: ret void
1427 %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0
1428 %afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0
1429 store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8
1430 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
1431 store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8
1435 define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) {
1436 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(
1437 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
1438 ; IEEE-GOODFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
1439 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
1440 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
1441 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0
1442 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1
1443 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
1444 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
1445 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
1446 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
1447 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
1448 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
1449 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
1450 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
1451 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
1452 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
1453 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
1454 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
1455 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
1456 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
1457 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
1458 ; IEEE-GOODFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
1459 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
1460 ; IEEE-GOODFREXP-NEXT: ret void
1462 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(
1463 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
1464 ; IEEE-BADFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
1465 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
1466 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
1467 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0
1468 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1
1469 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
1470 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
1471 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
1472 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
1473 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
1474 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
1475 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
1476 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
1477 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
1478 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
1479 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
1480 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
1481 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
1482 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
1483 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
1484 ; IEEE-BADFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
1485 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
1486 ; IEEE-BADFREXP-NEXT: ret void
1488 ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(
1489 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] {
1490 ; DAZ-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0
1491 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0
1492 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1
1493 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0
1494 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1
1495 ; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]])
1496 ; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]]
1497 ; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]])
1498 ; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]]
1499 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
1500 ; DAZ-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1
1501 ; DAZ-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8
1502 ; DAZ-NEXT: ret void
1504 %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0
1505 %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
1506 store volatile <2 x float> %arcp.25ulp, ptr addrspace(1) %out, align 8
1510 define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) {
1511 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath(
1512 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
1513 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
1514 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
1515 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
1516 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1517 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
1518 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]]
1519 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
1520 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
1521 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
1522 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
1523 ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]]
1524 ; IEEE-GOODFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
1525 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]])
1526 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
1527 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
1528 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
1529 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
1530 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
1531 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
1532 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1533 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
1534 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]]
1535 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
1536 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
1537 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
1538 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
1539 ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]]
1540 ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]]
1541 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
1542 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
1543 ; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]]
1544 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
1545 ; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]])
1546 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
1547 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
1548 ; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]]
1549 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
1550 ; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]])
1551 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
1552 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1553 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00
1554 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]]
1555 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]])
1556 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00
1557 ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]]
1558 ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
1559 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1560 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00
1561 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]]
1562 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]])
1563 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00
1564 ; IEEE-GOODFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]]
1565 ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
1566 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]]
1567 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]])
1568 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
1569 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1
1570 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]]
1571 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]])
1572 ; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]])
1573 ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
1574 ; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
1575 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]])
1576 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
1577 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
1578 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
1579 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
1580 ; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
1581 ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
1582 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = fcmp olt float [[X]], 0x3810000000000000
1583 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 32, i32 0
1584 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP37]])
1585 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP38]])
1586 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = select i1 [[TMP36]], i32 -16, i32 0
1587 ; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 [[TMP40]])
1588 ; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]]
1589 ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
1590 ; IEEE-GOODFREXP-NEXT: ret void
1592 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath(
1593 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
1594 ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
1595 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
1596 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
1597 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1598 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
1599 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]]
1600 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
1601 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
1602 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
1603 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
1604 ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]]
1605 ; IEEE-BADFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
1606 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]])
1607 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
1608 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MD_1ULP_MULTI_USE]])
1609 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
1610 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
1611 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
1612 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
1613 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1614 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
1615 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]]
1616 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
1617 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
1618 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
1619 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
1620 ; IEEE-BADFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]]
1621 ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]]
1622 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
1623 ; IEEE-BADFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
1624 ; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]]
1625 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
1626 ; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]])
1627 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
1628 ; IEEE-BADFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
1629 ; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]]
1630 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
1631 ; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]])
1632 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
1633 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1634 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00
1635 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]]
1636 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]])
1637 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00
1638 ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]]
1639 ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
1640 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000
1641 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00
1642 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]]
1643 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]])
1644 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00
1645 ; IEEE-BADFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]]
1646 ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
1647 ; IEEE-BADFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]]
1648 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]])
1649 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
1650 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_HALF_ULP]])
1651 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]]
1652 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]])
1653 ; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]])
1654 ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
1655 ; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
1656 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]])
1657 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
1658 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MISMATCH_MD1]])
1659 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
1660 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
1661 ; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
1662 ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
1663 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = fcmp olt float [[X]], 0x3810000000000000
1664 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 32, i32 0
1665 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP37]])
1666 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP38]])
1667 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = select i1 [[TMP36]], i32 -16, i32 0
1668 ; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 [[TMP40]])
1669 ; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]]
1670 ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
1671 ; IEEE-BADFREXP-NEXT: ret void
1673 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath(
1674 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
1675 ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]])
1676 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]]
1677 ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
1678 ; DAZ-NEXT: [[MD_1ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
1679 ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
1680 ; DAZ-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
1681 ; DAZ-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
1682 ; DAZ-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP_MULTI_USE]])
1683 ; DAZ-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4
1684 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
1685 ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
1686 ; DAZ-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]]
1687 ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]]
1688 ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
1689 ; DAZ-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]])
1690 ; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]]
1691 ; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4
1692 ; DAZ-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]])
1693 ; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4
1694 ; DAZ-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]])
1695 ; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]]
1696 ; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4
1697 ; DAZ-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]])
1698 ; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4
1699 ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
1700 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
1701 ; DAZ-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
1702 ; DAZ-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fneg contract float [[TMP1]]
1703 ; DAZ-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
1704 ; DAZ-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]]
1705 ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]])
1706 ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4
1707 ; DAZ-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]])
1708 ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD1]])
1709 ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4
1710 ; DAZ-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
1711 ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]]
1712 ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4
1713 ; DAZ-NEXT: ret void
1715 %sqrt.x.no.md = call contract float @llvm.sqrt.f32(float %x)
1716 %no.md = fdiv contract float 1.000000e+00, %sqrt.x.no.md
1717 store volatile float %no.md, ptr addrspace(1) %out, align 4
1719 ; Matches the rsq instruction accuracy
1720 %sqrt.md.1ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
1721 %md.1ulp = fdiv contract float 1.000000e+00, %sqrt.md.1ulp, !fpmath !2
1722 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
1724 %sqrt.md.1ulp.multi.use = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
1725 store volatile float %sqrt.md.1ulp.multi.use, ptr addrspace(1) %out, align 4
1726 %md.1ulp.multi.use = fdiv contract float 1.000000e+00, %sqrt.md.1ulp.multi.use, !fpmath !2
1727 store volatile float %md.1ulp.multi.use, ptr addrspace(1) %out, align 4
1729 %sqrt.md.25ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !0
1730 %md.25ulp = fdiv contract float 1.000000e+00, %sqrt.md.25ulp, !fpmath !0
1731 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
1733 %sqrt.md.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1
1734 %md.half.ulp = fdiv contract float 1.000000e+00, %sqrt.md.half.ulp, !fpmath !1
1735 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
1737 %sqrt.x.afn.no.md = call contract afn float @llvm.sqrt.f32(float %x)
1738 %afn.no.md = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.no.md
1739 store volatile float %afn.no.md, ptr addrspace(1) %out, align 4
1741 %sqrt.x.afn.25ulp = call contract afn float @llvm.sqrt.f32(float %x), !fpmath !0
1742 %afn.25ulp = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.25ulp, !fpmath !0
1743 store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4
1745 %sqrt.x.fast.no.md = call fast float @llvm.sqrt.f32(float %x)
1746 %fast.no.md = fdiv fast float 1.000000e+00, %sqrt.x.fast.no.md
1747 store volatile float %fast.no.md, ptr addrspace(1) %out, align 4
1749 %sqrt.x.fast.25ulp = call fast float @llvm.sqrt.f32(float %x), !fpmath !0
1750 %fast.25ulp = fdiv fast float 1.000000e+00, %sqrt.x.fast.25ulp, !fpmath !0
1751 store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4
1754 ; Test mismatched metadata/flags between the sqrt and fdiv
1756 ; Test the expected opencl default pattern
1757 %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
1758 %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !0
1759 store volatile float %fdiv.opencl, ptr addrspace(1) %out, align 4
1761 %neg.sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
1762 %neg.fdiv.opencl = fdiv contract float -1.0, %neg.sqrt.x.3ulp, !fpmath !0
1763 store volatile float %neg.fdiv.opencl, ptr addrspace(1) %out, align 4
1765 ; sqrt demands higher precision than fdiv
1766 %sqrt.x.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1
1767 %fdiv.sqrt.mismatch.md0 = fdiv contract float 1.0, %sqrt.x.half.ulp, !fpmath !0
1768 store volatile float %fdiv.sqrt.mismatch.md0, ptr addrspace(1) %out, align 4
1770 ; sqrt demands full precision but has afn
1771 %sqrt.mismatch.md1 = call afn float @llvm.sqrt.f32(float %x)
1772 %fdiv.sqrt.mismatch.md1 = fdiv contract float 1.0, %sqrt.mismatch.md1, !fpmath !0
1773 store volatile float %fdiv.sqrt.mismatch.md1, ptr addrspace(1) %out, align 4
1775 ; sqrt has relaxed precision fdiv has afn only
1776 %sqrt.mismatch.md2 = call contract float @llvm.sqrt.f32(float %x), !fpmath !3
1777 %fdiv.sqrt.mismatch.md2 = fdiv contract afn float 1.0, %sqrt.mismatch.md2
1778 store volatile float %fdiv.sqrt.mismatch.md2, ptr addrspace(1) %out, align 4
1783 define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x) {
1784 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags(
1785 ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
1786 ; IEEE-NEXT: [[TMP1:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000
1787 ; IEEE-NEXT: [[TMP2:%.*]] = select nnan ninf contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
1788 ; IEEE-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract float [[X]], [[TMP2]]
1789 ; IEEE-NEXT: [[TMP4:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
1790 ; IEEE-NEXT: [[TMP5:%.*]] = select nnan ninf contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
1791 ; IEEE-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = fmul nnan ninf contract float [[TMP4]], [[TMP5]]
1792 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
1793 ; IEEE-NEXT: [[TMP6:%.*]] = fcmp ninf contract olt float [[X]], 0x3810000000000000
1794 ; IEEE-NEXT: [[TMP7:%.*]] = select ninf contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00
1795 ; IEEE-NEXT: [[TMP8:%.*]] = fmul ninf contract float [[X]], [[TMP7]]
1796 ; IEEE-NEXT: [[TMP9:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP8]])
1797 ; IEEE-NEXT: [[TMP10:%.*]] = select ninf contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00
1798 ; IEEE-NEXT: [[FDIV_OPENCL_NINF:%.*]] = fmul ninf contract float [[TMP9]], [[TMP10]]
1799 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4
1800 ; IEEE-NEXT: [[TMP11:%.*]] = fcmp nnan contract olt float [[X]], 0x3810000000000000
1801 ; IEEE-NEXT: [[TMP12:%.*]] = select nnan contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
1802 ; IEEE-NEXT: [[TMP13:%.*]] = fmul nnan contract float [[X]], [[TMP12]]
1803 ; IEEE-NEXT: [[TMP14:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
1804 ; IEEE-NEXT: [[TMP15:%.*]] = select nnan contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
1805 ; IEEE-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = fmul nnan contract float [[TMP14]], [[TMP15]]
1806 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4
1807 ; IEEE-NEXT: [[TMP16:%.*]] = fcmp nsz contract olt float [[X]], 0x3810000000000000
1808 ; IEEE-NEXT: [[TMP17:%.*]] = select nsz contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00
1809 ; IEEE-NEXT: [[TMP18:%.*]] = fmul nsz contract float [[X]], [[TMP17]]
1810 ; IEEE-NEXT: [[TMP19:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP18]])
1811 ; IEEE-NEXT: [[TMP20:%.*]] = select nsz contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00
1812 ; IEEE-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = fmul nsz contract float [[TMP19]], [[TMP20]]
1813 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4
1814 ; IEEE-NEXT: [[TMP21:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000
1815 ; IEEE-NEXT: [[TMP22:%.*]] = select nnan ninf contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00
1816 ; IEEE-NEXT: [[TMP23:%.*]] = fmul nnan ninf contract float [[X]], [[TMP22]]
1817 ; IEEE-NEXT: [[TMP24:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP23]])
1818 ; IEEE-NEXT: [[TMP25:%.*]] = select nnan ninf contract i1 [[TMP21]], float 4.096000e+03, float 1.000000e+00
1819 ; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = fmul nnan ninf contract float [[TMP24]], [[TMP25]]
1820 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4
1821 ; IEEE-NEXT: [[TMP26:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000
1822 ; IEEE-NEXT: [[TMP27:%.*]] = select nnan ninf contract i1 [[TMP26]], float 0x4170000000000000, float 1.000000e+00
1823 ; IEEE-NEXT: [[TMP28:%.*]] = fmul nnan ninf contract float [[X]], [[TMP27]]
1824 ; IEEE-NEXT: [[TMP29:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP28]])
1825 ; IEEE-NEXT: [[TMP30:%.*]] = select nnan ninf contract i1 [[TMP26]], float 4.096000e+03, float 1.000000e+00
1826 ; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = fmul nnan ninf contract float [[TMP29]], [[TMP30]]
1827 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4
1828 ; IEEE-NEXT: ret void
1830 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags(
1831 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
1832 ; DAZ-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
1833 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4
1834 ; DAZ-NEXT: [[FDIV_OPENCL_NINF:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
1835 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4
1836 ; DAZ-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[X]])
1837 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4
1838 ; DAZ-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[X]])
1839 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4
1840 ; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
1841 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4
1842 ; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]])
1843 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4
1844 ; DAZ-NEXT: ret void
1846 %sqrt.x.3ulp.ninf.nnan = call contract ninf nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
1847 %fdiv.opencl.ninf.nnan = fdiv contract ninf nnan float 1.0, %sqrt.x.3ulp.ninf.nnan, !fpmath !0
1848 store volatile float %fdiv.opencl.ninf.nnan, ptr addrspace(1) %out, align 4
1850 %sqrt.x.3ulp.ninf = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
1851 %fdiv.opencl.ninf = fdiv contract ninf float 1.0, %sqrt.x.3ulp.ninf, !fpmath !0
1852 store volatile float %fdiv.opencl.ninf, ptr addrspace(1) %out, align 4
1854 %sqrt.x.3ulp.nnan = call contract nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
1855 %fdiv.opencl.nnan = fdiv contract nnan float 1.0, %sqrt.x.3ulp.nnan, !fpmath !0
1856 store volatile float %fdiv.opencl.nnan, ptr addrspace(1) %out, align 4
1858 %sqrt.x.3ulp.nsz = call contract nsz float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
1859 %fdiv.opencl.nsz = fdiv contract nsz float 1.0, %sqrt.x.3ulp.nsz, !fpmath !0
1860 store volatile float %fdiv.opencl.nsz, ptr addrspace(1) %out, align 4
1862 %sqrt.x.3ulp.ninf.mix0 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3
1863 %fdiv.opencl.nnan.mix0 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix0, !fpmath !0
1864 store volatile float %fdiv.opencl.nnan.mix0, ptr addrspace(1) %out, align 4
1866 %sqrt.x.3ulp.ninf.mix1 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3
1867 %fdiv.opencl.nnan.mix1 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix1, !fpmath !0
1868 store volatile float %fdiv.opencl.nnan.mix1, ptr addrspace(1) %out, align 4
1873 define float @rsq_f32_missing_contract0(float %x) {
1874 ; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract0(
1875 ; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] {
1876 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]]
1877 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
1878 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
1879 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
1880 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
1881 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
1882 ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
1883 ; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]]
1885 ; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract0(
1886 ; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] {
1887 ; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]]
1888 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
1889 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
1890 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]])
1891 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
1892 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]])
1893 ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
1894 ; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]]
1896 ; DAZ-LABEL: define float @rsq_f32_missing_contract0(
1897 ; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] {
1898 ; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
1899 ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
1900 ; DAZ-NEXT: ret float [[FDIV_OPENCL]]
1902 %sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2
1903 %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !2
1904 ret float %fdiv.opencl
1907 define float @rsq_f32_missing_contract1(float %x) {
1908 ; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract1(
1909 ; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] {
1910 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]]
1911 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
1912 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
1913 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
1914 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
1915 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
1916 ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
1917 ; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]]
1919 ; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract1(
1920 ; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] {
1921 ; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]]
1922 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]])
1923 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
1924 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]])
1925 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
1926 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
1927 ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
1928 ; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]]
1930 ; DAZ-LABEL: define float @rsq_f32_missing_contract1(
1931 ; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] {
1932 ; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]])
1933 ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]])
1934 ; DAZ-NEXT: ret float [[FDIV_OPENCL]]
1936 %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2
1937 %fdiv.opencl = fdiv float 1.0, %sqrt.x.3ulp, !fpmath !2
1938 ret float %fdiv.opencl
1941 define float @rsq_f32_flag_merge(float %x) {
1942 ; IEEE-LABEL: define float @rsq_f32_flag_merge(
1943 ; IEEE-SAME: float [[X:%.*]]) #[[ATTR1]] {
1944 ; IEEE-NEXT: [[TMP1:%.*]] = fcmp ninf nsz contract olt float [[X]], 0x3810000000000000
1945 ; IEEE-NEXT: [[TMP2:%.*]] = select ninf nsz contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
1946 ; IEEE-NEXT: [[TMP3:%.*]] = fmul ninf nsz contract float [[X]], [[TMP2]]
1947 ; IEEE-NEXT: [[TMP4:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
1948 ; IEEE-NEXT: [[TMP5:%.*]] = select ninf nsz contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
1949 ; IEEE-NEXT: [[FDIV_OPENCL:%.*]] = fmul ninf nsz contract float [[TMP4]], [[TMP5]]
1950 ; IEEE-NEXT: ret float [[FDIV_OPENCL]]
1952 ; DAZ-LABEL: define float @rsq_f32_flag_merge(
1953 ; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] {
1954 ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[X]])
1955 ; DAZ-NEXT: ret float [[FDIV_OPENCL]]
1957 %sqrt.x.3ulp = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !2
1958 %fdiv.opencl = fdiv contract nsz float 1.0, %sqrt.x.3ulp, !fpmath !2
1959 ret float %fdiv.opencl
1962 define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofpclass(nan) %no.nan,
1963 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite(
1964 ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
1965 ; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_NAN]], 0x3810000000000000
1966 ; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
1967 ; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_NAN]], [[TMP2]]
1968 ; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
1969 ; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
1970 ; IEEE-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
1971 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
1972 ; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_INF]], 0x3810000000000000
1973 ; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00
1974 ; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_INF]], [[TMP7]]
1975 ; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]])
1976 ; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00
1977 ; IEEE-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = fmul contract float [[TMP9]], [[TMP10]]
1978 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4
1979 ; IEEE-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[NO_INF_NAN]], 0x3810000000000000
1980 ; IEEE-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
1981 ; IEEE-NEXT: [[TMP13:%.*]] = fmul contract float [[NO_INF_NAN]], [[TMP12]]
1982 ; IEEE-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
1983 ; IEEE-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
1984 ; IEEE-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
1985 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
1986 ; IEEE-NEXT: ret void
1988 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite(
1989 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] {
1990 ; DAZ-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NAN]])
1991 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4
1992 ; DAZ-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF]])
1993 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4
1994 ; DAZ-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF_NAN]])
1995 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4
1996 ; DAZ-NEXT: ret void
1998 float nofpclass(nan) %no.inf,
1999 float nofpclass(inf nan) %no.inf.nan) {
2000 %sqrt.x.3ulp.no.nan = call contract float @llvm.sqrt.f32(float %no.nan), !fpmath !3
2001 %fdiv.opencl.no.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.nan, !fpmath !0
2002 store volatile float %fdiv.opencl.no.nan, ptr addrspace(1) %out, align 4
2004 %sqrt.x.3ulp.no.inf = call contract float @llvm.sqrt.f32(float %no.inf), !fpmath !3
2005 %fdiv.opencl.no.inf = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf, !fpmath !0
2006 store volatile float %fdiv.opencl.no.inf, ptr addrspace(1) %out, align 4
2008 %sqrt.x.3ulp.no.inf.nan = call contract float @llvm.sqrt.f32(float %no.inf.nan), !fpmath !3
2009 %fdiv.opencl.no.inf.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf.nan, !fpmath !0
2010 store volatile float %fdiv.opencl.no.inf.nan, ptr addrspace(1) %out, align 4
2015 define amdgpu_kernel void @rsq_f32_known_nozero(ptr addrspace(1) %out, float nofpclass(zero) %no.zero, float nofpclass(zero sub) %no.zero.sub) {
2016 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero(
2017 ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
2018 ; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_ZERO]], 0x3810000000000000
2019 ; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
2020 ; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_ZERO]], [[TMP2]]
2021 ; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
2022 ; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
2023 ; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
2024 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
2025 ; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]])
2026 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
2027 ; IEEE-NEXT: ret void
2029 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero(
2030 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] {
2031 ; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO]])
2032 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4
2033 ; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]])
2034 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4
2035 ; DAZ-NEXT: ret void
2037 %sqrt.x.3ulp.no.zero = call contract float @llvm.sqrt.f32(float %no.zero), !fpmath !3
2038 %fdiv.opencl.no.zero = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero, !fpmath !0
2039 store volatile float %fdiv.opencl.no.zero, ptr addrspace(1) %out, align 4
2041 %sqrt.x.3ulp.no.zero.sub = call contract float @llvm.sqrt.f32(float %no.zero.sub), !fpmath !3
2042 %fdiv.opencl.no.zero.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero.sub, !fpmath !0
2043 store volatile float %fdiv.opencl.no.zero.sub, ptr addrspace(1) %out, align 4
2048 define amdgpu_kernel void @rsq_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %no.sub, float nofpclass(psub) %no.psub, float nofpclass(nsub) %no.nsub) {
2049 ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub(
2050 ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] {
2051 ; IEEE-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]])
2052 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
2053 ; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_PSUB]], 0x3810000000000000
2054 ; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00
2055 ; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_PSUB]], [[TMP2]]
2056 ; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
2057 ; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00
2058 ; IEEE-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = fmul contract float [[TMP4]], [[TMP5]]
2059 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
2060 ; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_NSUB]], 0x3810000000000000
2061 ; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00
2062 ; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_NSUB]], [[TMP7]]
2063 ; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]])
2064 ; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00
2065 ; IEEE-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = fmul contract float [[TMP9]], [[TMP10]]
2066 ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
2067 ; IEEE-NEXT: ret void
2069 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub(
2070 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] {
2071 ; DAZ-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]])
2072 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
2073 ; DAZ-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_PSUB]])
2074 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4
2075 ; DAZ-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NSUB]])
2076 ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4
2077 ; DAZ-NEXT: ret void
2079 %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %no.sub), !fpmath !3
2080 %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
2081 store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4
2083 %sqrt.x.3ulp.no.psub = call contract float @llvm.sqrt.f32(float %no.psub), !fpmath !3
2084 %fdiv.opencl.no.psub = fdiv contract float 1.0, %sqrt.x.3ulp.no.psub, !fpmath !0
2085 store volatile float %fdiv.opencl.no.psub, ptr addrspace(1) %out, align 4
2087 %sqrt.x.3ulp.no.nsub = call contract float @llvm.sqrt.f32(float %no.nsub), !fpmath !3
2088 %fdiv.opencl.no.nsub = fdiv contract float 1.0, %sqrt.x.3ulp.no.nsub, !fpmath !0
2089 store volatile float %fdiv.opencl.no.nsub, ptr addrspace(1) %out, align 4
2094 define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x) {
2095 ; CHECK-LABEL: define amdgpu_kernel void @rsq_f32_assume_nosub(
2096 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] {
2097 ; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]])
2098 ; CHECK-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000
2099 ; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]])
2100 ; CHECK-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]])
2101 ; CHECK-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4
2102 ; CHECK-NEXT: ret void
2104 %fabs.x = call float @llvm.fabs.f32(float %x)
2105 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000
2106 call void @llvm.assume(i1 %is.not.subnormal)
2107 %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %x), !fpmath !3
2108 %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0
2109 store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4
2113 define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) {
2114 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath(
2115 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
2116 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
2117 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]]
2118 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
2119 ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]]
2120 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
2121 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
2122 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0
2123 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1
2124 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000
2125 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00
2126 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]]
2127 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]])
2128 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00
2129 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]]
2130 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000
2131 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
2132 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]]
2133 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
2134 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
2135 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
2136 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
2137 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1
2138 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
2139 ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]]
2140 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
2141 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
2142 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0
2143 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1
2144 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000
2145 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00
2146 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]]
2147 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]])
2148 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00
2149 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]]
2150 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]])
2151 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
2152 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
2153 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
2154 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
2155 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
2156 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1
2157 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
2158 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
2159 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
2160 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0
2161 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
2162 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
2163 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]]
2164 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
2165 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
2166 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0
2167 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1
2168 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000
2169 ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00
2170 ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]]
2171 ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]])
2172 ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00
2173 ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]]
2174 ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000
2175 ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00
2176 ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]]
2177 ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]])
2178 ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00
2179 ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]]
2180 ; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0
2181 ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1
2182 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
2183 ; IEEE-GOODFREXP-NEXT: ret void
2185 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath(
2186 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
2187 ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
2188 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]]
2189 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
2190 ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]]
2191 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
2192 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
2193 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0
2194 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1
2195 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000
2196 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00
2197 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]]
2198 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]])
2199 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00
2200 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]]
2201 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000
2202 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00
2203 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]]
2204 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]])
2205 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00
2206 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]]
2207 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
2208 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1
2209 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
2210 ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]]
2211 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
2212 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
2213 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0
2214 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1
2215 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000
2216 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00
2217 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]]
2218 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]])
2219 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00
2220 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]]
2221 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]])
2222 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
2223 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]])
2224 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
2225 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
2226 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
2227 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
2228 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
2229 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
2230 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
2231 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0
2232 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
2233 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
2234 ; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]]
2235 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
2236 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
2237 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0
2238 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1
2239 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000
2240 ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00
2241 ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]]
2242 ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]])
2243 ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00
2244 ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]]
2245 ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000
2246 ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00
2247 ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]]
2248 ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]])
2249 ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00
2250 ; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]]
2251 ; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0
2252 ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1
2253 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
2254 ; IEEE-BADFREXP-NEXT: ret void
2256 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath(
2257 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
2258 ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
2259 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]]
2260 ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
2261 ; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]]
2262 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
2263 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1
2264 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0
2265 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1
2266 ; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]])
2267 ; DAZ-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP4]])
2268 ; DAZ-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0
2269 ; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP6]], i64 1
2270 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
2271 ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[X]], i64 0
2272 ; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[X]], i64 1
2273 ; DAZ-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]])
2274 ; DAZ-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP9]])
2275 ; DAZ-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
2276 ; DAZ-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP11]], i64 1
2277 ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0
2278 ; DAZ-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1
2279 ; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
2280 ; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP14]])
2281 ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
2282 ; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
2283 ; DAZ-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP17]])
2284 ; DAZ-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
2285 ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0
2286 ; DAZ-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1
2287 ; DAZ-NEXT: [[TMP23:%.*]] = fmul contract float [[TMP21]], [[TMP19]]
2288 ; DAZ-NEXT: [[TMP24:%.*]] = sub i32 [[TMP22]], [[TMP18]]
2289 ; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP23]], i32 [[TMP24]])
2290 ; DAZ-NEXT: [[TMP26:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0
2291 ; DAZ-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP26]], float [[TMP25]], i64 1
2292 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4
2293 ; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]]
2294 ; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0
2295 ; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1
2296 ; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[X]], i64 0
2297 ; DAZ-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[X]], i64 1
2298 ; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP29]])
2299 ; DAZ-NEXT: [[TMP32:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP30]])
2300 ; DAZ-NEXT: [[TMP33:%.*]] = insertelement <2 x float> poison, float [[TMP31]], i64 0
2301 ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP33]], float [[TMP32]], i64 1
2302 ; DAZ-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4
2303 ; DAZ-NEXT: ret void
2305 %sqrt.x.no.md = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
2306 %no.md = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.no.md
2307 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4
2309 ; Matches the rsq instruction accuracy
2310 %sqrt.md.1ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
2311 %md.1ulp = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.md.1ulp, !fpmath !2
2312 store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4
2314 ; Matches the rsq instruction accuracy
2315 %sqrt.md.1ulp.undef = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2
2316 %md.1ulp.undef = fdiv contract <2 x float> <float 1.0, float poison>, %sqrt.md.1ulp.undef, !fpmath !2
2317 store volatile <2 x float> %md.1ulp.undef, ptr addrspace(1) %out, align 4
2319 ; Test mismatched metadata/flags between the sqrt and fdiv
2321 ; Test the expected opencl default pattern
2322 %sqrt.x.3ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv
2323 %fdiv.opencl = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.3ulp, !fpmath !0
2324 store volatile <2 x float> %fdiv.opencl, ptr addrspace(1) %out, align 4
2329 define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) {
2330 ; CHECK-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd(
2331 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2332 ; CHECK-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
2333 ; CHECK-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
2334 ; CHECK-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2335 ; CHECK-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2336 ; CHECK-NEXT: ret void
2338 %arcp0 = fdiv arcp float %x, %denom
2339 %arcp1 = fdiv arcp float %y, %denom
2340 store volatile float %arcp0, ptr addrspace(1) %out
2341 store volatile float %arcp1, ptr addrspace(1) %out
2345 define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) {
2346 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(
2347 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2348 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2349 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2350 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
2351 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2352 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2353 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2354 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2355 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2356 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
2357 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
2358 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
2359 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2360 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
2361 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
2362 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2363 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2364 ; IEEE-GOODFREXP-NEXT: ret void
2366 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(
2367 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2368 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2369 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2370 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2371 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2372 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2373 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2374 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2375 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2376 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
2377 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2378 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
2379 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2380 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
2381 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
2382 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2383 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2384 ; IEEE-BADFREXP-NEXT: ret void
2386 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(
2387 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2388 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2389 ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
2390 ; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2391 ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]]
2392 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2393 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2394 ; DAZ-NEXT: ret void
2396 %arcp0 = fdiv arcp float %x, %denom, !fpmath !0
2397 %arcp1 = fdiv arcp float %y, %denom, !fpmath !0
2398 store volatile float %arcp0, ptr addrspace(1) %out
2399 store volatile float %arcp1, ptr addrspace(1) %out
2403 define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %denom) {
2404 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(
2405 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2406 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2407 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2408 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
2409 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2410 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2411 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2412 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2413 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2414 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
2415 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
2416 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
2417 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2418 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
2419 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
2420 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2421 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
2422 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
2423 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]]
2424 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]])
2425 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]])
2426 ; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]]
2427 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2428 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2429 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
2430 ; IEEE-GOODFREXP-NEXT: ret void
2432 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(
2433 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2434 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2435 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2436 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2437 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2438 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2439 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2440 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2441 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2442 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
2443 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2444 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
2445 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2446 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
2447 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
2448 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2449 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
2450 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2451 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]]
2452 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]])
2453 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]])
2454 ; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]]
2455 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2456 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2457 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
2458 ; IEEE-BADFREXP-NEXT: ret void
2460 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(
2461 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2462 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2463 ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
2464 ; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2465 ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]]
2466 ; DAZ-NEXT: [[TMP3:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2467 ; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP3]]
2468 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2469 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2470 ; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
2471 ; DAZ-NEXT: ret void
2473 %arcp0 = fdiv arcp float %x, %denom, !fpmath !0
2474 %arcp1 = fdiv arcp float %y, %denom, !fpmath !0
2475 %arcp2 = fdiv arcp float %z, %denom, !fpmath !0
2476 store volatile float %arcp0, ptr addrspace(1) %out
2477 store volatile float %arcp1, ptr addrspace(1) %out
2478 store volatile float %arcp2, ptr addrspace(1) %out
2482 define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) {
2483 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(
2484 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2485 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2486 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2487 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
2488 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2489 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2490 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2491 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2492 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
2493 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2494 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2495 ; IEEE-GOODFREXP-NEXT: ret void
2497 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(
2498 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2499 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2500 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2501 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2502 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2503 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2504 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2505 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2506 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
2507 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2508 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2509 ; IEEE-BADFREXP-NEXT: ret void
2511 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(
2512 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2513 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2514 ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
2515 ; DAZ-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]]
2516 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2517 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2518 ; DAZ-NEXT: ret void
2520 %arcp0 = fdiv arcp float %x, %denom, !fpmath !0
2521 %arcp1 = fdiv arcp float %y, %denom
2522 store volatile float %arcp0, ptr addrspace(1) %out
2523 store volatile float %arcp1, ptr addrspace(1) %out
2527 define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) {
2528 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(
2529 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2530 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
2531 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2532 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2533 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
2534 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2535 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2536 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2537 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]]
2538 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2539 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2540 ; IEEE-GOODFREXP-NEXT: ret void
2542 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(
2543 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2544 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
2545 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2546 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2547 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2548 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2549 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2550 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2551 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]]
2552 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2553 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2554 ; IEEE-BADFREXP-NEXT: ret void
2556 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(
2557 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2558 ; DAZ-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]]
2559 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2560 ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP1]]
2561 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2562 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2563 ; DAZ-NEXT: ret void
2565 %arcp0 = fdiv arcp float %x, %denom
2566 %arcp1 = fdiv arcp float %y, %denom, !fpmath !0
2567 store volatile float %arcp0, ptr addrspace(1) %out
2568 store volatile float %arcp1, ptr addrspace(1) %out
2572 define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) {
2573 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(
2574 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2575 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2576 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2577 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
2578 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2579 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2580 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2581 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2582 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2583 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
2584 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
2585 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
2586 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2587 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
2588 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
2589 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2590 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2591 ; IEEE-GOODFREXP-NEXT: ret void
2593 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(
2594 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2595 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2596 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
2597 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2598 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
2599 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]])
2600 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
2601 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]]
2602 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2603 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
2604 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2605 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
2606 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2607 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
2608 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]]
2609 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2610 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2611 ; IEEE-BADFREXP-NEXT: ret void
2613 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(
2614 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] {
2615 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2616 ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]]
2617 ; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2618 ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]]
2619 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2620 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2621 ; DAZ-NEXT: ret void
2623 %arcp0 = fdiv arcp float %x, %denom, !fpmath !2
2624 %arcp1 = fdiv arcp float %y, %denom, !fpmath !2
2625 store volatile float %arcp0, ptr addrspace(1) %out
2626 store volatile float %arcp1, ptr addrspace(1) %out
2630 define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %denom) {
2631 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(
2632 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] {
2633 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
2634 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
2635 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2636 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2637 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
2638 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
2639 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
2640 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
2641 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
2642 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
2643 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
2644 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
2645 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
2646 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
2647 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
2648 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
2649 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
2650 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
2651 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
2652 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
2653 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0
2654 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1
2655 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2656 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2657 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]])
2658 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
2659 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1
2660 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
2661 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]])
2662 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
2663 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]]
2664 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]])
2665 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
2666 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
2667 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
2668 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]])
2669 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
2670 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]]
2671 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
2672 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
2673 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
2674 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
2675 ; IEEE-GOODFREXP-NEXT: ret void
2677 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(
2678 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] {
2679 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
2680 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
2681 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2682 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2683 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
2684 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
2685 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
2686 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
2687 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
2688 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
2689 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]]
2690 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
2691 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
2692 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
2693 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
2694 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
2695 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
2696 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]]
2697 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
2698 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1
2699 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0
2700 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1
2701 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2702 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2703 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]])
2704 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0
2705 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP22]])
2706 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]]
2707 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]])
2708 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]])
2709 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]]
2710 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]])
2711 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
2712 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP23]])
2713 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]]
2714 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]])
2715 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]])
2716 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]]
2717 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0
2718 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1
2719 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
2720 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
2721 ; IEEE-BADFREXP-NEXT: ret void
2723 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(
2724 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] {
2725 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0
2726 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1
2727 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2728 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2729 ; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]])
2730 ; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]]
2731 ; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]])
2732 ; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]]
2733 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
2734 ; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1
2735 ; DAZ-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[Y]], i64 0
2736 ; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[Y]], i64 1
2737 ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2738 ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2739 ; DAZ-NEXT: [[TMP14:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP12]])
2740 ; DAZ-NEXT: [[TMP15:%.*]] = fmul arcp float [[TMP10]], [[TMP14]]
2741 ; DAZ-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
2742 ; DAZ-NEXT: [[TMP17:%.*]] = fmul arcp float [[TMP11]], [[TMP16]]
2743 ; DAZ-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0
2744 ; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP17]], i64 1
2745 ; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
2746 ; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
2747 ; DAZ-NEXT: ret void
2749 %arcp0 = fdiv arcp <2 x float> %x, %denom, !fpmath !2
2750 %arcp1 = fdiv arcp <2 x float> %y, %denom, !fpmath !2
2751 store volatile <2 x float> %arcp0, ptr addrspace(1) %out
2752 store volatile <2 x float> %arcp1, ptr addrspace(1) %out
2756 define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %sqr.denom) {
2757 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(
2758 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2759 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000
2760 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
2761 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]])
2762 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
2763 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
2764 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
2765 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2766 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
2767 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
2768 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
2769 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
2770 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
2771 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]]
2772 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2773 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
2774 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
2775 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
2776 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
2777 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
2778 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]]
2779 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2780 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2781 ; IEEE-GOODFREXP-NEXT: ret void
2783 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(
2784 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2785 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000
2786 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
2787 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]])
2788 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
2789 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
2790 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
2791 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2792 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
2793 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2794 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
2795 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
2796 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
2797 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]]
2798 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
2799 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
2800 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
2801 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
2802 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
2803 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
2804 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]]
2805 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2806 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2807 ; IEEE-BADFREXP-NEXT: ret void
2809 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(
2810 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2811 ; DAZ-NEXT: [[DENOM:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[SQR_DENOM]])
2812 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2813 ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]]
2814 ; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
2815 ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]]
2816 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
2817 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
2818 ; DAZ-NEXT: ret void
2820 %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
2821 %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0
2822 %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0
2823 store volatile float %arcp0, ptr addrspace(1) %out
2824 store volatile float %arcp1, ptr addrspace(1) %out
2828 define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %sqr.denom) {
2829 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(
2830 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2831 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0
2832 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1
2833 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000
2834 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0
2835 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]])
2836 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]])
2837 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0
2838 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]])
2839 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000
2840 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0
2841 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]])
2842 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]])
2843 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0
2844 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
2845 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
2846 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
2847 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
2848 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
2849 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2850 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2851 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]])
2852 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0
2853 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1
2854 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 0, [[TMP22]]
2855 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP21]])
2856 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP24]], i32 [[TMP23]])
2857 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul arcp contract float [[TMP16]], [[TMP25]]
2858 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]])
2859 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0
2860 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1
2861 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = sub i32 0, [[TMP29]]
2862 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP28]])
2863 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP30]])
2864 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = fmul arcp contract float [[TMP17]], [[TMP32]]
2865 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0
2866 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1
2867 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[Y]], i64 0
2868 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[Y]], i64 1
2869 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2870 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2871 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP37]])
2872 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
2873 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1
2874 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]]
2875 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP40]])
2876 ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]])
2877 ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul arcp contract float [[TMP35]], [[TMP44]]
2878 ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP38]])
2879 ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0
2880 ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1
2881 ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = sub i32 0, [[TMP48]]
2882 ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP47]])
2883 ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP49]])
2884 ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = fmul arcp contract float [[TMP36]], [[TMP51]]
2885 ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i64 0
2886 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP52]], i64 1
2887 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
2888 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
2889 ; IEEE-GOODFREXP-NEXT: ret void
2891 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(
2892 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2893 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0
2894 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1
2895 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000
2896 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0
2897 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]])
2898 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]])
2899 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0
2900 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]])
2901 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000
2902 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0
2903 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]])
2904 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]])
2905 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0
2906 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]])
2907 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0
2908 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1
2909 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0
2910 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1
2911 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2912 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2913 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]])
2914 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0
2915 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]])
2916 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 0, [[TMP22]]
2917 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP21]])
2918 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP24]], i32 [[TMP23]])
2919 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul arcp contract float [[TMP16]], [[TMP25]]
2920 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]])
2921 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0
2922 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]])
2923 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = sub i32 0, [[TMP29]]
2924 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP28]])
2925 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP30]])
2926 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = fmul arcp contract float [[TMP17]], [[TMP32]]
2927 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0
2928 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1
2929 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[Y]], i64 0
2930 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[Y]], i64 1
2931 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2932 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2933 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP37]])
2934 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
2935 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP37]])
2936 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]]
2937 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP40]])
2938 ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]])
2939 ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul arcp contract float [[TMP35]], [[TMP44]]
2940 ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP38]])
2941 ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0
2942 ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP38]])
2943 ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = sub i32 0, [[TMP48]]
2944 ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP47]])
2945 ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP49]])
2946 ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = fmul arcp contract float [[TMP36]], [[TMP51]]
2947 ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i64 0
2948 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP52]], i64 1
2949 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
2950 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
2951 ; IEEE-BADFREXP-NEXT: ret void
2953 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(
2954 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2955 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0
2956 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1
2957 ; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
2958 ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
2959 ; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
2960 ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1
2961 ; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0
2962 ; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1
2963 ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2964 ; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2965 ; DAZ-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]])
2966 ; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP6]], [[TMP10]]
2967 ; DAZ-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
2968 ; DAZ-NEXT: [[TMP13:%.*]] = fmul arcp contract float [[TMP7]], [[TMP12]]
2969 ; DAZ-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0
2970 ; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1
2971 ; DAZ-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[Y]], i64 0
2972 ; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[Y]], i64 1
2973 ; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[DENOM]], i64 0
2974 ; DAZ-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 1
2975 ; DAZ-NEXT: [[TMP19:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP17]])
2976 ; DAZ-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP15]], [[TMP19]]
2977 ; DAZ-NEXT: [[TMP21:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP18]])
2978 ; DAZ-NEXT: [[TMP22:%.*]] = fmul arcp contract float [[TMP16]], [[TMP21]]
2979 ; DAZ-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP20]], i64 0
2980 ; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP22]], i64 1
2981 ; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8
2982 ; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8
2983 ; DAZ-NEXT: ret void
2985 %denom = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %sqr.denom), !fpmath !3
2986 %arcp0 = fdiv contract arcp <2 x float> %x, %denom, !fpmath !0
2987 %arcp1 = fdiv contract arcp <2 x float> %y, %denom, !fpmath !0
2988 store volatile <2 x float> %arcp0, ptr addrspace(1) %out
2989 store volatile <2 x float> %arcp1, ptr addrspace(1) %out
2993 define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %sqr.denom) {
2994 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(
2995 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
2996 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000
2997 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
2998 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]])
2999 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3000 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
3001 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
3002 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
3003 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
3004 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1
3005 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
3006 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
3007 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
3008 ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]]
3009 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
3010 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3011 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
3012 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3013 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3014 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3015 ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]]
3016 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
3017 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3018 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
3019 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
3020 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3021 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
3022 ; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP23]]
3023 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
3024 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
3025 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
3026 ; IEEE-GOODFREXP-NEXT: ret void
3028 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(
3029 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
3030 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000
3031 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0
3032 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]])
3033 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3034 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0
3035 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]])
3036 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
3037 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0
3038 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
3039 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]]
3040 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]])
3041 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]])
3042 ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]]
3043 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
3044 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3045 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
3046 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3047 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3048 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3049 ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]]
3050 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]])
3051 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3052 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]])
3053 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
3054 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3055 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
3056 ; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP23]]
3057 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
3058 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
3059 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
3060 ; IEEE-BADFREXP-NEXT: ret void
3062 ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(
3063 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] {
3064 ; DAZ-NEXT: [[DENOM:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[SQR_DENOM]])
3065 ; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
3066 ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]]
3067 ; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
3068 ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]]
3069 ; DAZ-NEXT: [[TMP3:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]])
3070 ; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP3]]
3071 ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4
3072 ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4
3073 ; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4
3074 ; DAZ-NEXT: ret void
3076 %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3
3077 %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0
3078 %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0
3079 %arcp2 = fdiv contract arcp float %z, %denom, !fpmath !0
3080 store volatile float %arcp0, ptr addrspace(1) %out
3081 store volatile float %arcp1, ptr addrspace(1) %out
3082 store volatile float %arcp2, ptr addrspace(1) %out
3086 define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) {
3087 ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator(
3088 ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3089 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]]
3090 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3091 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3092 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3093 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3094 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3095 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3096 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3097 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3098 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000
3099 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
3100 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]]
3101 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
3102 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
3103 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]]
3104 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000
3105 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
3106 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]]
3107 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
3108 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
3109 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]]
3110 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3111 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
3112 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1
3113 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
3114 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3115 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3116 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1
3117 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]]
3118 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]]
3119 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]])
3120 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3121 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
3122 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
3123 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
3124 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3125 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
3126 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1
3127 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]]
3128 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]]
3129 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]])
3130 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
3131 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1
3132 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2
3133 ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3
3134 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3136 ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator(
3137 ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3138 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]]
3139 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3140 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3141 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3142 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3143 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3144 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3145 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3146 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3147 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000
3148 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
3149 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]]
3150 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
3151 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
3152 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]]
3153 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000
3154 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
3155 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]]
3156 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
3157 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
3158 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]]
3159 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3160 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
3161 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
3162 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
3163 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3164 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3165 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00)
3166 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]]
3167 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]]
3168 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]])
3169 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3170 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
3171 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
3172 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
3173 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3174 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
3175 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
3176 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]]
3177 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]]
3178 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]])
3179 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
3180 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1
3181 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2
3182 ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3
3183 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3185 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator(
3186 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3187 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3188 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3189 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3190 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3191 ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
3192 ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
3193 ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3194 ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]])
3195 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3196 ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1
3197 ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2
3198 ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3
3199 ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3200 ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3201 ; DAZ-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3202 ; DAZ-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3203 ; DAZ-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP12]])
3204 ; DAZ-NEXT: [[TMP17:%.*]] = fneg contract float [[TMP13]]
3205 ; DAZ-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP17]])
3206 ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP14]])
3207 ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
3208 ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
3209 ; DAZ-NEXT: [[TMP22:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP20]])
3210 ; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3211 ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
3212 ; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
3213 ; DAZ-NEXT: [[TMP26:%.*]] = fmul contract float [[TMP24]], [[TMP22]]
3214 ; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
3215 ; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
3216 ; DAZ-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP15]])
3217 ; DAZ-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0
3218 ; DAZ-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1
3219 ; DAZ-NEXT: [[TMP32:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]])
3220 ; DAZ-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3221 ; DAZ-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0
3222 ; DAZ-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1
3223 ; DAZ-NEXT: [[TMP36:%.*]] = fmul contract float [[TMP34]], [[TMP32]]
3224 ; DAZ-NEXT: [[TMP37:%.*]] = sub i32 [[TMP35]], [[TMP31]]
3225 ; DAZ-NEXT: [[TMP38:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP36]], i32 [[TMP37]])
3226 ; DAZ-NEXT: [[TMP39:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0
3227 ; DAZ-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP18]], i64 1
3228 ; DAZ-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP28]], i64 2
3229 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP38]], i64 3
3230 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3232 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
3233 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2
3234 ret <4 x float> %partial.rsq
3237 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> %arg) {
3238 ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(
3239 ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3240 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
3241 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3242 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3243 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3244 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3245 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3246 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3247 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3248 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3249 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract afn olt float [[TMP5]], 0x3810000000000000
3250 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract afn i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
3251 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract afn float [[TMP5]], [[TMP10]]
3252 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP11]])
3253 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract afn i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
3254 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract afn float [[TMP12]], [[TMP13]]
3255 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract afn olt float [[TMP6]], 0x3810000000000000
3256 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract afn i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
3257 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract afn float [[TMP6]], [[TMP16]]
3258 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP17]])
3259 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract afn i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
3260 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract afn float [[TMP18]], [[TMP19]]
3261 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3262 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
3263 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1
3264 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
3265 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3266 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3267 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1
3268 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]]
3269 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]]
3270 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]])
3271 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3272 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
3273 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1
3274 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
3275 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3276 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
3277 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1
3278 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]]
3279 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]]
3280 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]])
3281 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
3282 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1
3283 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2
3284 ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3
3285 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3287 ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(
3288 ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3289 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
3290 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3291 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3292 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3293 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3294 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3295 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3296 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3297 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3298 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract afn olt float [[TMP5]], 0x3810000000000000
3299 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract afn i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
3300 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract afn float [[TMP5]], [[TMP10]]
3301 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP11]])
3302 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract afn i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
3303 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract afn float [[TMP12]], [[TMP13]]
3304 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract afn olt float [[TMP6]], 0x3810000000000000
3305 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract afn i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
3306 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract afn float [[TMP6]], [[TMP16]]
3307 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP17]])
3308 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract afn i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
3309 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract afn float [[TMP18]], [[TMP19]]
3310 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3311 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
3312 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
3313 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
3314 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3315 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3316 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00)
3317 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]]
3318 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]]
3319 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]])
3320 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3321 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0
3322 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
3323 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]])
3324 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3325 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
3326 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
3327 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]]
3328 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]]
3329 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]])
3330 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
3331 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1
3332 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2
3333 ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3
3334 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3336 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(
3337 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3338 ; DAZ-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
3339 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3340 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3341 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3342 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3343 ; DAZ-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3344 ; DAZ-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3345 ; DAZ-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3346 ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3347 ; DAZ-NEXT: [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP5]])
3348 ; DAZ-NEXT: [[TMP10:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP6]])
3349 ; DAZ-NEXT: [[TMP11:%.*]] = fneg contract afn float [[TMP10]]
3350 ; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3351 ; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3352 ; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
3353 ; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3354 ; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3355 ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0
3356 ; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1
3357 ; DAZ-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]]
3358 ; DAZ-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]]
3359 ; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]])
3360 ; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3361 ; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
3362 ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
3363 ; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]])
3364 ; DAZ-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3365 ; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0
3366 ; DAZ-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1
3367 ; DAZ-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]]
3368 ; DAZ-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]]
3369 ; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]])
3370 ; DAZ-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0
3371 ; DAZ-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1
3372 ; DAZ-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2
3373 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
3374 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3376 %denom = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
3377 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2
3378 ret <4 x float> %partial.rsq
3381 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) {
3382 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(
3383 ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3384 ; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2:![0-9]+]]
3385 ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]]
3386 ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3388 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(
3389 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3390 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3391 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3392 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3393 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3394 ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
3395 ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
3396 ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3397 ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]])
3398 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3399 ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1
3400 ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2
3401 ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3
3402 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]]
3403 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3405 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
3406 %partial.rsq = fdiv contract afn <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom
3407 ret <4 x float> %partial.rsq
3410 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) {
3411 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(
3412 ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3413 ; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]]
3414 ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]]
3415 ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3417 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(
3418 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3419 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3420 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3421 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3422 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3423 ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
3424 ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
3425 ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3426 ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]])
3427 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3428 ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1
3429 ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2
3430 ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3
3431 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]]
3432 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3434 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
3435 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom
3436 ret <4 x float> %partial.rsq
3439 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x float> %arg) {
3440 ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(
3441 ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3442 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
3443 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3444 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3445 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3446 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3447 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
3448 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
3449 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
3450 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
3451 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
3452 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
3453 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]]
3454 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3455 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3456 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
3457 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3458 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3459 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3460 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3461 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3462 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
3463 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3464 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3465 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
3466 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
3467 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
3468 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
3469 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
3470 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3471 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
3472 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
3473 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
3474 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3475 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
3476 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1
3477 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
3478 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
3479 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
3480 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
3481 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1
3482 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2
3483 ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3
3484 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3486 ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(
3487 ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3488 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
3489 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3490 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3491 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3492 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3493 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
3494 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
3495 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
3496 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
3497 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
3498 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
3499 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]]
3500 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3501 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3502 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
3503 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3504 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3505 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3506 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3507 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3508 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
3509 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3510 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3511 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
3512 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00)
3513 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
3514 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
3515 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
3516 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3517 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
3518 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
3519 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
3520 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3521 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0
3522 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
3523 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]]
3524 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]]
3525 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]])
3526 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
3527 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1
3528 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2
3529 ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3
3530 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3532 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(
3533 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3534 ; DAZ-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
3535 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3536 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3537 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3538 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3539 ; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]])
3540 ; DAZ-NEXT: [[TMP6:%.*]] = fneg contract float [[TMP2]]
3541 ; DAZ-NEXT: [[TMP7:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]])
3542 ; DAZ-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3543 ; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0
3544 ; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP8]], 1
3545 ; DAZ-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]])
3546 ; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00)
3547 ; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3548 ; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
3549 ; DAZ-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]]
3550 ; DAZ-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]]
3551 ; DAZ-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]])
3552 ; DAZ-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3553 ; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3554 ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
3555 ; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3556 ; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3557 ; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0
3558 ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1
3559 ; DAZ-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]]
3560 ; DAZ-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]]
3561 ; DAZ-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]])
3562 ; DAZ-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3563 ; DAZ-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1
3564 ; DAZ-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2
3565 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3
3566 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3568 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg)
3569 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2
3570 ret <4 x float> %partial.rsq
3573 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
3574 ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(
3575 ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3576 ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]]
3577 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3578 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3579 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3580 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3581 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3582 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3583 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3584 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3585 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000
3586 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
3587 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]]
3588 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
3589 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
3590 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]]
3591 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000
3592 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
3593 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]]
3594 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
3595 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
3596 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]]
3597 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3598 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
3599 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1
3600 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]]
3601 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
3602 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]])
3603 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]]
3604 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3605 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
3606 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
3607 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
3608 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
3609 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
3610 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float poison, [[TMP33]]
3611 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
3612 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1
3613 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2
3614 ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3
3615 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3617 ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(
3618 ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3619 ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]]
3620 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3621 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3622 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3623 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3624 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3625 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3626 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3627 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3628 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000
3629 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00
3630 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]]
3631 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]])
3632 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00
3633 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]]
3634 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000
3635 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00
3636 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]]
3637 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]])
3638 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00
3639 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]]
3640 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3641 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0
3642 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
3643 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]]
3644 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]])
3645 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]])
3646 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]]
3647 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3648 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
3649 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
3650 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
3651 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]])
3652 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
3653 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float poison, [[TMP33]]
3654 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
3655 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1
3656 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2
3657 ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3
3658 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3660 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(
3661 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3662 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3663 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3664 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3665 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3666 ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
3667 ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
3668 ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3669 ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]])
3670 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3671 ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1
3672 ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2
3673 ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3
3674 ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 0
3675 ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DENOM]], i64 1
3676 ; DAZ-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DENOM]], i64 2
3677 ; DAZ-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DENOM]], i64 3
3678 ; DAZ-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP12]])
3679 ; DAZ-NEXT: [[TMP17:%.*]] = fneg arcp contract float [[TMP13]]
3680 ; DAZ-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP17]])
3681 ; DAZ-NEXT: [[TMP19:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP14]])
3682 ; DAZ-NEXT: [[TMP20:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP19]]
3683 ; DAZ-NEXT: [[TMP21:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP15]])
3684 ; DAZ-NEXT: [[TMP22:%.*]] = fmul arcp contract float poison, [[TMP21]]
3685 ; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0
3686 ; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP18]], i64 1
3687 ; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP20]], i64 2
3688 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3
3689 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3691 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
3692 %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2
3693 ret <4 x float> %partial.rsq
3696 define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
3697 ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(
3698 ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3699 ; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]]
3700 ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]]
3701 ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3703 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(
3704 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3705 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3706 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3707 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3708 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3709 ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]])
3710 ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]])
3711 ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]])
3712 ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]])
3713 ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3714 ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1
3715 ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2
3716 ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3
3717 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]]
3718 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3720 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2
3721 %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom
3722 ret <4 x float> %partial.rsq
3725 define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) {
3726 ; IEEE-GOODFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(
3727 ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3728 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3729 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3730 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3731 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3732 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
3733 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
3734 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
3735 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
3736 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
3737 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
3738 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]]
3739 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3740 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3741 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
3742 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3743 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3744 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3745 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3746 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3747 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1
3748 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
3749 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3750 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
3751 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]]
3752 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3753 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3754 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1
3755 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]]
3756 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]])
3757 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]])
3758 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float poison, [[TMP30]]
3759 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
3760 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1
3761 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2
3762 ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
3763 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]]
3765 ; IEEE-BADFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(
3766 ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3767 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3768 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3769 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3770 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3771 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]])
3772 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
3773 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]])
3774 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]]
3775 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
3776 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]])
3777 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]]
3778 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3779 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3780 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
3781 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3782 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3783 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3784 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]])
3785 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0
3786 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]])
3787 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]]
3788 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]])
3789 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]])
3790 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]]
3791 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]])
3792 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3793 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]])
3794 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]]
3795 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]])
3796 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]])
3797 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float poison, [[TMP30]]
3798 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0
3799 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1
3800 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2
3801 ; IEEE-BADFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3
3802 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]]
3804 ; DAZ-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(
3805 ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3806 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0
3807 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1
3808 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2
3809 ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3
3810 ; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]])
3811 ; DAZ-NEXT: [[TMP6:%.*]] = fneg arcp float [[TMP2]]
3812 ; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]])
3813 ; DAZ-NEXT: [[TMP8:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]])
3814 ; DAZ-NEXT: [[TMP9:%.*]] = fmul arcp float 4.000000e+00, [[TMP8]]
3815 ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]])
3816 ; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp float poison, [[TMP10]]
3817 ; DAZ-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
3818 ; DAZ-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i64 1
3819 ; DAZ-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 2
3820 ; DAZ-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 3
3821 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RCP]]
3823 %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %arg, !fpmath !2
3824 ret <4 x float> %partial.rcp
3827 define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) {
3828 ; CHECK-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(
3829 ; CHECK-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] {
3830 ; CHECK-NEXT: [[PARTIAL_RCP:%.*]] = fdiv arcp <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[ARG]]
3831 ; CHECK-NEXT: ret <4 x float> [[PARTIAL_RCP]]
3833 %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %arg
3834 ret <4 x float> %partial.rcp
3837 ; Make sure we don't crash if a vector square root has a constant vecctor input
3838 define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float> %x) {
3839 ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom(
3840 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
3841 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00)
3842 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00)
3843 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00)
3844 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison)
3845 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
3846 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1
3847 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2
3848 ; IEEE-GOODFREXP-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3
3849 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
3850 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
3851 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
3852 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
3853 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP8]])
3854 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3855 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1
3856 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3857 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3858 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3859 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]]
3860 ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]])
3861 ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0
3862 ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1
3863 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP50]]
3864 ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]])
3865 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP51]], i32 [[TMP22]])
3866 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]])
3867 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0
3868 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1
3869 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]])
3870 ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3871 ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = extractvalue { float, i32 } [[TMP52]], 0
3872 ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = extractvalue { float, i32 } [[TMP52]], 1
3873 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP53]], [[TMP28]]
3874 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP54]], [[TMP31]]
3875 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]])
3876 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3877 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
3878 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1
3879 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP36]])
3880 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
3881 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
3882 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1
3883 ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = fmul contract float [[TMP40]], [[TMP38]]
3884 ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = sub i32 [[TMP41]], [[TMP37]]
3885 ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]])
3886 ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0
3887 ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP24]], i64 1
3888 ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP34]], i64 2
3889 ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i64 3
3890 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3892 ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom(
3893 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
3894 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00)
3895 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00)
3896 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00)
3897 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison)
3898 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
3899 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1
3900 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2
3901 ; IEEE-BADFREXP-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3
3902 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
3903 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
3904 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
3905 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
3906 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP8]])
3907 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0
3908 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP8]])
3909 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]]
3910 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3911 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]])
3912 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]]
3913 ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]])
3914 ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0
3915 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]])
3916 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP21]]
3917 ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]])
3918 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP22]])
3919 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]])
3920 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0
3921 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP10]])
3922 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]])
3923 ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3924 ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP51]], 0
3925 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
3926 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP52]], [[TMP28]]
3927 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]]
3928 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]])
3929 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3930 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0
3931 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]])
3932 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP36]])
3933 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
3934 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0
3935 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00)
3936 ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = fmul contract float [[TMP40]], [[TMP38]]
3937 ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = sub i32 [[TMP41]], [[TMP37]]
3938 ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]])
3939 ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0
3940 ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP24]], i64 1
3941 ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP34]], i64 2
3942 ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i64 3
3943 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3945 ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_const_denom(
3946 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
3947 ; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00)
3948 ; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00)
3949 ; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00)
3950 ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison)
3951 ; DAZ-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
3952 ; DAZ-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1
3953 ; DAZ-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2
3954 ; DAZ-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3
3955 ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0
3956 ; DAZ-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1
3957 ; DAZ-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2
3958 ; DAZ-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3
3959 ; DAZ-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP8]])
3960 ; DAZ-NEXT: [[TMP13:%.*]] = fneg contract float [[TMP9]]
3961 ; DAZ-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]])
3962 ; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]])
3963 ; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0
3964 ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1
3965 ; DAZ-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP16]])
3966 ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
3967 ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
3968 ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
3969 ; DAZ-NEXT: [[TMP22:%.*]] = fmul contract float [[TMP20]], [[TMP18]]
3970 ; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]]
3971 ; DAZ-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]])
3972 ; DAZ-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]])
3973 ; DAZ-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0
3974 ; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1
3975 ; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP26]])
3976 ; DAZ-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
3977 ; DAZ-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0
3978 ; DAZ-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1
3979 ; DAZ-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP28]]
3980 ; DAZ-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]]
3981 ; DAZ-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]])
3982 ; DAZ-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP12]], i64 0
3983 ; DAZ-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP14]], i64 1
3984 ; DAZ-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP24]], i64 2
3985 ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3
3986 ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]]
3988 %sqrt = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float poison>), !fpmath !2
3989 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float poison, float 2.0>, %sqrt, !fpmath !2
3990 ret <4 x float> %partial.rsq
3993 define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float> %x) {
3994 ; IEEE-GOODFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector(
3995 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
3996 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01)
3997 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
3998 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
3999 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
4000 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4001 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
4002 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00)
4003 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
4004 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
4005 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
4006 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]])
4007 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
4008 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01)
4009 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
4010 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
4011 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
4012 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
4013 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
4014 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
4015 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
4016 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
4017 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
4018 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01)
4019 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
4020 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
4021 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]])
4022 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
4023 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0
4024 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1
4025 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]]
4026 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]]
4027 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]])
4028 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
4029 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1
4030 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2
4031 ; IEEE-GOODFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3
4032 ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
4034 ; IEEE-BADFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector(
4035 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
4036 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01)
4037 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4038 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 5.000000e-01)
4039 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]]
4040 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4041 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]])
4042 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00)
4043 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
4044 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float -2.000000e+00)
4045 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]]
4046 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]])
4047 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]])
4048 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01)
4049 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
4050 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 3.200000e+01)
4051 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
4052 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
4053 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
4054 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
4055 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
4056 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
4057 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
4058 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01)
4059 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
4060 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 1.000000e+01)
4061 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]])
4062 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
4063 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0
4064 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00)
4065 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]]
4066 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]]
4067 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]])
4068 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
4069 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1
4070 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2
4071 ; IEEE-BADFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3
4072 ; IEEE-BADFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
4074 ; DAZ-LABEL: define <4 x float> @fdiv_constant_f32_vector(
4075 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
4076 ; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.rcp.f32(float 5.000000e-01)
4077 ; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float -2.000000e+00)
4078 ; DAZ-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01)
4079 ; DAZ-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
4080 ; DAZ-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1
4081 ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP4]])
4082 ; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
4083 ; DAZ-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
4084 ; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
4085 ; DAZ-NEXT: [[TMP10:%.*]] = fmul float [[TMP8]], [[TMP6]]
4086 ; DAZ-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP5]]
4087 ; DAZ-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP11]])
4088 ; DAZ-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01)
4089 ; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
4090 ; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
4091 ; DAZ-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
4092 ; DAZ-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00)
4093 ; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
4094 ; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
4095 ; DAZ-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
4096 ; DAZ-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
4097 ; DAZ-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
4098 ; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
4099 ; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP2]], i64 1
4100 ; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP12]], i64 2
4101 ; DAZ-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3
4102 ; DAZ-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
4104 %const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
4105 ret <4 x float> %const.partial.rcp
4108 define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, float nofpclass(sub) %a, float %b) {
4109 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(
4110 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
4111 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
4112 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
4113 ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]]
4114 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
4115 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4116 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4117 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
4118 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4119 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4120 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
4121 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
4122 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
4123 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
4124 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
4125 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4126 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4127 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
4128 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
4129 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
4130 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4131 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
4132 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
4133 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
4134 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
4135 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
4136 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4137 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4138 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
4139 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
4140 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
4141 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4142 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
4143 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
4144 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
4145 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
4146 ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
4147 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
4148 ; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
4149 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4150 ; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
4151 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4152 ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
4153 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
4154 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4155 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
4156 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
4157 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
4158 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
4159 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
4160 ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
4161 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4162 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4163 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
4164 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
4165 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
4166 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
4167 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
4168 ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
4169 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4170 ; IEEE-GOODFREXP-NEXT: ret void
4172 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(
4173 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
4174 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
4175 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
4176 ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]]
4177 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
4178 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4179 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4180 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4181 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4182 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4183 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
4184 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
4185 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
4186 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
4187 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
4188 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4189 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4190 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
4191 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4192 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
4193 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4194 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
4195 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
4196 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
4197 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
4198 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
4199 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4200 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4201 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
4202 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4203 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
4204 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4205 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
4206 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
4207 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
4208 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
4209 ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
4210 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
4211 ; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
4212 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4213 ; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
4214 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4215 ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
4216 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
4217 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4218 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
4219 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4220 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
4221 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
4222 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
4223 ; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
4224 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4225 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4226 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
4227 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4228 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
4229 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
4230 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
4231 ; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
4232 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4233 ; IEEE-BADFREXP-NEXT: ret void
4235 ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(
4236 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
4237 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
4238 ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
4239 ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]]
4240 ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
4241 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4242 ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4243 ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
4244 ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4245 ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4246 ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
4247 ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
4248 ; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
4249 ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
4250 ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
4251 ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4252 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
4253 ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4254 ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
4255 ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
4256 ; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
4257 ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4258 ; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
4259 ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4260 ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
4261 ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
4262 ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
4263 ; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]]
4264 ; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4265 ; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
4266 ; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]]
4267 ; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4268 ; DAZ-NEXT: ret void
4270 %no.md = fdiv float %a, %b
4271 store volatile float %no.md, ptr addrspace(1) %out, align 4
4272 %md.half.ulp = fdiv float %a, %b, !fpmath !1
4273 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
4274 %md.1ulp = fdiv float %a, %b, !fpmath !2
4275 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
4276 %md.25ulp = fdiv float %a, %b, !fpmath !0
4277 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
4278 %md.3ulp = fdiv float %a, %b, !fpmath !3
4279 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
4280 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
4281 store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
4282 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
4283 store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
4284 %no.md.arcp = fdiv arcp float %a, %b
4285 store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
4286 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
4287 store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
4288 %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
4289 store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
4293 define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, float %a, float nofpclass(sub) %b) {
4294 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(
4295 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] {
4296 ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
4297 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
4298 ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]]
4299 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
4300 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4301 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4302 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
4303 ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4304 ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4305 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
4306 ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
4307 ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
4308 ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
4309 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
4310 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4311 ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4312 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
4313 ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1
4314 ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
4315 ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4316 ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
4317 ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1
4318 ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
4319 ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
4320 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
4321 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4322 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4323 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
4324 ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1
4325 ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
4326 ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4327 ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
4328 ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1
4329 ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
4330 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
4331 ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
4332 ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
4333 ; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
4334 ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4335 ; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
4336 ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4337 ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
4338 ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
4339 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4340 ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
4341 ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1
4342 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
4343 ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
4344 ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
4345 ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
4346 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4347 ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4348 ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
4349 ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1
4350 ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
4351 ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
4352 ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
4353 ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
4354 ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4355 ; IEEE-GOODFREXP-NEXT: ret void
4357 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(
4358 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] {
4359 ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
4360 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
4361 ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]]
4362 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
4363 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4364 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4365 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4366 ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4367 ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4368 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
4369 ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
4370 ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
4371 ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
4372 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
4373 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4374 ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4375 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0
4376 ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4377 ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]])
4378 ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4379 ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0
4380 ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
4381 ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]]
4382 ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]]
4383 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]])
4384 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4385 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4386 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0
4387 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4388 ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]])
4389 ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4390 ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0
4391 ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]])
4392 ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]]
4393 ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]]
4394 ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]])
4395 ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
4396 ; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
4397 ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4398 ; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
4399 ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4400 ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
4401 ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
4402 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4403 ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0
4404 ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4405 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]]
4406 ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]])
4407 ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]])
4408 ; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]]
4409 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4410 ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4411 ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0
4412 ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]])
4413 ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]]
4414 ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]])
4415 ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]])
4416 ; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]]
4417 ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4418 ; IEEE-BADFREXP-NEXT: ret void
4420 ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(
4421 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] {
4422 ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]]
4423 ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
4424 ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]]
4425 ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4
4426 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]])
4427 ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0
4428 ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1
4429 ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]])
4430 ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]])
4431 ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0
4432 ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1
4433 ; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]]
4434 ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]]
4435 ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]])
4436 ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4437 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
4438 ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4439 ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]])
4440 ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4
4441 ; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]]
4442 ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4443 ; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]]
4444 ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4445 ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]]
4446 ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4
4447 ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
4448 ; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]]
4449 ; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4
4450 ; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]])
4451 ; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]]
4452 ; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4
4453 ; DAZ-NEXT: ret void
4455 %no.md = fdiv float %a, %b
4456 store volatile float %no.md, ptr addrspace(1) %out, align 4
4457 %md.half.ulp = fdiv float %a, %b, !fpmath !1
4458 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4
4459 %md.1ulp = fdiv float %a, %b, !fpmath !2
4460 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4
4461 %md.25ulp = fdiv float %a, %b, !fpmath !0
4462 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4
4463 %md.3ulp = fdiv float %a, %b, !fpmath !3
4464 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4
4465 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
4466 store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4
4467 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0
4468 store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4
4469 %no.md.arcp = fdiv arcp float %a, %b
4470 store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4
4471 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
4472 store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4
4473 %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2
4474 store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4
4478 declare float @llvm.sqrt.f32(float)
4479 declare float @llvm.fabs.f32(float)
4480 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
4481 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
4482 declare void @llvm.assume(i1 noundef)
4484 attributes #0 = { optnone noinline }
4486 !0 = !{float 2.500000e+00}
4487 !1 = !{float 5.000000e-01}
4488 !2 = !{float 1.000000e+00}
4489 !3 = !{float 3.000000e+00}