1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
4 define i32 @fadd(i32 %arg) {
6 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fadd half undef, undef
7 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fadd <4 x half> undef, undef
8 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fadd <8 x half> undef, undef
9 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fadd <16 x half> undef, undef
10 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fadd float undef, undef
11 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fadd <2 x float> undef, undef
12 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fadd <4 x float> undef, undef
13 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fadd <8 x float> undef, undef
14 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fadd double undef, undef
15 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fadd <2 x double> undef, undef
16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fadd <4 x double> undef, undef
17 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
19 %F16 = fadd half undef, undef
20 %V4F16 = fadd <4 x half> undef, undef
21 %V8F16 = fadd <8 x half> undef, undef
22 %V16F16 = fadd <16 x half> undef, undef
24 %F32 = fadd float undef, undef
25 %V2F32 = fadd <2 x float> undef, undef
26 %V4F32 = fadd <4 x float> undef, undef
27 %V8F32 = fadd <8 x float> undef, undef
29 %F64 = fadd double undef, undef
30 %V2F64 = fadd <2 x double> undef, undef
31 %V4F64 = fadd <4 x double> undef, undef
36 define i32 @fsub(i32 %arg) {
38 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half undef, undef
39 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> undef, undef
40 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> undef, undef
41 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fsub <16 x half> undef, undef
42 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float undef, undef
43 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> undef, undef
44 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> undef, undef
45 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> undef, undef
46 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double undef, undef
47 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> undef, undef
48 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> undef, undef
49 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
51 %F16 = fsub half undef, undef
52 %V4F16 = fsub <4 x half> undef, undef
53 %V8F16 = fsub <8 x half> undef, undef
54 %V16F16 = fsub <16 x half> undef, undef
56 %F32 = fsub float undef, undef
57 %V2F32 = fsub <2 x float> undef, undef
58 %V4F32 = fsub <4 x float> undef, undef
59 %V8F32 = fsub <8 x float> undef, undef
61 %F64 = fsub double undef, undef
62 %V2F64 = fsub <2 x double> undef, undef
63 %V4F64 = fsub <4 x double> undef, undef
68 define i32 @fneg_idiom(i32 %arg) {
69 ; CHECK-LABEL: 'fneg_idiom'
70 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fsub half 0xH8000, undef
71 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
72 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
73 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float -0.000000e+00, undef
74 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, undef
75 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
76 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
77 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
78 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
79 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
80 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
82 %F16 = fsub half -0.0, undef
83 %V4F16 = fsub <4 x half> <half -0.0, half -0.0, half -0.0, half -0.0>, undef
84 %V8F16 = fsub <8 x half> <half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0, half -0.0>, undef
86 %F32 = fsub float -0.0, undef
87 %V2F32 = fsub <2 x float> <float -0.0, float -0.0>, undef
88 %V4F32 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, undef
89 %V8F32 = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, undef
91 %F64 = fsub double -0.0, undef
92 %V2F64 = fsub <2 x double> <double -0.0, double -0.0>, undef
93 %V4F64 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, undef
98 define i32 @fneg(i32 %arg) {
100 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fneg half undef
101 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16 = fneg <2 x half> undef
102 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fneg <4 x half> undef
103 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fneg <8 x half> undef
104 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fneg <16 x half> undef
105 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fneg float undef
106 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fneg <2 x float> undef
107 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fneg <4 x float> undef
108 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fneg <8 x float> undef
109 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fneg double undef
110 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fneg <2 x double> undef
111 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fneg <4 x double> undef
112 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
114 %F16 = fneg half undef
115 %V2F16 = fneg <2 x half> undef
116 %V4F16 = fneg <4 x half> undef
117 %V8F16 = fneg <8 x half> undef
118 %V16F16 = fneg <16 x half> undef
120 %F32 = fneg float undef
121 %V2F32 = fneg <2 x float> undef
122 %V4F32 = fneg <4 x float> undef
123 %V8F32 = fneg <8 x float> undef
125 %F64 = fneg double undef
126 %V2F64 = fneg <2 x double> undef
127 %V4F64 = fneg <4 x double> undef
132 define i32 @fmul(i32 %arg) {
133 ; CHECK-LABEL: 'fmul'
134 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fmul half undef, undef
135 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fmul <4 x half> undef, undef
136 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fmul <8 x half> undef, undef
137 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fmul <16 x half> undef, undef
138 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fmul float undef, undef
139 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fmul <2 x float> undef, undef
140 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fmul <4 x float> undef, undef
141 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fmul <8 x float> undef, undef
142 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef
143 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef
144 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef
145 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
147 %F16 = fmul half undef, undef
148 %V4F16 = fmul <4 x half> undef, undef
149 %V8F16 = fmul <8 x half> undef, undef
150 %V16F16 = fmul <16 x half> undef, undef
152 %F32 = fmul float undef, undef
153 %V2F32 = fmul <2 x float> undef, undef
154 %V4F32 = fmul <4 x float> undef, undef
155 %V8F32 = fmul <8 x float> undef, undef
157 %F64 = fmul double undef, undef
158 %V2F64 = fmul <2 x double> undef, undef
159 %V4F64 = fmul <4 x double> undef, undef
164 define i32 @fdiv(i32 %arg) {
165 ; CHECK-LABEL: 'fdiv'
166 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = fdiv half undef, undef
167 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = fdiv <4 x half> undef, undef
168 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = fdiv <8 x half> undef, undef
169 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = fdiv <16 x half> undef, undef
170 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = fdiv float undef, undef
171 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = fdiv <2 x float> undef, undef
172 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fdiv <4 x float> undef, undef
173 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fdiv <8 x float> undef, undef
174 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fdiv double undef, undef
175 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fdiv <2 x double> undef, undef
176 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fdiv <4 x double> undef, undef
177 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
179 %F16 = fdiv half undef, undef
180 %V4F16 = fdiv <4 x half> undef, undef
181 %V8F16 = fdiv <8 x half> undef, undef
182 %V16F16 = fdiv <16 x half> undef, undef
184 %F32 = fdiv float undef, undef
185 %V2F32 = fdiv <2 x float> undef, undef
186 %V4F32 = fdiv <4 x float> undef, undef
187 %V8F32 = fdiv <8 x float> undef, undef
189 %F64 = fdiv double undef, undef
190 %V2F64 = fdiv <2 x double> undef, undef
191 %V4F64 = fdiv <4 x double> undef, undef
196 define i32 @frem(i32 %arg) {
197 ; CHECK-LABEL: 'frem'
198 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
199 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4F16 = frem <4 x half> undef, undef
200 ; CHECK-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8F16 = frem <8 x half> undef, undef
201 ; CHECK-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16F16 = frem <16 x half> undef, undef
202 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
203 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <2 x float> undef, undef
204 ; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4F32 = frem <4 x float> undef, undef
205 ; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8F32 = frem <8 x float> undef, undef
206 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
207 ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <2 x double> undef, undef
208 ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = frem <4 x double> undef, undef
209 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
211 %F16 = frem half undef, undef
212 %V4F16 = frem <4 x half> undef, undef
213 %V8F16 = frem <8 x half> undef, undef
214 %V16F16 = frem <16 x half> undef, undef
216 %F32 = frem float undef, undef
217 %V2F32 = frem <2 x float> undef, undef
218 %V4F32 = frem <4 x float> undef, undef
219 %V8F32 = frem <8 x float> undef, undef
221 %F64 = frem double undef, undef
222 %V2F64 = frem <2 x double> undef, undef
223 %V4F64 = frem <4 x double> undef, undef
228 define i32 @fsqrt(i32 %arg) {
229 ; CHECK-LABEL: 'fsqrt'
230 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.sqrt.f16(half undef)
231 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
232 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
233 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
234 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
235 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
236 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
237 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
238 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
239 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
240 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
241 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
243 %F16 = call half @llvm.sqrt.f16(half undef)
244 %V4F16 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
245 %V8F16 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
246 %V16F16 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
248 %F32 = call float @llvm.sqrt.f32(float undef)
249 %V2F32 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
250 %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
251 %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
253 %F64 = call double @llvm.sqrt.f64(double undef)
254 %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
255 %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
260 define i32 @fabs(i32 %arg) {
261 ; CHECK-LABEL: 'fabs'
262 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.fabs.f16(half undef)
263 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
264 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
265 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
266 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fabs.f32(float undef)
267 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
268 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
269 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
270 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fabs.f64(double undef)
271 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
272 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
273 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
275 %F16 = call half @llvm.fabs.f16(half undef)
276 %V4F16 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
277 %V8F16 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
278 %V2F16 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
280 %F32 = call float @llvm.fabs.f32(float undef)
281 %V2F32 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
282 %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
283 %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
285 %F64 = call double @llvm.fabs.f64(double undef)
286 %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
287 %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
292 define i32 @fcopysign(i32 %arg) {
293 ; CHECK-LABEL: 'fcopysign'
294 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
295 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
296 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
297 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
298 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
299 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
300 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
301 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
302 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
303 ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
304 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
305 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
307 %F16 = call half @llvm.copysign.f16(half undef, half undef)
308 %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
309 %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
310 %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
312 %F32 = call float @llvm.copysign.f32(float undef, float undef)
313 %V2F32 = call <2 x float> @llvm.copysign.v2f32(<2 x float> undef, <2 x float> undef)
314 %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
315 %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
317 %F64 = call double @llvm.copysign.f64(double undef, double undef)
318 %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
319 %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
324 define i32 @fma(i32 %arg) {
326 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call float @llvm.fma.f32(float undef, float undef, float undef)
327 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
328 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
329 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
330 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
331 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
332 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
333 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
334 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
335 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
336 ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
337 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
339 %F16 = call float @llvm.fma.f32(float undef, float undef, float undef)
340 %V4F16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef)
341 %V8F16 = call <8 x half> @llvm.fma.v8f16(<8 x half> undef, <8 x half> undef, <8 x half> undef)
342 %V16F16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef)
344 %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
345 %V2F32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef)
346 %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
347 %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
349 %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
350 %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
351 %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
356 declare half @llvm.sqrt.f16(half)
357 declare <4 x half> @llvm.sqrt.v4f16(<4 x half>)
358 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
359 declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)
361 declare float @llvm.sqrt.f32(float)
362 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
363 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
364 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
366 declare double @llvm.sqrt.f64(double)
367 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
368 declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
370 declare half @llvm.fabs.f16(half)
371 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
372 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
373 declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
375 declare float @llvm.fabs.f32(float)
376 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
377 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
378 declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
380 declare double @llvm.fabs.f64(double)
381 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
382 declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
384 declare half @llvm.copysign.f16(half, half)
385 declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
386 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
387 declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>)
389 declare float @llvm.copysign.f32(float, float)
390 declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>)
391 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
392 declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>)
394 declare double @llvm.copysign.f64(double, double)
395 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
396 declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>)
398 declare half @llvm.fma.f16(half, half, half)
399 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
400 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
401 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
403 declare float @llvm.fma.f32(float, float, float)
404 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
405 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
406 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
408 declare double @llvm.fma.f64(double, double, double)
409 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
410 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)