1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // MSVC++ requires this to be set before any other includes to get M_PI.
6 #define _USE_MATH_DEFINES
9 #include "base/command_line.h"
11 #include "base/memory/aligned_memory.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/stringize_macros.h"
15 #include "base/time/time.h"
16 #include "media/base/vector_math.h"
17 #include "media/base/vector_math_testing.h"
18 #include "testing/gtest/include/gtest/gtest.h"
20 using base::TimeTicks
;
23 // Command line switch for runtime adjustment of benchmark iterations.
24 static const char kBenchmarkIterations
[] = "vector-math-iterations";
25 static const int kDefaultIterations
= 10;
27 // Default test values.
28 static const float kScale
= 0.5;
29 static const float kInputFillValue
= 1.0;
30 static const float kOutputFillValue
= 3.0;
34 class VectorMathTest
: public testing::Test
{
36 static const int kVectorSize
= 8192;
39 // Initialize input and output vectors.
40 input_vector
.reset(static_cast<float*>(base::AlignedAlloc(
41 sizeof(float) * kVectorSize
, vector_math::kRequiredAlignment
)));
42 output_vector
.reset(static_cast<float*>(base::AlignedAlloc(
43 sizeof(float) * kVectorSize
, vector_math::kRequiredAlignment
)));
46 void FillTestVectors(float input
, float output
) {
47 // Setup input and output vectors.
48 fill(input_vector
.get(), input_vector
.get() + kVectorSize
, input
);
49 fill(output_vector
.get(), output_vector
.get() + kVectorSize
, output
);
52 void VerifyOutput(float value
) {
53 for (int i
= 0; i
< kVectorSize
; ++i
)
54 ASSERT_FLOAT_EQ(output_vector
.get()[i
], value
);
57 int BenchmarkIterations() {
58 int vector_math_iterations
= kDefaultIterations
;
59 std::string
iterations(
60 CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
61 kBenchmarkIterations
));
62 if (!iterations
.empty())
63 base::StringToInt(iterations
, &vector_math_iterations
);
64 return vector_math_iterations
;
68 int benchmark_iterations
;
69 scoped_ptr_malloc
<float, base::ScopedPtrAlignedFree
> input_vector
;
70 scoped_ptr_malloc
<float, base::ScopedPtrAlignedFree
> output_vector
;
72 DISALLOW_COPY_AND_ASSIGN(VectorMathTest
);
75 // Ensure each optimized vector_math::FMAC() method returns the same value.
76 TEST_F(VectorMathTest
, FMAC
) {
77 static const float kResult
= kInputFillValue
* kScale
+ kOutputFillValue
;
81 FillTestVectors(kInputFillValue
, kOutputFillValue
);
83 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
84 VerifyOutput(kResult
);
88 SCOPED_TRACE("FMAC_C");
89 FillTestVectors(kInputFillValue
, kOutputFillValue
);
91 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
92 VerifyOutput(kResult
);
95 #if defined(ARCH_CPU_X86_FAMILY)
97 ASSERT_TRUE(base::CPU().has_sse());
98 SCOPED_TRACE("FMAC_SSE");
99 FillTestVectors(kInputFillValue
, kOutputFillValue
);
100 vector_math::FMAC_SSE(
101 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
102 VerifyOutput(kResult
);
106 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
108 SCOPED_TRACE("FMAC_NEON");
109 FillTestVectors(kInputFillValue
, kOutputFillValue
);
110 vector_math::FMAC_NEON(
111 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
112 VerifyOutput(kResult
);
117 // Ensure each optimized vector_math::FMUL() method returns the same value.
118 TEST_F(VectorMathTest
, FMUL
) {
119 static const float kResult
= kInputFillValue
* kScale
;
122 SCOPED_TRACE("FMUL");
123 FillTestVectors(kInputFillValue
, kOutputFillValue
);
125 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
126 VerifyOutput(kResult
);
130 SCOPED_TRACE("FMUL_C");
131 FillTestVectors(kInputFillValue
, kOutputFillValue
);
133 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
134 VerifyOutput(kResult
);
137 #if defined(ARCH_CPU_X86_FAMILY)
139 ASSERT_TRUE(base::CPU().has_sse());
140 SCOPED_TRACE("FMUL_SSE");
141 FillTestVectors(kInputFillValue
, kOutputFillValue
);
142 vector_math::FMUL_SSE(
143 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
144 VerifyOutput(kResult
);
148 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
150 SCOPED_TRACE("FMUL_NEON");
151 FillTestVectors(kInputFillValue
, kOutputFillValue
);
152 vector_math::FMUL_NEON(
153 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
154 VerifyOutput(kResult
);
159 // Define platform independent function name for FMACBenchmark* tests.
160 #if defined(ARCH_CPU_X86_FAMILY)
161 #define FMAC_FUNC FMAC_SSE
162 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
163 #define FMAC_FUNC FMAC_NEON
166 // Benchmark for each optimized vector_math::FMAC() method. Original benchmarks
167 // were run with --vector-fmac-iterations=200000.
168 TEST_F(VectorMathTest
, FMACBenchmark
) {
169 static const int kBenchmarkIterations
= BenchmarkIterations();
171 printf("Benchmarking %d iterations:\n", kBenchmarkIterations
);
173 // Benchmark FMAC_C().
174 FillTestVectors(kInputFillValue
, kOutputFillValue
);
175 TimeTicks start
= TimeTicks::HighResNow();
176 for (int i
= 0; i
< kBenchmarkIterations
; ++i
) {
178 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
180 double total_time_c_ms
= (TimeTicks::HighResNow() - start
).InMillisecondsF();
181 printf("FMAC_C took %.2fms.\n", total_time_c_ms
);
183 #if defined(FMAC_FUNC)
184 #if defined(ARCH_CPU_X86_FAMILY)
185 ASSERT_TRUE(base::CPU().has_sse());
188 // Benchmark FMAC_FUNC() with unaligned size.
189 ASSERT_NE((kVectorSize
- 1) % (vector_math::kRequiredAlignment
/
191 FillTestVectors(kInputFillValue
, kOutputFillValue
);
192 start
= TimeTicks::HighResNow();
193 for (int j
= 0; j
< kBenchmarkIterations
; ++j
) {
194 vector_math::FMAC_FUNC(
195 input_vector
.get(), kScale
, kVectorSize
- 1, output_vector
.get());
197 double total_time_optimized_unaligned_ms
=
198 (TimeTicks::HighResNow() - start
).InMillisecondsF();
199 printf(STRINGIZE(FMAC_FUNC
) " (unaligned size) took %.2fms; which is %.2fx "
200 "faster than FMAC_C.\n", total_time_optimized_unaligned_ms
,
201 total_time_c_ms
/ total_time_optimized_unaligned_ms
);
203 // Benchmark FMAC_FUNC() with aligned size.
204 ASSERT_EQ(kVectorSize
% (vector_math::kRequiredAlignment
/ sizeof(float)),
206 FillTestVectors(kInputFillValue
, kOutputFillValue
);
207 start
= TimeTicks::HighResNow();
208 for (int j
= 0; j
< kBenchmarkIterations
; ++j
) {
209 vector_math::FMAC_FUNC(
210 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
212 double total_time_optimized_aligned_ms
=
213 (TimeTicks::HighResNow() - start
).InMillisecondsF();
214 printf(STRINGIZE(FMAC_FUNC
) " (aligned) took %.2fms; which is %.2fx "
215 "faster than FMAC_C and %.2fx faster than "
216 STRINGIZE(FMAC_FUNC
) " (unaligned).\n",
217 total_time_optimized_aligned_ms
,
218 total_time_c_ms
/ total_time_optimized_aligned_ms
,
219 total_time_optimized_unaligned_ms
/ total_time_optimized_aligned_ms
);
225 // Define platform independent function name for FMULBenchmark* tests.
226 #if defined(ARCH_CPU_X86_FAMILY)
227 #define FMUL_FUNC FMUL_SSE
228 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
229 #define FMUL_FUNC FMUL_NEON
232 // Benchmark for each optimized vector_math::FMUL() method. Original benchmarks
233 // were run with --vector-math-iterations=200000.
234 TEST_F(VectorMathTest
, FMULBenchmark
) {
235 static const int kBenchmarkIterations
= BenchmarkIterations();
237 printf("Benchmarking %d iterations:\n", kBenchmarkIterations
);
239 // Benchmark FMUL_C().
240 FillTestVectors(kInputFillValue
, kOutputFillValue
);
241 TimeTicks start
= TimeTicks::HighResNow();
242 for (int i
= 0; i
< kBenchmarkIterations
; ++i
) {
244 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
246 double total_time_c_ms
= (TimeTicks::HighResNow() - start
).InMillisecondsF();
247 printf("FMUL_C took %.2fms.\n", total_time_c_ms
);
249 #if defined(FMUL_FUNC)
250 #if defined(ARCH_CPU_X86_FAMILY)
251 ASSERT_TRUE(base::CPU().has_sse());
254 // Benchmark FMUL_SSE() with unaligned size.
255 ASSERT_NE((kVectorSize
- 1) % (vector_math::kRequiredAlignment
/
257 FillTestVectors(kInputFillValue
, kOutputFillValue
);
258 start
= TimeTicks::HighResNow();
259 for (int j
= 0; j
< kBenchmarkIterations
; ++j
) {
260 vector_math::FMUL_FUNC(
261 input_vector
.get(), kScale
, kVectorSize
- 1, output_vector
.get());
263 double total_time_optimized_unaligned_ms
=
264 (TimeTicks::HighResNow() - start
).InMillisecondsF();
265 printf(STRINGIZE(FMUL_FUNC
) " (unaligned size) took %.2fms; which is %.2fx "
266 "faster than FMUL_C.\n", total_time_optimized_unaligned_ms
,
267 total_time_c_ms
/ total_time_optimized_unaligned_ms
);
269 // Benchmark FMUL_SSE() with aligned size.
270 ASSERT_EQ(kVectorSize
% (vector_math::kRequiredAlignment
/ sizeof(float)),
272 FillTestVectors(kInputFillValue
, kOutputFillValue
);
273 start
= TimeTicks::HighResNow();
274 for (int j
= 0; j
< kBenchmarkIterations
; ++j
) {
275 vector_math::FMUL_FUNC(
276 input_vector
.get(), kScale
, kVectorSize
, output_vector
.get());
278 double total_time_optimized_aligned_ms
=
279 (TimeTicks::HighResNow() - start
).InMillisecondsF();
280 printf(STRINGIZE(FMUL_FUNC
) " (aligned) took %.2fms; which is %.2fx "
281 "faster than FMUL_C and %.2fx faster than "
282 STRINGIZE(FMUL_FUNC
) " (unaligned).\n",
283 total_time_optimized_aligned_ms
,
284 total_time_c_ms
/ total_time_optimized_aligned_ms
,
285 total_time_optimized_unaligned_ms
/ total_time_optimized_aligned_ms
);