1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/base/vector_math.h"
6 #include "media/base/vector_math_testing.h"
9 #include "base/logging.h"
10 #include "build/build_config.h"
12 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
17 namespace vector_math
{
19 // If we know the minimum architecture at compile time, avoid CPU detection.
20 // Force NaCl code to use C routines since (at present) nothing there uses these
21 // methods and plumbing the -msse built library is non-trivial.
22 #if defined(ARCH_CPU_X86_FAMILY) && !defined(OS_NACL)
24 #define FMAC_FUNC FMAC_SSE
25 #define FMUL_FUNC FMUL_SSE
28 // X86 CPU detection required. Functions will be set by Initialize().
29 // TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed.
30 #define FMAC_FUNC g_fmac_proc_
31 #define FMUL_FUNC g_fmul_proc_
33 typedef void (*MathProc
)(const float src
[], float scale
, int len
, float dest
[]);
34 static MathProc g_fmac_proc_
= NULL
;
35 static MathProc g_fmul_proc_
= NULL
;
40 const bool kUseSSE
= base::CPU().has_sse();
41 g_fmac_proc_
= kUseSSE
? FMAC_SSE
: FMAC_C
;
42 g_fmul_proc_
= kUseSSE
? FMUL_SSE
: FMUL_C
;
45 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
46 #define FMAC_FUNC FMAC_NEON
47 #define FMUL_FUNC FMUL_NEON
50 // Unknown architecture.
51 #define FMAC_FUNC FMAC_C
52 #define FMUL_FUNC FMUL_C
56 void FMAC(const float src
[], float scale
, int len
, float dest
[]) {
57 // Ensure |src| and |dest| are 16-byte aligned.
58 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src
) & (kRequiredAlignment
- 1));
59 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest
) & (kRequiredAlignment
- 1));
60 return FMAC_FUNC(src
, scale
, len
, dest
);
63 void FMAC_C(const float src
[], float scale
, int len
, float dest
[]) {
64 for (int i
= 0; i
< len
; ++i
)
65 dest
[i
] += src
[i
] * scale
;
68 void FMUL(const float src
[], float scale
, int len
, float dest
[]) {
69 // Ensure |src| and |dest| are 16-byte aligned.
70 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src
) & (kRequiredAlignment
- 1));
71 DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest
) & (kRequiredAlignment
- 1));
72 return FMUL_FUNC(src
, scale
, len
, dest
);
75 void FMUL_C(const float src
[], float scale
, int len
, float dest
[]) {
76 for (int i
= 0; i
< len
; ++i
)
77 dest
[i
] = src
[i
] * scale
;
80 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
81 void FMAC_NEON(const float src
[], float scale
, int len
, float dest
[]) {
82 const int rem
= len
% 4;
83 const int last_index
= len
- rem
;
84 float32x4_t m_scale
= vmovq_n_f32(scale
);
85 for (int i
= 0; i
< last_index
; i
+= 4) {
86 vst1q_f32(dest
+ i
, vmlaq_f32(
87 vld1q_f32(dest
+ i
), vld1q_f32(src
+ i
), m_scale
));
90 // Handle any remaining values that wouldn't fit in an NEON pass.
91 for (int i
= last_index
; i
< len
; ++i
)
92 dest
[i
] += src
[i
] * scale
;
95 void FMUL_NEON(const float src
[], float scale
, int len
, float dest
[]) {
96 const int rem
= len
% 4;
97 const int last_index
= len
- rem
;
98 float32x4_t m_scale
= vmovq_n_f32(scale
);
99 for (int i
= 0; i
< last_index
; i
+= 4)
100 vst1q_f32(dest
+ i
, vmulq_f32(vld1q_f32(src
+ i
), m_scale
));
102 // Handle any remaining values that wouldn't fit in an NEON pass.
103 for (int i
= last_index
; i
< len
; ++i
)
104 dest
[i
] = src
[i
] * scale
;
108 } // namespace vector_math