1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/audio_buffer.h"
6 #include "content/browser/speech/endpointer/endpointer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
10 const int kFrameRate
= 50; // 20 ms long frames for AMR encoding.
11 const int kSampleRate
= 8000; // 8 k samples per second for AMR encoding.
13 // At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
15 const int kFrameSize
= kSampleRate
/ kFrameRate
; // 160 samples.
16 COMPILE_ASSERT(kFrameSize
== 160, invalid_frame_size
);
21 class FrameProcessor
{
23 // Process a single frame of test audio samples.
24 virtual EpStatus
ProcessFrame(int64 time
, int16
* samples
, int frame_size
) = 0;
27 void RunEndpointerEventsTest(FrameProcessor
* processor
) {
28 int16 samples
[kFrameSize
];
30 // We will create a white noise signal of 150 frames. The frames from 50 to
31 // 100 will have more power, and the endpointer should fire on those frames.
32 const int kNumFrames
= 150;
34 // Create a random sequence of samples.
38 for (int frame_count
= 0; frame_count
< kNumFrames
; ++frame_count
) {
39 // The frames from 50 to 100 will have more power, and the endpointer
40 // should detect those frames as speech.
41 if ((frame_count
>= 50) && (frame_count
< 100)) {
46 // Create random samples.
47 for (int i
= 0; i
< kFrameSize
; ++i
) {
48 float randNum
= static_cast<float>(rand() - (RAND_MAX
/ 2)) /
49 static_cast<float>(RAND_MAX
);
50 samples
[i
] = static_cast<int16
>(gain
* randNum
);
53 EpStatus ep_status
= processor
->ProcessFrame(time
, samples
, kFrameSize
);
54 time
+= static_cast<int64
>(kFrameSize
* (1e6
/ kSampleRate
));
57 if (20 == frame_count
)
58 EXPECT_EQ(EP_PRE_SPEECH
, ep_status
);
59 if (70 == frame_count
)
60 EXPECT_EQ(EP_SPEECH_PRESENT
, ep_status
);
61 if (120 == frame_count
)
62 EXPECT_EQ(EP_PRE_SPEECH
, ep_status
);
66 // This test instantiates and initializes a stand alone endpointer module.
67 // The test creates FrameData objects with random noise and send them
68 // to the endointer module. The energy of the first 50 frames is low,
69 // followed by 500 high energy frames, and another 50 low energy frames.
70 // We test that the correct start and end frames were detected.
71 class EnergyEndpointerFrameProcessor
: public FrameProcessor
{
73 explicit EnergyEndpointerFrameProcessor(EnergyEndpointer
* endpointer
)
74 : endpointer_(endpointer
) {}
76 virtual EpStatus
ProcessFrame(int64 time
,
78 int frame_size
) OVERRIDE
{
79 endpointer_
->ProcessAudioFrame(time
, samples
, kFrameSize
, NULL
);
81 return endpointer_
->Status(&ep_time
);
85 EnergyEndpointer
* endpointer_
;
88 TEST(EndpointerTest
, TestEnergyEndpointerEvents
) {
89 // Initialize endpointer and configure it. We specify the parameters
90 // here for a 20ms window, and a 20ms step size, which corrsponds to
91 // the narrow band AMR codec.
92 EnergyEndpointerParams ep_config
;
93 ep_config
.set_frame_period(1.0f
/ static_cast<float>(kFrameRate
));
94 ep_config
.set_frame_duration(1.0f
/ static_cast<float>(kFrameRate
));
95 ep_config
.set_endpoint_margin(0.2f
);
96 ep_config
.set_onset_window(0.15f
);
97 ep_config
.set_speech_on_window(0.4f
);
98 ep_config
.set_offset_window(0.15f
);
99 ep_config
.set_onset_detect_dur(0.09f
);
100 ep_config
.set_onset_confirm_dur(0.075f
);
101 ep_config
.set_on_maintain_dur(0.10f
);
102 ep_config
.set_offset_confirm_dur(0.12f
);
103 ep_config
.set_decision_threshold(100.0f
);
104 EnergyEndpointer endpointer
;
105 endpointer
.Init(ep_config
);
107 endpointer
.StartSession();
109 EnergyEndpointerFrameProcessor
frame_processor(&endpointer
);
110 RunEndpointerEventsTest(&frame_processor
);
112 endpointer
.EndSession();
115 // Test endpointer wrapper class.
116 class EndpointerFrameProcessor
: public FrameProcessor
{
118 explicit EndpointerFrameProcessor(Endpointer
* endpointer
)
119 : endpointer_(endpointer
) {}
121 virtual EpStatus
ProcessFrame(int64 time
,
123 int frame_size
) OVERRIDE
{
124 scoped_refptr
<AudioChunk
> frame(
125 new AudioChunk(reinterpret_cast<uint8
*>(samples
), kFrameSize
* 2, 2));
126 endpointer_
->ProcessAudio(*frame
.get(), NULL
);
128 return endpointer_
->Status(&ep_time
);
132 Endpointer
* endpointer_
;
135 TEST(EndpointerTest
, TestEmbeddedEndpointerEvents
) {
136 const int kSampleRate
= 8000; // 8 k samples per second for AMR encoding.
138 Endpointer
endpointer(kSampleRate
);
139 const int64 kMillisecondsPerMicrosecond
= 1000;
140 const int64 short_timeout
= 300 * kMillisecondsPerMicrosecond
;
141 endpointer
.set_speech_input_possibly_complete_silence_length(short_timeout
);
142 const int64 long_timeout
= 500 * kMillisecondsPerMicrosecond
;
143 endpointer
.set_speech_input_complete_silence_length(long_timeout
);
144 endpointer
.StartSession();
146 EndpointerFrameProcessor
frame_processor(&endpointer
);
147 RunEndpointerEventsTest(&frame_processor
);
149 endpointer
.EndSession();
152 } // namespace content