1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/audio_buffer.h"
6 #include "content/browser/speech/endpointer/endpointer.h"
7 #include "testing/gtest/include/gtest/gtest.h"
10 const int kFrameRate
= 50; // 20 ms long frames for AMR encoding.
11 const int kSampleRate
= 8000; // 8 k samples per second for AMR encoding.
13 // At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
15 const int kFrameSize
= kSampleRate
/ kFrameRate
; // 160 samples.
16 static_assert(kFrameSize
== 160, "invalid frame size");
21 class FrameProcessor
{
23 // Process a single frame of test audio samples.
24 virtual EpStatus
ProcessFrame(int64 time
, int16
* samples
, int frame_size
) = 0;
27 void RunEndpointerEventsTest(FrameProcessor
* processor
) {
28 int16 samples
[kFrameSize
];
30 // We will create a white noise signal of 150 frames. The frames from 50 to
31 // 100 will have more power, and the endpointer should fire on those frames.
32 const int kNumFrames
= 150;
34 // Create a random sequence of samples.
38 for (int frame_count
= 0; frame_count
< kNumFrames
; ++frame_count
) {
39 // The frames from 50 to 100 will have more power, and the endpointer
40 // should detect those frames as speech.
41 if ((frame_count
>= 50) && (frame_count
< 100)) {
46 // Create random samples.
47 for (int i
= 0; i
< kFrameSize
; ++i
) {
48 float randNum
= static_cast<float>(rand() - (RAND_MAX
/ 2)) /
49 static_cast<float>(RAND_MAX
);
50 samples
[i
] = static_cast<int16
>(gain
* randNum
);
53 EpStatus ep_status
= processor
->ProcessFrame(time
, samples
, kFrameSize
);
54 time
+= static_cast<int64
>(kFrameSize
* (1e6
/ kSampleRate
));
57 if (20 == frame_count
)
58 EXPECT_EQ(EP_PRE_SPEECH
, ep_status
);
59 if (70 == frame_count
)
60 EXPECT_EQ(EP_SPEECH_PRESENT
, ep_status
);
61 if (120 == frame_count
)
62 EXPECT_EQ(EP_PRE_SPEECH
, ep_status
);
66 // This test instantiates and initializes a stand alone endpointer module.
67 // The test creates FrameData objects with random noise and send them
68 // to the endointer module. The energy of the first 50 frames is low,
69 // followed by 500 high energy frames, and another 50 low energy frames.
70 // We test that the correct start and end frames were detected.
71 class EnergyEndpointerFrameProcessor
: public FrameProcessor
{
73 explicit EnergyEndpointerFrameProcessor(EnergyEndpointer
* endpointer
)
74 : endpointer_(endpointer
) {}
76 EpStatus
ProcessFrame(int64 time
, int16
* samples
, int frame_size
) override
{
77 endpointer_
->ProcessAudioFrame(time
, samples
, kFrameSize
, NULL
);
79 return endpointer_
->Status(&ep_time
);
83 EnergyEndpointer
* endpointer_
;
86 TEST(EndpointerTest
, TestEnergyEndpointerEvents
) {
87 // Initialize endpointer and configure it. We specify the parameters
88 // here for a 20ms window, and a 20ms step size, which corrsponds to
89 // the narrow band AMR codec.
90 EnergyEndpointerParams ep_config
;
91 ep_config
.set_frame_period(1.0f
/ static_cast<float>(kFrameRate
));
92 ep_config
.set_frame_duration(1.0f
/ static_cast<float>(kFrameRate
));
93 ep_config
.set_endpoint_margin(0.2f
);
94 ep_config
.set_onset_window(0.15f
);
95 ep_config
.set_speech_on_window(0.4f
);
96 ep_config
.set_offset_window(0.15f
);
97 ep_config
.set_onset_detect_dur(0.09f
);
98 ep_config
.set_onset_confirm_dur(0.075f
);
99 ep_config
.set_on_maintain_dur(0.10f
);
100 ep_config
.set_offset_confirm_dur(0.12f
);
101 ep_config
.set_decision_threshold(100.0f
);
102 EnergyEndpointer endpointer
;
103 endpointer
.Init(ep_config
);
105 endpointer
.StartSession();
107 EnergyEndpointerFrameProcessor
frame_processor(&endpointer
);
108 RunEndpointerEventsTest(&frame_processor
);
110 endpointer
.EndSession();
113 // Test endpointer wrapper class.
114 class EndpointerFrameProcessor
: public FrameProcessor
{
116 explicit EndpointerFrameProcessor(Endpointer
* endpointer
)
117 : endpointer_(endpointer
) {}
119 EpStatus
ProcessFrame(int64 time
, int16
* samples
, int frame_size
) override
{
120 scoped_refptr
<AudioChunk
> frame(
121 new AudioChunk(reinterpret_cast<uint8
*>(samples
), kFrameSize
* 2, 2));
122 endpointer_
->ProcessAudio(*frame
.get(), NULL
);
124 return endpointer_
->Status(&ep_time
);
128 Endpointer
* endpointer_
;
131 TEST(EndpointerTest
, TestEmbeddedEndpointerEvents
) {
132 const int kSampleRate
= 8000; // 8 k samples per second for AMR encoding.
134 Endpointer
endpointer(kSampleRate
);
135 const int64 kMillisecondsPerMicrosecond
= 1000;
136 const int64 short_timeout
= 300 * kMillisecondsPerMicrosecond
;
137 endpointer
.set_speech_input_possibly_complete_silence_length(short_timeout
);
138 const int64 long_timeout
= 500 * kMillisecondsPerMicrosecond
;
139 endpointer
.set_speech_input_complete_silence_length(long_timeout
);
140 endpointer
.StartSession();
142 EndpointerFrameProcessor
frame_processor(&endpointer
);
143 RunEndpointerEventsTest(&frame_processor
);
145 endpointer
.EndSession();
148 } // namespace content