Upstreaming browser/ui/uikit_ui_util from iOS.
[chromium-blink-merge.git] / content / renderer / media / speech_recognition_audio_sink_unittest.cc
blob12e8cc1825cc8d32dc3f0b58fa3d20b7e6459b40
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/speech_recognition_audio_sink.h"
7 #include "base/bind.h"
8 #include "base/strings/utf_string_conversions.h"
9 #include "content/renderer/media/media_stream_audio_source.h"
10 #include "content/renderer/media/mock_media_constraint_factory.h"
11 #include "content/renderer/media/webrtc/webrtc_local_audio_track_adapter.h"
12 #include "content/renderer/media/webrtc_local_audio_track.h"
13 #include "media/audio/audio_parameters.h"
14 #include "media/base/audio_bus.h"
15 #include "testing/gmock/include/gmock/gmock.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"
19 namespace {
21 // Supported speech recognition audio parameters.
22 const int kSpeechRecognitionSampleRate = 16000;
23 const int kSpeechRecognitionFramesPerBuffer = 1600;
25 // Input audio format.
26 const media::AudioParameters::Format kInputFormat =
27 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
28 const media::ChannelLayout kInputChannelLayout = media::CHANNEL_LAYOUT_MONO;
29 const int kInputChannels = 1;
30 const int kInputBitsPerSample = 16;
32 // Output audio format.
33 const media::AudioParameters::Format kOutputFormat =
34 media::AudioParameters::AUDIO_PCM_LOW_LATENCY;
35 const media::ChannelLayout kOutputChannelLayout = media::CHANNEL_LAYOUT_STEREO;
36 const int kOutputChannels = 2;
37 const int kOutputBitsPerSample = 16;
39 // Mocked out sockets used for Send/Receive.
40 // Data is written and read from a shared buffer used as a FIFO and there is
41 // no blocking. |OnSendCB| is used to trigger a |Receive| on the other socket.
42 class MockSyncSocket : public base::SyncSocket {
43 public:
44 // This allows for 2 requests in queue between the |MockSyncSocket|s.
45 static const int kSharedBufferSize = 8;
47 // Buffer to be shared between two |MockSyncSocket|s. Allocated on heap.
48 struct SharedBuffer {
49 SharedBuffer() : data(), start(0), length(0) {}
51 uint8 data[kSharedBufferSize];
52 size_t start;
53 size_t length;
56 // Callback used for pairing an A.Send() with B.Receieve() without blocking.
57 typedef base::Callback<void()> OnSendCB;
59 explicit MockSyncSocket(SharedBuffer* shared_buffer)
60 : buffer_(shared_buffer),
61 in_failure_mode_(false) {}
63 MockSyncSocket(SharedBuffer* shared_buffer, const OnSendCB& on_send_cb)
64 : buffer_(shared_buffer),
65 on_send_cb_(on_send_cb),
66 in_failure_mode_(false) {}
68 size_t Send(const void* buffer, size_t length) override;
69 size_t Receive(void* buffer, size_t length) override;
71 // When |in_failure_mode_| == true, the socket fails to send.
72 void SetFailureMode(bool in_failure_mode) {
73 in_failure_mode_ = in_failure_mode;
76 private:
77 SharedBuffer* buffer_;
78 const OnSendCB on_send_cb_;
79 bool in_failure_mode_;
81 DISALLOW_COPY_AND_ASSIGN(MockSyncSocket);
84 // base::SyncSocket implementation
85 size_t MockSyncSocket::Send(const void* buffer, size_t length) {
86 if (in_failure_mode_)
87 return 0;
89 const uint8* b = static_cast<const uint8*>(buffer);
90 for (size_t i = 0; i < length; ++i, ++buffer_->length)
91 buffer_->data[buffer_->start + buffer_->length] = b[i];
93 on_send_cb_.Run();
94 return length;
97 size_t MockSyncSocket::Receive(void* buffer, size_t length) {
98 uint8* b = static_cast<uint8*>(buffer);
99 for (size_t i = buffer_->start; i < buffer_->length; ++i, ++buffer_->start)
100 b[i] = buffer_->data[buffer_->start];
102 // Since buffer is used sequentially, we can reset the buffer indices here.
103 buffer_->start = buffer_->length = 0;
104 return length;
107 // This fake class is the consumer used to verify behaviour of the producer.
108 // The |Initialize()| method shows what the consumer should be responsible for
109 // in the production code (minus the mocks).
110 class FakeSpeechRecognizer {
111 public:
112 FakeSpeechRecognizer() : is_responsive_(true) {}
114 void Initialize(
115 const blink::WebMediaStreamTrack& track,
116 const media::AudioParameters& sink_params,
117 base::SharedMemoryHandle* foreign_memory_handle) {
118 // Shared memory is allocated, mapped and shared.
119 const uint32 kSharedMemorySize =
120 sizeof(media::AudioInputBufferParameters) +
121 media::AudioBus::CalculateMemorySize(sink_params);
122 shared_memory_.reset(new base::SharedMemory());
123 ASSERT_TRUE(shared_memory_->CreateAndMapAnonymous(kSharedMemorySize));
124 memset(shared_memory_->memory(), 0, kSharedMemorySize);
125 ASSERT_TRUE(shared_memory_->ShareToProcess(base::GetCurrentProcessHandle(),
126 foreign_memory_handle));
128 // Wrap the shared memory for the audio bus.
129 media::AudioInputBuffer* buffer =
130 static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
132 audio_track_bus_ = media::AudioBus::WrapMemory(sink_params, buffer->audio);
133 audio_track_bus_->Zero();
135 // Reference to the counter used to synchronize.
136 buffer->params.size = 0U;
138 // Create a shared buffer for the |MockSyncSocket|s.
139 shared_buffer_.reset(new MockSyncSocket::SharedBuffer());
141 // Local socket will receive signals from the producer.
142 receiving_socket_.reset(new MockSyncSocket(shared_buffer_.get()));
144 // We automatically trigger a Receive when data is sent over the socket.
145 sending_socket_ = new MockSyncSocket(
146 shared_buffer_.get(),
147 base::Bind(&FakeSpeechRecognizer::EmulateReceiveThreadLoopIteration,
148 base::Unretained(this)));
150 // This is usually done to pair the sockets. Here it's not effective.
151 base::SyncSocket::CreatePair(receiving_socket_.get(), sending_socket_);
154 // Emulates a single iteraton of a thread receiving on the socket.
155 // This would normally be done on a receiving thread's task on the browser.
156 void EmulateReceiveThreadLoopIteration() {
157 if (!is_responsive_)
158 return;
160 const int kSize = sizeof(media::AudioInputBufferParameters().size);
161 receiving_socket_->Receive(&(GetAudioInputBuffer()->params.size), kSize);
163 // Notify the producer that the audio buffer has been consumed.
164 GetAudioInputBuffer()->params.size++;
167 // Used to simulate an unresponsive behaviour of the consumer.
168 void SimulateResponsiveness(bool is_responsive) {
169 is_responsive_ = is_responsive;
172 media::AudioInputBuffer * GetAudioInputBuffer() const {
173 return static_cast<media::AudioInputBuffer*>(shared_memory_->memory());
176 MockSyncSocket* sending_socket() { return sending_socket_; }
177 media::AudioBus* audio_bus() const { return audio_track_bus_.get(); }
180 private:
181 bool is_responsive_;
183 // Shared memory for the audio and synchronization.
184 scoped_ptr<base::SharedMemory> shared_memory_;
186 // Fake sockets and their shared buffer.
187 scoped_ptr<MockSyncSocket::SharedBuffer> shared_buffer_;
188 scoped_ptr<MockSyncSocket> receiving_socket_;
189 MockSyncSocket* sending_socket_;
191 // Audio bus wrapping the shared memory from the renderer.
192 scoped_ptr<media::AudioBus> audio_track_bus_;
194 DISALLOW_COPY_AND_ASSIGN(FakeSpeechRecognizer);
197 } // namespace
199 namespace content {
201 class SpeechRecognitionAudioSinkTest : public testing::Test {
202 public:
203 SpeechRecognitionAudioSinkTest() {}
205 ~SpeechRecognitionAudioSinkTest() {}
207 // Initializes the producer and consumer with specified audio parameters.
208 // Returns the minimal number of input audio buffers which need to be captured
209 // before they get sent to the consumer.
210 uint32 Initialize(int input_sample_rate,
211 int input_frames_per_buffer,
212 int output_sample_rate,
213 int output_frames_per_buffer) {
214 // Audio Environment setup.
215 source_params_.Reset(kInputFormat,
216 kInputChannelLayout,
217 kInputChannels,
218 input_sample_rate,
219 kInputBitsPerSample,
220 input_frames_per_buffer);
221 sink_params_.Reset(kOutputFormat,
222 kOutputChannelLayout,
223 kOutputChannels,
224 output_sample_rate,
225 kOutputBitsPerSample,
226 output_frames_per_buffer);
227 source_bus_ =
228 media::AudioBus::Create(kInputChannels, input_frames_per_buffer);
229 source_bus_->Zero();
230 first_frame_capture_time_ = base::TimeTicks::Now();
231 sample_frames_captured_ = 0;
233 // Prepare the track and audio source.
234 blink::WebMediaStreamTrack blink_track;
235 PrepareBlinkTrackOfType(MEDIA_DEVICE_AUDIO_CAPTURE, &blink_track);
237 // Get the native track from the blink track and initialize.
238 native_track_ =
239 static_cast<WebRtcLocalAudioTrack*>(blink_track.extraData());
240 native_track_->OnSetFormat(source_params_);
242 // Create and initialize the consumer.
243 recognizer_.reset(new FakeSpeechRecognizer());
244 base::SharedMemoryHandle foreign_memory_handle;
245 recognizer_->Initialize(blink_track, sink_params_, &foreign_memory_handle);
247 // Create the producer.
248 scoped_ptr<base::SyncSocket> sending_socket(recognizer_->sending_socket());
249 speech_audio_sink_.reset(new SpeechRecognitionAudioSink(
250 blink_track, sink_params_, foreign_memory_handle,
251 sending_socket.Pass(),
252 base::Bind(&SpeechRecognitionAudioSinkTest::StoppedCallback,
253 base::Unretained(this))));
255 // Return number of buffers needed to trigger resampling and consumption.
256 return static_cast<uint32>(std::ceil(
257 static_cast<double>(output_frames_per_buffer * input_sample_rate) /
258 (input_frames_per_buffer * output_sample_rate)));
261 // Mock callback expected to be called when the track is stopped.
262 MOCK_METHOD0(StoppedCallback, void());
264 protected:
265 // Prepares a blink track of a given MediaStreamType and attaches the native
266 // track which can be used to capture audio data and pass it to the producer.
267 static void PrepareBlinkTrackOfType(
268 const MediaStreamType device_type,
269 blink::WebMediaStreamTrack* blink_track) {
270 StreamDeviceInfo device_info(device_type, "Mock device",
271 "mock_device_id");
272 MockMediaConstraintFactory constraint_factory;
273 const blink::WebMediaConstraints constraints =
274 constraint_factory.CreateWebMediaConstraints();
275 scoped_refptr<WebRtcAudioCapturer> capturer(
276 WebRtcAudioCapturer::CreateCapturer(-1, device_info, constraints, NULL,
277 NULL));
278 scoped_refptr<WebRtcLocalAudioTrackAdapter> adapter(
279 WebRtcLocalAudioTrackAdapter::Create(std::string(), NULL));
280 scoped_ptr<WebRtcLocalAudioTrack> native_track(
281 new WebRtcLocalAudioTrack(adapter.get(), capturer, NULL));
282 blink::WebMediaStreamSource blink_audio_source;
283 blink_audio_source.initialize(base::UTF8ToUTF16("dummy_source_id"),
284 blink::WebMediaStreamSource::TypeAudio,
285 base::UTF8ToUTF16("dummy_source_name"),
286 false /* remote */, true /* readonly */);
287 MediaStreamSource::SourceStoppedCallback cb;
288 blink_audio_source.setExtraData(
289 new MediaStreamAudioSource(-1, device_info, cb, NULL));
290 blink_track->initialize(blink::WebString::fromUTF8("dummy_track"),
291 blink_audio_source);
292 blink_track->setExtraData(native_track.release());
295 // Emulates an audio capture device capturing data from the source.
296 inline void CaptureAudio(const uint32 buffers) {
297 for (uint32 i = 0; i < buffers; ++i) {
298 const base::TimeTicks estimated_capture_time = first_frame_capture_time_ +
299 (sample_frames_captured_ * base::TimeDelta::FromSeconds(1) /
300 source_params_.sample_rate());
301 native_track()->Capture(*source_bus_, estimated_capture_time, false);
302 sample_frames_captured_ += source_bus_->frames();
306 // Used to simulate a problem with sockets.
307 void SetFailureModeOnForeignSocket(bool in_failure_mode) {
308 recognizer()->sending_socket()->SetFailureMode(in_failure_mode);
311 // Helper method for verifying captured audio data has been consumed.
312 inline void AssertConsumedBuffers(const uint32 buffer_index) {
313 ASSERT_EQ(buffer_index, recognizer()->GetAudioInputBuffer()->params.size);
316 // Helper method for providing audio data to producer and verifying it was
317 // consumed on the recognizer.
318 inline void CaptureAudioAndAssertConsumedBuffers(const uint32 buffers,
319 const uint32 buffer_index) {
320 CaptureAudio(buffers);
321 AssertConsumedBuffers(buffer_index);
324 // Helper method to capture and assert consumption at different sample rates
325 // and audio buffer sizes.
326 inline void AssertConsumptionForAudioParameters(
327 const int input_sample_rate,
328 const int input_frames_per_buffer,
329 const int output_sample_rate,
330 const int output_frames_per_buffer,
331 const uint32 consumptions) {
332 const uint32 buffers_per_notification =
333 Initialize(input_sample_rate,
334 input_frames_per_buffer,
335 output_sample_rate,
336 output_frames_per_buffer);
337 AssertConsumedBuffers(0U);
339 for (uint32 i = 1U; i <= consumptions; ++i) {
340 CaptureAudio(buffers_per_notification);
341 ASSERT_EQ(i, recognizer()->GetAudioInputBuffer()->params.size)
342 << "Tested at rates: "
343 << "In(" << input_sample_rate << ", " << input_frames_per_buffer
344 << ") "
345 << "Out(" << output_sample_rate << ", " << output_frames_per_buffer
346 << ")";
350 media::AudioBus* source_bus() const { return source_bus_.get(); }
352 FakeSpeechRecognizer* recognizer() const { return recognizer_.get(); }
354 const media::AudioParameters& sink_params() const { return sink_params_; }
356 WebRtcLocalAudioTrack* native_track() const { return native_track_; }
358 private:
359 // Producer.
360 scoped_ptr<SpeechRecognitionAudioSink> speech_audio_sink_;
362 // Consumer.
363 scoped_ptr<FakeSpeechRecognizer> recognizer_;
365 // Audio related members.
366 scoped_ptr<media::AudioBus> source_bus_;
367 media::AudioParameters source_params_;
368 media::AudioParameters sink_params_;
369 WebRtcLocalAudioTrack* native_track_;
371 base::TimeTicks first_frame_capture_time_;
372 int64 sample_frames_captured_;
374 DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSinkTest);
377 // Not all types of tracks are supported. This test checks if that policy is
378 // implemented correctly.
379 TEST_F(SpeechRecognitionAudioSinkTest, CheckIsSupportedAudioTrack) {
380 typedef std::map<MediaStreamType, bool> SupportedTrackPolicy;
382 // This test must be aligned with the policy of supported tracks.
383 SupportedTrackPolicy p;
384 p[MEDIA_NO_SERVICE] = false;
385 p[MEDIA_DEVICE_AUDIO_CAPTURE] = true; // The only one supported for now.
386 p[MEDIA_DEVICE_VIDEO_CAPTURE] = false;
387 p[MEDIA_TAB_AUDIO_CAPTURE] = false;
388 p[MEDIA_TAB_VIDEO_CAPTURE] = false;
389 p[MEDIA_DESKTOP_VIDEO_CAPTURE] = false;
390 p[MEDIA_DESKTOP_AUDIO_CAPTURE] = false;
391 p[MEDIA_DEVICE_AUDIO_OUTPUT] = false;
393 // Ensure this test gets updated along with |content::MediaStreamType| enum.
394 EXPECT_EQ(NUM_MEDIA_TYPES, p.size());
396 // Check the the entire policy.
397 for (SupportedTrackPolicy::iterator it = p.begin(); it != p.end(); ++it) {
398 blink::WebMediaStreamTrack blink_track;
399 PrepareBlinkTrackOfType(it->first, &blink_track);
400 ASSERT_EQ(
401 it->second,
402 SpeechRecognitionAudioSink::IsSupportedTrack(blink_track));
406 // Checks if the producer can support the listed range of input sample rates
407 // and associated buffer sizes.
408 TEST_F(SpeechRecognitionAudioSinkTest, RecognizerNotifiedOnSocket) {
409 const size_t kNumAudioParamTuples = 24;
410 const int kAudioParams[kNumAudioParamTuples][2] = {
411 {8000, 80}, {8000, 800}, {16000, 160}, {16000, 1600},
412 {24000, 240}, {24000, 2400}, {32000, 320}, {32000, 3200},
413 {44100, 441}, {44100, 4410}, {48000, 480}, {48000, 4800},
414 {96000, 960}, {96000, 9600}, {11025, 111}, {11025, 1103},
415 {22050, 221}, {22050, 2205}, {88200, 882}, {88200, 8820},
416 {176400, 1764}, {176400, 17640}, {192000, 1920}, {192000, 19200}};
418 // Check all listed tuples of input sample rates and buffers sizes.
419 for (size_t i = 0; i < kNumAudioParamTuples; ++i) {
420 AssertConsumptionForAudioParameters(
421 kAudioParams[i][0], kAudioParams[i][1],
422 kSpeechRecognitionSampleRate, kSpeechRecognitionFramesPerBuffer, 3U);
426 // Checks that the input data is getting resampled to the target sample rate.
427 TEST_F(SpeechRecognitionAudioSinkTest, AudioDataIsResampledOnSink) {
428 EXPECT_GE(kInputChannels, 1);
429 EXPECT_GE(kOutputChannels, 1);
431 // Input audio is sampled at 44.1 KHz with data chunks of 10ms. Desired output
432 // is corresponding to the speech recognition engine requirements: 16 KHz with
433 // 100 ms chunks (1600 frames per buffer).
434 const uint32 kSourceFrames = 441;
435 const uint32 buffers_per_notification =
436 Initialize(44100, kSourceFrames, 16000, 1600);
437 // Fill audio input frames with 0, 1, 2, 3, ..., 440.
438 int16 source_data[kSourceFrames * kInputChannels];
439 for (uint32 i = 0; i < kSourceFrames; ++i) {
440 for (int c = 0; c < kInputChannels; ++c)
441 source_data[i * kInputChannels + c] = i;
443 source_bus()->FromInterleaved(
444 source_data, kSourceFrames, sizeof(source_data[0]));
446 // Prepare sink audio bus and data for rendering.
447 media::AudioBus* sink_bus = recognizer()->audio_bus();
448 const uint32 kSinkDataLength = 1600 * kOutputChannels;
449 int16 sink_data[kSinkDataLength] = {0};
451 // Render the audio data from the recognizer.
452 sink_bus->ToInterleaved(sink_bus->frames(),
453 sink_params().bits_per_sample() / 8, sink_data);
455 // Checking only a fraction of the sink frames.
456 const uint32 kNumFramesToTest = 12;
458 // Check all channels are zeroed out before we trigger resampling.
459 for (uint32 i = 0; i < kNumFramesToTest; ++i) {
460 for (int c = 0; c < kOutputChannels; ++c)
461 EXPECT_EQ(0, sink_data[i * kOutputChannels + c]);
464 // Trigger the speech sink to resample the input data.
465 AssertConsumedBuffers(0U);
466 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
468 // Render the audio data from the recognizer.
469 sink_bus->ToInterleaved(sink_bus->frames(),
470 sink_params().bits_per_sample() / 8, sink_data);
472 // Resampled data expected frames. Extracted based on |source_data|.
473 const int16 kExpectedData[kNumFramesToTest] = {0, 2, 5, 8, 11, 13,
474 16, 19, 22, 24, 27, 30};
476 // Check all channels have the same resampled data.
477 for (uint32 i = 0; i < kNumFramesToTest; ++i) {
478 for (int c = 0; c < kOutputChannels; ++c)
479 EXPECT_EQ(kExpectedData[i], sink_data[i * kOutputChannels + c]);
483 // Checks that the producer does not misbehave when a socket failure occurs.
484 TEST_F(SpeechRecognitionAudioSinkTest, SyncSocketFailsSendingData) {
485 const uint32 buffers_per_notification = Initialize(44100, 441, 16000, 1600);
486 // Start with no problems on the socket.
487 AssertConsumedBuffers(0U);
488 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
490 // A failure occurs (socket cannot send).
491 SetFailureModeOnForeignSocket(true);
492 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
495 // A very unlikely scenario in which the peer is not synchronizing for a long
496 // time (e.g. 300 ms) which results in dropping cached buffers and restarting.
497 // We check that the FIFO overflow does not occur and that the producer is able
498 // to resume.
499 TEST_F(SpeechRecognitionAudioSinkTest, RepeatedSycnhronizationLag) {
500 const uint32 buffers_per_notification = Initialize(44100, 441, 16000, 1600);
502 // Start with no synchronization problems.
503 AssertConsumedBuffers(0U);
504 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
506 // Consumer gets out of sync.
507 recognizer()->SimulateResponsiveness(false);
508 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
509 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
510 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
512 // Consumer recovers.
513 recognizer()->SimulateResponsiveness(true);
514 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 2U);
515 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 3U);
516 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 4U);
519 // Checks that an OnStoppedCallback is issued when the track is stopped.
520 TEST_F(SpeechRecognitionAudioSinkTest, OnReadyStateChangedOccured) {
521 const uint32 buffers_per_notification = Initialize(44100, 441, 16000, 1600);
522 AssertConsumedBuffers(0U);
523 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
524 EXPECT_CALL(*this, StoppedCallback()).Times(1);
526 native_track()->Stop();
527 CaptureAudioAndAssertConsumedBuffers(buffers_per_notification, 1U);
530 } // namespace content