chrome/browser/media/chrome_webrtc_audio_quality_browsertest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <ctime>
   6
   7 #include "base/command_line.h"
   8 #include "base/files/file_enumerator.h"
   9 #include "base/files/file_util.h"
  10 #include "base/files/scoped_temp_dir.h"
  11 #include "base/process/launch.h"
  12 #include "base/process/process.h"
  13 #include "base/scoped_native_library.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/stringprintf.h"
  16 #include "chrome/browser/media/webrtc_browsertest_audio.h"
  17 #include "chrome/browser/media/webrtc_browsertest_base.h"
  18 #include "chrome/browser/media/webrtc_browsertest_common.h"
  19 #include "chrome/browser/profiles/profile.h"
  20 #include "chrome/browser/ui/browser.h"
  21 #include "chrome/browser/ui/browser_tabstrip.h"
  22 #include "chrome/browser/ui/tabs/tab_strip_model.h"
  23 #include "chrome/common/chrome_paths.h"
  24 #include "chrome/common/chrome_switches.h"
  25 #include "chrome/test/base/ui_test_utils.h"
  26 #include "content/public/test/browser_test_utils.h"
  27 #include "media/audio/audio_parameters.h"
  28 #include "media/base/media_switches.h"
  29 #include "net/test/embedded_test_server/embedded_test_server.h"
  30 #include "testing/perf/perf_test.h"
  31
  32 namespace {
  33
  34 static const base::FilePath::CharType kReferenceFile[] =
  35     FILE_PATH_LITERAL("speech_44kHz_16bit_stereo.wav");
  36
  37 // The javascript will load the reference file relative to its location,
  38 // which is in /webrtc on the web server. The files we are looking for are in
  39 // webrtc/resources in the chrome/test/data folder.
  40 static const char kReferenceFileRelativeUrl[] =
  41     "resources/speech_44kHz_16bit_stereo.wav";
  42
  43 static const char kWebRtcAudioTestHtmlPage[] =
  44     "/webrtc/webrtc_audio_quality_test.html";
  45
  46 #if defined(OS_LINUX) || defined(OS_WIN) || defined(OS_MACOSX)
  47 #define MAYBE_WebRtcAudioQualityBrowserTest WebRtcAudioQualityBrowserTest
  48 #else
  49 // Not implemented on Android, ChromeOS etc.
  50 #define MAYBE_WebRtcAudioQualityBrowserTest DISABLED_WebRtcAudioQualityBrowserTest
  51 #endif
  52
  53 }  // namespace
  54
  55 // Test we can set up a WebRTC call and play audio through it.
  56 //
  57 // If you're not a googler and want to run this test, you need to provide a
  58 // pesq binary for your platform (and sox.exe on windows). Read more on how
  59 // resources are managed in chrome/test/data/webrtc/resources/README.
  60 //
  61 // This test will only work on machines that have been configured to record
  62 // their own input.
  63 //
  64 // On Linux:
  65 // 1. # sudo apt-get install pavucontrol sox
  66 // 2. For the user who will run the test: # pavucontrol
  67 // 3. In a separate terminal, # arecord dummy
  68 // 4. In pavucontrol, go to the recording tab.
  69 // 5. For the ALSA plugin [aplay]: ALSA Capture from, change from <x> to
  70 //    <Monitor of x>, where x is whatever your primary sound device is called.
  71 // 6. Try launching chrome as the target user on the target machine, try
  72 //    playing, say, a YouTube video, and record with # arecord -f dat tmp.dat.
  73 //    Verify the recording with aplay (should have recorded what you played
  74 //    from chrome).
  75 //
  76 // Note: the volume for ALL your input devices will be forced to 100% by
  77 //       running this test on Linux.
  78 //
  79 // On Mac:
  80 // TODO(phoglund): download sox from gs instead.
  81 // 1. Get SoundFlower: http://rogueamoeba.com/freebies/soundflower/download.php
  82 // 2. Install it + reboot.
  83 // 3. Install MacPorts (http://www.macports.org/).
  84 // 4. Install sox: sudo port install sox.
  85 // 5. (For Chrome bots) Ensure sox and rec are reachable from the env the test
  86 //    executes in (sox and rec tends to install in /opt/, which generally isn't
  87 //    in the Chrome bots' env). For instance, run
  88 //    sudo ln -s /opt/local/bin/rec /usr/local/bin/rec
  89 //    sudo ln -s /opt/local/bin/sox /usr/local/bin/sox
  90 // 6. In Sound Preferences, set both input and output to Soundflower (2ch).
  91 //    Note: You will no longer hear audio on this machine, and it will no
  92 //    longer use any built-in mics.
  93 // 7. Try launching chrome as the target user on the target machine, try
  94 //    playing, say, a YouTube video, and record with 'rec test.wav trim 0 5'.
  95 //    Stop the video in chrome and try playing back the file; you should hear
  96 //    a recording of the video (note; if you play back on the target machine
  97 //    you must revert the changes in step 3 first).
  98 //
  99 // On Windows 7:
 100 // 1. Control panel > Sound > Manage audio devices.
 101 // 2. In the recording tab, right-click in an empty space in the pane with the
 102 //    devices. Tick 'show disabled devices'.
 103 // 3. You should see a 'stero mix' device - this is what your speakers output.
 104 //    Right click > Properties.
 105 // 4. In the Listen tab for the mix device, check the 'listen to this device'
 106 //    checkbox. Ensure the mix device is the default recording device.
 107 // 5. Launch chrome and try playing a video with sound. You should see
 108 //    in the volume meter for the mix device. Configure the mix device to have
 109 //    50 / 100 in level. Also go into the playback tab, right-click Speakers,
 110 //    and set that level to 50 / 100. Otherwise you will get distortion in
 111 //    the recording.
 112 class MAYBE_WebRtcAudioQualityBrowserTest : public WebRtcTestBase {
 113  public:
 114   MAYBE_WebRtcAudioQualityBrowserTest() {}
 115   void SetUpInProcessBrowserTestFixture() override {
 116     DetectErrorsInJavaScript();  // Look for errors in our rather complex js.
 117   }
 118
 119   void SetUpCommandLine(base::CommandLine* command_line) override {
 120     EXPECT_FALSE(command_line->HasSwitch(
 121         switches::kUseFakeUIForMediaStream));
 122
 123     // The WebAudio-based tests don't care what devices are available to
 124     // getUserMedia, and the getUserMedia-based tests will play back a file
 125     // through the fake device using using --use-file-for-fake-audio-capture.
 126     command_line->AppendSwitch(switches::kUseFakeDeviceForMediaStream);
 127   }
 128
 129   void ConfigureFakeDeviceToPlayFile(const base::FilePath& wav_file_path) {
 130     base::CommandLine::ForCurrentProcess()->AppendSwitchPath(
 131         switches::kUseFileForFakeAudioCapture, wav_file_path);
 132   }
 133
 134   void AddAudioFileToWebAudio(const std::string& input_file_relative_url,
 135                               content::WebContents* tab_contents) {
 136     // This calls into webaudio.js.
 137     EXPECT_EQ("ok-added", ExecuteJavascript(
 138         "addAudioFile('" + input_file_relative_url + "')", tab_contents));
 139   }
 140
 141   void PlayAudioFileThroughWebAudio(content::WebContents* tab_contents) {
 142     EXPECT_EQ("ok-playing", ExecuteJavascript("playAudioFile()", tab_contents));
 143   }
 144
 145   content::WebContents* OpenPageWithoutGetUserMedia(const char* url) {
 146     chrome::AddTabAt(browser(), GURL(), -1, true);
 147     ui_test_utils::NavigateToURL(
 148         browser(), embedded_test_server()->GetURL(url));
 149     content::WebContents* tab =
 150         browser()->tab_strip_model()->GetActiveWebContents();
 151
 152     // Prepare the peer connections manually in this test since we don't add
 153     // getUserMedia-derived media streams in this test like the other tests.
 154     EXPECT_EQ("ok-peerconnection-created",
 155               ExecuteJavascript("preparePeerConnection()", tab));
 156     return tab;
 157   }
 158
 159   void MuteMediaElement(const std::string& element_id,
 160                         content::WebContents* tab_contents) {
 161     EXPECT_EQ("ok-muted", ExecuteJavascript(
 162         "setMediaElementMuted('" + element_id + "', true)", tab_contents));
 163   }
 164
 165  protected:
 166   void TestAutoGainControl(const base::FilePath::StringType& reference_filename,
 167                            const std::string& constraints,
 168                            const std::string& perf_modifier);
 169   void SetupAndRecordAudioCall(const base::FilePath& reference_file,
 170                                const base::FilePath& recording,
 171                                const std::string& constraints,
 172                                const base::TimeDelta recording_time);
 173 };
 174
 175 namespace {
 176
 177 class AudioRecorder {
 178  public:
 179   AudioRecorder() {}
 180   ~AudioRecorder() {}
 181
 182   // Starts the recording program for the specified duration. Returns true
 183   // on success. We record in 16-bit 44.1 kHz Stereo (mostly because that's
 184   // what SoundRecorder.exe will give us and we can't change that).
 185   bool StartRecording(base::TimeDelta recording_time,
 186                       const base::FilePath& output_file) {
 187     EXPECT_FALSE(recording_application_.IsValid())
 188         << "Tried to record, but is already recording.";
 189
 190     int duration_sec = static_cast<int>(recording_time.InSeconds());
 191     base::CommandLine command_line(base::CommandLine::NO_PROGRAM);
 192
 193 #if defined(OS_WIN)
 194     // This disable is required to run SoundRecorder.exe on 64-bit Windows
 195     // from a 32-bit binary. We need to load the wow64 disable function from
 196     // the DLL since it doesn't exist on Windows XP.
 197     base::ScopedNativeLibrary kernel32_lib(base::FilePath(L"kernel32"));
 198     if (kernel32_lib.is_valid()) {
 199       typedef BOOL (WINAPI* Wow64DisableWow64FSRedirection)(PVOID*);
 200       Wow64DisableWow64FSRedirection wow_64_disable_wow_64_fs_redirection;
 201       wow_64_disable_wow_64_fs_redirection =
 202           reinterpret_cast<Wow64DisableWow64FSRedirection>(
 203               kernel32_lib.GetFunctionPointer(
 204                   "Wow64DisableWow64FsRedirection"));
 205       if (wow_64_disable_wow_64_fs_redirection != NULL) {
 206         PVOID* ignored = NULL;
 207         wow_64_disable_wow_64_fs_redirection(ignored);
 208       }
 209     }
 210
 211     char duration_in_hms[128] = {0};
 212     struct tm duration_tm = {0};
 213     duration_tm.tm_sec = duration_sec;
 214     EXPECT_NE(0u, strftime(duration_in_hms, arraysize(duration_in_hms),
 215                            "%H:%M:%S", &duration_tm));
 216
 217     command_line.SetProgram(
 218         base::FilePath(FILE_PATH_LITERAL("SoundRecorder.exe")));
 219     command_line.AppendArg("/FILE");
 220     command_line.AppendArgPath(output_file);
 221     command_line.AppendArg("/DURATION");
 222     command_line.AppendArg(duration_in_hms);
 223 #elif defined(OS_MACOSX)
 224     command_line.SetProgram(base::FilePath("rec"));
 225     command_line.AppendArg("-b");
 226     command_line.AppendArg("16");
 227     command_line.AppendArg("-q");
 228     command_line.AppendArgPath(output_file);
 229     command_line.AppendArg("trim");
 230     command_line.AppendArg("0");
 231     command_line.AppendArg(base::StringPrintf("%d", duration_sec));
 232 #else
 233     command_line.SetProgram(base::FilePath("arecord"));
 234     command_line.AppendArg("-d");
 235     command_line.AppendArg(base::StringPrintf("%d", duration_sec));
 236     command_line.AppendArg("-f");
 237     command_line.AppendArg("cd");
 238     command_line.AppendArg("-c");
 239     command_line.AppendArg("2");
 240     command_line.AppendArgPath(output_file);
 241 #endif
 242
 243     DVLOG(0) << "Running " << command_line.GetCommandLineString();
 244     recording_application_ =
 245         base::LaunchProcess(command_line, base::LaunchOptions());
 246     return recording_application_.IsValid();
 247   }
 248
 249   // Joins the recording program. Returns true on success.
 250   bool WaitForRecordingToEnd() {
 251     int exit_code = -1;
 252     recording_application_.WaitForExit(&exit_code);
 253     return exit_code == 0;
 254   }
 255  private:
 256   base::Process recording_application_;
 257 };
 258
 259 bool ForceMicrophoneVolumeTo100Percent() {
 260 #if defined(OS_WIN)
 261   // Note: the force binary isn't in tools since it's one of our own.
 262   base::CommandLine command_line(test::GetReferenceFilesDir().Append(
 263       FILE_PATH_LITERAL("force_mic_volume_max.exe")));
 264   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 265   std::string result;
 266   if (!base::GetAppOutput(command_line, &result)) {
 267     LOG(ERROR) << "Failed to set source volume: output was " << result;
 268     return false;
 269   }
 270 #elif defined(OS_MACOSX)
 271   base::CommandLine command_line(
 272       base::FilePath(FILE_PATH_LITERAL("osascript")));
 273   command_line.AppendArg("-e");
 274   command_line.AppendArg("set volume input volume 100");
 275   command_line.AppendArg("-e");
 276   command_line.AppendArg("set volume output volume 85");
 277
 278   std::string result;
 279   if (!base::GetAppOutput(command_line, &result)) {
 280     LOG(ERROR) << "Failed to set source volume: output was " << result;
 281     return false;
 282   }
 283 #else
 284   // Just force the volume of, say the first 5 devices. A machine will rarely
 285   // have more input sources than that. This is way easier than finding the
 286   // input device we happen to be using.
 287   for (int device_index = 0; device_index < 5; ++device_index) {
 288     std::string result;
 289     const std::string kHundredPercentVolume = "65536";
 290     base::CommandLine command_line(base::FilePath(FILE_PATH_LITERAL("pacmd")));
 291     command_line.AppendArg("set-source-volume");
 292     command_line.AppendArg(base::StringPrintf("%d", device_index));
 293     command_line.AppendArg(kHundredPercentVolume);
 294     DVLOG(0) << "Running " << command_line.GetCommandLineString();
 295     if (!base::GetAppOutput(command_line, &result)) {
 296       LOG(ERROR) << "Failed to set source volume: output was " << result;
 297       return false;
 298     }
 299   }
 300 #endif
 301   return true;
 302 }
 303
 304 // Sox is the "Swiss army knife" of audio processing. We mainly use it for
 305 // silence trimming. See http://sox.sourceforge.net.
 306 base::CommandLine MakeSoxCommandLine() {
 307 #if defined(OS_WIN)
 308   base::FilePath sox_path = test::GetToolForPlatform("sox");
 309   if (!base::PathExists(sox_path)) {
 310     LOG(ERROR) << "Missing sox.exe binary in " << sox_path.value()
 311                << "; you may have to provide this binary yourself.";
 312     return base::CommandLine(base::CommandLine::NO_PROGRAM);
 313   }
 314   base::CommandLine command_line(sox_path);
 315 #else
 316   // TODO(phoglund): call checked-in sox rather than system sox on mac/linux.
 317   // Same for rec invocations on Mac, above.
 318   base::CommandLine command_line(base::FilePath(FILE_PATH_LITERAL("sox")));
 319 #endif
 320   return command_line;
 321 }
 322
 323 // Removes silence from beginning and end of the |input_audio_file| and writes
 324 // the result to the |output_audio_file|. Returns true on success.
 325 bool RemoveSilence(const base::FilePath& input_file,
 326                    const base::FilePath& output_file) {
 327   // SOX documentation for silence command: http://sox.sourceforge.net/sox.html
 328   // To remove the silence from both beginning and end of the audio file, we
 329   // call sox silence command twice: once on normal file and again on its
 330   // reverse, then we reverse the final output.
 331   // Silence parameters are (in sequence):
 332   // ABOVE_PERIODS: The period for which silence occurs. Value 1 is used for
 333   //                 silence at beginning of audio.
 334   // DURATION: the amount of time in seconds that non-silence must be detected
 335   //           before sox stops trimming audio.
 336   // THRESHOLD: value used to indicate what sample value is treats as silence.
 337   const char* kAbovePeriods = "1";
 338   const char* kDuration = "2";
 339   const char* kTreshold = "3%";
 340
 341   base::CommandLine command_line = MakeSoxCommandLine();
 342   if (command_line.GetProgram().empty())
 343     return false;
 344   command_line.AppendArgPath(input_file);
 345   command_line.AppendArgPath(output_file);
 346   command_line.AppendArg("silence");
 347   command_line.AppendArg(kAbovePeriods);
 348   command_line.AppendArg(kDuration);
 349   command_line.AppendArg(kTreshold);
 350   command_line.AppendArg("reverse");
 351   command_line.AppendArg("silence");
 352   command_line.AppendArg(kAbovePeriods);
 353   command_line.AppendArg(kDuration);
 354   command_line.AppendArg(kTreshold);
 355   command_line.AppendArg("reverse");
 356
 357   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 358   std::string result;
 359   bool ok = base::GetAppOutput(command_line, &result);
 360   DVLOG(0) << "Output was:\n\n" << result;
 361   return ok;
 362 }
 363
 364 // Looks for 0.3-second silences (under 1% audio power) and splits the input
 365 // file on those silences. Output files are written according to the output file
 366 // template (e.g. /tmp/out.wav writes /tmp/out001.wav, /tmp/out002.wav, etc if
 367 // there are two silence-padded regions in the file). The silences between
 368 // speech segments must be at least 500 ms for this to be reliable.
 369 bool SplitFileOnSilence(const base::FilePath& input_file,
 370                         const base::FilePath& output_file_template) {
 371   base::CommandLine command_line = MakeSoxCommandLine();
 372   if (command_line.GetProgram().empty())
 373     return false;
 374
 375   // These are experimentally determined and work on the files we use.
 376   const char* kAbovePeriods = "1";
 377   const char* kUnderPeriods = "1";
 378   const char* kDuration = "0.3";
 379   const char* kTreshold = "1%";
 380   command_line.AppendArgPath(input_file);
 381   command_line.AppendArgPath(output_file_template);
 382   command_line.AppendArg("silence");
 383   command_line.AppendArg(kAbovePeriods);
 384   command_line.AppendArg(kDuration);
 385   command_line.AppendArg(kTreshold);
 386   command_line.AppendArg(kUnderPeriods);
 387   command_line.AppendArg(kDuration);
 388   command_line.AppendArg(kTreshold);
 389   command_line.AppendArg(":");
 390   command_line.AppendArg("newfile");
 391   command_line.AppendArg(":");
 392   command_line.AppendArg("restart");
 393
 394   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 395   std::string result;
 396   bool ok = base::GetAppOutput(command_line, &result);
 397   DVLOG(0) << "Output was:\n\n" << result;
 398   return ok;
 399 }
 400
 401 bool CanParseAsFloat(const std::string& value) {
 402   return atof(value.c_str()) != 0 || value == "0";
 403 }
 404
 405 // Runs PESQ to compare |reference_file| to a |actual_file|. The |sample_rate|
 406 // can be either 16000 or 8000.
 407 //
 408 // PESQ is only mono-aware, so the files should preferably be recorded in mono.
 409 // Furthermore it expects the file to be 16 rather than 32 bits, even though
 410 // 32 bits might work. The audio bandwidth of the two files should be the same
 411 // e.g. don't compare a 32 kHz file to a 8 kHz file.
 412 //
 413 // The raw score in MOS is written to |raw_mos|, whereas the MOS-LQO score is
 414 // written to mos_lqo. The scores are returned as floats in string form (e.g.
 415 // "3.145", etc). Returns true on success.
 416 bool RunPesq(const base::FilePath& reference_file,
 417              const base::FilePath& actual_file,
 418              int sample_rate, std::string* raw_mos, std::string* mos_lqo) {
 419   // PESQ will break if the paths are too long (!).
 420   EXPECT_LT(reference_file.value().length(), 128u);
 421   EXPECT_LT(actual_file.value().length(), 128u);
 422
 423   base::FilePath pesq_path = test::GetToolForPlatform("pesq");
 424   if (!base::PathExists(pesq_path)) {
 425     LOG(ERROR) << "Missing PESQ binary in " << pesq_path.value()
 426                << "; you may have to provide this binary yourself.";
 427     return false;
 428   }
 429
 430   base::CommandLine command_line(pesq_path);
 431   command_line.AppendArg(base::StringPrintf("+%d", sample_rate));
 432   command_line.AppendArgPath(reference_file);
 433   command_line.AppendArgPath(actual_file);
 434
 435   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 436   std::string result;
 437   if (!base::GetAppOutput(command_line, &result)) {
 438     LOG(ERROR) << "Failed to run PESQ.";
 439     return false;
 440   }
 441   DVLOG(0) << "Output was:\n\n" << result;
 442
 443   const std::string result_anchor = "Prediction (Raw MOS, MOS-LQO):  = ";
 444   std::size_t anchor_pos = result.find(result_anchor);
 445   if (anchor_pos == std::string::npos) {
 446     LOG(ERROR) << "PESQ was not able to compute a score; we probably recorded "
 447         << "only silence. Please check the output/input volume levels.";
 448     return false;
 449   }
 450
 451   // There are two tab-separated numbers on the format x.xxx, e.g. 5 chars each.
 452   std::size_t first_number_pos = anchor_pos + result_anchor.length();
 453   *raw_mos = result.substr(first_number_pos, 5);
 454   EXPECT_TRUE(CanParseAsFloat(*raw_mos)) << "Failed to parse raw MOS number.";
 455   *mos_lqo = result.substr(first_number_pos + 5 + 1, 5);
 456   EXPECT_TRUE(CanParseAsFloat(*mos_lqo)) << "Failed to parse MOS LQO number.";
 457
 458   return true;
 459 }
 460
 461 base::FilePath CreateTemporaryWaveFile() {
 462   base::FilePath filename;
 463   EXPECT_TRUE(base::CreateTemporaryFile(&filename));
 464   base::FilePath wav_filename =
 465       filename.AddExtension(FILE_PATH_LITERAL(".wav"));
 466   EXPECT_TRUE(base::Move(filename, wav_filename));
 467   return wav_filename;
 468 }
 469
 470 std::vector<base::FilePath> ListWavFilesInDir(const base::FilePath& dir) {
 471   base::FileEnumerator files(dir, false, base::FileEnumerator::FILES,
 472                              FILE_PATH_LITERAL("*.wav"));
 473
 474   std::vector<base::FilePath> result;
 475   for (base::FilePath name = files.Next(); !name.empty(); name = files.Next())
 476     result.push_back(name);
 477   return result;
 478 }
 479
 480 // Splits |to_split| into sub-files based on silence. The file you use must have
 481 // at least 500 ms periods of silence between speech segments for this to be
 482 // reliable.
 483 void SplitFileOnSilenceIntoDir(const base::FilePath& to_split,
 484                                const base::FilePath& workdir) {
 485   // First trim beginning and end since they are tricky for the splitter.
 486   base::FilePath trimmed_audio = CreateTemporaryWaveFile();
 487
 488   ASSERT_TRUE(RemoveSilence(to_split, trimmed_audio));
 489   DVLOG(0) << "Trimmed silence: " << trimmed_audio.value() << std::endl;
 490
 491   ASSERT_TRUE(SplitFileOnSilence(
 492       trimmed_audio, workdir.Append(FILE_PATH_LITERAL("output.wav"))));
 493   ASSERT_TRUE(base::DeleteFile(trimmed_audio, false));
 494 }
 495
 496 // Computes the difference between the actual and reference segment. A positive
 497 // number x means the actual file is x dB stronger than the reference.
 498 float AnalyzeOneSegment(const base::FilePath& ref_segment,
 499                         const base::FilePath& actual_segment,
 500                         int segment_number) {
 501   media::AudioParameters ref_parameters;
 502   media::AudioParameters actual_parameters;
 503   float ref_energy =
 504       test::ComputeAudioEnergyForWavFile(ref_segment, &ref_parameters);
 505   float actual_energy =
 506       test::ComputeAudioEnergyForWavFile(actual_segment, &actual_parameters);
 507
 508   base::TimeDelta difference_in_length = ref_parameters.GetBufferDuration() -
 509                                          actual_parameters.GetBufferDuration();
 510   EXPECT_LE(difference_in_length, base::TimeDelta::FromMilliseconds(200))
 511       << "Segments differ " << difference_in_length.InMilliseconds() << " ms "
 512       << "in length for segment " << segment_number << "; we're likely "
 513       << "comparing unrelated segments or silence splitting is busted.";
 514
 515   return actual_energy - ref_energy;
 516 }
 517
 518 void AnalyzeSegmentsAndPrintResult(
 519     const std::vector<base::FilePath>& ref_segments,
 520     const std::vector<base::FilePath>& actual_segments,
 521     const base::FilePath& reference_file,
 522     const std::string& perf_modifier) {
 523   ASSERT_GT(ref_segments.size(), 0u)
 524       << "Failed to split reference file on silence; sox is likely broken.";
 525   ASSERT_EQ(ref_segments.size(), actual_segments.size())
 526       << "The recording did not result in the same number of audio segments "
 527       << "after on splitting on silence; WebRTC must have deformed the audio "
 528       << "too much.";
 529
 530   for (size_t i = 0; i < ref_segments.size(); i++) {
 531     float difference_in_decibel = AnalyzeOneSegment(ref_segments[i],
 532                                                     actual_segments[i],
 533                                                     i);
 534     std::string trace_name = base::StringPrintf(
 535         "%s_segment_%zu", reference_file.BaseName().value().c_str(), i);
 536     perf_test::PrintResult("agc_energy_diff", perf_modifier, trace_name,
 537                            difference_in_decibel, "dB", false);
 538   }
 539 }
 540
 541 void ComputeAndPrintPesqResults(const base::FilePath& reference_file,
 542                                 const base::FilePath& recording,
 543                                 const std::string& perf_modifier) {
 544   base::FilePath trimmed_reference = CreateTemporaryWaveFile();
 545   base::FilePath trimmed_recording = CreateTemporaryWaveFile();
 546
 547   ASSERT_TRUE(RemoveSilence(reference_file, trimmed_reference));
 548   ASSERT_TRUE(RemoveSilence(recording, trimmed_recording));
 549
 550   std::string raw_mos;
 551   std::string mos_lqo;
 552   ASSERT_TRUE(RunPesq(trimmed_reference, trimmed_recording, 16000,
 553                       &raw_mos, &mos_lqo));
 554
 555   perf_test::PrintResult(
 556       "audio_pesq", perf_modifier, "raw_mos", raw_mos, "score", true);
 557   perf_test::PrintResult(
 558       "audio_pesq", perf_modifier, "mos_lqo", mos_lqo, "score", true);
 559
 560   EXPECT_TRUE(base::DeleteFile(trimmed_reference, false));
 561   EXPECT_TRUE(base::DeleteFile(trimmed_recording, false));
 562 }
 563
 564 }  // namespace
 565
 566 // Sets up a two-way WebRTC call and records its output to |recording|, using
 567 // getUserMedia.
 568 //
 569 // |reference_file| should have at least two seconds of silence in the
 570 // beginning: otherwise all the reference audio will not be picked up by the
 571 // recording. Note that the reference file will start playing as soon as the
 572 // audio device is up following the getUserMedia call in the left tab. The time
 573 // it takes to negotiate a call isn't deterministic, but two seconds should be
 574 // plenty of time. Similarly, the recording time should be enough to catch the
 575 // whole reference file. If you then silence-trim the reference file and actual
 576 // file, you should end up with two time-synchronized files.
 577 void MAYBE_WebRtcAudioQualityBrowserTest::SetupAndRecordAudioCall(
 578     const base::FilePath& reference_file,
 579     const base::FilePath& recording,
 580     const std::string& constraints,
 581     const base::TimeDelta recording_time) {
 582   ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
 583   ASSERT_TRUE(test::HasReferenceFilesInCheckout());
 584   ASSERT_TRUE(ForceMicrophoneVolumeTo100Percent());
 585
 586   ConfigureFakeDeviceToPlayFile(reference_file);
 587
 588   // Create a two-way call. Mute one of the receivers though; that way it will
 589   // be receiving audio bytes, but we will not be playing out of both elements.
 590   GURL test_page = embedded_test_server()->GetURL(kWebRtcAudioTestHtmlPage);
 591   content::WebContents* left_tab =
 592       OpenPageAndGetUserMediaInNewTabWithConstraints(test_page, constraints);
 593   SetupPeerconnectionWithLocalStream(left_tab);
 594   MuteMediaElement("remote-view", left_tab);
 595
 596   content::WebContents* right_tab =
 597       OpenPageAndGetUserMediaInNewTabWithConstraints(test_page, constraints);
 598   SetupPeerconnectionWithLocalStream(right_tab);
 599
 600   AudioRecorder recorder;
 601   ASSERT_TRUE(recorder.StartRecording(recording_time, recording));
 602
 603   NegotiateCall(left_tab, right_tab);
 604
 605   ASSERT_TRUE(recorder.WaitForRecordingToEnd());
 606   DVLOG(0) << "Done recording to " << recording.value() << std::endl;
 607
 608   HangUp(left_tab);
 609 }
 610
 611 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 612                        MANUAL_TestCallQualityWithAudioFromFakeDevice) {
 613   if (OnWinXp() || OnWin8()) {
 614     // http://crbug.com/379798.
 615     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 616     return;
 617   }
 618
 619   base::FilePath reference_file =
 620       test::GetReferenceFilesDir().Append(kReferenceFile);
 621   base::FilePath recording = CreateTemporaryWaveFile();
 622
 623   ASSERT_NO_FATAL_FAILURE(SetupAndRecordAudioCall(
 624       reference_file, recording, kAudioOnlyCallConstraints,
 625       base::TimeDelta::FromSeconds(25)));
 626   ComputeAndPrintPesqResults(reference_file, recording, "_getusermedia");
 627
 628   EXPECT_TRUE(base::DeleteFile(recording, false));
 629 }
 630
 631 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 632                        MANUAL_TestCallQualityWithAudioFromWebAudio) {
 633   if (OnWinXp() || OnWin8()) {
 634     // http://crbug.com/379798.
 635     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 636     return;
 637   }
 638   ASSERT_TRUE(test::HasReferenceFilesInCheckout());
 639   ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
 640
 641   ASSERT_TRUE(ForceMicrophoneVolumeTo100Percent());
 642
 643   content::WebContents* left_tab =
 644       OpenPageWithoutGetUserMedia(kWebRtcAudioTestHtmlPage);
 645   content::WebContents* right_tab =
 646       OpenPageWithoutGetUserMedia(kWebRtcAudioTestHtmlPage);
 647
 648   AddAudioFileToWebAudio(kReferenceFileRelativeUrl, left_tab);
 649
 650   NegotiateCall(left_tab, right_tab);
 651
 652   base::FilePath recording = CreateTemporaryWaveFile();
 653
 654   // Note: the sound clip is about 13 seconds: record for 20 seconds to get some
 655   // safety margins on each side.
 656   AudioRecorder recorder;
 657   ASSERT_TRUE(recorder.StartRecording(base::TimeDelta::FromSeconds(20),
 658                                       recording));
 659
 660   PlayAudioFileThroughWebAudio(left_tab);
 661
 662   ASSERT_TRUE(recorder.WaitForRecordingToEnd());
 663   DVLOG(0) << "Done recording to " << recording.value() << std::endl;
 664
 665   HangUp(left_tab);
 666
 667   // Compare with the reference file on disk (this is the same file we played
 668   // through WebAudio earlier).
 669   base::FilePath reference_file =
 670       test::GetReferenceFilesDir().Append(kReferenceFile);
 671   ComputeAndPrintPesqResults(reference_file, recording, "_webaudio");
 672 }
 673
 674 /**
 675  * The auto gain control test plays a file into the fake microphone. Then it
 676  * sets up a one-way WebRTC call with audio only and records Chrome's output on
 677  * the receiving side using the audio loopback provided by the quality test
 678  * (see the class comments for more details).
 679  *
 680  * Then both the recording and reference file are split on silence. This creates
 681  * a number of segments with speech in them. The reason for this is to provide
 682  * a kind of synchronization mechanism so the start of each speech segment is
 683  * compared to the start of the corresponding speech segment. This is because we
 684  * will experience inevitable clock drift between the system clock (which runs
 685  * the fake microphone) and the sound card (which runs play-out). Effectively
 686  * re-synchronizing on each segment mitigates this.
 687  *
 688  * The silence splitting is inherently sensitive to the sound file we run on.
 689  * Therefore the reference file must have at least 500 ms of pure silence
 690  * between speech segments; the test will fail if the output produces more
 691  * segments than the reference.
 692  *
 693  * The test reports the difference in decibel between the reference and output
 694  * file per 10 ms interval in each speech segment. A value of 6 means the
 695  * output was 6 dB louder than the reference, presumably because the AGC applied
 696  * gain to the signal.
 697  *
 698  * The test only exercises digital AGC for now.
 699  *
 700  * We record in CD format here (44.1 kHz) because that's what the fake input
 701  * device currently supports, and we want to be able to compare directly. See
 702  * http://crbug.com/421054.
 703  */
 704 void MAYBE_WebRtcAudioQualityBrowserTest::TestAutoGainControl(
 705     const base::FilePath::StringType& reference_filename,
 706     const std::string& constraints,
 707     const std::string& perf_modifier) {
 708   if (OnWinXp() || OnWin8()) {
 709     // http://crbug.com/379798.
 710     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 711     return;
 712   }
 713   base::FilePath reference_file =
 714       test::GetReferenceFilesDir().Append(reference_filename);
 715   base::FilePath recording = CreateTemporaryWaveFile();
 716
 717   ASSERT_NO_FATAL_FAILURE(SetupAndRecordAudioCall(
 718       reference_file, recording, constraints,
 719       base::TimeDelta::FromSeconds(25)));
 720
 721   // Call Take() on the scoped temp dirs if you want to look at the files after
 722   // the test exits (the default is to delete the files).
 723   base::ScopedTempDir split_ref_files;
 724   ASSERT_TRUE(split_ref_files.CreateUniqueTempDir());
 725   ASSERT_NO_FATAL_FAILURE(
 726       SplitFileOnSilenceIntoDir(reference_file, split_ref_files.path()));
 727   std::vector<base::FilePath> ref_segments =
 728       ListWavFilesInDir(split_ref_files.path());
 729
 730   base::ScopedTempDir split_actual_files;
 731   ASSERT_TRUE(split_actual_files.CreateUniqueTempDir());
 732   ASSERT_NO_FATAL_FAILURE(
 733       SplitFileOnSilenceIntoDir(recording, split_actual_files.path()));
 734   std::vector<base::FilePath> actual_segments =
 735       ListWavFilesInDir(split_actual_files.path());
 736
 737   AnalyzeSegmentsAndPrintResult(ref_segments, actual_segments, reference_file,
 738                                 perf_modifier);
 739
 740   EXPECT_TRUE(base::DeleteFile(recording, false));
 741 }
 742
 743 // Only implemented for Linux for now.
 744 #if defined(OS_LINUX)
 745 #define MAYBE_MANUAL_TestAutoGainControlOnLowAudio \
 746         MANUAL_TestAutoGainControlOnLowAudio
 747 #else
 748 #define MAYBE_MANUAL_TestAutoGainControlOnLowAudio \
 749         DISABLED_MANUAL_TestAutoGainControlOnLowAudio
 750 #endif
 751
 752 // The AGC should apply non-zero gain here.
 753 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 754                        MAYBE_MANUAL_TestAutoGainControlOnLowAudio) {
 755   ASSERT_NO_FATAL_FAILURE(TestAutoGainControl(
 756       kReferenceFile, kAudioOnlyCallConstraints, "_with_agc"));
 757 }
 758
 759 // Only implemented for Linux for now.
 760 #if defined(OS_LINUX)
 761 #define MAYBE_MANUAL_TestAutoGainIsOffWithAudioProcessingOff \
 762         MANUAL_TestAutoGainIsOffWithAudioProcessingOff
 763 #else
 764 #define MAYBE_MANUAL_TestAutoGainIsOffWithAudioProcessingOff \
 765         DISABLED_MANUAL_TestAutoGainIsOffWithAudioProcessingOff
 766 #endif
 767
 768 // Since the AGC is off here there should be no gain at all.
 769 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 770                        MAYBE_MANUAL_TestAutoGainIsOffWithAudioProcessingOff) {
 771   const char* kAudioCallWithoutAudioProcessing =
 772       "{audio: { mandatory: { echoCancellation: false } } }";
 773   ASSERT_NO_FATAL_FAILURE(TestAutoGainControl(
 774       kReferenceFile, kAudioCallWithoutAudioProcessing, "_no_agc"));
 775 }