chrome/browser/media/chrome_webrtc_audio_quality_browsertest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <ctime>
   6
   7 #include "base/command_line.h"
   8 #include "base/files/file_enumerator.h"
   9 #include "base/files/file_util.h"
  10 #include "base/files/scoped_temp_dir.h"
  11 #include "base/process/launch.h"
  12 #include "base/process/process.h"
  13 #include "base/scoped_native_library.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/stringprintf.h"
  16 #include "base/strings/utf_string_conversions.h"
  17 #include "chrome/browser/media/webrtc_browsertest_audio.h"
  18 #include "chrome/browser/media/webrtc_browsertest_base.h"
  19 #include "chrome/browser/media/webrtc_browsertest_common.h"
  20 #include "chrome/browser/profiles/profile.h"
  21 #include "chrome/browser/ui/browser.h"
  22 #include "chrome/browser/ui/browser_tabstrip.h"
  23 #include "chrome/browser/ui/tabs/tab_strip_model.h"
  24 #include "chrome/common/chrome_paths.h"
  25 #include "chrome/common/chrome_switches.h"
  26 #include "chrome/test/base/ui_test_utils.h"
  27 #include "content/public/test/browser_test_utils.h"
  28 #include "media/audio/audio_parameters.h"
  29 #include "media/base/media_switches.h"
  30 #include "net/test/embedded_test_server/embedded_test_server.h"
  31 #include "testing/perf/perf_test.h"
  32
  33 namespace {
  34
  35 static const base::FilePath::CharType kReferenceFile[] =
  36     FILE_PATH_LITERAL("speech_44kHz_16bit_stereo.wav");
  37
  38 // The javascript will load the reference file relative to its location,
  39 // which is in /webrtc on the web server. The files we are looking for are in
  40 // webrtc/resources in the chrome/test/data folder.
  41 static const char kReferenceFileRelativeUrl[] =
  42     "resources/speech_44kHz_16bit_stereo.wav";
  43
  44 static const char kWebRtcAudioTestHtmlPage[] =
  45     "/webrtc/webrtc_audio_quality_test.html";
  46
  47 // For the AGC test, there are 6 speech segments split on silence. If one
  48 // segment is significantly different in length compared to the same segment in
  49 // the reference file, there's something fishy going on.
  50 const int kMaxAgcSegmentDiffMs =
  51 #if defined(OS_MACOSX)
  52   // Something is different on Mac; http://crbug.com/477653.
  53   600;
  54 #else
  55   200;
  56 #endif
  57
  58 #if defined(OS_LINUX) || defined(OS_WIN) || defined(OS_MACOSX)
  59 #define MAYBE_WebRtcAudioQualityBrowserTest WebRtcAudioQualityBrowserTest
  60 #else
  61 // Not implemented on Android, ChromeOS etc.
  62 #define MAYBE_WebRtcAudioQualityBrowserTest DISABLED_WebRtcAudioQualityBrowserTest
  63 #endif
  64
  65 }  // namespace
  66
  67 // Test we can set up a WebRTC call and play audio through it.
  68 //
  69 // If you're not a googler and want to run this test, you need to provide a
  70 // pesq binary for your platform (and sox.exe on windows). Read more on how
  71 // resources are managed in chrome/test/data/webrtc/resources/README.
  72 //
  73 // This test will only work on machines that have been configured to record
  74 // their own input.
  75 //
  76 // On Linux:
  77 // 1. # sudo apt-get install pavucontrol sox
  78 // 2. For the user who will run the test: # pavucontrol
  79 // 3. In a separate terminal, # arecord dummy
  80 // 4. In pavucontrol, go to the recording tab.
  81 // 5. For the ALSA plugin [aplay]: ALSA Capture from, change from <x> to
  82 //    <Monitor of x>, where x is whatever your primary sound device is called.
  83 // 6. Try launching chrome as the target user on the target machine, try
  84 //    playing, say, a YouTube video, and record with # arecord -f dat tmp.dat.
  85 //    Verify the recording with aplay (should have recorded what you played
  86 //    from chrome).
  87 //
  88 // Note: the volume for ALL your input devices will be forced to 100% by
  89 //       running this test on Linux.
  90 //
  91 // On Mac:
  92 // TODO(phoglund): download sox from gs instead.
  93 // 1. Get SoundFlower: http://rogueamoeba.com/freebies/soundflower/download.php
  94 // 2. Install it + reboot.
  95 // 3. Install MacPorts (http://www.macports.org/).
  96 // 4. Install sox: sudo port install sox.
  97 // 5. (For Chrome bots) Ensure sox and rec are reachable from the env the test
  98 //    executes in (sox and rec tends to install in /opt/, which generally isn't
  99 //    in the Chrome bots' env). For instance, run
 100 //    sudo ln -s /opt/local/bin/rec /usr/local/bin/rec
 101 //    sudo ln -s /opt/local/bin/sox /usr/local/bin/sox
 102 // 6. In Sound Preferences, set both input and output to Soundflower (2ch).
 103 //    Note: You will no longer hear audio on this machine, and it will no
 104 //    longer use any built-in mics.
 105 // 7. Try launching chrome as the target user on the target machine, try
 106 //    playing, say, a YouTube video, and record with 'rec test.wav trim 0 5'.
 107 //    Stop the video in chrome and try playing back the file; you should hear
 108 //    a recording of the video (note; if you play back on the target machine
 109 //    you must revert the changes in step 3 first).
 110 //
 111 // On Windows 7:
 112 // 1. Control panel > Sound > Manage audio devices.
 113 // 2. In the recording tab, right-click in an empty space in the pane with the
 114 //    devices. Tick 'show disabled devices'.
 115 // 3. You should see a 'stero mix' device - this is what your speakers output.
 116 //    Right click > Properties.
 117 // 4. In the Listen tab for the mix device, check the 'listen to this device'
 118 //    checkbox. Ensure the mix device is the default recording device.
 119 // 5. Launch chrome and try playing a video with sound. You should see
 120 //    in the volume meter for the mix device. Configure the mix device to have
 121 //    50 / 100 in level. Also go into the playback tab, right-click Speakers,
 122 //    and set that level to 50 / 100. Otherwise you will get distortion in
 123 //    the recording.
 124 class MAYBE_WebRtcAudioQualityBrowserTest : public WebRtcTestBase {
 125  public:
 126   MAYBE_WebRtcAudioQualityBrowserTest() {}
 127   void SetUpInProcessBrowserTestFixture() override {
 128     DetectErrorsInJavaScript();  // Look for errors in our rather complex js.
 129   }
 130
 131   void SetUpCommandLine(base::CommandLine* command_line) override {
 132     EXPECT_FALSE(command_line->HasSwitch(
 133         switches::kUseFakeUIForMediaStream));
 134
 135     // The WebAudio-based tests don't care what devices are available to
 136     // getUserMedia, and the getUserMedia-based tests will play back a file
 137     // through the fake device using using --use-file-for-fake-audio-capture.
 138     command_line->AppendSwitch(switches::kUseFakeDeviceForMediaStream);
 139   }
 140
 141   void ConfigureFakeDeviceToPlayFile(const base::FilePath& wav_file_path) {
 142     base::CommandLine::ForCurrentProcess()->AppendSwitchPath(
 143         switches::kUseFileForFakeAudioCapture, wav_file_path);
 144   }
 145
 146   void AddAudioFileToWebAudio(const std::string& input_file_relative_url,
 147                               content::WebContents* tab_contents) {
 148     // This calls into webaudio.js.
 149     EXPECT_EQ("ok-added", ExecuteJavascript(
 150         "addAudioFile('" + input_file_relative_url + "')", tab_contents));
 151   }
 152
 153   void PlayAudioFileThroughWebAudio(content::WebContents* tab_contents) {
 154     EXPECT_EQ("ok-playing", ExecuteJavascript("playAudioFile()", tab_contents));
 155   }
 156
 157   content::WebContents* OpenPageWithoutGetUserMedia(const char* url) {
 158     chrome::AddTabAt(browser(), GURL(), -1, true);
 159     ui_test_utils::NavigateToURL(
 160         browser(), embedded_test_server()->GetURL(url));
 161     content::WebContents* tab =
 162         browser()->tab_strip_model()->GetActiveWebContents();
 163
 164     // Prepare the peer connections manually in this test since we don't add
 165     // getUserMedia-derived media streams in this test like the other tests.
 166     EXPECT_EQ("ok-peerconnection-created",
 167               ExecuteJavascript("preparePeerConnection()", tab));
 168     return tab;
 169   }
 170
 171   void MuteMediaElement(const std::string& element_id,
 172                         content::WebContents* tab_contents) {
 173     EXPECT_EQ("ok-muted", ExecuteJavascript(
 174         "setMediaElementMuted('" + element_id + "', true)", tab_contents));
 175   }
 176
 177  protected:
 178   void TestAutoGainControl(const base::FilePath::StringType& reference_filename,
 179                            const std::string& constraints,
 180                            const std::string& perf_modifier);
 181   void SetupAndRecordAudioCall(const base::FilePath& reference_file,
 182                                const base::FilePath& recording,
 183                                const std::string& constraints,
 184                                const base::TimeDelta recording_time);
 185   void TestWithFakeDeviceGetUserMedia(const std::string& constraints,
 186                                       const std::string& perf_modifier);
 187 };
 188
 189 namespace {
 190
 191 class AudioRecorder {
 192  public:
 193   AudioRecorder() {}
 194   ~AudioRecorder() {}
 195
 196   // Starts the recording program for the specified duration. Returns true
 197   // on success. We record in 16-bit 44.1 kHz Stereo (mostly because that's
 198   // what SoundRecorder.exe will give us and we can't change that).
 199   bool StartRecording(base::TimeDelta recording_time,
 200                       const base::FilePath& output_file) {
 201     EXPECT_FALSE(recording_application_.IsValid())
 202         << "Tried to record, but is already recording.";
 203
 204     int duration_sec = static_cast<int>(recording_time.InSeconds());
 205     base::CommandLine command_line(base::CommandLine::NO_PROGRAM);
 206
 207 #if defined(OS_WIN)
 208     // This disable is required to run SoundRecorder.exe on 64-bit Windows
 209     // from a 32-bit binary. We need to load the wow64 disable function from
 210     // the DLL since it doesn't exist on Windows XP.
 211     base::ScopedNativeLibrary kernel32_lib(base::FilePath(L"kernel32"));
 212     if (kernel32_lib.is_valid()) {
 213       typedef BOOL (WINAPI* Wow64DisableWow64FSRedirection)(PVOID*);
 214       Wow64DisableWow64FSRedirection wow_64_disable_wow_64_fs_redirection;
 215       wow_64_disable_wow_64_fs_redirection =
 216           reinterpret_cast<Wow64DisableWow64FSRedirection>(
 217               kernel32_lib.GetFunctionPointer(
 218                   "Wow64DisableWow64FsRedirection"));
 219       if (wow_64_disable_wow_64_fs_redirection != NULL) {
 220         PVOID* ignored = NULL;
 221         wow_64_disable_wow_64_fs_redirection(ignored);
 222       }
 223     }
 224
 225     char duration_in_hms[128] = {0};
 226     struct tm duration_tm = {0};
 227     duration_tm.tm_sec = duration_sec;
 228     EXPECT_NE(0u, strftime(duration_in_hms, arraysize(duration_in_hms),
 229                            "%H:%M:%S", &duration_tm));
 230
 231     command_line.SetProgram(
 232         base::FilePath(FILE_PATH_LITERAL("SoundRecorder.exe")));
 233     command_line.AppendArg("/FILE");
 234     command_line.AppendArgPath(output_file);
 235     command_line.AppendArg("/DURATION");
 236     command_line.AppendArg(duration_in_hms);
 237 #elif defined(OS_MACOSX)
 238     command_line.SetProgram(base::FilePath("rec"));
 239     command_line.AppendArg("-b");
 240     command_line.AppendArg("16");
 241     command_line.AppendArg("-q");
 242     command_line.AppendArgPath(output_file);
 243     command_line.AppendArg("trim");
 244     command_line.AppendArg("0");
 245     command_line.AppendArg(base::StringPrintf("%d", duration_sec));
 246 #else
 247     command_line.SetProgram(base::FilePath("arecord"));
 248     command_line.AppendArg("-d");
 249     command_line.AppendArg(base::StringPrintf("%d", duration_sec));
 250     command_line.AppendArg("-f");
 251     command_line.AppendArg("cd");
 252     command_line.AppendArg("-c");
 253     command_line.AppendArg("2");
 254     command_line.AppendArgPath(output_file);
 255 #endif
 256
 257     DVLOG(0) << "Running " << command_line.GetCommandLineString();
 258     recording_application_ =
 259         base::LaunchProcess(command_line, base::LaunchOptions());
 260     return recording_application_.IsValid();
 261   }
 262
 263   // Joins the recording program. Returns true on success.
 264   bool WaitForRecordingToEnd() {
 265     int exit_code = -1;
 266     recording_application_.WaitForExit(&exit_code);
 267     return exit_code == 0;
 268   }
 269  private:
 270   base::Process recording_application_;
 271 };
 272
 273 bool ForceMicrophoneVolumeTo100Percent() {
 274 #if defined(OS_WIN)
 275   // Note: the force binary isn't in tools since it's one of our own.
 276   base::CommandLine command_line(test::GetReferenceFilesDir().Append(
 277       FILE_PATH_LITERAL("force_mic_volume_max.exe")));
 278   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 279   std::string result;
 280   if (!base::GetAppOutput(command_line, &result)) {
 281     LOG(ERROR) << "Failed to set source volume: output was " << result;
 282     return false;
 283   }
 284 #elif defined(OS_MACOSX)
 285   base::CommandLine command_line(
 286       base::FilePath(FILE_PATH_LITERAL("osascript")));
 287   command_line.AppendArg("-e");
 288   command_line.AppendArg("set volume input volume 100");
 289   command_line.AppendArg("-e");
 290   command_line.AppendArg("set volume output volume 85");
 291
 292   std::string result;
 293   if (!base::GetAppOutput(command_line, &result)) {
 294     LOG(ERROR) << "Failed to set source volume: output was " << result;
 295     return false;
 296   }
 297 #else
 298   // Just force the volume of, say the first 5 devices. A machine will rarely
 299   // have more input sources than that. This is way easier than finding the
 300   // input device we happen to be using.
 301   for (int device_index = 0; device_index < 5; ++device_index) {
 302     std::string result;
 303     const std::string kHundredPercentVolume = "65536";
 304     base::CommandLine command_line(base::FilePath(FILE_PATH_LITERAL("pacmd")));
 305     command_line.AppendArg("set-source-volume");
 306     command_line.AppendArg(base::StringPrintf("%d", device_index));
 307     command_line.AppendArg(kHundredPercentVolume);
 308     DVLOG(0) << "Running " << command_line.GetCommandLineString();
 309     if (!base::GetAppOutput(command_line, &result)) {
 310       LOG(ERROR) << "Failed to set source volume: output was " << result;
 311       return false;
 312     }
 313   }
 314 #endif
 315   return true;
 316 }
 317
 318 // Sox is the "Swiss army knife" of audio processing. We mainly use it for
 319 // silence trimming. See http://sox.sourceforge.net.
 320 base::CommandLine MakeSoxCommandLine() {
 321 #if defined(OS_WIN)
 322   base::FilePath sox_path = test::GetToolForPlatform("sox");
 323   if (!base::PathExists(sox_path)) {
 324     LOG(ERROR) << "Missing sox.exe binary in " << sox_path.value()
 325                << "; you may have to provide this binary yourself.";
 326     return base::CommandLine(base::CommandLine::NO_PROGRAM);
 327   }
 328   base::CommandLine command_line(sox_path);
 329 #else
 330   // TODO(phoglund): call checked-in sox rather than system sox on mac/linux.
 331   // Same for rec invocations on Mac, above.
 332   base::CommandLine command_line(base::FilePath(FILE_PATH_LITERAL("sox")));
 333 #endif
 334   return command_line;
 335 }
 336
 337 // Removes silence from beginning and end of the |input_audio_file| and writes
 338 // the result to the |output_audio_file|. Returns true on success.
 339 bool RemoveSilence(const base::FilePath& input_file,
 340                    const base::FilePath& output_file) {
 341   // SOX documentation for silence command: http://sox.sourceforge.net/sox.html
 342   // To remove the silence from both beginning and end of the audio file, we
 343   // call sox silence command twice: once on normal file and again on its
 344   // reverse, then we reverse the final output.
 345   // Silence parameters are (in sequence):
 346   // ABOVE_PERIODS: The period for which silence occurs. Value 1 is used for
 347   //                 silence at beginning of audio.
 348   // DURATION: the amount of time in seconds that non-silence must be detected
 349   //           before sox stops trimming audio.
 350   // THRESHOLD: value used to indicate what sample value is treats as silence.
 351   const char* kAbovePeriods = "1";
 352   const char* kDuration = "2";
 353   const char* kTreshold = "1.5%";
 354
 355   base::CommandLine command_line = MakeSoxCommandLine();
 356   if (command_line.GetProgram().empty())
 357     return false;
 358   command_line.AppendArgPath(input_file);
 359   command_line.AppendArgPath(output_file);
 360   command_line.AppendArg("silence");
 361   command_line.AppendArg(kAbovePeriods);
 362   command_line.AppendArg(kDuration);
 363   command_line.AppendArg(kTreshold);
 364   command_line.AppendArg("reverse");
 365   command_line.AppendArg("silence");
 366   command_line.AppendArg(kAbovePeriods);
 367   command_line.AppendArg(kDuration);
 368   command_line.AppendArg(kTreshold);
 369   command_line.AppendArg("reverse");
 370
 371   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 372   std::string result;
 373   bool ok = base::GetAppOutput(command_line, &result);
 374   DVLOG(0) << "Output was:\n\n" << result;
 375   return ok;
 376 }
 377
 378 // Looks for 0.2 second audio segments surrounded by silences under 0.3% audio
 379 // power and splits the input file on those silences. Output files are written
 380 // according to the output file template (e.g. /tmp/out.wav writes
 381 // /tmp/out001.wav, /tmp/out002.wav, etc if there are two silence-padded
 382 // regions in the file). The silences between speech segments must be at
 383 // least 500 ms for this to be reliable.
 384 bool SplitFileOnSilence(const base::FilePath& input_file,
 385                         const base::FilePath& output_file_template) {
 386   base::CommandLine command_line = MakeSoxCommandLine();
 387   if (command_line.GetProgram().empty())
 388     return false;
 389
 390   // These are experimentally determined and work on the files we use.
 391   const char* kAbovePeriods = "1";
 392   const char* kUnderPeriods = "1";
 393   const char* kDuration = "0.2";
 394   const char* kTreshold = "0.5%";
 395   command_line.AppendArgPath(input_file);
 396   command_line.AppendArgPath(output_file_template);
 397   command_line.AppendArg("silence");
 398   command_line.AppendArg(kAbovePeriods);
 399   command_line.AppendArg(kDuration);
 400   command_line.AppendArg(kTreshold);
 401   command_line.AppendArg(kUnderPeriods);
 402   command_line.AppendArg(kDuration);
 403   command_line.AppendArg(kTreshold);
 404   command_line.AppendArg(":");
 405   command_line.AppendArg("newfile");
 406   command_line.AppendArg(":");
 407   command_line.AppendArg("restart");
 408
 409   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 410   std::string result;
 411   bool ok = base::GetAppOutput(command_line, &result);
 412   DVLOG(0) << "Output was:\n\n" << result;
 413   return ok;
 414 }
 415
 416 bool CanParseAsFloat(const std::string& value) {
 417   return atof(value.c_str()) != 0 || value == "0";
 418 }
 419
 420 // Runs PESQ to compare |reference_file| to a |actual_file|. The |sample_rate|
 421 // can be either 16000 or 8000.
 422 //
 423 // PESQ is only mono-aware, so the files should preferably be recorded in mono.
 424 // Furthermore it expects the file to be 16 rather than 32 bits, even though
 425 // 32 bits might work. The audio bandwidth of the two files should be the same
 426 // e.g. don't compare a 32 kHz file to a 8 kHz file.
 427 //
 428 // The raw score in MOS is written to |raw_mos|, whereas the MOS-LQO score is
 429 // written to mos_lqo. The scores are returned as floats in string form (e.g.
 430 // "3.145", etc). Returns true on success.
 431 bool RunPesq(const base::FilePath& reference_file,
 432              const base::FilePath& actual_file,
 433              int sample_rate, std::string* raw_mos, std::string* mos_lqo) {
 434   // PESQ will break if the paths are too long (!).
 435   EXPECT_LT(reference_file.value().length(), 128u);
 436   EXPECT_LT(actual_file.value().length(), 128u);
 437
 438   base::FilePath pesq_path = test::GetToolForPlatform("pesq");
 439   if (!base::PathExists(pesq_path)) {
 440     LOG(ERROR) << "Missing PESQ binary in " << pesq_path.value()
 441                << "; you may have to provide this binary yourself.";
 442     return false;
 443   }
 444
 445   base::CommandLine command_line(pesq_path);
 446   command_line.AppendArg(base::StringPrintf("+%d", sample_rate));
 447   command_line.AppendArgPath(reference_file);
 448   command_line.AppendArgPath(actual_file);
 449
 450   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 451   std::string result;
 452   if (!base::GetAppOutput(command_line, &result)) {
 453     LOG(ERROR) << "Failed to run PESQ.";
 454     return false;
 455   }
 456   DVLOG(0) << "Output was:\n\n" << result;
 457
 458   const std::string result_anchor = "Prediction (Raw MOS, MOS-LQO):  = ";
 459   std::size_t anchor_pos = result.find(result_anchor);
 460   if (anchor_pos == std::string::npos) {
 461     LOG(ERROR) << "PESQ was not able to compute a score; we probably recorded "
 462         << "only silence. Please check the output/input volume levels.";
 463     return false;
 464   }
 465
 466   // There are two tab-separated numbers on the format x.xxx, e.g. 5 chars each.
 467   std::size_t first_number_pos = anchor_pos + result_anchor.length();
 468   *raw_mos = result.substr(first_number_pos, 5);
 469   EXPECT_TRUE(CanParseAsFloat(*raw_mos)) << "Failed to parse raw MOS number.";
 470   *mos_lqo = result.substr(first_number_pos + 5 + 1, 5);
 471   EXPECT_TRUE(CanParseAsFloat(*mos_lqo)) << "Failed to parse MOS LQO number.";
 472
 473   return true;
 474 }
 475
 476 base::FilePath CreateTemporaryWaveFile() {
 477   base::FilePath filename;
 478   EXPECT_TRUE(base::CreateTemporaryFile(&filename));
 479   base::FilePath wav_filename =
 480       filename.AddExtension(FILE_PATH_LITERAL(".wav"));
 481   EXPECT_TRUE(base::Move(filename, wav_filename));
 482   return wav_filename;
 483 }
 484
 485 void DeleteFileUnlessTestFailed(const base::FilePath& path, bool recursive) {
 486   if (::testing::Test::HasFailure())
 487     printf("Test failed; keeping recording(s) at\n\t%" PRFilePath ".\n",
 488            path.value().c_str());
 489   else
 490     EXPECT_TRUE(base::DeleteFile(path, recursive));
 491 }
 492
 493 std::vector<base::FilePath> ListWavFilesInDir(const base::FilePath& dir) {
 494   base::FileEnumerator files(dir, false, base::FileEnumerator::FILES,
 495                              FILE_PATH_LITERAL("*.wav"));
 496
 497   std::vector<base::FilePath> result;
 498   for (base::FilePath name = files.Next(); !name.empty(); name = files.Next())
 499     result.push_back(name);
 500   return result;
 501 }
 502
 503 // Splits |to_split| into sub-files based on silence. The file you use must have
 504 // at least 500 ms periods of silence between speech segments for this to be
 505 // reliable.
 506 void SplitFileOnSilenceIntoDir(const base::FilePath& to_split,
 507                                const base::FilePath& workdir) {
 508   // First trim beginning and end since they are tricky for the splitter.
 509   base::FilePath trimmed_audio = CreateTemporaryWaveFile();
 510
 511   ASSERT_TRUE(RemoveSilence(to_split, trimmed_audio));
 512   DVLOG(0) << "Trimmed silence: " << trimmed_audio.value() << std::endl;
 513
 514   ASSERT_TRUE(SplitFileOnSilence(
 515       trimmed_audio, workdir.Append(FILE_PATH_LITERAL("output.wav"))));
 516   DeleteFileUnlessTestFailed(trimmed_audio, false);
 517 }
 518
 519 // Computes the difference between the actual and reference segment. A positive
 520 // number x means the actual file is x dB stronger than the reference.
 521 float AnalyzeOneSegment(const base::FilePath& ref_segment,
 522                         const base::FilePath& actual_segment,
 523                         int segment_number) {
 524   media::AudioParameters ref_parameters;
 525   media::AudioParameters actual_parameters;
 526   float ref_energy =
 527       test::ComputeAudioEnergyForWavFile(ref_segment, &ref_parameters);
 528   float actual_energy =
 529       test::ComputeAudioEnergyForWavFile(actual_segment, &actual_parameters);
 530
 531   base::TimeDelta difference_in_length = ref_parameters.GetBufferDuration() -
 532                                          actual_parameters.GetBufferDuration();
 533
 534   EXPECT_LE(difference_in_length,
 535             base::TimeDelta::FromMilliseconds(kMaxAgcSegmentDiffMs))
 536       << "Segments differ " << difference_in_length.InMilliseconds() << " ms "
 537       << "in length for segment " << segment_number << "; we're likely "
 538       << "comparing unrelated segments or silence splitting is busted.";
 539
 540   return actual_energy - ref_energy;
 541 }
 542
 543 std::string MakeTraceName(const base::FilePath& ref_filename,
 544                           size_t segment_number) {
 545   std::string ascii_filename;
 546 #if defined(OS_WIN)
 547   ascii_filename = base::WideToUTF8(ref_filename.BaseName().value());
 548 #else
 549   ascii_filename = ref_filename.BaseName().value();
 550 #endif
 551   return base::StringPrintf(
 552       "%s_segment_%d", ascii_filename.c_str(), (int)segment_number);
 553 }
 554
 555 void AnalyzeSegmentsAndPrintResult(
 556     const std::vector<base::FilePath>& ref_segments,
 557     const std::vector<base::FilePath>& actual_segments,
 558     const base::FilePath& reference_file,
 559     const std::string& perf_modifier) {
 560   ASSERT_GT(ref_segments.size(), 0u)
 561       << "Failed to split reference file on silence; sox is likely broken.";
 562   ASSERT_EQ(ref_segments.size(), actual_segments.size())
 563       << "The recording did not result in the same number of audio segments "
 564       << "after on splitting on silence; WebRTC must have deformed the audio "
 565       << "too much.";
 566
 567   for (size_t i = 0; i < ref_segments.size(); i++) {
 568     float difference_in_decibel = AnalyzeOneSegment(ref_segments[i],
 569                                                     actual_segments[i],
 570                                                     i);
 571     std::string trace_name = MakeTraceName(reference_file, i);
 572     perf_test::PrintResult("agc_energy_diff", perf_modifier, trace_name,
 573                            difference_in_decibel, "dB", false);
 574   }
 575 }
 576
 577 void ComputeAndPrintPesqResults(const base::FilePath& reference_file,
 578                                 const base::FilePath& recording,
 579                                 const std::string& perf_modifier) {
 580   base::FilePath trimmed_reference = CreateTemporaryWaveFile();
 581   base::FilePath trimmed_recording = CreateTemporaryWaveFile();
 582
 583   ASSERT_TRUE(RemoveSilence(reference_file, trimmed_reference));
 584   ASSERT_TRUE(RemoveSilence(recording, trimmed_recording));
 585
 586   std::string raw_mos;
 587   std::string mos_lqo;
 588   bool succeeded = RunPesq(trimmed_reference, trimmed_recording, 16000,
 589                            &raw_mos, &mos_lqo);
 590   EXPECT_TRUE(succeeded) << "Failed to run PESQ.";
 591   if (succeeded) {
 592     perf_test::PrintResult(
 593         "audio_pesq", perf_modifier, "raw_mos", raw_mos, "score", true);
 594     perf_test::PrintResult(
 595         "audio_pesq", perf_modifier, "mos_lqo", mos_lqo, "score", true);
 596   }
 597
 598   DeleteFileUnlessTestFailed(trimmed_reference, false);
 599   DeleteFileUnlessTestFailed(trimmed_recording, false);
 600 }
 601
 602 }  // namespace
 603
 604 // Sets up a two-way WebRTC call and records its output to |recording|, using
 605 // getUserMedia.
 606 //
 607 // |reference_file| should have at least five seconds of silence in the
 608 // beginning: otherwise all the reference audio will not be picked up by the
 609 // recording. Note that the reference file will start playing as soon as the
 610 // audio device is up following the getUserMedia call in the left tab. The time
 611 // it takes to negotiate a call isn't deterministic, but five seconds should be
 612 // plenty of time. Similarly, the recording time should be enough to catch the
 613 // whole reference file. If you then silence-trim the reference file and actual
 614 // file, you should end up with two time-synchronized files.
 615 void MAYBE_WebRtcAudioQualityBrowserTest::SetupAndRecordAudioCall(
 616     const base::FilePath& reference_file,
 617     const base::FilePath& recording,
 618     const std::string& constraints,
 619     const base::TimeDelta recording_time) {
 620   ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
 621   ASSERT_TRUE(test::HasReferenceFilesInCheckout());
 622   ASSERT_TRUE(ForceMicrophoneVolumeTo100Percent());
 623
 624   ConfigureFakeDeviceToPlayFile(reference_file);
 625
 626   // Create a two-way call. Mute one of the receivers though; that way it will
 627   // be receiving audio bytes, but we will not be playing out of both elements.
 628   GURL test_page = embedded_test_server()->GetURL(kWebRtcAudioTestHtmlPage);
 629   content::WebContents* left_tab =
 630       OpenPageAndGetUserMediaInNewTabWithConstraints(test_page, constraints);
 631   SetupPeerconnectionWithLocalStream(left_tab);
 632   MuteMediaElement("remote-view", left_tab);
 633
 634   content::WebContents* right_tab =
 635       OpenPageAndGetUserMediaInNewTabWithConstraints(test_page, constraints);
 636   SetupPeerconnectionWithLocalStream(right_tab);
 637
 638   AudioRecorder recorder;
 639   ASSERT_TRUE(recorder.StartRecording(recording_time, recording));
 640
 641   NegotiateCall(left_tab, right_tab);
 642
 643   ASSERT_TRUE(recorder.WaitForRecordingToEnd());
 644   DVLOG(0) << "Done recording to " << recording.value() << std::endl;
 645
 646   HangUp(left_tab);
 647 }
 648
 649 void MAYBE_WebRtcAudioQualityBrowserTest::TestWithFakeDeviceGetUserMedia(
 650     const std::string& constraints,
 651     const std::string& perf_modifier) {
 652   if (OnWinXp() || OnWin8()) {
 653     // http://crbug.com/379798.
 654     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 655     return;
 656   }
 657
 658   base::FilePath reference_file =
 659       test::GetReferenceFilesDir().Append(kReferenceFile);
 660   base::FilePath recording = CreateTemporaryWaveFile();
 661
 662   ASSERT_NO_FATAL_FAILURE(SetupAndRecordAudioCall(
 663       reference_file, recording, constraints,
 664       base::TimeDelta::FromSeconds(30)));
 665
 666   ComputeAndPrintPesqResults(reference_file, recording, perf_modifier);
 667   DeleteFileUnlessTestFailed(recording, false);
 668 }
 669
 670 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 671                        MANUAL_TestCallQualityWithAudioFromFakeDevice) {
 672   TestWithFakeDeviceGetUserMedia(kAudioOnlyCallConstraints, "_getusermedia");
 673 }
 674
 675 // Test the new 48KHz audio processing path.
 676 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 677                        MANUAL_TestCallQualityWithAudioFromFakeDevice48Khz) {
 678   const char* kAudio48KhzAudioProcessingConstraints =
 679       "{audio: { optional: [{ googAudioProcessing48kHzSupport: true }] } }";
 680   TestWithFakeDeviceGetUserMedia(kAudio48KhzAudioProcessingConstraints,
 681                                  "_getusermedia_48khz");
 682 }
 683
 684 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 685                        MANUAL_TestCallQualityWithAudioFromWebAudio) {
 686   if (OnWinXp() || OnWin8()) {
 687     // http://crbug.com/379798.
 688     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 689     return;
 690   }
 691   ASSERT_TRUE(test::HasReferenceFilesInCheckout());
 692   ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
 693
 694   ASSERT_TRUE(ForceMicrophoneVolumeTo100Percent());
 695
 696   content::WebContents* left_tab =
 697       OpenPageWithoutGetUserMedia(kWebRtcAudioTestHtmlPage);
 698   content::WebContents* right_tab =
 699       OpenPageWithoutGetUserMedia(kWebRtcAudioTestHtmlPage);
 700
 701   AddAudioFileToWebAudio(kReferenceFileRelativeUrl, left_tab);
 702
 703   NegotiateCall(left_tab, right_tab);
 704
 705   base::FilePath recording = CreateTemporaryWaveFile();
 706
 707   // Note: the sound clip is 21.6 seconds: record for 25 seconds to get some
 708   // safety margins on each side.
 709   AudioRecorder recorder;
 710   ASSERT_TRUE(recorder.StartRecording(base::TimeDelta::FromSeconds(25),
 711                                       recording));
 712
 713   PlayAudioFileThroughWebAudio(left_tab);
 714
 715   ASSERT_TRUE(recorder.WaitForRecordingToEnd());
 716   DVLOG(0) << "Done recording to " << recording.value() << std::endl;
 717
 718   HangUp(left_tab);
 719
 720   // Compare with the reference file on disk (this is the same file we played
 721   // through WebAudio earlier).
 722   base::FilePath reference_file =
 723       test::GetReferenceFilesDir().Append(kReferenceFile);
 724   ComputeAndPrintPesqResults(reference_file, recording, "_webaudio");
 725 }
 726
 727 /**
 728  * The auto gain control test plays a file into the fake microphone. Then it
 729  * sets up a one-way WebRTC call with audio only and records Chrome's output on
 730  * the receiving side using the audio loopback provided by the quality test
 731  * (see the class comments for more details).
 732  *
 733  * Then both the recording and reference file are split on silence. This creates
 734  * a number of segments with speech in them. The reason for this is to provide
 735  * a kind of synchronization mechanism so the start of each speech segment is
 736  * compared to the start of the corresponding speech segment. This is because we
 737  * will experience inevitable clock drift between the system clock (which runs
 738  * the fake microphone) and the sound card (which runs play-out). Effectively
 739  * re-synchronizing on each segment mitigates this.
 740  *
 741  * The silence splitting is inherently sensitive to the sound file we run on.
 742  * Therefore the reference file must have at least 500 ms of pure silence
 743  * between speech segments; the test will fail if the output produces more
 744  * segments than the reference.
 745  *
 746  * The test reports the difference in decibel between the reference and output
 747  * file per 10 ms interval in each speech segment. A value of 6 means the
 748  * output was 6 dB louder than the reference, presumably because the AGC applied
 749  * gain to the signal.
 750  *
 751  * The test only exercises digital AGC for now.
 752  *
 753  * We record in CD format here (44.1 kHz) because that's what the fake input
 754  * device currently supports, and we want to be able to compare directly. See
 755  * http://crbug.com/421054.
 756  */
 757 void MAYBE_WebRtcAudioQualityBrowserTest::TestAutoGainControl(
 758     const base::FilePath::StringType& reference_filename,
 759     const std::string& constraints,
 760     const std::string& perf_modifier) {
 761   if (OnWinXp() || OnWin8()) {
 762     // http://crbug.com/379798.
 763     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 764     return;
 765   }
 766   base::FilePath reference_file =
 767       test::GetReferenceFilesDir().Append(reference_filename);
 768   base::FilePath recording = CreateTemporaryWaveFile();
 769
 770   ASSERT_NO_FATAL_FAILURE(SetupAndRecordAudioCall(
 771       reference_file, recording, constraints,
 772       base::TimeDelta::FromSeconds(30)));
 773
 774   base::ScopedTempDir split_ref_files;
 775   ASSERT_TRUE(split_ref_files.CreateUniqueTempDir());
 776   ASSERT_NO_FATAL_FAILURE(
 777       SplitFileOnSilenceIntoDir(reference_file, split_ref_files.path()));
 778   std::vector<base::FilePath> ref_segments =
 779       ListWavFilesInDir(split_ref_files.path());
 780
 781   base::ScopedTempDir split_actual_files;
 782   ASSERT_TRUE(split_actual_files.CreateUniqueTempDir());
 783   ASSERT_NO_FATAL_FAILURE(
 784       SplitFileOnSilenceIntoDir(recording, split_actual_files.path()));
 785
 786   // Keep the recording and split files if the analysis fails.
 787   base::FilePath actual_files_dir = split_actual_files.Take();
 788   std::vector<base::FilePath> actual_segments =
 789       ListWavFilesInDir(actual_files_dir);
 790
 791   AnalyzeSegmentsAndPrintResult(
 792       ref_segments, actual_segments, reference_file, perf_modifier);
 793
 794   DeleteFileUnlessTestFailed(recording, false);
 795   DeleteFileUnlessTestFailed(actual_files_dir, true);
 796 }
 797
 798 // The AGC should apply non-zero gain here.
 799 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 800                        MANUAL_TestAutoGainControlOnLowAudio) {
 801   ASSERT_NO_FATAL_FAILURE(TestAutoGainControl(
 802       kReferenceFile, kAudioOnlyCallConstraints, "_with_agc"));
 803 }
 804
 805 // Since the AGC is off here there should be no gain at all.
 806 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 807                        MANUAL_TestAutoGainIsOffWithAudioProcessingOff) {
 808   const char* kAudioCallWithoutAudioProcessing =
 809       "{audio: { mandatory: { echoCancellation: false } } }";
 810   ASSERT_NO_FATAL_FAILURE(TestAutoGainControl(
 811       kReferenceFile, kAudioCallWithoutAudioProcessing, "_no_agc"));
 812 }