chrome/browser/media/chrome_webrtc_audio_quality_browsertest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <ctime>
   6
   7 #include "base/command_line.h"
   8 #include "base/files/file_enumerator.h"
   9 #include "base/files/file_util.h"
  10 #include "base/files/scoped_temp_dir.h"
  11 #include "base/process/launch.h"
  12 #include "base/process/process.h"
  13 #include "base/scoped_native_library.h"
  14 #include "base/strings/string_number_conversions.h"
  15 #include "base/strings/string_util.h"
  16 #include "base/strings/stringprintf.h"
  17 #include "base/strings/utf_string_conversions.h"
  18 #include "chrome/browser/media/webrtc_browsertest_audio.h"
  19 #include "chrome/browser/media/webrtc_browsertest_base.h"
  20 #include "chrome/browser/media/webrtc_browsertest_common.h"
  21 #include "chrome/browser/profiles/profile.h"
  22 #include "chrome/browser/ui/browser.h"
  23 #include "chrome/browser/ui/browser_tabstrip.h"
  24 #include "chrome/browser/ui/tabs/tab_strip_model.h"
  25 #include "chrome/common/chrome_paths.h"
  26 #include "chrome/common/chrome_switches.h"
  27 #include "chrome/test/base/ui_test_utils.h"
  28 #include "content/public/test/browser_test_utils.h"
  29 #include "media/audio/audio_parameters.h"
  30 #include "media/base/media_switches.h"
  31 #include "net/test/embedded_test_server/embedded_test_server.h"
  32 #include "testing/perf/perf_test.h"
  33
  34 namespace {
  35
  36 static const base::FilePath::CharType kReferenceFile[] =
  37     FILE_PATH_LITERAL("speech_44kHz_16bit_stereo.wav");
  38
  39 // The javascript will load the reference file relative to its location,
  40 // which is in /webrtc on the web server. The files we are looking for are in
  41 // webrtc/resources in the chrome/test/data folder.
  42 static const char kReferenceFileRelativeUrl[] =
  43     "resources/speech_44kHz_16bit_stereo.wav";
  44
  45 static const char kWebRtcAudioTestHtmlPage[] =
  46     "/webrtc/webrtc_audio_quality_test.html";
  47
  48 // For the AGC test, there are 6 speech segments split on silence. If one
  49 // segment is significantly different in length compared to the same segment in
  50 // the reference file, there's something fishy going on.
  51 const int kMaxAgcSegmentDiffMs =
  52 #if defined(OS_MACOSX)
  53   // Something is different on Mac; http://crbug.com/477653.
  54   600;
  55 #else
  56   200;
  57 #endif
  58
  59 #if defined(OS_LINUX) || defined(OS_WIN) || defined(OS_MACOSX)
  60 #define MAYBE_WebRtcAudioQualityBrowserTest WebRtcAudioQualityBrowserTest
  61 #else
  62 // Not implemented on Android, ChromeOS etc.
  63 #define MAYBE_WebRtcAudioQualityBrowserTest DISABLED_WebRtcAudioQualityBrowserTest
  64 #endif
  65
  66 }  // namespace
  67
  68 // Test we can set up a WebRTC call and play audio through it.
  69 //
  70 // If you're not a googler and want to run this test, you need to provide a
  71 // pesq binary for your platform (and sox.exe on windows). Read more on how
  72 // resources are managed in chrome/test/data/webrtc/resources/README.
  73 //
  74 // This test will only work on machines that have been configured to record
  75 // their own input.
  76 //
  77 // On Linux:
  78 // 1. # sudo apt-get install pavucontrol sox
  79 // 2. For the user who will run the test: # pavucontrol
  80 // 3. In a separate terminal, # arecord dummy
  81 // 4. In pavucontrol, go to the recording tab.
  82 // 5. For the ALSA plugin [aplay]: ALSA Capture from, change from <x> to
  83 //    <Monitor of x>, where x is whatever your primary sound device is called.
  84 // 6. Try launching chrome as the target user on the target machine, try
  85 //    playing, say, a YouTube video, and record with # arecord -f dat tmp.dat.
  86 //    Verify the recording with aplay (should have recorded what you played
  87 //    from chrome).
  88 //
  89 // Note: the volume for ALL your input devices will be forced to 100% by
  90 //       running this test on Linux.
  91 //
  92 // On Mac:
  93 // TODO(phoglund): download sox from gs instead.
  94 // 1. Get SoundFlower: http://rogueamoeba.com/freebies/soundflower/download.php
  95 // 2. Install it + reboot.
  96 // 3. Install MacPorts (http://www.macports.org/).
  97 // 4. Install sox: sudo port install sox.
  98 // 5. (For Chrome bots) Ensure sox and rec are reachable from the env the test
  99 //    executes in (sox and rec tends to install in /opt/, which generally isn't
 100 //    in the Chrome bots' env). For instance, run
 101 //    sudo ln -s /opt/local/bin/rec /usr/local/bin/rec
 102 //    sudo ln -s /opt/local/bin/sox /usr/local/bin/sox
 103 // 6. In Sound Preferences, set both input and output to Soundflower (2ch).
 104 //    Note: You will no longer hear audio on this machine, and it will no
 105 //    longer use any built-in mics.
 106 // 7. Try launching chrome as the target user on the target machine, try
 107 //    playing, say, a YouTube video, and record with 'rec test.wav trim 0 5'.
 108 //    Stop the video in chrome and try playing back the file; you should hear
 109 //    a recording of the video (note; if you play back on the target machine
 110 //    you must revert the changes in step 3 first).
 111 //
 112 // On Windows 7:
 113 // 1. Control panel > Sound > Manage audio devices.
 114 // 2. In the recording tab, right-click in an empty space in the pane with the
 115 //    devices. Tick 'show disabled devices'.
 116 // 3. You should see a 'stero mix' device - this is what your speakers output.
 117 //    Right click > Properties.
 118 // 4. In the Listen tab for the mix device, check the 'listen to this device'
 119 //    checkbox. Ensure the mix device is the default recording device.
 120 // 5. Launch chrome and try playing a video with sound. You should see
 121 //    in the volume meter for the mix device. Configure the mix device to have
 122 //    50 / 100 in level. Also go into the playback tab, right-click Speakers,
 123 //    and set that level to 50 / 100. Otherwise you will get distortion in
 124 //    the recording.
 125 class MAYBE_WebRtcAudioQualityBrowserTest : public WebRtcTestBase {
 126  public:
 127   MAYBE_WebRtcAudioQualityBrowserTest() {}
 128   void SetUpInProcessBrowserTestFixture() override {
 129     DetectErrorsInJavaScript();  // Look for errors in our rather complex js.
 130   }
 131
 132   void SetUpCommandLine(base::CommandLine* command_line) override {
 133     EXPECT_FALSE(command_line->HasSwitch(
 134         switches::kUseFakeUIForMediaStream));
 135
 136     // The WebAudio-based tests don't care what devices are available to
 137     // getUserMedia, and the getUserMedia-based tests will play back a file
 138     // through the fake device using using --use-file-for-fake-audio-capture.
 139     command_line->AppendSwitch(switches::kUseFakeDeviceForMediaStream);
 140   }
 141
 142   void ConfigureFakeDeviceToPlayFile(const base::FilePath& wav_file_path) {
 143     base::CommandLine::ForCurrentProcess()->AppendSwitchPath(
 144         switches::kUseFileForFakeAudioCapture, wav_file_path);
 145   }
 146
 147   void AddAudioFileToWebAudio(const std::string& input_file_relative_url,
 148                               content::WebContents* tab_contents) {
 149     // This calls into webaudio.js.
 150     EXPECT_EQ("ok-added", ExecuteJavascript(
 151         "addAudioFile('" + input_file_relative_url + "')", tab_contents));
 152   }
 153
 154   void PlayAudioFileThroughWebAudio(content::WebContents* tab_contents) {
 155     EXPECT_EQ("ok-playing", ExecuteJavascript("playAudioFile()", tab_contents));
 156   }
 157
 158   content::WebContents* OpenPageWithoutGetUserMedia(const char* url) {
 159     chrome::AddTabAt(browser(), GURL(), -1, true);
 160     ui_test_utils::NavigateToURL(
 161         browser(), embedded_test_server()->GetURL(url));
 162     content::WebContents* tab =
 163         browser()->tab_strip_model()->GetActiveWebContents();
 164
 165     // Prepare the peer connections manually in this test since we don't add
 166     // getUserMedia-derived media streams in this test like the other tests.
 167     EXPECT_EQ("ok-peerconnection-created",
 168               ExecuteJavascript("preparePeerConnection()", tab));
 169     return tab;
 170   }
 171
 172   void MuteMediaElement(const std::string& element_id,
 173                         content::WebContents* tab_contents) {
 174     EXPECT_EQ("ok-muted", ExecuteJavascript(
 175         "setMediaElementMuted('" + element_id + "', true)", tab_contents));
 176   }
 177
 178  protected:
 179   void TestAutoGainControl(const base::FilePath::StringType& reference_filename,
 180                            const std::string& constraints,
 181                            const std::string& perf_modifier);
 182   void SetupAndRecordAudioCall(const base::FilePath& reference_file,
 183                                const base::FilePath& recording,
 184                                const std::string& constraints,
 185                                const base::TimeDelta recording_time);
 186   void TestWithFakeDeviceGetUserMedia(const std::string& constraints,
 187                                       const std::string& perf_modifier);
 188 };
 189
 190 namespace {
 191
 192 class AudioRecorder {
 193  public:
 194   AudioRecorder() {}
 195   ~AudioRecorder() {}
 196
 197   // Starts the recording program for the specified duration. Returns true
 198   // on success. We record in 16-bit 44.1 kHz Stereo (mostly because that's
 199   // what SoundRecorder.exe will give us and we can't change that).
 200   bool StartRecording(base::TimeDelta recording_time,
 201                       const base::FilePath& output_file) {
 202     EXPECT_FALSE(recording_application_.IsValid())
 203         << "Tried to record, but is already recording.";
 204
 205     int duration_sec = static_cast<int>(recording_time.InSeconds());
 206     base::CommandLine command_line(base::CommandLine::NO_PROGRAM);
 207
 208 #if defined(OS_WIN)
 209     // This disable is required to run SoundRecorder.exe on 64-bit Windows
 210     // from a 32-bit binary. We need to load the wow64 disable function from
 211     // the DLL since it doesn't exist on Windows XP.
 212     base::ScopedNativeLibrary kernel32_lib(base::FilePath(L"kernel32"));
 213     if (kernel32_lib.is_valid()) {
 214       typedef BOOL (WINAPI* Wow64DisableWow64FSRedirection)(PVOID*);
 215       Wow64DisableWow64FSRedirection wow_64_disable_wow_64_fs_redirection;
 216       wow_64_disable_wow_64_fs_redirection =
 217           reinterpret_cast<Wow64DisableWow64FSRedirection>(
 218               kernel32_lib.GetFunctionPointer(
 219                   "Wow64DisableWow64FsRedirection"));
 220       if (wow_64_disable_wow_64_fs_redirection != NULL) {
 221         PVOID* ignored = NULL;
 222         wow_64_disable_wow_64_fs_redirection(ignored);
 223       }
 224     }
 225
 226     char duration_in_hms[128] = {0};
 227     struct tm duration_tm = {0};
 228     duration_tm.tm_sec = duration_sec;
 229     EXPECT_NE(0u, strftime(duration_in_hms, arraysize(duration_in_hms),
 230                            "%H:%M:%S", &duration_tm));
 231
 232     command_line.SetProgram(
 233         base::FilePath(FILE_PATH_LITERAL("SoundRecorder.exe")));
 234     command_line.AppendArg("/FILE");
 235     command_line.AppendArgPath(output_file);
 236     command_line.AppendArg("/DURATION");
 237     command_line.AppendArg(duration_in_hms);
 238 #elif defined(OS_MACOSX)
 239     command_line.SetProgram(base::FilePath("rec"));
 240     command_line.AppendArg("-b");
 241     command_line.AppendArg("16");
 242     command_line.AppendArg("-q");
 243     command_line.AppendArgPath(output_file);
 244     command_line.AppendArg("trim");
 245     command_line.AppendArg("0");
 246     command_line.AppendArg(base::IntToString(duration_sec));
 247 #else
 248     command_line.SetProgram(base::FilePath("arecord"));
 249     command_line.AppendArg("-d");
 250     command_line.AppendArg(base::IntToString(duration_sec));
 251     command_line.AppendArg("-f");
 252     command_line.AppendArg("cd");
 253     command_line.AppendArg("-c");
 254     command_line.AppendArg("2");
 255     command_line.AppendArgPath(output_file);
 256 #endif
 257
 258     DVLOG(0) << "Running " << command_line.GetCommandLineString();
 259     recording_application_ =
 260         base::LaunchProcess(command_line, base::LaunchOptions());
 261     return recording_application_.IsValid();
 262   }
 263
 264   // Joins the recording program. Returns true on success.
 265   bool WaitForRecordingToEnd() {
 266     int exit_code = -1;
 267     recording_application_.WaitForExit(&exit_code);
 268     return exit_code == 0;
 269   }
 270  private:
 271   base::Process recording_application_;
 272 };
 273
 274 bool ForceMicrophoneVolumeTo100Percent() {
 275 #if defined(OS_WIN)
 276   // Note: the force binary isn't in tools since it's one of our own.
 277   base::CommandLine command_line(test::GetReferenceFilesDir().Append(
 278       FILE_PATH_LITERAL("force_mic_volume_max.exe")));
 279   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 280   std::string result;
 281   if (!base::GetAppOutput(command_line, &result)) {
 282     LOG(ERROR) << "Failed to set source volume: output was " << result;
 283     return false;
 284   }
 285 #elif defined(OS_MACOSX)
 286   base::CommandLine command_line(
 287       base::FilePath(FILE_PATH_LITERAL("osascript")));
 288   command_line.AppendArg("-e");
 289   command_line.AppendArg("set volume input volume 100");
 290   command_line.AppendArg("-e");
 291   command_line.AppendArg("set volume output volume 85");
 292
 293   std::string result;
 294   if (!base::GetAppOutput(command_line, &result)) {
 295     LOG(ERROR) << "Failed to set source volume: output was " << result;
 296     return false;
 297   }
 298 #else
 299   // Just force the volume of, say the first 5 devices. A machine will rarely
 300   // have more input sources than that. This is way easier than finding the
 301   // input device we happen to be using.
 302   for (int device_index = 0; device_index < 5; ++device_index) {
 303     std::string result;
 304     const std::string kHundredPercentVolume = "65536";
 305     base::CommandLine command_line(base::FilePath(FILE_PATH_LITERAL("pacmd")));
 306     command_line.AppendArg("set-source-volume");
 307     command_line.AppendArg(base::IntToString(device_index));
 308     command_line.AppendArg(kHundredPercentVolume);
 309     DVLOG(0) << "Running " << command_line.GetCommandLineString();
 310     if (!base::GetAppOutput(command_line, &result)) {
 311       LOG(ERROR) << "Failed to set source volume: output was " << result;
 312       return false;
 313     }
 314   }
 315 #endif
 316   return true;
 317 }
 318
 319 // Sox is the "Swiss army knife" of audio processing. We mainly use it for
 320 // silence trimming. See http://sox.sourceforge.net.
 321 base::CommandLine MakeSoxCommandLine() {
 322 #if defined(OS_WIN)
 323   base::FilePath sox_path = test::GetToolForPlatform("sox");
 324   if (!base::PathExists(sox_path)) {
 325     LOG(ERROR) << "Missing sox.exe binary in " << sox_path.value()
 326                << "; you may have to provide this binary yourself.";
 327     return base::CommandLine(base::CommandLine::NO_PROGRAM);
 328   }
 329   base::CommandLine command_line(sox_path);
 330 #else
 331   // TODO(phoglund): call checked-in sox rather than system sox on mac/linux.
 332   // Same for rec invocations on Mac, above.
 333   base::CommandLine command_line(base::FilePath(FILE_PATH_LITERAL("sox")));
 334 #endif
 335   return command_line;
 336 }
 337
 338 // Removes silence from beginning and end of the |input_audio_file| and writes
 339 // the result to the |output_audio_file|. Returns true on success.
 340 bool RemoveSilence(const base::FilePath& input_file,
 341                    const base::FilePath& output_file) {
 342   // SOX documentation for silence command: http://sox.sourceforge.net/sox.html
 343   // To remove the silence from both beginning and end of the audio file, we
 344   // call sox silence command twice: once on normal file and again on its
 345   // reverse, then we reverse the final output.
 346   // Silence parameters are (in sequence):
 347   // ABOVE_PERIODS: The period for which silence occurs. Value 1 is used for
 348   //                 silence at beginning of audio.
 349   // DURATION: the amount of time in seconds that non-silence must be detected
 350   //           before sox stops trimming audio.
 351   // THRESHOLD: value used to indicate what sample value is treats as silence.
 352   const char* kAbovePeriods = "1";
 353   const char* kDuration = "2";
 354   const char* kTreshold = "1.5%";
 355
 356   base::CommandLine command_line = MakeSoxCommandLine();
 357   if (command_line.GetProgram().empty())
 358     return false;
 359   command_line.AppendArgPath(input_file);
 360   command_line.AppendArgPath(output_file);
 361   command_line.AppendArg("silence");
 362   command_line.AppendArg(kAbovePeriods);
 363   command_line.AppendArg(kDuration);
 364   command_line.AppendArg(kTreshold);
 365   command_line.AppendArg("reverse");
 366   command_line.AppendArg("silence");
 367   command_line.AppendArg(kAbovePeriods);
 368   command_line.AppendArg(kDuration);
 369   command_line.AppendArg(kTreshold);
 370   command_line.AppendArg("reverse");
 371
 372   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 373   std::string result;
 374   bool ok = base::GetAppOutput(command_line, &result);
 375   DVLOG(0) << "Output was:\n\n" << result;
 376   return ok;
 377 }
 378
 379 // Looks for 0.2 second audio segments surrounded by silences under 0.3% audio
 380 // power and splits the input file on those silences. Output files are written
 381 // according to the output file template (e.g. /tmp/out.wav writes
 382 // /tmp/out001.wav, /tmp/out002.wav, etc if there are two silence-padded
 383 // regions in the file). The silences between speech segments must be at
 384 // least 500 ms for this to be reliable.
 385 bool SplitFileOnSilence(const base::FilePath& input_file,
 386                         const base::FilePath& output_file_template) {
 387   base::CommandLine command_line = MakeSoxCommandLine();
 388   if (command_line.GetProgram().empty())
 389     return false;
 390
 391   // These are experimentally determined and work on the files we use.
 392   const char* kAbovePeriods = "1";
 393   const char* kUnderPeriods = "1";
 394   const char* kDuration = "0.2";
 395   const char* kTreshold = "0.5%";
 396   command_line.AppendArgPath(input_file);
 397   command_line.AppendArgPath(output_file_template);
 398   command_line.AppendArg("silence");
 399   command_line.AppendArg(kAbovePeriods);
 400   command_line.AppendArg(kDuration);
 401   command_line.AppendArg(kTreshold);
 402   command_line.AppendArg(kUnderPeriods);
 403   command_line.AppendArg(kDuration);
 404   command_line.AppendArg(kTreshold);
 405   command_line.AppendArg(":");
 406   command_line.AppendArg("newfile");
 407   command_line.AppendArg(":");
 408   command_line.AppendArg("restart");
 409
 410   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 411   std::string result;
 412   bool ok = base::GetAppOutput(command_line, &result);
 413   DVLOG(0) << "Output was:\n\n" << result;
 414   return ok;
 415 }
 416
 417 bool CanParseAsFloat(const std::string& value) {
 418   return atof(value.c_str()) != 0 || value == "0";
 419 }
 420
 421 // Runs PESQ to compare |reference_file| to a |actual_file|. The |sample_rate|
 422 // can be either 16000 or 8000.
 423 //
 424 // PESQ is only mono-aware, so the files should preferably be recorded in mono.
 425 // Furthermore it expects the file to be 16 rather than 32 bits, even though
 426 // 32 bits might work. The audio bandwidth of the two files should be the same
 427 // e.g. don't compare a 32 kHz file to a 8 kHz file.
 428 //
 429 // The raw score in MOS is written to |raw_mos|, whereas the MOS-LQO score is
 430 // written to mos_lqo. The scores are returned as floats in string form (e.g.
 431 // "3.145", etc). Returns true on success.
 432 bool RunPesq(const base::FilePath& reference_file,
 433              const base::FilePath& actual_file,
 434              int sample_rate, std::string* raw_mos, std::string* mos_lqo) {
 435   // PESQ will break if the paths are too long (!).
 436   EXPECT_LT(reference_file.value().length(), 128u);
 437   EXPECT_LT(actual_file.value().length(), 128u);
 438
 439   base::FilePath pesq_path = test::GetToolForPlatform("pesq");
 440   if (!base::PathExists(pesq_path)) {
 441     LOG(ERROR) << "Missing PESQ binary in " << pesq_path.value()
 442                << "; you may have to provide this binary yourself.";
 443     return false;
 444   }
 445
 446   base::CommandLine command_line(pesq_path);
 447   command_line.AppendArg(base::StringPrintf("+%d", sample_rate));
 448   command_line.AppendArgPath(reference_file);
 449   command_line.AppendArgPath(actual_file);
 450
 451   DVLOG(0) << "Running " << command_line.GetCommandLineString();
 452   std::string result;
 453   if (!base::GetAppOutput(command_line, &result)) {
 454     LOG(ERROR) << "Failed to run PESQ.";
 455     return false;
 456   }
 457   DVLOG(0) << "Output was:\n\n" << result;
 458
 459   const std::string result_anchor = "Prediction (Raw MOS, MOS-LQO):  = ";
 460   std::size_t anchor_pos = result.find(result_anchor);
 461   if (anchor_pos == std::string::npos) {
 462     LOG(ERROR) << "PESQ was not able to compute a score; we probably recorded "
 463         << "only silence. Please check the output/input volume levels.";
 464     return false;
 465   }
 466
 467   // There are two tab-separated numbers on the format x.xxx, e.g. 5 chars each.
 468   std::size_t first_number_pos = anchor_pos + result_anchor.length();
 469   *raw_mos = result.substr(first_number_pos, 5);
 470   EXPECT_TRUE(CanParseAsFloat(*raw_mos)) << "Failed to parse raw MOS number.";
 471   *mos_lqo = result.substr(first_number_pos + 5 + 1, 5);
 472   EXPECT_TRUE(CanParseAsFloat(*mos_lqo)) << "Failed to parse MOS LQO number.";
 473
 474   return true;
 475 }
 476
 477 base::FilePath CreateTemporaryWaveFile() {
 478   base::FilePath filename;
 479   EXPECT_TRUE(base::CreateTemporaryFile(&filename));
 480   base::FilePath wav_filename =
 481       filename.AddExtension(FILE_PATH_LITERAL(".wav"));
 482   EXPECT_TRUE(base::Move(filename, wav_filename));
 483   return wav_filename;
 484 }
 485
 486 void DeleteFileUnlessTestFailed(const base::FilePath& path, bool recursive) {
 487   if (::testing::Test::HasFailure())
 488     printf("Test failed; keeping recording(s) at\n\t%" PRFilePath ".\n",
 489            path.value().c_str());
 490   else
 491     EXPECT_TRUE(base::DeleteFile(path, recursive));
 492 }
 493
 494 std::vector<base::FilePath> ListWavFilesInDir(const base::FilePath& dir) {
 495   base::FileEnumerator files(dir, false, base::FileEnumerator::FILES,
 496                              FILE_PATH_LITERAL("*.wav"));
 497
 498   std::vector<base::FilePath> result;
 499   for (base::FilePath name = files.Next(); !name.empty(); name = files.Next())
 500     result.push_back(name);
 501   return result;
 502 }
 503
 504 // Splits |to_split| into sub-files based on silence. The file you use must have
 505 // at least 500 ms periods of silence between speech segments for this to be
 506 // reliable.
 507 void SplitFileOnSilenceIntoDir(const base::FilePath& to_split,
 508                                const base::FilePath& workdir) {
 509   // First trim beginning and end since they are tricky for the splitter.
 510   base::FilePath trimmed_audio = CreateTemporaryWaveFile();
 511
 512   ASSERT_TRUE(RemoveSilence(to_split, trimmed_audio));
 513   DVLOG(0) << "Trimmed silence: " << trimmed_audio.value() << std::endl;
 514
 515   ASSERT_TRUE(SplitFileOnSilence(
 516       trimmed_audio, workdir.Append(FILE_PATH_LITERAL("output.wav"))));
 517   DeleteFileUnlessTestFailed(trimmed_audio, false);
 518 }
 519
 520 // Computes the difference between the actual and reference segment. A positive
 521 // number x means the actual file is x dB stronger than the reference.
 522 float AnalyzeOneSegment(const base::FilePath& ref_segment,
 523                         const base::FilePath& actual_segment,
 524                         int segment_number) {
 525   media::AudioParameters ref_parameters;
 526   media::AudioParameters actual_parameters;
 527   float ref_energy =
 528       test::ComputeAudioEnergyForWavFile(ref_segment, &ref_parameters);
 529   float actual_energy =
 530       test::ComputeAudioEnergyForWavFile(actual_segment, &actual_parameters);
 531
 532   base::TimeDelta difference_in_length = ref_parameters.GetBufferDuration() -
 533                                          actual_parameters.GetBufferDuration();
 534
 535   EXPECT_LE(difference_in_length,
 536             base::TimeDelta::FromMilliseconds(kMaxAgcSegmentDiffMs))
 537       << "Segments differ " << difference_in_length.InMilliseconds() << " ms "
 538       << "in length for segment " << segment_number << "; we're likely "
 539       << "comparing unrelated segments or silence splitting is busted.";
 540
 541   return actual_energy - ref_energy;
 542 }
 543
 544 std::string MakeTraceName(const base::FilePath& ref_filename,
 545                           size_t segment_number) {
 546   std::string ascii_filename;
 547 #if defined(OS_WIN)
 548   ascii_filename = base::WideToUTF8(ref_filename.BaseName().value());
 549 #else
 550   ascii_filename = ref_filename.BaseName().value();
 551 #endif
 552   return base::StringPrintf(
 553       "%s_segment_%d", ascii_filename.c_str(), (int)segment_number);
 554 }
 555
 556 void AnalyzeSegmentsAndPrintResult(
 557     const std::vector<base::FilePath>& ref_segments,
 558     const std::vector<base::FilePath>& actual_segments,
 559     const base::FilePath& reference_file,
 560     const std::string& perf_modifier) {
 561   ASSERT_GT(ref_segments.size(), 0u)
 562       << "Failed to split reference file on silence; sox is likely broken.";
 563   ASSERT_EQ(ref_segments.size(), actual_segments.size())
 564       << "The recording did not result in the same number of audio segments "
 565       << "after on splitting on silence; WebRTC must have deformed the audio "
 566       << "too much.";
 567
 568   for (size_t i = 0; i < ref_segments.size(); i++) {
 569     float difference_in_decibel = AnalyzeOneSegment(ref_segments[i],
 570                                                     actual_segments[i],
 571                                                     i);
 572     std::string trace_name = MakeTraceName(reference_file, i);
 573     perf_test::PrintResult("agc_energy_diff", perf_modifier, trace_name,
 574                            difference_in_decibel, "dB", false);
 575   }
 576 }
 577
 578 void ComputeAndPrintPesqResults(const base::FilePath& reference_file,
 579                                 const base::FilePath& recording,
 580                                 const std::string& perf_modifier) {
 581   base::FilePath trimmed_reference = CreateTemporaryWaveFile();
 582   base::FilePath trimmed_recording = CreateTemporaryWaveFile();
 583
 584   ASSERT_TRUE(RemoveSilence(reference_file, trimmed_reference));
 585   ASSERT_TRUE(RemoveSilence(recording, trimmed_recording));
 586
 587   std::string raw_mos;
 588   std::string mos_lqo;
 589   bool succeeded = RunPesq(trimmed_reference, trimmed_recording, 16000,
 590                            &raw_mos, &mos_lqo);
 591   EXPECT_TRUE(succeeded) << "Failed to run PESQ.";
 592   if (succeeded) {
 593     perf_test::PrintResult(
 594         "audio_pesq", perf_modifier, "raw_mos", raw_mos, "score", true);
 595     perf_test::PrintResult(
 596         "audio_pesq", perf_modifier, "mos_lqo", mos_lqo, "score", true);
 597   }
 598
 599   DeleteFileUnlessTestFailed(trimmed_reference, false);
 600   DeleteFileUnlessTestFailed(trimmed_recording, false);
 601 }
 602
 603 }  // namespace
 604
 605 // Sets up a two-way WebRTC call and records its output to |recording|, using
 606 // getUserMedia.
 607 //
 608 // |reference_file| should have at least five seconds of silence in the
 609 // beginning: otherwise all the reference audio will not be picked up by the
 610 // recording. Note that the reference file will start playing as soon as the
 611 // audio device is up following the getUserMedia call in the left tab. The time
 612 // it takes to negotiate a call isn't deterministic, but five seconds should be
 613 // plenty of time. Similarly, the recording time should be enough to catch the
 614 // whole reference file. If you then silence-trim the reference file and actual
 615 // file, you should end up with two time-synchronized files.
 616 void MAYBE_WebRtcAudioQualityBrowserTest::SetupAndRecordAudioCall(
 617     const base::FilePath& reference_file,
 618     const base::FilePath& recording,
 619     const std::string& constraints,
 620     const base::TimeDelta recording_time) {
 621   ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
 622   ASSERT_TRUE(test::HasReferenceFilesInCheckout());
 623   ASSERT_TRUE(ForceMicrophoneVolumeTo100Percent());
 624
 625   ConfigureFakeDeviceToPlayFile(reference_file);
 626
 627   // Create a two-way call. Mute one of the receivers though; that way it will
 628   // be receiving audio bytes, but we will not be playing out of both elements.
 629   GURL test_page = embedded_test_server()->GetURL(kWebRtcAudioTestHtmlPage);
 630   content::WebContents* left_tab =
 631       OpenPageAndGetUserMediaInNewTabWithConstraints(test_page, constraints);
 632   SetupPeerconnectionWithLocalStream(left_tab);
 633   MuteMediaElement("remote-view", left_tab);
 634
 635   content::WebContents* right_tab =
 636       OpenPageAndGetUserMediaInNewTabWithConstraints(test_page, constraints);
 637   SetupPeerconnectionWithLocalStream(right_tab);
 638
 639   AudioRecorder recorder;
 640   ASSERT_TRUE(recorder.StartRecording(recording_time, recording));
 641
 642   NegotiateCall(left_tab, right_tab);
 643
 644   ASSERT_TRUE(recorder.WaitForRecordingToEnd());
 645   DVLOG(0) << "Done recording to " << recording.value() << std::endl;
 646
 647   HangUp(left_tab);
 648 }
 649
 650 void MAYBE_WebRtcAudioQualityBrowserTest::TestWithFakeDeviceGetUserMedia(
 651     const std::string& constraints,
 652     const std::string& perf_modifier) {
 653   if (OnWinXp() || OnWin8()) {
 654     // http://crbug.com/379798.
 655     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 656     return;
 657   }
 658
 659   base::FilePath reference_file =
 660       test::GetReferenceFilesDir().Append(kReferenceFile);
 661   base::FilePath recording = CreateTemporaryWaveFile();
 662
 663   ASSERT_NO_FATAL_FAILURE(SetupAndRecordAudioCall(
 664       reference_file, recording, constraints,
 665       base::TimeDelta::FromSeconds(30)));
 666
 667   ComputeAndPrintPesqResults(reference_file, recording, perf_modifier);
 668   DeleteFileUnlessTestFailed(recording, false);
 669 }
 670
 671 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 672                        MANUAL_TestCallQualityWithAudioFromFakeDevice) {
 673   TestWithFakeDeviceGetUserMedia(kAudioOnlyCallConstraints, "_getusermedia");
 674 }
 675
 676 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 677                        MANUAL_TestCallQualityWithAudioFromWebAudio) {
 678   if (OnWinXp() || OnWin8()) {
 679     // http://crbug.com/379798.
 680     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 681     return;
 682   }
 683   ASSERT_TRUE(test::HasReferenceFilesInCheckout());
 684   ASSERT_TRUE(embedded_test_server()->InitializeAndWaitUntilReady());
 685
 686   ASSERT_TRUE(ForceMicrophoneVolumeTo100Percent());
 687
 688   content::WebContents* left_tab =
 689       OpenPageWithoutGetUserMedia(kWebRtcAudioTestHtmlPage);
 690   content::WebContents* right_tab =
 691       OpenPageWithoutGetUserMedia(kWebRtcAudioTestHtmlPage);
 692
 693   AddAudioFileToWebAudio(kReferenceFileRelativeUrl, left_tab);
 694
 695   NegotiateCall(left_tab, right_tab);
 696
 697   base::FilePath recording = CreateTemporaryWaveFile();
 698
 699   // Note: the sound clip is 21.6 seconds: record for 25 seconds to get some
 700   // safety margins on each side.
 701   AudioRecorder recorder;
 702   ASSERT_TRUE(recorder.StartRecording(base::TimeDelta::FromSeconds(25),
 703                                       recording));
 704
 705   PlayAudioFileThroughWebAudio(left_tab);
 706
 707   ASSERT_TRUE(recorder.WaitForRecordingToEnd());
 708   DVLOG(0) << "Done recording to " << recording.value() << std::endl;
 709
 710   HangUp(left_tab);
 711
 712   // Compare with the reference file on disk (this is the same file we played
 713   // through WebAudio earlier).
 714   base::FilePath reference_file =
 715       test::GetReferenceFilesDir().Append(kReferenceFile);
 716   ComputeAndPrintPesqResults(reference_file, recording, "_webaudio");
 717 }
 718
 719 /**
 720  * The auto gain control test plays a file into the fake microphone. Then it
 721  * sets up a one-way WebRTC call with audio only and records Chrome's output on
 722  * the receiving side using the audio loopback provided by the quality test
 723  * (see the class comments for more details).
 724  *
 725  * Then both the recording and reference file are split on silence. This creates
 726  * a number of segments with speech in them. The reason for this is to provide
 727  * a kind of synchronization mechanism so the start of each speech segment is
 728  * compared to the start of the corresponding speech segment. This is because we
 729  * will experience inevitable clock drift between the system clock (which runs
 730  * the fake microphone) and the sound card (which runs play-out). Effectively
 731  * re-synchronizing on each segment mitigates this.
 732  *
 733  * The silence splitting is inherently sensitive to the sound file we run on.
 734  * Therefore the reference file must have at least 500 ms of pure silence
 735  * between speech segments; the test will fail if the output produces more
 736  * segments than the reference.
 737  *
 738  * The test reports the difference in decibel between the reference and output
 739  * file per 10 ms interval in each speech segment. A value of 6 means the
 740  * output was 6 dB louder than the reference, presumably because the AGC applied
 741  * gain to the signal.
 742  *
 743  * The test only exercises digital AGC for now.
 744  *
 745  * We record in CD format here (44.1 kHz) because that's what the fake input
 746  * device currently supports, and we want to be able to compare directly. See
 747  * http://crbug.com/421054.
 748  */
 749 void MAYBE_WebRtcAudioQualityBrowserTest::TestAutoGainControl(
 750     const base::FilePath::StringType& reference_filename,
 751     const std::string& constraints,
 752     const std::string& perf_modifier) {
 753   if (OnWinXp() || OnWin8()) {
 754     // http://crbug.com/379798.
 755     LOG(ERROR) << "This test is not implemented for Windows XP/Win8.";
 756     return;
 757   }
 758   base::FilePath reference_file =
 759       test::GetReferenceFilesDir().Append(reference_filename);
 760   base::FilePath recording = CreateTemporaryWaveFile();
 761
 762   ASSERT_NO_FATAL_FAILURE(SetupAndRecordAudioCall(
 763       reference_file, recording, constraints,
 764       base::TimeDelta::FromSeconds(30)));
 765
 766   base::ScopedTempDir split_ref_files;
 767   ASSERT_TRUE(split_ref_files.CreateUniqueTempDir());
 768   ASSERT_NO_FATAL_FAILURE(
 769       SplitFileOnSilenceIntoDir(reference_file, split_ref_files.path()));
 770   std::vector<base::FilePath> ref_segments =
 771       ListWavFilesInDir(split_ref_files.path());
 772
 773   base::ScopedTempDir split_actual_files;
 774   ASSERT_TRUE(split_actual_files.CreateUniqueTempDir());
 775   ASSERT_NO_FATAL_FAILURE(
 776       SplitFileOnSilenceIntoDir(recording, split_actual_files.path()));
 777
 778   // Keep the recording and split files if the analysis fails.
 779   base::FilePath actual_files_dir = split_actual_files.Take();
 780   std::vector<base::FilePath> actual_segments =
 781       ListWavFilesInDir(actual_files_dir);
 782
 783   AnalyzeSegmentsAndPrintResult(
 784       ref_segments, actual_segments, reference_file, perf_modifier);
 785
 786   DeleteFileUnlessTestFailed(recording, false);
 787   DeleteFileUnlessTestFailed(actual_files_dir, true);
 788 }
 789
 790 // The AGC should apply non-zero gain here.
 791 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 792                        MANUAL_TestAutoGainControlOnLowAudio) {
 793   ASSERT_NO_FATAL_FAILURE(TestAutoGainControl(
 794       kReferenceFile, kAudioOnlyCallConstraints, "_with_agc"));
 795 }
 796
 797 // Since the AGC is off here there should be no gain at all.
 798 IN_PROC_BROWSER_TEST_F(MAYBE_WebRtcAudioQualityBrowserTest,
 799                        MANUAL_TestAutoGainIsOffWithAudioProcessingOff) {
 800   const char* kAudioCallWithoutAudioProcessing =
 801       "{audio: { mandatory: { echoCancellation: false } } }";
 802   ASSERT_NO_FATAL_FAILURE(TestAutoGainControl(
 803       kReferenceFile, kAudioCallWithoutAudioProcessing, "_no_agc"));
 804 }