content/renderer/media/media_stream_audio_processor_options.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/media/media_stream_audio_processor_options.h"
   6
   7 #include "base/files/file_path.h"
   8 #include "base/files/file_util.h"
   9 #include "base/logging.h"
  10 #include "base/metrics/field_trial.h"
  11 #include "base/metrics/histogram.h"
  12 #include "base/strings/string_number_conversions.h"
  13 #include "base/strings/string_split.h"
  14 #include "base/strings/string_util.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "content/common/media/media_stream_options.h"
  17 #include "content/renderer/media/media_stream_constraints_util.h"
  18 #include "content/renderer/media/media_stream_source.h"
  19 #include "content/renderer/media/rtc_media_constraints.h"
  20 #include "media/audio/audio_parameters.h"
  21 #include "third_party/webrtc/modules/audio_processing/include/audio_processing.h"
  22 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
  23
  24 namespace content {
  25
  26 const char MediaAudioConstraints::kEchoCancellation[] = "echoCancellation";
  27 const char MediaAudioConstraints::kGoogEchoCancellation[] =
  28     "googEchoCancellation";
  29 const char MediaAudioConstraints::kGoogExperimentalEchoCancellation[] =
  30     "googEchoCancellation2";
  31 const char MediaAudioConstraints::kGoogAutoGainControl[] =
  32     "googAutoGainControl";
  33 const char MediaAudioConstraints::kGoogExperimentalAutoGainControl[] =
  34     "googAutoGainControl2";
  35 const char MediaAudioConstraints::kGoogNoiseSuppression[] =
  36     "googNoiseSuppression";
  37 const char MediaAudioConstraints::kGoogExperimentalNoiseSuppression[] =
  38     "googNoiseSuppression2";
  39 const char MediaAudioConstraints::kGoogBeamforming[] = "googBeamforming";
  40 const char MediaAudioConstraints::kGoogArrayGeometry[] = "googArrayGeometry";
  41 const char MediaAudioConstraints::kGoogHighpassFilter[] = "googHighpassFilter";
  42 const char MediaAudioConstraints::kGoogTypingNoiseDetection[] =
  43     "googTypingNoiseDetection";
  44 const char MediaAudioConstraints::kGoogAudioMirroring[] = "googAudioMirroring";
  45
  46 namespace {
  47
  48 // Constant constraint keys which enables default audio constraints on
  49 // mediastreams with audio.
  50 struct {
  51   const char* key;
  52   bool value;
  53 } const kDefaultAudioConstraints[] = {
  54   { MediaAudioConstraints::kEchoCancellation, true },
  55   { MediaAudioConstraints::kGoogEchoCancellation, true },
  56 #if defined(OS_ANDROID) || defined(OS_IOS)
  57   { MediaAudioConstraints::kGoogExperimentalEchoCancellation, false },
  58 #else
  59   // Enable the extended filter mode AEC on all non-mobile platforms.
  60   { MediaAudioConstraints::kGoogExperimentalEchoCancellation, true },
  61 #endif
  62   { MediaAudioConstraints::kGoogAutoGainControl, true },
  63   { MediaAudioConstraints::kGoogExperimentalAutoGainControl, true },
  64   { MediaAudioConstraints::kGoogNoiseSuppression, true },
  65   { MediaAudioConstraints::kGoogHighpassFilter, true },
  66   { MediaAudioConstraints::kGoogTypingNoiseDetection, true },
  67   { MediaAudioConstraints::kGoogExperimentalNoiseSuppression, false },
  68   { MediaAudioConstraints::kGoogBeamforming, false },
  69 #if defined(OS_WIN)
  70   { kMediaStreamAudioDucking, true },
  71 #else
  72   { kMediaStreamAudioDucking, false },
  73 #endif
  74   { kMediaStreamAudioHotword, false },
  75 };
  76
  77 bool IsAudioProcessingConstraint(const std::string& key) {
  78   // |kMediaStreamAudioDucking| does not require audio processing.
  79   return key != kMediaStreamAudioDucking;
  80 }
  81
  82 // Used to log echo quality based on delay estimates.
  83 enum DelayBasedEchoQuality {
  84   DELAY_BASED_ECHO_QUALITY_GOOD = 0,
  85   DELAY_BASED_ECHO_QUALITY_SPURIOUS,
  86   DELAY_BASED_ECHO_QUALITY_BAD,
  87   DELAY_BASED_ECHO_QUALITY_INVALID,
  88   DELAY_BASED_ECHO_QUALITY_MAX
  89 };
  90
  91 DelayBasedEchoQuality EchoDelayFrequencyToQuality(float delay_frequency) {
  92   const float kEchoDelayFrequencyLowerLimit = 0.1f;
  93   const float kEchoDelayFrequencyUpperLimit = 0.8f;
  94   // DELAY_BASED_ECHO_QUALITY_GOOD
  95   //   delay is out of bounds during at most 10 % of the time.
  96   // DELAY_BASED_ECHO_QUALITY_SPURIOUS
  97   //   delay is out of bounds 10-80 % of the time.
  98   // DELAY_BASED_ECHO_QUALITY_BAD
  99   //   delay is mostly out of bounds >= 80 % of the time.
 100   // DELAY_BASED_ECHO_QUALITY_INVALID
 101   //   delay_frequency is negative which happens if we have insufficient data.
 102   if (delay_frequency < 0)
 103     return DELAY_BASED_ECHO_QUALITY_INVALID;
 104   else if (delay_frequency <= kEchoDelayFrequencyLowerLimit)
 105     return DELAY_BASED_ECHO_QUALITY_GOOD;
 106   else if (delay_frequency < kEchoDelayFrequencyUpperLimit)
 107     return DELAY_BASED_ECHO_QUALITY_SPURIOUS;
 108   else
 109     return DELAY_BASED_ECHO_QUALITY_BAD;
 110 }
 111
 112 }  // namespace
 113
 114 // TODO(xians): Remove this method after the APM in WebRtc is deprecated.
 115 void MediaAudioConstraints::ApplyFixedAudioConstraints(
 116     RTCMediaConstraints* constraints) {
 117   for (size_t i = 0; i < arraysize(kDefaultAudioConstraints); ++i) {
 118     bool already_set_value;
 119     if (!webrtc::FindConstraint(constraints, kDefaultAudioConstraints[i].key,
 120                                 &already_set_value, NULL)) {
 121       const std::string value = kDefaultAudioConstraints[i].value ?
 122           webrtc::MediaConstraintsInterface::kValueTrue :
 123           webrtc::MediaConstraintsInterface::kValueFalse;
 124       constraints->AddOptional(kDefaultAudioConstraints[i].key, value, false);
 125     } else {
 126       DVLOG(1) << "Constraint " << kDefaultAudioConstraints[i].key
 127                << " already set to " << already_set_value;
 128     }
 129   }
 130 }
 131
 132 MediaAudioConstraints::MediaAudioConstraints(
 133     const blink::WebMediaConstraints& constraints, int effects)
 134     : constraints_(constraints),
 135       effects_(effects),
 136       default_audio_processing_constraint_value_(true) {
 137   // The default audio processing constraints are turned off when
 138   // - gUM has a specific kMediaStreamSource, which is used by tab capture
 139   //   and screen capture.
 140   // - |kEchoCancellation| is explicitly set to false.
 141   std::string value_str;
 142   bool value_bool = false;
 143   if ((GetConstraintValueAsString(constraints, kMediaStreamSource,
 144                                   &value_str)) ||
 145       (GetConstraintValueAsBoolean(constraints_, kEchoCancellation,
 146                                    &value_bool) && !value_bool)) {
 147     default_audio_processing_constraint_value_ = false;
 148   }
 149 }
 150
 151 MediaAudioConstraints::~MediaAudioConstraints() {}
 152
 153 bool MediaAudioConstraints::GetProperty(const std::string& key) const {
 154   // Return the value if the constraint is specified in |constraints|,
 155   // otherwise return the default value.
 156   bool value = false;
 157   if (!GetConstraintValueAsBoolean(constraints_, key, &value))
 158     value = GetDefaultValueForConstraint(constraints_, key);
 159
 160   return value;
 161 }
 162
 163 std::string MediaAudioConstraints::GetPropertyAsString(
 164     const std::string& key) const {
 165   std::string value;
 166   GetConstraintValueAsString(constraints_, key, &value);
 167   return value;
 168 }
 169
 170 bool MediaAudioConstraints::GetEchoCancellationProperty() const {
 171   // If platform echo canceller is enabled, disable the software AEC.
 172   if (effects_ & media::AudioParameters::ECHO_CANCELLER)
 173     return false;
 174
 175   // If |kEchoCancellation| is specified in the constraints, it will
 176   // override the value of |kGoogEchoCancellation|.
 177   bool value = false;
 178   if (GetConstraintValueAsBoolean(constraints_, kEchoCancellation, &value))
 179     return value;
 180
 181   return GetProperty(kGoogEchoCancellation);
 182 }
 183
 184 bool MediaAudioConstraints::IsValid() const {
 185   blink::WebVector<blink::WebMediaConstraint> mandatory;
 186   constraints_.getMandatoryConstraints(mandatory);
 187   for (size_t i = 0; i < mandatory.size(); ++i) {
 188     const std::string key = mandatory[i].m_name.utf8();
 189     if (key == kMediaStreamSource || key == kMediaStreamSourceId ||
 190         key == MediaStreamSource::kSourceId) {
 191       // Ignore Chrome specific Tab capture and |kSourceId| constraints.
 192       continue;
 193     }
 194
 195     bool valid = false;
 196     for (size_t j = 0; j < arraysize(kDefaultAudioConstraints); ++j) {
 197       if (key == kDefaultAudioConstraints[j].key) {
 198         bool value = false;
 199         valid = GetMandatoryConstraintValueAsBoolean(constraints_, key, &value);
 200         break;
 201       }
 202     }
 203
 204     if (!valid) {
 205       DLOG(ERROR) << "Invalid MediaStream constraint. Name: " << key;
 206       return false;
 207     }
 208   }
 209
 210   return true;
 211 }
 212
 213 bool MediaAudioConstraints::GetDefaultValueForConstraint(
 214     const blink::WebMediaConstraints& constraints,
 215     const std::string& key) const {
 216   // |kMediaStreamAudioDucking| is not restricted by
 217   // |default_audio_processing_constraint_value_| since it does not require
 218   // audio processing.
 219   if (!default_audio_processing_constraint_value_ &&
 220       IsAudioProcessingConstraint(key))
 221     return false;
 222
 223   for (size_t i = 0; i < arraysize(kDefaultAudioConstraints); ++i) {
 224     if (kDefaultAudioConstraints[i].key == key)
 225       return kDefaultAudioConstraints[i].value;
 226   }
 227
 228   return false;
 229 }
 230
 231 EchoInformation::EchoInformation()
 232     : num_chunks_(0), echo_frames_received_(false) {
 233 }
 234
 235 EchoInformation::~EchoInformation() {}
 236
 237 void EchoInformation::UpdateAecDelayStats(
 238     webrtc::EchoCancellation* echo_cancellation) {
 239   // Only start collecting stats if we know echo cancellation has measured an
 240   // echo. Otherwise we clutter the stats with for example cases where only the
 241   // microphone is used.
 242   if (!echo_frames_received_ & !echo_cancellation->stream_has_echo())
 243     return;
 244
 245   echo_frames_received_ = true;
 246   // In WebRTC, three echo delay metrics are calculated and updated every
 247   // five seconds. We use one of them, |fraction_poor_delays| to log in a UMA
 248   // histogram an Echo Cancellation quality metric. The stat in WebRTC has a
 249   // fixed aggregation window of five seconds, so we use the same query
 250   // frequency to avoid logging old values.
 251   const int kNumChunksInFiveSeconds = 500;
 252   if (!echo_cancellation->is_delay_logging_enabled() ||
 253       !echo_cancellation->is_enabled()) {
 254     return;
 255   }
 256
 257   num_chunks_++;
 258   if (num_chunks_ < kNumChunksInFiveSeconds) {
 259     return;
 260   }
 261
 262   int dummy_median = 0, dummy_std = 0;
 263   float fraction_poor_delays = 0;
 264   if (echo_cancellation->GetDelayMetrics(
 265           &dummy_median, &dummy_std, &fraction_poor_delays) ==
 266       webrtc::AudioProcessing::kNoError) {
 267     num_chunks_ = 0;
 268     // Map |fraction_poor_delays| to an Echo Cancellation quality and log in UMA
 269     // histogram. See DelayBasedEchoQuality for information on histogram
 270     // buckets.
 271     UMA_HISTOGRAM_ENUMERATION("WebRTC.AecDelayBasedQuality",
 272                               EchoDelayFrequencyToQuality(fraction_poor_delays),
 273                               DELAY_BASED_ECHO_QUALITY_MAX);
 274   }
 275 }
 276
 277 void EnableEchoCancellation(AudioProcessing* audio_processing) {
 278 #if defined(OS_ANDROID) || defined(OS_IOS)
 279   const std::string group_name =
 280       base::FieldTrialList::FindFullName("ReplaceAECMWithAEC");
 281   if (group_name.empty() ||
 282       !(group_name == "Enabled" || group_name == "DefaultEnabled")) {
 283     // Mobile devices are using AECM.
 284     int err = audio_processing->echo_control_mobile()->set_routing_mode(
 285         webrtc::EchoControlMobile::kSpeakerphone);
 286     err |= audio_processing->echo_control_mobile()->Enable(true);
 287     CHECK_EQ(err, 0);
 288     return;
 289   }
 290 #endif
 291   int err = audio_processing->echo_cancellation()->set_suppression_level(
 292       webrtc::EchoCancellation::kHighSuppression);
 293
 294   // Enable the metrics for AEC.
 295   err |= audio_processing->echo_cancellation()->enable_metrics(true);
 296   err |= audio_processing->echo_cancellation()->enable_delay_logging(true);
 297   err |= audio_processing->echo_cancellation()->Enable(true);
 298   CHECK_EQ(err, 0);
 299 }
 300
 301 void EnableNoiseSuppression(AudioProcessing* audio_processing,
 302                             webrtc::NoiseSuppression::Level ns_level) {
 303   int err = audio_processing->noise_suppression()->set_level(ns_level);
 304   err |= audio_processing->noise_suppression()->Enable(true);
 305   CHECK_EQ(err, 0);
 306 }
 307
 308 void EnableHighPassFilter(AudioProcessing* audio_processing) {
 309   CHECK_EQ(audio_processing->high_pass_filter()->Enable(true), 0);
 310 }
 311
 312 void EnableTypingDetection(AudioProcessing* audio_processing,
 313                            webrtc::TypingDetection* typing_detector) {
 314   int err = audio_processing->voice_detection()->Enable(true);
 315   err |= audio_processing->voice_detection()->set_likelihood(
 316       webrtc::VoiceDetection::kVeryLowLikelihood);
 317   CHECK_EQ(err, 0);
 318
 319   // Configure the update period to 1s (100 * 10ms) in the typing detector.
 320   typing_detector->SetParameters(0, 0, 0, 0, 0, 100);
 321 }
 322
 323 void StartEchoCancellationDump(AudioProcessing* audio_processing,
 324                                base::File aec_dump_file) {
 325   DCHECK(aec_dump_file.IsValid());
 326
 327   FILE* stream = base::FileToFILE(aec_dump_file.Pass(), "w");
 328   if (!stream) {
 329     LOG(ERROR) << "Failed to open AEC dump file";
 330     return;
 331   }
 332
 333   if (audio_processing->StartDebugRecording(stream))
 334     DLOG(ERROR) << "Fail to start AEC debug recording";
 335 }
 336
 337 void StopEchoCancellationDump(AudioProcessing* audio_processing) {
 338   if (audio_processing->StopDebugRecording())
 339     DLOG(ERROR) << "Fail to stop AEC debug recording";
 340 }
 341
 342 void EnableAutomaticGainControl(AudioProcessing* audio_processing) {
 343 #if defined(OS_ANDROID) || defined(OS_IOS)
 344   const webrtc::GainControl::Mode mode = webrtc::GainControl::kFixedDigital;
 345 #else
 346   const webrtc::GainControl::Mode mode = webrtc::GainControl::kAdaptiveAnalog;
 347 #endif
 348   int err = audio_processing->gain_control()->set_mode(mode);
 349   err |= audio_processing->gain_control()->Enable(true);
 350   CHECK_EQ(err, 0);
 351 }
 352
 353 void GetAecStats(webrtc::EchoCancellation* echo_cancellation,
 354                  webrtc::AudioProcessorInterface::AudioProcessorStats* stats) {
 355   // These values can take on valid negative values, so use the lowest possible
 356   // level as default rather than -1.
 357   stats->echo_return_loss = -100;
 358   stats->echo_return_loss_enhancement = -100;
 359
 360   // The median value can also be negative, but in practice -1 is only used to
 361   // signal insufficient data, since the resolution is limited to multiples
 362   // of 4ms.
 363   stats->echo_delay_median_ms = -1;
 364   stats->echo_delay_std_ms = -1;
 365
 366   // TODO(ajm): Re-enable this metric once we have a reliable implementation.
 367   stats->aec_quality_min = -1.0f;
 368
 369   if (!echo_cancellation->are_metrics_enabled() ||
 370       !echo_cancellation->is_delay_logging_enabled() ||
 371       !echo_cancellation->is_enabled()) {
 372     return;
 373   }
 374
 375   // TODO(ajm): we may want to use VoECallReport::GetEchoMetricsSummary
 376   // here, but it appears to be unsuitable currently. Revisit after this is
 377   // investigated: http://b/issue?id=5666755
 378   webrtc::EchoCancellation::Metrics echo_metrics;
 379   if (!echo_cancellation->GetMetrics(&echo_metrics)) {
 380     stats->echo_return_loss = echo_metrics.echo_return_loss.instant;
 381     stats->echo_return_loss_enhancement =
 382         echo_metrics.echo_return_loss_enhancement.instant;
 383   }
 384
 385   int median = 0, std = 0;
 386   float dummy = 0;
 387   if (echo_cancellation->GetDelayMetrics(&median, &std, &dummy) ==
 388       webrtc::AudioProcessing::kNoError) {
 389     stats->echo_delay_median_ms = median;
 390     stats->echo_delay_std_ms = std;
 391   }
 392 }
 393
 394 CONTENT_EXPORT std::vector<webrtc::Point> ParseArrayGeometry(
 395     const std::string& geometry_string) {
 396   const auto& tokens =
 397       base::SplitString(geometry_string, base::kWhitespaceASCII,
 398                         base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
 399   std::vector<webrtc::Point> geometry;
 400   if (tokens.size() < 3 || tokens.size() % 3 != 0) {
 401     LOG(ERROR) << "Malformed geometry string: " << geometry_string;
 402     return geometry;
 403   }
 404
 405   std::vector<float> float_tokens;
 406   float_tokens.reserve(tokens.size());
 407   for (const auto& token : tokens) {
 408     double float_token;
 409     if (!base::StringToDouble(token, &float_token)) {
 410       LOG(ERROR) << "Unable to convert token=" << token
 411                  << " to double from geometry string: " << geometry_string;
 412       return geometry;
 413     }
 414     float_tokens.push_back(float_token);
 415   }
 416
 417   geometry.reserve(float_tokens.size() / 3);
 418   for (size_t i = 0; i < float_tokens.size(); i += 3) {
 419     geometry.push_back(webrtc::Point(float_tokens[i + 0], float_tokens[i + 1],
 420                                      float_tokens[i + 2]));
 421   }
 422
 423   return geometry;
 424 }
 425
 426 }  // namespace content