Remove outdated instrumentation and move a few remaining bits around.
[chromium-blink-merge.git] / google_apis / gcm / engine / connection_factory_impl.cc
blobb708edf869f712578fde45ce611032bb240876ce
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "google_apis/gcm/engine/connection_factory_impl.h"
7 #include "base/message_loop/message_loop.h"
8 #include "base/metrics/histogram.h"
9 #include "base/metrics/sparse_histogram.h"
10 #include "base/profiler/scoped_tracker.h"
11 #include "google_apis/gcm/engine/connection_handler_impl.h"
12 #include "google_apis/gcm/monitoring/gcm_stats_recorder.h"
13 #include "google_apis/gcm/protocol/mcs.pb.h"
14 #include "net/base/load_flags.h"
15 #include "net/base/net_errors.h"
16 #include "net/http/http_network_session.h"
17 #include "net/http/http_request_headers.h"
18 #include "net/proxy/proxy_info.h"
19 #include "net/socket/client_socket_handle.h"
20 #include "net/socket/client_socket_pool_manager.h"
21 #include "net/ssl/ssl_config_service.h"
23 namespace gcm {
25 namespace {
27 // The amount of time a Socket read should wait before timing out.
28 const int kReadTimeoutMs = 30000; // 30 seconds.
30 // If a connection is reset after succeeding within this window of time,
31 // the previous backoff entry is restored (and the connection success is treated
32 // as if it was transient).
33 const int kConnectionResetWindowSecs = 10; // 10 seconds.
35 // Decides whether the last login was within kConnectionResetWindowSecs of now
36 // or not.
37 bool ShouldRestorePreviousBackoff(const base::TimeTicks& login_time,
38 const base::TimeTicks& now_ticks) {
39 return !login_time.is_null() &&
40 now_ticks - login_time <=
41 base::TimeDelta::FromSeconds(kConnectionResetWindowSecs);
44 } // namespace
46 ConnectionFactoryImpl::ConnectionFactoryImpl(
47 const std::vector<GURL>& mcs_endpoints,
48 const net::BackoffEntry::Policy& backoff_policy,
49 const scoped_refptr<net::HttpNetworkSession>& gcm_network_session,
50 const scoped_refptr<net::HttpNetworkSession>& http_network_session,
51 net::NetLog* net_log,
52 GCMStatsRecorder* recorder)
53 : mcs_endpoints_(mcs_endpoints),
54 next_endpoint_(0),
55 last_successful_endpoint_(0),
56 backoff_policy_(backoff_policy),
57 gcm_network_session_(gcm_network_session),
58 http_network_session_(http_network_session),
59 bound_net_log_(
60 net::BoundNetLog::Make(net_log, net::NetLog::SOURCE_SOCKET)),
61 pac_request_(NULL),
62 connecting_(false),
63 waiting_for_backoff_(false),
64 waiting_for_network_online_(false),
65 logging_in_(false),
66 recorder_(recorder),
67 listener_(NULL),
68 weak_ptr_factory_(this) {
69 DCHECK_GE(mcs_endpoints_.size(), 1U);
70 DCHECK(!http_network_session_.get() ||
71 (gcm_network_session_.get() != http_network_session_.get()));
74 ConnectionFactoryImpl::~ConnectionFactoryImpl() {
75 CloseSocket();
76 net::NetworkChangeNotifier::RemoveNetworkChangeObserver(this);
77 if (pac_request_) {
78 gcm_network_session_->proxy_service()->CancelPacRequest(pac_request_);
79 pac_request_ = NULL;
83 void ConnectionFactoryImpl::Initialize(
84 const BuildLoginRequestCallback& request_builder,
85 const ConnectionHandler::ProtoReceivedCallback& read_callback,
86 const ConnectionHandler::ProtoSentCallback& write_callback) {
87 DCHECK(!connection_handler_);
88 DCHECK(read_callback_.is_null());
89 DCHECK(write_callback_.is_null());
91 previous_backoff_ = CreateBackoffEntry(&backoff_policy_);
92 backoff_entry_ = CreateBackoffEntry(&backoff_policy_);
93 request_builder_ = request_builder;
94 read_callback_ = read_callback;
95 write_callback_ = write_callback;
97 net::NetworkChangeNotifier::AddNetworkChangeObserver(this);
98 waiting_for_network_online_ = net::NetworkChangeNotifier::IsOffline();
101 ConnectionHandler* ConnectionFactoryImpl::GetConnectionHandler() const {
102 return connection_handler_.get();
105 void ConnectionFactoryImpl::Connect() {
106 if (!connection_handler_) {
107 connection_handler_ = CreateConnectionHandler(
108 base::TimeDelta::FromMilliseconds(kReadTimeoutMs),
109 read_callback_,
110 write_callback_,
111 base::Bind(&ConnectionFactoryImpl::ConnectionHandlerCallback,
112 weak_ptr_factory_.GetWeakPtr())).Pass();
115 if (connecting_ || waiting_for_backoff_)
116 return; // Connection attempt already in progress or pending.
118 if (IsEndpointReachable())
119 return; // Already connected.
121 ConnectWithBackoff();
124 void ConnectionFactoryImpl::ConnectWithBackoff() {
125 // If a canary managed to connect while a backoff expiration was pending,
126 // just cleanup the internal state.
127 if (connecting_ || logging_in_ || IsEndpointReachable()) {
128 waiting_for_backoff_ = false;
129 return;
132 if (backoff_entry_->ShouldRejectRequest()) {
133 DVLOG(1) << "Delaying MCS endpoint connection for "
134 << backoff_entry_->GetTimeUntilRelease().InMilliseconds()
135 << " milliseconds.";
136 waiting_for_backoff_ = true;
137 recorder_->RecordConnectionDelayedDueToBackoff(
138 backoff_entry_->GetTimeUntilRelease().InMilliseconds());
139 base::MessageLoop::current()->PostDelayedTask(
140 FROM_HERE,
141 base::Bind(&ConnectionFactoryImpl::ConnectWithBackoff,
142 weak_ptr_factory_.GetWeakPtr()),
143 backoff_entry_->GetTimeUntilRelease());
144 return;
147 DVLOG(1) << "Attempting connection to MCS endpoint.";
148 waiting_for_backoff_ = false;
149 // It's necessary to close the socket before attempting any new connection,
150 // otherwise it's possible to hit a use-after-free in the connection handler.
151 // crbug.com/462319
152 CloseSocket();
153 ConnectImpl();
156 bool ConnectionFactoryImpl::IsEndpointReachable() const {
157 return connection_handler_ && connection_handler_->CanSendMessage();
160 std::string ConnectionFactoryImpl::GetConnectionStateString() const {
161 if (IsEndpointReachable())
162 return "CONNECTED";
163 if (logging_in_)
164 return "LOGGING IN";
165 if (connecting_)
166 return "CONNECTING";
167 if (waiting_for_backoff_)
168 return "WAITING FOR BACKOFF";
169 if (waiting_for_network_online_)
170 return "WAITING FOR NETWORK CHANGE";
171 return "NOT CONNECTED";
174 void ConnectionFactoryImpl::SignalConnectionReset(
175 ConnectionResetReason reason) {
176 if (!connection_handler_) {
177 // No initial connection has been made. No need to do anything.
178 return;
181 // A failure can trigger multiple resets, so no need to do anything if a
182 // connection is already in progress.
183 if (connecting_) {
184 DVLOG(1) << "Connection in progress, ignoring reset.";
185 return;
188 if (listener_)
189 listener_->OnDisconnected();
191 UMA_HISTOGRAM_ENUMERATION("GCM.ConnectionResetReason",
192 reason,
193 CONNECTION_RESET_COUNT);
194 recorder_->RecordConnectionResetSignaled(reason);
195 if (!last_login_time_.is_null()) {
196 UMA_HISTOGRAM_CUSTOM_TIMES("GCM.ConnectionUpTime",
197 NowTicks() - last_login_time_,
198 base::TimeDelta::FromSeconds(1),
199 base::TimeDelta::FromHours(24),
200 50);
201 // |last_login_time_| will be reset below, before attempting the new
202 // connection.
205 CloseSocket();
206 DCHECK(!IsEndpointReachable());
208 // TODO(zea): if the network is offline, don't attempt to connect.
209 // See crbug.com/396687
211 // Network changes get special treatment as they can trigger a one-off canary
212 // request that bypasses backoff (but does nothing if a connection is in
213 // progress). Other connection reset events can be ignored as a connection
214 // is already awaiting backoff expiration.
215 if (waiting_for_backoff_ && reason != NETWORK_CHANGE) {
216 DVLOG(1) << "Backoff expiration pending, ignoring reset.";
217 return;
220 if (reason == NETWORK_CHANGE) {
221 // Canary attempts bypass backoff without resetting it. These will have no
222 // effect if we're already in the process of connecting.
223 ConnectImpl();
224 return;
225 } else if (logging_in_) {
226 // Failures prior to login completion just reuse the existing backoff entry.
227 logging_in_ = false;
228 backoff_entry_->InformOfRequest(false);
229 } else if (reason == LOGIN_FAILURE ||
230 ShouldRestorePreviousBackoff(last_login_time_, NowTicks())) {
231 // Failures due to login, or within the reset window of a login, restore
232 // the backoff entry that was saved off at login completion time.
233 backoff_entry_.swap(previous_backoff_);
234 backoff_entry_->InformOfRequest(false);
235 } else {
236 // We shouldn't be in backoff in thise case.
237 DCHECK_EQ(0, backoff_entry_->failure_count());
240 // At this point the last login time has been consumed or deemed irrelevant,
241 // reset it.
242 last_login_time_ = base::TimeTicks();
244 Connect();
247 void ConnectionFactoryImpl::SetConnectionListener(
248 ConnectionListener* listener) {
249 listener_ = listener;
252 base::TimeTicks ConnectionFactoryImpl::NextRetryAttempt() const {
253 if (!backoff_entry_)
254 return base::TimeTicks();
255 return backoff_entry_->GetReleaseTime();
258 void ConnectionFactoryImpl::OnNetworkChanged(
259 net::NetworkChangeNotifier::ConnectionType type) {
260 if (type == net::NetworkChangeNotifier::CONNECTION_NONE) {
261 DVLOG(1) << "Network lost, resettion connection.";
262 waiting_for_network_online_ = true;
264 // Will do nothing due to |waiting_for_network_online_ == true|.
265 // TODO(zea): make the above statement actually true. See crbug.com/396687
266 SignalConnectionReset(NETWORK_CHANGE);
267 return;
270 DVLOG(1) << "Connection type changed to " << type << ", reconnecting.";
271 waiting_for_network_online_ = false;
272 SignalConnectionReset(NETWORK_CHANGE);
275 GURL ConnectionFactoryImpl::GetCurrentEndpoint() const {
276 // Note that IsEndpointReachable() returns false anytime connecting_ is true,
277 // so while connecting this always uses |next_endpoint_|.
278 if (IsEndpointReachable())
279 return mcs_endpoints_[last_successful_endpoint_];
280 return mcs_endpoints_[next_endpoint_];
283 net::IPEndPoint ConnectionFactoryImpl::GetPeerIP() {
284 if (!socket_handle_.socket())
285 return net::IPEndPoint();
287 net::IPEndPoint ip_endpoint;
288 int result = socket_handle_.socket()->GetPeerAddress(&ip_endpoint);
289 if (result != net::OK)
290 return net::IPEndPoint();
292 return ip_endpoint;
295 void ConnectionFactoryImpl::ConnectImpl() {
296 DCHECK(!IsEndpointReachable());
297 // TODO(zea): Make this a dcheck again. crbug.com/462319
298 CHECK(!socket_handle_.socket());
300 // TODO(zea): if the network is offline, don't attempt to connect.
301 // See crbug.com/396687
303 connecting_ = true;
304 GURL current_endpoint = GetCurrentEndpoint();
305 recorder_->RecordConnectionInitiated(current_endpoint.host());
306 RebuildNetworkSessionAuthCache();
307 int status = gcm_network_session_->proxy_service()->ResolveProxy(
308 current_endpoint,
309 net::LOAD_NORMAL,
310 &proxy_info_,
311 base::Bind(&ConnectionFactoryImpl::OnProxyResolveDone,
312 weak_ptr_factory_.GetWeakPtr()),
313 &pac_request_,
314 NULL,
315 bound_net_log_);
316 if (status != net::ERR_IO_PENDING)
317 OnProxyResolveDone(status);
320 void ConnectionFactoryImpl::InitHandler() {
321 // May be null in tests.
322 mcs_proto::LoginRequest login_request;
323 if (!request_builder_.is_null()) {
324 request_builder_.Run(&login_request);
325 DCHECK(login_request.IsInitialized());
328 connection_handler_->Init(login_request, socket_handle_.socket());
331 scoped_ptr<net::BackoffEntry> ConnectionFactoryImpl::CreateBackoffEntry(
332 const net::BackoffEntry::Policy* const policy) {
333 return scoped_ptr<net::BackoffEntry>(new net::BackoffEntry(policy));
336 scoped_ptr<ConnectionHandler> ConnectionFactoryImpl::CreateConnectionHandler(
337 base::TimeDelta read_timeout,
338 const ConnectionHandler::ProtoReceivedCallback& read_callback,
339 const ConnectionHandler::ProtoSentCallback& write_callback,
340 const ConnectionHandler::ConnectionChangedCallback& connection_callback) {
341 return make_scoped_ptr<ConnectionHandler>(
342 new ConnectionHandlerImpl(read_timeout,
343 read_callback,
344 write_callback,
345 connection_callback));
348 base::TimeTicks ConnectionFactoryImpl::NowTicks() {
349 return base::TimeTicks::Now();
352 void ConnectionFactoryImpl::OnConnectDone(int result) {
353 // TODO(zea): Remove ScopedTracker below once crbug.com/455884 is fixed.
354 tracked_objects::ScopedTracker tracking_profile(
355 FROM_HERE_WITH_EXPLICIT_FUNCTION(
356 "455884 ConnectionFactoryImpl::OnConnectDone"));
357 if (result != net::OK) {
358 // If the connection fails, try another proxy.
359 result = ReconsiderProxyAfterError(result);
360 // ReconsiderProxyAfterError either returns an error (in which case it is
361 // not reconsidering a proxy) or returns ERR_IO_PENDING if it is considering
362 // another proxy.
363 DCHECK_NE(result, net::OK);
364 if (result == net::ERR_IO_PENDING)
365 return; // Proxy reconsideration pending. Return.
366 LOG(ERROR) << "Failed to connect to MCS endpoint with error " << result;
367 UMA_HISTOGRAM_BOOLEAN("GCM.ConnectionSuccessRate", false);
368 recorder_->RecordConnectionFailure(result);
369 CloseSocket();
370 backoff_entry_->InformOfRequest(false);
371 UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionFailureErrorCode", result);
373 // If there are other endpoints available, use the next endpoint on the
374 // subsequent retry.
375 next_endpoint_++;
376 if (next_endpoint_ >= mcs_endpoints_.size())
377 next_endpoint_ = 0;
378 connecting_ = false;
379 Connect();
380 return;
383 UMA_HISTOGRAM_BOOLEAN("GCM.ConnectionSuccessRate", true);
384 UMA_HISTOGRAM_COUNTS("GCM.ConnectionEndpoint", next_endpoint_);
385 UMA_HISTOGRAM_BOOLEAN("GCM.ConnectedViaProxy",
386 !(proxy_info_.is_empty() || proxy_info_.is_direct()));
387 ReportSuccessfulProxyConnection();
388 recorder_->RecordConnectionSuccess();
390 // Reset the endpoint back to the default.
391 // TODO(zea): consider prioritizing endpoints more intelligently based on
392 // which ones succeed most for this client? Although that will affect
393 // measuring the success rate of the default endpoint vs fallback.
394 last_successful_endpoint_ = next_endpoint_;
395 next_endpoint_ = 0;
396 connecting_ = false;
397 logging_in_ = true;
398 DVLOG(1) << "MCS endpoint socket connection success, starting login.";
399 InitHandler();
402 void ConnectionFactoryImpl::ConnectionHandlerCallback(int result) {
403 DCHECK(!connecting_);
404 if (result != net::OK) {
405 // TODO(zea): Consider how to handle errors that may require some sort of
406 // user intervention (login page, etc.).
407 UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionDisconnectErrorCode", result);
408 SignalConnectionReset(SOCKET_FAILURE);
409 return;
412 // Handshake complete, reset backoff. If the login failed with an error,
413 // the client should invoke SignalConnectionReset(LOGIN_FAILURE), which will
414 // restore the previous backoff.
415 DVLOG(1) << "Handshake complete.";
416 last_login_time_ = NowTicks();
417 previous_backoff_.swap(backoff_entry_);
418 backoff_entry_->Reset();
419 logging_in_ = false;
421 if (listener_)
422 listener_->OnConnected(GetCurrentEndpoint(), GetPeerIP());
425 // This has largely been copied from
426 // HttpStreamFactoryImpl::Job::DoResolveProxyComplete. This should be
427 // refactored into some common place.
428 void ConnectionFactoryImpl::OnProxyResolveDone(int status) {
429 pac_request_ = NULL;
430 DVLOG(1) << "Proxy resolution status: " << status;
432 DCHECK_NE(status, net::ERR_IO_PENDING);
433 if (status == net::OK) {
434 // Remove unsupported proxies from the list.
435 proxy_info_.RemoveProxiesWithoutScheme(
436 net::ProxyServer::SCHEME_DIRECT |
437 net::ProxyServer::SCHEME_HTTP | net::ProxyServer::SCHEME_HTTPS |
438 net::ProxyServer::SCHEME_SOCKS4 | net::ProxyServer::SCHEME_SOCKS5);
440 if (proxy_info_.is_empty()) {
441 // No proxies/direct to choose from. This happens when we don't support
442 // any of the proxies in the returned list.
443 status = net::ERR_NO_SUPPORTED_PROXIES;
447 if (status != net::OK) {
448 // Failed to resolve proxy. Retry later.
449 OnConnectDone(status);
450 return;
453 DVLOG(1) << "Resolved proxy with PAC:" << proxy_info_.ToPacString();
455 net::SSLConfig ssl_config;
456 gcm_network_session_->ssl_config_service()->GetSSLConfig(&ssl_config);
457 status = net::InitSocketHandleForTlsConnect(
458 net::HostPortPair::FromURL(GetCurrentEndpoint()),
459 gcm_network_session_.get(),
460 proxy_info_,
461 ssl_config,
462 ssl_config,
463 net::PRIVACY_MODE_DISABLED,
464 bound_net_log_,
465 &socket_handle_,
466 base::Bind(&ConnectionFactoryImpl::OnConnectDone,
467 weak_ptr_factory_.GetWeakPtr()));
468 if (status != net::ERR_IO_PENDING)
469 OnConnectDone(status);
472 // This has largely been copied from
473 // HttpStreamFactoryImpl::Job::ReconsiderProxyAfterError. This should be
474 // refactored into some common place.
475 // This method reconsiders the proxy on certain errors. If it does reconsider
476 // a proxy it always returns ERR_IO_PENDING and posts a call to
477 // OnProxyResolveDone with the result of the reconsideration.
478 int ConnectionFactoryImpl::ReconsiderProxyAfterError(int error) {
479 DCHECK(!pac_request_);
480 DCHECK_NE(error, net::OK);
481 DCHECK_NE(error, net::ERR_IO_PENDING);
482 // A failure to resolve the hostname or any error related to establishing a
483 // TCP connection could be grounds for trying a new proxy configuration.
485 // Why do this when a hostname cannot be resolved? Some URLs only make sense
486 // to proxy servers. The hostname in those URLs might fail to resolve if we
487 // are still using a non-proxy config. We need to check if a proxy config
488 // now exists that corresponds to a proxy server that could load the URL.
490 switch (error) {
491 case net::ERR_PROXY_CONNECTION_FAILED:
492 case net::ERR_NAME_NOT_RESOLVED:
493 case net::ERR_INTERNET_DISCONNECTED:
494 case net::ERR_ADDRESS_UNREACHABLE:
495 case net::ERR_CONNECTION_CLOSED:
496 case net::ERR_CONNECTION_TIMED_OUT:
497 case net::ERR_CONNECTION_RESET:
498 case net::ERR_CONNECTION_REFUSED:
499 case net::ERR_CONNECTION_ABORTED:
500 case net::ERR_TIMED_OUT:
501 case net::ERR_TUNNEL_CONNECTION_FAILED:
502 case net::ERR_SOCKS_CONNECTION_FAILED:
503 // This can happen in the case of trying to talk to a proxy using SSL, and
504 // ending up talking to a captive portal that supports SSL instead.
505 case net::ERR_PROXY_CERTIFICATE_INVALID:
506 // This can happen when trying to talk SSL to a non-SSL server (Like a
507 // captive portal).
508 case net::ERR_SSL_PROTOCOL_ERROR:
509 break;
510 case net::ERR_SOCKS_CONNECTION_HOST_UNREACHABLE:
511 // Remap the SOCKS-specific "host unreachable" error to a more
512 // generic error code (this way consumers like the link doctor
513 // know to substitute their error page).
515 // Note that if the host resolving was done by the SOCKS5 proxy, we can't
516 // differentiate between a proxy-side "host not found" versus a proxy-side
517 // "address unreachable" error, and will report both of these failures as
518 // ERR_ADDRESS_UNREACHABLE.
519 return net::ERR_ADDRESS_UNREACHABLE;
520 default:
521 return error;
524 net::SSLConfig ssl_config;
525 gcm_network_session_->ssl_config_service()->GetSSLConfig(&ssl_config);
526 if (proxy_info_.is_https() && ssl_config.send_client_cert) {
527 gcm_network_session_->ssl_client_auth_cache()->Remove(
528 proxy_info_.proxy_server().host_port_pair());
531 int status = gcm_network_session_->proxy_service()->ReconsiderProxyAfterError(
532 GetCurrentEndpoint(), net::LOAD_NORMAL, error, &proxy_info_,
533 base::Bind(&ConnectionFactoryImpl::OnProxyResolveDone,
534 weak_ptr_factory_.GetWeakPtr()),
535 &pac_request_,
536 NULL,
537 bound_net_log_);
538 if (status == net::OK || status == net::ERR_IO_PENDING) {
539 CloseSocket();
540 } else {
541 // If ReconsiderProxyAfterError() failed synchronously, it means
542 // there was nothing left to fall-back to, so fail the transaction
543 // with the last connection error we got.
544 status = error;
547 // If there is new proxy info, post OnProxyResolveDone to retry it. Otherwise,
548 // if there was an error falling back, fail synchronously.
549 if (status == net::OK) {
550 base::MessageLoop::current()->PostTask(
551 FROM_HERE,
552 base::Bind(&ConnectionFactoryImpl::OnProxyResolveDone,
553 weak_ptr_factory_.GetWeakPtr(), status));
554 status = net::ERR_IO_PENDING;
556 return status;
559 void ConnectionFactoryImpl::ReportSuccessfulProxyConnection() {
560 if (gcm_network_session_.get() && gcm_network_session_->proxy_service())
561 gcm_network_session_->proxy_service()->ReportSuccess(proxy_info_, NULL);
564 void ConnectionFactoryImpl::CloseSocket() {
565 // The connection handler needs to be reset, else it'll attempt to keep using
566 // the destroyed socket.
567 if (connection_handler_)
568 connection_handler_->Reset();
570 if (socket_handle_.socket() && socket_handle_.socket()->IsConnected())
571 socket_handle_.socket()->Disconnect();
572 socket_handle_.Reset();
575 void ConnectionFactoryImpl::RebuildNetworkSessionAuthCache() {
576 if (!http_network_session_.get() || !http_network_session_->http_auth_cache())
577 return;
579 gcm_network_session_->http_auth_cache()->UpdateAllFrom(
580 *http_network_session_->http_auth_cache());
583 } // namespace gcm