1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/dns/dns_session.h"
7 #include "base/basictypes.h"
9 #include "base/lazy_instance.h"
10 #include "base/metrics/histogram.h"
11 #include "base/metrics/sample_vector.h"
12 #include "base/rand_util.h"
13 #include "base/stl_util.h"
14 #include "base/time/time.h"
15 #include "net/base/ip_endpoint.h"
16 #include "net/base/net_errors.h"
17 #include "net/dns/dns_config_service.h"
18 #include "net/dns/dns_socket_pool.h"
19 #include "net/socket/stream_socket.h"
20 #include "net/udp/datagram_client_socket.h"
25 // Never exceed max timeout.
26 const unsigned kMaxTimeoutMs
= 5000;
27 // Set min timeout, in case we are talking to a local DNS proxy.
28 const unsigned kMinTimeoutMs
= 10;
30 // Number of buckets in the histogram of observed RTTs.
31 const size_t kRTTBucketCount
= 100;
32 // Target percentile in the RTT histogram used for retransmission timeout.
33 const unsigned kRTOPercentile
= 99;
36 // Runtime statistics of DNS server.
37 struct DnsSession::ServerStats
{
38 ServerStats(base::TimeDelta rtt_estimate_param
, RttBuckets
* buckets
)
39 : last_failure_count(0), rtt_estimate(rtt_estimate_param
) {
40 rtt_histogram
.reset(new base::SampleVector(buckets
));
41 // Seed histogram with 2 samples at |rtt_estimate| timeout.
42 rtt_histogram
->Accumulate(rtt_estimate
.InMilliseconds(), 2);
45 // Count of consecutive failures after last success.
46 int last_failure_count
;
48 // Last time when server returned failure or timeout.
49 base::Time last_failure
;
50 // Last time when server returned success.
51 base::Time last_success
;
53 // Estimated RTT using moving average.
54 base::TimeDelta rtt_estimate
;
55 // Estimated error in the above.
56 base::TimeDelta rtt_deviation
;
58 // A histogram of observed RTT .
59 scoped_ptr
<base::SampleVector
> rtt_histogram
;
61 DISALLOW_COPY_AND_ASSIGN(ServerStats
);
65 base::LazyInstance
<DnsSession::RttBuckets
>::Leaky
DnsSession::rtt_buckets_
=
66 LAZY_INSTANCE_INITIALIZER
;
68 DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount
+ 1) {
69 base::Histogram::InitializeBucketRanges(1, 5000, this);
72 DnsSession::SocketLease::SocketLease(scoped_refptr
<DnsSession
> session
,
73 unsigned server_index
,
74 scoped_ptr
<DatagramClientSocket
> socket
)
75 : session_(session
), server_index_(server_index
), socket_(socket
.Pass()) {}
77 DnsSession::SocketLease::~SocketLease() {
78 session_
->FreeSocket(server_index_
, socket_
.Pass());
81 DnsSession::DnsSession(const DnsConfig
& config
,
82 scoped_ptr
<DnsSocketPool
> socket_pool
,
83 const RandIntCallback
& rand_int_callback
,
86 socket_pool_(socket_pool
.Pass()),
87 rand_callback_(base::Bind(rand_int_callback
, 0, kuint16max
)),
90 socket_pool_
->Initialize(&config_
.nameservers
, net_log
);
91 UMA_HISTOGRAM_CUSTOM_COUNTS(
92 "AsyncDNS.ServerCount", config_
.nameservers
.size(), 0, 10, 10);
93 for (size_t i
= 0; i
< config_
.nameservers
.size(); ++i
) {
94 server_stats_
.push_back(new ServerStats(config_
.timeout
,
95 rtt_buckets_
.Pointer()));
99 DnsSession::~DnsSession() {
103 int DnsSession::NextQueryId() const { return rand_callback_
.Run(); }
105 unsigned DnsSession::NextFirstServerIndex() {
106 unsigned index
= NextGoodServerIndex(server_index_
);
108 server_index_
= (server_index_
+ 1) % config_
.nameservers
.size();
112 unsigned DnsSession::NextGoodServerIndex(unsigned server_index
) {
113 unsigned index
= server_index
;
114 base::Time
oldest_server_failure(base::Time::Now());
115 unsigned oldest_server_failure_index
= 0;
117 UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood",
118 server_stats_
[server_index
]->last_failure
.is_null());
121 base::Time cur_server_failure
= server_stats_
[index
]->last_failure
;
122 // If number of failures on this server doesn't exceed number of allowed
123 // attempts, return its index.
124 if (server_stats_
[server_index
]->last_failure_count
< config_
.attempts
) {
127 // Track oldest failed server.
128 if (cur_server_failure
< oldest_server_failure
) {
129 oldest_server_failure
= cur_server_failure
;
130 oldest_server_failure_index
= index
;
132 index
= (index
+ 1) % config_
.nameservers
.size();
133 } while (index
!= server_index
);
135 // If we are here it means that there are no successful servers, so we have
136 // to use one that has failed oldest.
137 return oldest_server_failure_index
;
140 void DnsSession::RecordServerFailure(unsigned server_index
) {
141 UMA_HISTOGRAM_CUSTOM_COUNTS(
142 "AsyncDNS.ServerFailureIndex", server_index
, 0, 10, 10);
143 ++(server_stats_
[server_index
]->last_failure_count
);
144 server_stats_
[server_index
]->last_failure
= base::Time::Now();
147 void DnsSession::RecordServerSuccess(unsigned server_index
) {
148 if (server_stats_
[server_index
]->last_success
.is_null()) {
149 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange",
150 server_stats_
[server_index
]->last_failure_count
);
152 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess",
153 server_stats_
[server_index
]->last_failure_count
);
155 server_stats_
[server_index
]->last_failure_count
= 0;
156 server_stats_
[server_index
]->last_failure
= base::Time();
157 server_stats_
[server_index
]->last_success
= base::Time::Now();
160 void DnsSession::RecordRTT(unsigned server_index
, base::TimeDelta rtt
) {
161 DCHECK_LT(server_index
, server_stats_
.size());
163 // For measurement, assume it is the first attempt (no backoff).
164 base::TimeDelta timeout_jacobson
= NextTimeoutFromJacobson(server_index
, 0);
165 base::TimeDelta timeout_histogram
= NextTimeoutFromHistogram(server_index
, 0);
166 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt
- timeout_jacobson
);
167 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram",
168 rtt
- timeout_histogram
);
169 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder",
170 timeout_jacobson
- rtt
);
171 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder",
172 timeout_histogram
- rtt
);
174 // Jacobson/Karels algorithm for TCP.
175 // Using parameters: alpha = 1/8, delta = 1/4, beta = 4
176 base::TimeDelta
& estimate
= server_stats_
[server_index
]->rtt_estimate
;
177 base::TimeDelta
& deviation
= server_stats_
[server_index
]->rtt_deviation
;
178 base::TimeDelta current_error
= rtt
- estimate
;
179 estimate
+= current_error
/ 8; // * alpha
180 base::TimeDelta abs_error
= base::TimeDelta::FromInternalValue(
181 std::abs(current_error
.ToInternalValue()));
182 deviation
+= (abs_error
- deviation
) / 4; // * delta
184 // Histogram-based method.
185 server_stats_
[server_index
]->rtt_histogram
186 ->Accumulate(rtt
.InMilliseconds(), 1);
189 void DnsSession::RecordLostPacket(unsigned server_index
, int attempt
) {
190 base::TimeDelta timeout_jacobson
=
191 NextTimeoutFromJacobson(server_index
, attempt
);
192 base::TimeDelta timeout_histogram
=
193 NextTimeoutFromHistogram(server_index
, attempt
);
194 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson
);
195 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram
);
198 void DnsSession::RecordServerStats() {
199 for (size_t index
= 0; index
< server_stats_
.size(); ++index
) {
200 if (server_stats_
[index
]->last_failure_count
) {
201 if (server_stats_
[index
]->last_success
.is_null()) {
202 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess",
203 server_stats_
[index
]->last_failure_count
);
205 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess",
206 server_stats_
[index
]->last_failure_count
);
213 base::TimeDelta
DnsSession::NextTimeout(unsigned server_index
, int attempt
) {
214 // Respect config timeout if it exceeds |kMaxTimeoutMs|.
215 if (config_
.timeout
.InMilliseconds() >= kMaxTimeoutMs
)
216 return config_
.timeout
;
217 return NextTimeoutFromHistogram(server_index
, attempt
);
220 // Allocate a socket, already connected to the server address.
221 scoped_ptr
<DnsSession::SocketLease
> DnsSession::AllocateSocket(
222 unsigned server_index
, const NetLog::Source
& source
) {
223 scoped_ptr
<DatagramClientSocket
> socket
;
225 socket
= socket_pool_
->AllocateSocket(server_index
);
227 return scoped_ptr
<SocketLease
>();
229 socket
->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE
,
230 source
.ToEventParametersCallback());
232 SocketLease
* lease
= new SocketLease(this, server_index
, socket
.Pass());
233 return scoped_ptr
<SocketLease
>(lease
);
236 scoped_ptr
<StreamSocket
> DnsSession::CreateTCPSocket(
237 unsigned server_index
, const NetLog::Source
& source
) {
238 return socket_pool_
->CreateTCPSocket(server_index
, source
);
242 void DnsSession::FreeSocket(unsigned server_index
,
243 scoped_ptr
<DatagramClientSocket
> socket
) {
244 DCHECK(socket
.get());
246 socket
->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE
);
248 socket_pool_
->FreeSocket(server_index
, socket
.Pass());
251 base::TimeDelta
DnsSession::NextTimeoutFromJacobson(unsigned server_index
,
253 DCHECK_LT(server_index
, server_stats_
.size());
255 base::TimeDelta timeout
= server_stats_
[server_index
]->rtt_estimate
+
256 4 * server_stats_
[server_index
]->rtt_deviation
;
258 timeout
= std::max(timeout
, base::TimeDelta::FromMilliseconds(kMinTimeoutMs
));
260 // The timeout doubles every full round.
261 unsigned num_backoffs
= attempt
/ config_
.nameservers
.size();
263 return std::min(timeout
* (1 << num_backoffs
),
264 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs
));
267 base::TimeDelta
DnsSession::NextTimeoutFromHistogram(unsigned server_index
,
269 DCHECK_LT(server_index
, server_stats_
.size());
271 COMPILE_ASSERT(std::numeric_limits
<base::HistogramBase::Count
>::is_signed
,
272 histogram_base_count_assumed_to_be_signed
);
274 // Use fixed percentile of observed samples.
275 const base::SampleVector
& samples
=
276 *server_stats_
[server_index
]->rtt_histogram
;
278 base::HistogramBase::Count total
= samples
.TotalCount();
279 base::HistogramBase::Count remaining_count
= kRTOPercentile
* total
/ 100;
281 while (remaining_count
> 0 && index
< rtt_buckets_
.Get().size()) {
282 remaining_count
-= samples
.GetCountAtIndex(index
);
286 base::TimeDelta timeout
=
287 base::TimeDelta::FromMilliseconds(rtt_buckets_
.Get().range(index
));
289 timeout
= std::max(timeout
, base::TimeDelta::FromMilliseconds(kMinTimeoutMs
));
291 // The timeout still doubles every full round.
292 unsigned num_backoffs
= attempt
/ config_
.nameservers
.size();
294 return std::min(timeout
* (1 << num_backoffs
),
295 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs
));