1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/net/referrer.h"
9 #include "base/compiler_specific.h"
10 #include "base/logging.h"
11 #include "base/message_loop/message_loop.h"
12 #include "base/values.h"
13 #include "chrome/browser/net/predictor.h"
15 namespace chrome_browser_net
{
17 //------------------------------------------------------------------------------
18 // Smoothing parameter for updating subresource_use_rate_.
20 // We always combine our old expected value, weighted by some factor W (we use
21 // kWeightingForOldConnectsExpectedValue), with the new expected value Enew.
22 // The new "expected value" is the number of actual connections made due to the
23 // current navigations.
24 // That means that IF we end up needing to connect, we should apply the formula:
25 // Eupdated = Eold * W + Enew * (1 - W)
26 // If we visit the containing url, but don't end up needing a connection, then
27 // Enew == 0, so we use the formula:
28 // Eupdated = Eold * W
29 // To achieve the above updating algorithm, we end up doing the multiplication
30 // by W every time we contemplate doing a preconnection (i.e., when we navigate
31 // to the containing URL, and consider doing a preconnection), and then IFF we
32 // learn that we really needed a connection to the subresource, we complete the
33 // above algorithm by adding the (1 - W) for each connection we make.
35 // We weight the new expected value by a factor which is in the range of 0.0 to
37 static const double kWeightingForOldConnectsExpectedValue
= 0.66;
39 // To estimate the expected value of the number of connections that we'll need
40 // when a referrer is navigated to, we start with the following low initial
42 // Each time we do indeed (again) need the subresource, this value will get
44 // Each time we navigate to the refererrer but never end up needing this
45 // subresource, the value will decrease.
46 // Very conservative is 0.0, which will mean that we have to wait for a while
47 // before doing much speculative acvtivity. We do persist results, so we'll
48 // save the asymptotic (correct?) learned answer in the long run.
49 // Some browsers blindly make 2 connections all the time, so we'll use that as
51 static const double kInitialConnectsExpectedValue
= 2.0;
53 Referrer::Referrer() : use_count_(1) {}
55 void Referrer::SuggestHost(const GURL
& url
) {
56 // Limit how large our list can get, in case we make mistakes about what
57 // hostnames are in sub-resources (example: Some advertisments have a link to
58 // the ad agency, and then provide a "surprising" redirect to the advertised
59 // entity, which then (mistakenly) appears to be a subresource on the page
61 // TODO(jar): Do experiments to optimize the max count of suggestions.
62 static const size_t kMaxSuggestions
= 10;
64 if (!url
.has_host()) // TODO(jar): Is this really needed????
66 DCHECK(url
== url
.GetWithEmptyPath());
67 SubresourceMap::iterator it
= find(url
);
69 it
->second
.SubresourceIsNeeded();
73 if (kMaxSuggestions
<= size()) {
75 DCHECK(kMaxSuggestions
> size());
77 (*this)[url
].SubresourceIsNeeded();
80 void Referrer::DeleteLeastUseful() {
81 // Find the item with the lowest value. Most important is preconnection_rate,
82 // and least is lifetime (age).
83 GURL least_useful_url
;
84 double lowest_rate_seen
= 0.0;
85 // We use longs for durations because we will use multiplication on them.
86 int64 least_useful_lifetime
= 0; // Duration in milliseconds.
88 const base::Time
kNow(base::Time::Now()); // Avoid multiple calls.
89 for (SubresourceMap::iterator it
= begin(); it
!= end(); ++it
) {
90 int64 lifetime
= (kNow
- it
->second
.birth_time()).InMilliseconds();
91 double rate
= it
->second
.subresource_use_rate();
92 if (least_useful_url
.has_host()) {
93 if (rate
> lowest_rate_seen
)
95 if (lifetime
<= least_useful_lifetime
)
98 least_useful_url
= it
->first
;
99 lowest_rate_seen
= rate
;
100 least_useful_lifetime
= lifetime
;
102 if (least_useful_url
.has_host())
103 erase(least_useful_url
);
106 bool Referrer::Trim(double reduce_rate
, double threshold
) {
107 std::vector
<GURL
> discarded_urls
;
108 for (SubresourceMap::iterator it
= begin(); it
!= end(); ++it
) {
109 if (!it
->second
.Trim(reduce_rate
, threshold
))
110 discarded_urls
.push_back(it
->first
);
112 for (size_t i
= 0; i
< discarded_urls
.size(); ++i
)
113 erase(discarded_urls
[i
]);
117 bool ReferrerValue::Trim(double reduce_rate
, double threshold
) {
118 subresource_use_rate_
*= reduce_rate
;
119 return subresource_use_rate_
> threshold
;
123 void Referrer::Deserialize(const base::Value
& value
) {
124 if (value
.GetType() != base::Value::TYPE_LIST
)
126 const base::ListValue
* subresource_list(
127 static_cast<const base::ListValue
*>(&value
));
128 size_t index
= 0; // Bounds checking is done by subresource_list->Get*().
130 std::string url_spec
;
131 if (!subresource_list
->GetString(index
++, &url_spec
))
134 if (!subresource_list
->GetDouble(index
++, &rate
))
138 // TODO(jar): We could be more direct, and change birth date or similar to
139 // show that this is a resurrected value we're adding in. I'm not yet sure
140 // of how best to optimize the learning and pruning (Trim) algorithm at this
141 // level, so for now, we just suggest subresources, which leaves them all
142 // with the same birth date (typically start of process).
144 (*this)[url
].SetSubresourceUseRate(rate
);
148 base::Value
* Referrer::Serialize() const {
149 base::ListValue
* subresource_list(new base::ListValue
);
150 for (const_iterator it
= begin(); it
!= end(); ++it
) {
151 base::StringValue
* url_spec(new base::StringValue(it
->first
.spec()));
152 base::FundamentalValue
* rate(new base::FundamentalValue(
153 it
->second
.subresource_use_rate()));
155 subresource_list
->Append(url_spec
);
156 subresource_list
->Append(rate
);
158 return subresource_list
;
161 //------------------------------------------------------------------------------
163 ReferrerValue::ReferrerValue()
164 : birth_time_(base::Time::Now()),
165 navigation_count_(0),
166 preconnection_count_(0),
167 preresolution_count_(0),
168 subresource_use_rate_(kInitialConnectsExpectedValue
) {
171 void ReferrerValue::SubresourceIsNeeded() {
172 DCHECK_GE(kWeightingForOldConnectsExpectedValue
, 0);
173 DCHECK_LE(kWeightingForOldConnectsExpectedValue
, 1.0);
175 subresource_use_rate_
+= 1 - kWeightingForOldConnectsExpectedValue
;
178 void ReferrerValue::ReferrerWasObserved() {
179 subresource_use_rate_
*= kWeightingForOldConnectsExpectedValue
;
180 // Note: the use rate is temporarilly possibly incorect, as we need to find
181 // out if we really end up connecting. This will happen in a few hundred
182 // milliseconds (when content arrives, etc.).
183 // Value of subresource_use_rate_ should be sampled before this call.
186 } // namespace chrome_browser_net