Popular sites on the NTP: check that experiment group StartsWith (rather than IS...
[chromium-blink-merge.git] / chrome / browser / android / history_report / delta_file_commons.cc
blob027f82d9c7c84d99b36360105cea81dfb6560b8e
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/android/history_report/delta_file_commons.h"
7 #include <iomanip>
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "crypto/sha2.h"
12 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 using bookmarks::BookmarkModel;
15 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
16 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
17 using net::registry_controlled_domains::GetRegistryLength;
19 namespace {
21 const int kBookmarkScoreBonusMultiplier = 3;
22 const size_t kIdLengthLimit = 256;
23 const int kSHA256ByteSize = 32;
24 const size_t kUrlLengthLimit = 20 * 1024 * 1024; // 20M
25 const size_t kUrlLengthWidth = 8;
27 void StripTopLevelDomain(std::string* host) {
28 size_t registry_length = GetRegistryLength(
29 *host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
30 if (registry_length != 0 && registry_length != std::string::npos)
31 host->erase(host->length() - (registry_length + 1));
34 void StripCommonSubDomains(std::string* host) {
35 std::string www_prefix("www.");
36 std::string ww2_prefix("ww2.");
37 if (host->compare(0, www_prefix.size(), www_prefix) == 0) {
38 host->erase(0, www_prefix.size());
39 } else if (host->compare(0, ww2_prefix.size(), ww2_prefix) == 0) {
40 host->erase(0, ww2_prefix.size());
44 } // namespace
46 namespace history_report {
48 DeltaFileEntryWithData::DeltaFileEntryWithData(DeltaFileEntry entry)
49 : entry_(entry),
50 data_set_(false),
51 is_bookmark_(false) {}
53 DeltaFileEntryWithData::~DeltaFileEntryWithData() {}
55 int64 DeltaFileEntryWithData::SeqNo() const {
56 return entry_.seq_no();
59 std::string DeltaFileEntryWithData::Type() const {
60 // If deletion entry has data then it's not a real deletion entry
61 // but an update entry. Real deletion entry never has data.
62 if (data_set_) return "add";
63 return entry_.type();
66 // Generates a unique ID for a given URL.
67 // It must be shorter than or equal to |kIdLengthLimit| characters.
68 // If URL is shorter than or equal to |kIdLengthLimit| then ID is the URL
69 // itself. Otherwise it has a form of 3 concatenated parts:
70 // 1. Length of URL. Zero-padded integer to width |kUrlLengthWidth|,
71 // because URLs are limited to 20M in Chrome.
72 // 2. SHA-256 of URL which takes 64 characters.
73 // 3. Prefix of URL of size |kIdLengthLimit| - 64 - |kUrlLengthWidth|.
74 std::string DeltaFileEntryWithData::UrlToId(const std::string& url) {
75 if (url.size() > kUrlLengthLimit) {
76 return "error: url too long";
79 if (IsValidId(url)) {
80 return url;
83 std::stringstream id;
85 // 1. Zero-padded URL length to width |kUrlLengthWidth|.
86 id << std::setfill('0') << std::setw(kUrlLengthWidth) << url.size();
88 // 2. SHA-256 of URL.
89 uint8 hash[kSHA256ByteSize];
90 crypto::SHA256HashString(url, hash, sizeof(hash));
91 id << base::HexEncode(hash, sizeof(hash));
93 // 3. Prefix of URL to fill rest of the space.
94 id << url.substr(0, kIdLengthLimit - 2 * kSHA256ByteSize - kUrlLengthWidth);
96 return id.str();
99 // ID which identifies URL of this entry.
100 std::string DeltaFileEntryWithData::Id() const {
101 return UrlToId(entry_.url());
104 std::string DeltaFileEntryWithData::Url() const {
105 return entry_.url();
108 base::string16 DeltaFileEntryWithData::Title() const {
109 if (!Valid()) return base::UTF8ToUTF16("");
110 if (is_bookmark_ && !bookmark_title_.empty()) return bookmark_title_;
111 if (data_.title().empty()) return base::UTF8ToUTF16(data_.url().host());
112 return data_.title();
115 int32 DeltaFileEntryWithData::Score() const {
116 if (!Valid()) return 0;
117 int32 score = data_.visit_count() + data_.typed_count();
118 if (is_bookmark_) score = (score + 1) * kBookmarkScoreBonusMultiplier;
119 return score;
122 std::string DeltaFileEntryWithData::IndexedUrl() const {
123 if (!Valid()) return "";
124 std::string indexed_url = data_.url().host();
125 StripTopLevelDomain(&indexed_url);
126 StripCommonSubDomains(&indexed_url);
127 return indexed_url;
130 bool DeltaFileEntryWithData::Valid() const {
131 return entry_.type() == "del" || is_bookmark_ ||
132 (data_set_ && !data_.hidden());
135 void DeltaFileEntryWithData::SetData(const history::URLRow& data) {
136 data_set_ = true;
137 data_ = data;
140 void DeltaFileEntryWithData::MarkAsBookmark(
141 const BookmarkModel::URLAndTitle& bookmark) {
142 is_bookmark_ = true;
143 bookmark_title_ = bookmark.title;
146 // static
147 bool DeltaFileEntryWithData::IsValidId(const std::string& url) {
148 return url.size() <= kIdLengthLimit;
151 } // namespace history_report