1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/android/history_report/delta_file_commons.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "crypto/sha2.h"
12 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 using bookmarks::BookmarkModel
;
15 using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
;
16 using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
;
17 using net::registry_controlled_domains::GetRegistryLength
;
21 const int kBookmarkScoreBonusMultiplier
= 3;
22 const size_t kIdLengthLimit
= 256;
23 const int kSHA256ByteSize
= 32;
24 const size_t kUrlLengthLimit
= 20 * 1024 * 1024; // 20M
25 const size_t kUrlLengthWidth
= 8;
27 void StripTopLevelDomain(std::string
* host
) {
28 size_t registry_length
= GetRegistryLength(
29 *host
, EXCLUDE_UNKNOWN_REGISTRIES
, EXCLUDE_PRIVATE_REGISTRIES
);
30 if (registry_length
!= 0 && registry_length
!= std::string::npos
)
31 host
->erase(host
->length() - (registry_length
+ 1));
34 void StripCommonSubDomains(std::string
* host
) {
35 std::string
www_prefix("www.");
36 std::string
ww2_prefix("ww2.");
37 if (host
->compare(0, www_prefix
.size(), www_prefix
) == 0) {
38 host
->erase(0, www_prefix
.size());
39 } else if (host
->compare(0, ww2_prefix
.size(), ww2_prefix
) == 0) {
40 host
->erase(0, ww2_prefix
.size());
46 namespace history_report
{
48 DeltaFileEntryWithData::DeltaFileEntryWithData(DeltaFileEntry entry
)
51 is_bookmark_(false) {}
53 DeltaFileEntryWithData::~DeltaFileEntryWithData() {}
55 int64
DeltaFileEntryWithData::SeqNo() const {
56 return entry_
.seq_no();
59 std::string
DeltaFileEntryWithData::Type() const {
60 // If deletion entry has data then it's not a real deletion entry
61 // but an update entry. Real deletion entry never has data.
62 if (data_set_
) return "add";
66 // Generates a unique ID for a given URL.
67 // It must be shorter than or equal to |kIdLengthLimit| characters.
68 // If URL is shorter than or equal to |kIdLengthLimit| then ID is the URL
69 // itself. Otherwise it has a form of 3 concatenated parts:
70 // 1. Length of URL. Zero-padded integer to width |kUrlLengthWidth|,
71 // because URLs are limited to 20M in Chrome.
72 // 2. SHA-256 of URL which takes 64 characters.
73 // 3. Prefix of URL of size |kIdLengthLimit| - 64 - |kUrlLengthWidth|.
74 std::string
DeltaFileEntryWithData::UrlToId(const std::string
& url
) {
75 if (url
.size() > kUrlLengthLimit
) {
76 return "error: url too long";
85 // 1. Zero-padded URL length to width |kUrlLengthWidth|.
86 id
<< std::setfill('0') << std::setw(kUrlLengthWidth
) << url
.size();
89 uint8 hash
[kSHA256ByteSize
];
90 crypto::SHA256HashString(url
, hash
, sizeof(hash
));
91 id
<< base::HexEncode(hash
, sizeof(hash
));
93 // 3. Prefix of URL to fill rest of the space.
94 id
<< url
.substr(0, kIdLengthLimit
- 2 * kSHA256ByteSize
- kUrlLengthWidth
);
99 // ID which identifies URL of this entry.
100 std::string
DeltaFileEntryWithData::Id() const {
101 return UrlToId(entry_
.url());
104 std::string
DeltaFileEntryWithData::Url() const {
108 base::string16
DeltaFileEntryWithData::Title() const {
109 if (!Valid()) return base::UTF8ToUTF16("");
110 if (is_bookmark_
&& !bookmark_title_
.empty()) return bookmark_title_
;
111 if (data_
.title().empty()) return base::UTF8ToUTF16(data_
.url().host());
112 return data_
.title();
115 int32
DeltaFileEntryWithData::Score() const {
116 if (!Valid()) return 0;
117 int32 score
= data_
.visit_count() + data_
.typed_count();
118 if (is_bookmark_
) score
= (score
+ 1) * kBookmarkScoreBonusMultiplier
;
122 std::string
DeltaFileEntryWithData::IndexedUrl() const {
123 if (!Valid()) return "";
124 std::string indexed_url
= data_
.url().host();
125 StripTopLevelDomain(&indexed_url
);
126 StripCommonSubDomains(&indexed_url
);
130 bool DeltaFileEntryWithData::Valid() const {
131 return entry_
.type() == "del" || is_bookmark_
||
132 (data_set_
&& !data_
.hidden());
135 void DeltaFileEntryWithData::SetData(const history::URLRow
& data
) {
140 void DeltaFileEntryWithData::MarkAsBookmark(
141 const BookmarkModel::URLAndTitle
& bookmark
) {
143 bookmark_title_
= bookmark
.title
;
147 bool DeltaFileEntryWithData::IsValidId(const std::string
& url
) {
148 return url
.size() <= kIdLengthLimit
;
151 } // namespace history_report