1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/metrics/persisted_logs.h"
9 #include "base/base64.h"
11 #include "base/metrics/histogram.h"
12 #include "base/prefs/pref_service.h"
13 #include "base/prefs/scoped_user_pref_update.h"
14 #include "base/sha1.h"
15 #include "base/timer/elapsed_timer.h"
16 #include "components/metrics/compression_utils.h"
22 PersistedLogs::LogReadStatus
MakeRecallStatusHistogram(
23 PersistedLogs::LogReadStatus status
) {
24 UMA_HISTOGRAM_ENUMERATION("PrefService.PersistentLogRecallProtobufs",
25 status
, PersistedLogs::END_RECALL_STATUS
);
29 // Reads the value at |index| from |list_value| as a string and Base64-decodes
30 // it into |result|. Returns true on success.
31 bool ReadBase64String(const base::ListValue
& list_value
,
33 std::string
* result
) {
34 std::string base64_result
;
35 if (!list_value
.GetString(index
, &base64_result
))
37 return base::Base64Decode(base64_result
, result
);
40 // Base64-encodes |str| and appends the result to |list_value|.
41 void AppendBase64String(const std::string
& str
, base::ListValue
* list_value
) {
42 std::string base64_str
;
43 base::Base64Encode(str
, &base64_str
);
44 list_value
->Append(base::Value::CreateStringValue(base64_str
));
49 void PersistedLogs::LogHashPair::Init(const std::string
& log_data
) {
50 DCHECK(!log_data
.empty());
52 if (!GzipCompress(log_data
, &compressed_log_data
)) {
57 UMA_HISTOGRAM_PERCENTAGE(
58 "UMA.ProtoCompressionRatio",
59 static_cast<int>(100 * compressed_log_data
.size() / log_data
.size()));
60 UMA_HISTOGRAM_CUSTOM_COUNTS(
61 "UMA.ProtoGzippedKBSaved",
62 static_cast<int>((log_data
.size() - compressed_log_data
.size()) / 1024),
65 hash
= base::SHA1HashString(log_data
);
68 void PersistedLogs::LogHashPair::Clear() {
69 compressed_log_data
.clear();
73 void PersistedLogs::LogHashPair::Swap(PersistedLogs::LogHashPair
* input
) {
74 compressed_log_data
.swap(input
->compressed_log_data
);
75 hash
.swap(input
->hash
);
78 PersistedLogs::PersistedLogs(PrefService
* local_state
,
79 const char* pref_name
,
80 const char* old_pref_name
,
84 : local_state_(local_state
),
85 pref_name_(pref_name
),
86 old_pref_name_(old_pref_name
),
87 min_log_count_(min_log_count
),
88 min_log_bytes_(min_log_bytes
),
89 max_log_size_(max_log_size
),
90 last_provisional_store_index_(-1) {
92 // One of the limit arguments must be non-zero.
93 DCHECK(min_log_count_
> 0 || min_log_bytes_
> 0);
96 PersistedLogs::~PersistedLogs() {}
98 void PersistedLogs::SerializeLogs() {
99 // Remove any logs that are over the serialization size limit.
101 for (std::vector
<LogHashPair
>::iterator it
= list_
.begin();
102 it
!= list_
.end();) {
103 size_t log_size
= it
->compressed_log_data
.length();
104 if (log_size
> max_log_size_
) {
105 UMA_HISTOGRAM_COUNTS("UMA.Large Accumulated Log Not Persisted",
106 static_cast<int>(log_size
));
107 it
= list_
.erase(it
);
114 ListPrefUpdate
update(local_state_
, pref_name_
);
115 WriteLogsToPrefList(update
.Get());
117 // Clear the old pref now that we've written to the new one.
118 // TODO(asvitkine): Remove the old pref in M39.
119 local_state_
->ClearPref(old_pref_name_
);
122 PersistedLogs::LogReadStatus
PersistedLogs::DeserializeLogs() {
123 // First, try reading from old pref. If it's empty, read from the new one.
124 // TODO(asvitkine): Remove the old pref in M39.
125 const base::ListValue
* unsent_logs
= local_state_
->GetList(old_pref_name_
);
126 if (!unsent_logs
->empty())
127 return ReadLogsFromOldPrefList(*unsent_logs
);
129 unsent_logs
= local_state_
->GetList(pref_name_
);
130 return ReadLogsFromPrefList(*unsent_logs
);
133 void PersistedLogs::StoreLog(const std::string
& log_data
) {
134 list_
.push_back(LogHashPair());
135 list_
.back().Init(log_data
);
138 void PersistedLogs::StageLog() {
139 // CHECK, rather than DCHECK, because swap()ing with an empty list causes
140 // hard-to-identify crashes much later.
141 CHECK(!list_
.empty());
142 DCHECK(!has_staged_log());
143 staged_log_
.Swap(&list_
.back());
146 // If the staged log was the last provisional store, clear that.
147 if (static_cast<size_t>(last_provisional_store_index_
) == list_
.size())
148 last_provisional_store_index_
= -1;
149 DCHECK(has_staged_log());
152 void PersistedLogs::DiscardStagedLog() {
153 DCHECK(has_staged_log());
157 void PersistedLogs::StoreStagedLogAsUnsent(StoreType store_type
) {
158 list_
.push_back(LogHashPair());
159 list_
.back().Swap(&staged_log_
);
160 if (store_type
== PROVISIONAL_STORE
)
161 last_provisional_store_index_
= list_
.size() - 1;
164 void PersistedLogs::DiscardLastProvisionalStore() {
165 if (last_provisional_store_index_
== -1)
167 DCHECK_LT(static_cast<size_t>(last_provisional_store_index_
), list_
.size());
168 list_
.erase(list_
.begin() + last_provisional_store_index_
);
169 last_provisional_store_index_
= -1;
172 void PersistedLogs::WriteLogsToPrefList(base::ListValue
* list_value
) {
174 // Leave the list completely empty if there are no storable values.
179 // If there are too many logs, keep the most recent logs up to the length
180 // limit, and at least to the minimum number of bytes.
181 if (list_
.size() > min_log_count_
) {
182 start
= list_
.size();
183 size_t bytes_used
= 0;
184 std::vector
<LogHashPair
>::const_reverse_iterator end
= list_
.rend();
185 for (std::vector
<LogHashPair
>::const_reverse_iterator it
= list_
.rbegin();
187 const size_t log_size
= it
->compressed_log_data
.length();
188 if (bytes_used
>= min_log_bytes_
&&
189 (list_
.size() - start
) >= min_log_count_
) {
192 bytes_used
+= log_size
;
196 DCHECK_LT(start
, list_
.size());
198 for (size_t i
= start
; i
< list_
.size(); ++i
) {
199 AppendBase64String(list_
[i
].compressed_log_data
, list_value
);
200 AppendBase64String(list_
[i
].hash
, list_value
);
204 PersistedLogs::LogReadStatus
PersistedLogs::ReadLogsFromPrefList(
205 const base::ListValue
& list_value
) {
206 if (list_value
.empty())
207 return MakeRecallStatusHistogram(LIST_EMPTY
);
209 // For each log, there's two entries in the list (the data and the hash).
210 DCHECK_EQ(0U, list_value
.GetSize() % 2);
211 const size_t log_count
= list_value
.GetSize() / 2;
213 // Resize |list_| ahead of time, so that values can be decoded directly into
214 // the elements of the list.
215 DCHECK(list_
.empty());
216 list_
.resize(log_count
);
218 for (size_t i
= 0; i
< log_count
; ++i
) {
219 if (!ReadBase64String(list_value
, i
* 2, &list_
[i
].compressed_log_data
) ||
220 !ReadBase64String(list_value
, i
* 2 + 1, &list_
[i
].hash
)) {
222 return MakeRecallStatusHistogram(LOG_STRING_CORRUPTION
);
226 return MakeRecallStatusHistogram(RECALL_SUCCESS
);
229 PersistedLogs::LogReadStatus
PersistedLogs::ReadLogsFromOldPrefList(
230 const base::ListValue
& list_value
) {
231 // We append (2) more elements to persisted lists: the size of the list and a
232 // checksum of the elements.
233 const size_t kChecksumEntryCount
= 2;
235 if (list_value
.GetSize() == 0)
236 return MakeRecallStatusHistogram(LIST_EMPTY
);
237 if (list_value
.GetSize() <= kChecksumEntryCount
)
238 return MakeRecallStatusHistogram(LIST_SIZE_TOO_SMALL
);
240 // The size is stored at the beginning of the list_value.
242 bool valid
= (*list_value
.begin())->GetAsInteger(&size
);
244 return MakeRecallStatusHistogram(LIST_SIZE_MISSING
);
245 // Account for checksum and size included in the list_value.
246 if (static_cast<size_t>(size
) != list_value
.GetSize() - kChecksumEntryCount
)
247 return MakeRecallStatusHistogram(LIST_SIZE_CORRUPTION
);
249 // Allocate strings for all of the logs we are going to read in.
250 // Do this ahead of time so that we can decode the string values directly into
251 // the elements of |list_|, and thereby avoid making copies of the
252 // serialized logs, which can be fairly large.
253 DCHECK(list_
.empty());
256 base::MD5Context ctx
;
258 std::string encoded_log
;
259 size_t local_index
= 0;
260 for (base::ListValue::const_iterator it
= list_value
.begin() + 1;
261 it
!= list_value
.end() - 1; // Last element is the checksum.
262 ++it
, ++local_index
) {
263 bool valid
= (*it
)->GetAsString(&encoded_log
);
266 return MakeRecallStatusHistogram(LOG_STRING_CORRUPTION
);
269 base::MD5Update(&ctx
, encoded_log
);
271 std::string log_text
;
272 if (!base::Base64Decode(encoded_log
, &log_text
)) {
274 return MakeRecallStatusHistogram(DECODE_FAIL
);
277 DCHECK_LT(local_index
, list_
.size());
278 list_
[local_index
].Init(log_text
);
282 base::MD5Digest digest
;
283 base::MD5Final(&digest
, &ctx
);
284 std::string recovered_md5
;
285 // We store the hash at the end of the list_value.
286 valid
= (*(list_value
.end() - 1))->GetAsString(&recovered_md5
);
289 return MakeRecallStatusHistogram(CHECKSUM_STRING_CORRUPTION
);
291 if (recovered_md5
!= base::MD5DigestToBase16(digest
)) {
293 return MakeRecallStatusHistogram(CHECKSUM_CORRUPTION
);
295 return MakeRecallStatusHistogram(RECALL_SUCCESS
);
298 } // namespace metrics