Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / content / browser / download / save_package.cc
blob98092f48458cd348fcf676eecfa51ba4303ec30c
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/download/save_package.h"
7 #include <algorithm>
9 #include "base/bind.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/i18n/file_util_icu.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/sys_string_conversions.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/threading/thread.h"
21 #include "components/url_formatter/url_formatter.h"
22 #include "content/browser/download/download_item_impl.h"
23 #include "content/browser/download/download_manager_impl.h"
24 #include "content/browser/download/download_stats.h"
25 #include "content/browser/download/save_file.h"
26 #include "content/browser/download/save_file_manager.h"
27 #include "content/browser/download/save_item.h"
28 #include "content/browser/loader/resource_dispatcher_host_impl.h"
29 #include "content/browser/renderer_host/render_process_host_impl.h"
30 #include "content/browser/renderer_host/render_view_host_delegate.h"
31 #include "content/browser/renderer_host/render_view_host_impl.h"
32 #include "content/common/frame_messages.h"
33 #include "content/common/view_messages.h"
34 #include "content/public/browser/browser_context.h"
35 #include "content/public/browser/browser_thread.h"
36 #include "content/public/browser/content_browser_client.h"
37 #include "content/public/browser/download_manager_delegate.h"
38 #include "content/public/browser/navigation_entry.h"
39 #include "content/public/browser/notification_service.h"
40 #include "content/public/browser/notification_types.h"
41 #include "content/public/browser/render_frame_host.h"
42 #include "content/public/browser/resource_context.h"
43 #include "content/public/browser/web_contents.h"
44 #include "net/base/filename_util.h"
45 #include "net/base/io_buffer.h"
46 #include "net/base/mime_util.h"
47 #include "net/url_request/url_request_context.h"
48 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
49 #include "url/url_constants.h"
51 using base::Time;
52 using blink::WebPageSerializerClient;
54 namespace content {
55 namespace {
57 // A counter for uniquely identifying each save package.
58 int g_save_package_id = 0;
60 // Default name which will be used when we can not get proper name from
61 // resource URL.
62 const char kDefaultSaveName[] = "saved_resource";
64 // Maximum number of file ordinal number. I think it's big enough for resolving
65 // name-conflict files which has same base file name.
66 const int32 kMaxFileOrdinalNumber = 9999;
68 // Maximum length for file path. Since Windows have MAX_PATH limitation for
69 // file path, we need to make sure length of file path of every saved file
70 // is less than MAX_PATH
71 #if defined(OS_WIN)
72 const uint32 kMaxFilePathLength = MAX_PATH - 1;
73 #elif defined(OS_POSIX)
74 const uint32 kMaxFilePathLength = PATH_MAX - 1;
75 #endif
77 // Maximum length for file ordinal number part. Since we only support the
78 // maximum 9999 for ordinal number, which means maximum file ordinal number part
79 // should be "(9998)", so the value is 6.
80 const uint32 kMaxFileOrdinalNumberPartLength = 6;
82 // Strip current ordinal number, if any. Should only be used on pure
83 // file names, i.e. those stripped of their extensions.
84 // TODO(estade): improve this to not choke on alternate encodings.
85 base::FilePath::StringType StripOrdinalNumber(
86 const base::FilePath::StringType& pure_file_name) {
87 base::FilePath::StringType::size_type r_paren_index =
88 pure_file_name.rfind(FILE_PATH_LITERAL(')'));
89 base::FilePath::StringType::size_type l_paren_index =
90 pure_file_name.rfind(FILE_PATH_LITERAL('('));
91 if (l_paren_index >= r_paren_index)
92 return pure_file_name;
94 for (base::FilePath::StringType::size_type i = l_paren_index + 1;
95 i != r_paren_index; ++i) {
96 if (!base::IsAsciiDigit(pure_file_name[i]))
97 return pure_file_name;
100 return pure_file_name.substr(0, l_paren_index);
103 // Check whether we can save page as complete-HTML for the contents which
104 // have specified a MIME type. Now only contents which have the MIME type
105 // "text/html" can be saved as complete-HTML.
106 bool CanSaveAsComplete(const std::string& contents_mime_type) {
107 return contents_mime_type == "text/html" ||
108 contents_mime_type == "application/xhtml+xml";
111 // Request handle for SavePackage downloads. Currently doesn't support
112 // pause/resume/cancel, but returns a WebContents.
113 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
114 public:
115 SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
116 : save_package_(save_package) {}
118 // DownloadRequestHandleInterface
119 WebContents* GetWebContents() const override {
120 return save_package_.get() ? save_package_->web_contents() : NULL;
122 DownloadManager* GetDownloadManager() const override { return NULL; }
123 void PauseRequest() const override {}
124 void ResumeRequest() const override {}
125 void CancelRequest() const override {}
126 std::string DebugString() const override {
127 return "SavePackage DownloadRequestHandle";
130 private:
131 base::WeakPtr<SavePackage> save_package_;
134 } // namespace
136 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
137 FILE_PATH_LITERAL("html");
139 SavePackage::SavePackage(WebContents* web_contents,
140 SavePageType save_type,
141 const base::FilePath& file_full_path,
142 const base::FilePath& directory_full_path)
143 : WebContentsObserver(web_contents),
144 number_of_frames_pending_response_(0),
145 file_manager_(NULL),
146 download_manager_(NULL),
147 download_(NULL),
148 page_url_(GetUrlToBeSaved()),
149 saved_main_file_path_(file_full_path),
150 saved_main_directory_path_(directory_full_path),
151 title_(web_contents->GetTitle()),
152 start_tick_(base::TimeTicks::Now()),
153 finished_(false),
154 mhtml_finishing_(false),
155 user_canceled_(false),
156 disk_error_occurred_(false),
157 save_type_(save_type),
158 all_save_items_count_(0),
159 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
160 wait_state_(INITIALIZE),
161 contents_id_(web_contents->GetRenderProcessHost()->GetID()),
162 unique_id_(g_save_package_id++),
163 wrote_to_completed_file_(false),
164 wrote_to_failed_file_(false) {
165 DCHECK(page_url_.is_valid());
166 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
167 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
168 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
169 DCHECK(!saved_main_file_path_.empty() &&
170 saved_main_file_path_.value().length() <= kMaxFilePathLength);
171 DCHECK(!saved_main_directory_path_.empty() &&
172 saved_main_directory_path_.value().length() < kMaxFilePathLength);
173 InternalInit();
176 SavePackage::SavePackage(WebContents* web_contents)
177 : WebContentsObserver(web_contents),
178 number_of_frames_pending_response_(0),
179 file_manager_(NULL),
180 download_manager_(NULL),
181 download_(NULL),
182 page_url_(GetUrlToBeSaved()),
183 title_(web_contents->GetTitle()),
184 start_tick_(base::TimeTicks::Now()),
185 finished_(false),
186 mhtml_finishing_(false),
187 user_canceled_(false),
188 disk_error_occurred_(false),
189 save_type_(SAVE_PAGE_TYPE_UNKNOWN),
190 all_save_items_count_(0),
191 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
192 wait_state_(INITIALIZE),
193 contents_id_(web_contents->GetRenderProcessHost()->GetID()),
194 unique_id_(g_save_package_id++),
195 wrote_to_completed_file_(false),
196 wrote_to_failed_file_(false) {
197 DCHECK(page_url_.is_valid());
198 InternalInit();
201 // This is for testing use. Set |finished_| as true because we don't want
202 // method Cancel to be be called in destructor in test mode.
203 // We also don't call InternalInit().
204 SavePackage::SavePackage(WebContents* web_contents,
205 const base::FilePath& file_full_path,
206 const base::FilePath& directory_full_path)
207 : WebContentsObserver(web_contents),
208 file_manager_(NULL),
209 download_manager_(NULL),
210 download_(NULL),
211 saved_main_file_path_(file_full_path),
212 saved_main_directory_path_(directory_full_path),
213 start_tick_(base::TimeTicks::Now()),
214 finished_(true),
215 mhtml_finishing_(false),
216 user_canceled_(false),
217 disk_error_occurred_(false),
218 save_type_(SAVE_PAGE_TYPE_UNKNOWN),
219 all_save_items_count_(0),
220 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
221 wait_state_(INITIALIZE),
222 contents_id_(0),
223 unique_id_(g_save_package_id++),
224 wrote_to_completed_file_(false),
225 wrote_to_failed_file_(false) {
228 SavePackage::~SavePackage() {
229 // Stop receiving saving job's updates
230 if (!finished_ && !canceled()) {
231 // Unexpected quit.
232 Cancel(true);
235 // We should no longer be observing the DownloadItem at this point.
236 CHECK(!download_);
238 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
239 completed_count() +
240 in_process_count()));
241 // Free all SaveItems.
242 while (!waiting_item_queue_.empty()) {
243 // We still have some items which are waiting for start to save.
244 SaveItem* save_item = waiting_item_queue_.front();
245 waiting_item_queue_.pop();
246 delete save_item;
249 STLDeleteValues(&saved_success_items_);
250 STLDeleteValues(&in_progress_items_);
251 STLDeleteValues(&saved_failed_items_);
253 file_manager_ = NULL;
256 GURL SavePackage::GetUrlToBeSaved() {
257 // Instead of using web_contents_.GetURL here, we use url() (which is the
258 // "real" url of the page) from the NavigationEntry because it reflects its
259 // origin rather than the displayed one (returned by GetURL) which may be
260 // different (like having "view-source:" on the front).
261 NavigationEntry* visible_entry =
262 web_contents()->GetController().GetVisibleEntry();
263 return visible_entry ? visible_entry->GetURL() : GURL::EmptyGURL();
266 void SavePackage::Cancel(bool user_action) {
267 if (!canceled()) {
268 if (user_action)
269 user_canceled_ = true;
270 else
271 disk_error_occurred_ = true;
272 Stop();
274 RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
277 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
278 // cases, we need file_manager_ to be initialized, so we do this first.
279 void SavePackage::InternalInit() {
280 ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
281 if (!rdh) {
282 NOTREACHED();
283 return;
286 file_manager_ = rdh->save_file_manager();
287 DCHECK(file_manager_);
289 download_manager_ = static_cast<DownloadManagerImpl*>(
290 BrowserContext::GetDownloadManager(
291 web_contents()->GetBrowserContext()));
292 DCHECK(download_manager_);
294 RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
297 bool SavePackage::Init(
298 const SavePackageDownloadCreatedCallback& download_created_callback) {
299 DCHECK_CURRENTLY_ON(BrowserThread::UI);
300 // Set proper running state.
301 if (wait_state_ != INITIALIZE)
302 return false;
304 wait_state_ = START_PROCESS;
306 // Initialize the request context and resource dispatcher.
307 BrowserContext* browser_context = web_contents()->GetBrowserContext();
308 if (!browser_context) {
309 NOTREACHED();
310 return false;
313 scoped_ptr<DownloadRequestHandleInterface> request_handle(
314 new SavePackageRequestHandle(AsWeakPtr()));
315 // The download manager keeps ownership but adds us as an observer.
316 download_manager_->CreateSavePackageDownloadItem(
317 saved_main_file_path_,
318 page_url_,
319 ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
320 "multipart/related" : "text/html"),
321 request_handle.Pass(),
322 base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
323 download_created_callback));
324 return true;
327 void SavePackage::InitWithDownloadItem(
328 const SavePackageDownloadCreatedCallback& download_created_callback,
329 DownloadItemImpl* item) {
330 DCHECK_CURRENTLY_ON(BrowserThread::UI);
331 DCHECK(item);
332 download_ = item;
333 download_->AddObserver(this);
334 // Confirm above didn't delete the tab out from under us.
335 if (!download_created_callback.is_null())
336 download_created_callback.Run(download_);
338 // Check save type and process the save page job.
339 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
340 // Get directory
341 DCHECK(!saved_main_directory_path_.empty());
342 GetSavableResourceLinksForCurrentPage();
343 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
344 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
345 &SavePackage::OnMHTMLGenerated, this));
346 } else {
347 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
348 wait_state_ = NET_FILES;
349 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
350 SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
351 SaveFileCreateInfo::SAVE_FILE_FROM_NET;
352 SaveItem* save_item = new SaveItem(page_url_,
353 Referrer(),
354 this,
355 save_source);
356 // Add this item to waiting list.
357 waiting_item_queue_.push(save_item);
358 all_save_items_count_ = 1;
359 download_->SetTotalBytes(1);
361 DoSavingProcess();
365 void SavePackage::OnMHTMLGenerated(int64 size) {
366 if (size <= 0) {
367 Cancel(false);
368 return;
370 wrote_to_completed_file_ = true;
372 // Hack to avoid touching download_ after user cancel.
373 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
374 // with SavePackage flow.
375 if (download_->GetState() == DownloadItem::IN_PROGRESS) {
376 download_->SetTotalBytes(size);
377 download_->DestinationUpdate(size, 0, std::string());
378 // Must call OnAllDataSaved here in order for
379 // GDataDownloadObserver::ShouldUpload() to return true.
380 // ShouldCompleteDownload() may depend on the gdata uploader to finish.
381 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
384 if (!download_manager_->GetDelegate()) {
385 Finish();
386 return;
389 if (download_manager_->GetDelegate()->ShouldCompleteDownload(
390 download_, base::Bind(&SavePackage::Finish, this))) {
391 Finish();
395 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
396 // restricted by NAME_MAX. The maximum allowed path looks like:
397 // '/path/to/save_dir' + '/' + NAME_MAX.
398 uint32 SavePackage::GetMaxPathLengthForDirectory(
399 const base::FilePath& base_dir) {
400 #if defined(OS_POSIX)
401 return std::min(kMaxFilePathLength,
402 static_cast<uint32>(base_dir.value().length()) +
403 NAME_MAX + 1);
404 #else
405 return kMaxFilePathLength;
406 #endif
409 // File name is considered being consist of pure file name, dot and file
410 // extension name. File name might has no dot and file extension, or has
411 // multiple dot inside file name. The dot, which separates the pure file
412 // name and file extension name, is last dot in the whole file name.
413 // This function is for making sure the length of specified file path is not
414 // great than the specified maximum length of file path and getting safe pure
415 // file name part if the input pure file name is too long.
416 // The parameter |dir_path| specifies directory part of the specified
417 // file path. The parameter |file_name_ext| specifies file extension
418 // name part of the specified file path (including start dot). The parameter
419 // |max_file_path_len| specifies maximum length of the specified file path.
420 // The parameter |pure_file_name| input pure file name part of the specified
421 // file path. If the length of specified file path is great than
422 // |max_file_path_len|, the |pure_file_name| will output new pure file name
423 // part for making sure the length of specified file path is less than
424 // specified maximum length of file path. Return false if the function can
425 // not get a safe pure file name, otherwise it returns true.
426 bool SavePackage::GetSafePureFileName(
427 const base::FilePath& dir_path,
428 const base::FilePath::StringType& file_name_ext,
429 uint32 max_file_path_len,
430 base::FilePath::StringType* pure_file_name) {
431 DCHECK(!pure_file_name->empty());
432 int available_length = static_cast<int>(max_file_path_len -
433 dir_path.value().length() -
434 file_name_ext.length());
435 // Need an extra space for the separator.
436 if (!dir_path.EndsWithSeparator())
437 --available_length;
439 // Plenty of room.
440 if (static_cast<int>(pure_file_name->length()) <= available_length)
441 return true;
443 // Limited room. Truncate |pure_file_name| to fit.
444 if (available_length > 0) {
445 *pure_file_name = pure_file_name->substr(0, available_length);
446 return true;
449 // Not enough room to even use a shortened |pure_file_name|.
450 pure_file_name->clear();
451 return false;
454 // Generate name for saving resource.
455 bool SavePackage::GenerateFileName(const std::string& disposition,
456 const GURL& url,
457 bool need_html_ext,
458 base::FilePath::StringType* generated_name) {
459 // TODO(jungshik): Figure out the referrer charset when having one
460 // makes sense and pass it to GenerateFileName.
461 base::FilePath file_path = net::GenerateFileName(url,
462 disposition,
463 std::string(),
464 std::string(),
465 std::string(),
466 kDefaultSaveName);
468 DCHECK(!file_path.empty());
469 base::FilePath::StringType pure_file_name =
470 file_path.RemoveExtension().BaseName().value();
471 base::FilePath::StringType file_name_ext = file_path.Extension();
473 // If it is HTML resource, use ".html" as its extension.
474 if (need_html_ext) {
475 file_name_ext = FILE_PATH_LITERAL(".");
476 file_name_ext.append(kDefaultHtmlExtension);
479 // Need to make sure the suggested file name is not too long.
480 uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
482 // Get safe pure file name.
483 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
484 max_path, &pure_file_name))
485 return false;
487 base::FilePath::StringType file_name = pure_file_name + file_name_ext;
489 // Check whether we already have same name in a case insensitive manner.
490 FileNameSet::const_iterator iter = file_name_set_.find(file_name);
491 if (iter == file_name_set_.end()) {
492 file_name_set_.insert(file_name);
493 } else {
494 // Found same name, increase the ordinal number for the file name.
495 pure_file_name =
496 base::FilePath(*iter).RemoveExtension().BaseName().value();
497 base::FilePath::StringType base_file_name =
498 StripOrdinalNumber(pure_file_name);
500 // We need to make sure the length of base file name plus maximum ordinal
501 // number path will be less than or equal to kMaxFilePathLength.
502 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
503 max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
504 return false;
506 // Prepare the new ordinal number.
507 uint32 ordinal_number;
508 FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
509 if (it == file_name_count_map_.end()) {
510 // First base-name-conflict resolving, use 1 as initial ordinal number.
511 file_name_count_map_[base_file_name] = 1;
512 ordinal_number = 1;
513 } else {
514 // We have met same base-name conflict, use latest ordinal number.
515 ordinal_number = it->second;
518 if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
519 // Use a random file from temporary file.
520 base::FilePath temp_file;
521 base::CreateTemporaryFile(&temp_file);
522 file_name = temp_file.RemoveExtension().BaseName().value();
523 // Get safe pure file name.
524 if (!GetSafePureFileName(saved_main_directory_path_,
525 base::FilePath::StringType(),
526 max_path, &file_name))
527 return false;
528 } else {
529 for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
530 base::FilePath::StringType new_name = base_file_name +
531 base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
532 if (file_name_set_.find(new_name) == file_name_set_.end()) {
533 // Resolved name conflict.
534 file_name = new_name;
535 file_name_count_map_[base_file_name] = ++i;
536 break;
541 file_name_set_.insert(file_name);
544 DCHECK(!file_name.empty());
545 generated_name->assign(file_name);
547 return true;
550 // We have received a message from SaveFileManager about a new saving job. We
551 // create a SaveItem and store it in our in_progress list.
552 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
553 DCHECK(info && !info->url.is_empty());
555 SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
556 if (it == in_progress_items_.end()) {
557 // If not found, we must have cancel action.
558 DCHECK(canceled());
559 return;
561 SaveItem* save_item = it->second;
563 DCHECK(!saved_main_file_path_.empty());
565 save_item->SetSaveId(info->save_id);
566 save_item->SetTotalBytes(info->total_bytes);
568 // Determine the proper path for a saving job, by choosing either the default
569 // save directory, or prompting the user.
570 DCHECK(!save_item->has_final_name());
571 if (info->url != page_url_) {
572 base::FilePath::StringType generated_name;
573 // For HTML resource file, make sure it will have .htm as extension name,
574 // otherwise, when you open the saved page in Chrome again, download
575 // file manager will treat it as downloadable resource, and download it
576 // instead of opening it as HTML.
577 bool need_html_ext =
578 info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
579 if (!GenerateFileName(info->content_disposition,
580 GURL(info->url),
581 need_html_ext,
582 &generated_name)) {
583 // We can not generate file name for this SaveItem, so we cancel the
584 // saving page job if the save source is from serialized DOM data.
585 // Otherwise, it means this SaveItem is sub-resource type, we treat it
586 // as an error happened on saving. We can ignore this type error for
587 // sub-resource links which will be resolved as absolute links instead
588 // of local links in final saved contents.
589 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
590 Cancel(true);
591 else
592 SaveFinished(save_item->save_id(), 0, false);
593 return;
596 // When saving page as only-HTML, we only have a SaveItem whose url
597 // must be page_url_.
598 DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
599 DCHECK(!saved_main_directory_path_.empty());
601 // Now we get final name retrieved from GenerateFileName, we will use it
602 // rename the SaveItem.
603 base::FilePath final_name =
604 saved_main_directory_path_.Append(generated_name);
605 save_item->Rename(final_name);
606 } else {
607 // It is the main HTML file, use the name chosen by the user.
608 save_item->Rename(saved_main_file_path_);
611 // If the save source is from file system, inform SaveFileManager to copy
612 // corresponding file to the file path which this SaveItem specifies.
613 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
614 BrowserThread::PostTask(
615 BrowserThread::FILE, FROM_HERE,
616 base::Bind(&SaveFileManager::SaveLocalFile,
617 file_manager_,
618 save_item->url(),
619 save_item->save_id(),
620 contents_id()));
621 return;
624 // Check whether we begin to require serialized HTML data.
625 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
626 wait_state_ == HTML_DATA) {
627 // Inform backend to serialize the all frames' DOM and send serialized
628 // HTML data back.
629 GetSerializedHtmlDataForCurrentPageWithLocalLinks();
633 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
634 if (in_process_count()) {
635 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
636 it != in_progress_items_.end(); ++it) {
637 SaveItem* save_item = it->second;
638 DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
639 if (save_item->save_id() == save_id)
640 return save_item;
643 return NULL;
646 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
647 SaveUrlItemMap::iterator it = in_progress_items_.find(
648 save_item->url().spec());
649 DCHECK(it != in_progress_items_.end());
650 DCHECK(save_item == it->second);
651 in_progress_items_.erase(it);
653 if (save_item->success()) {
654 // Add it to saved_success_items_.
655 DCHECK(saved_success_items_.find(save_item->save_id()) ==
656 saved_success_items_.end());
657 saved_success_items_[save_item->save_id()] = save_item;
658 } else {
659 // Add it to saved_failed_items_.
660 DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
661 saved_failed_items_.end());
662 saved_failed_items_[save_item->url().spec()] = save_item;
666 // Called for updating saving state.
667 bool SavePackage::UpdateSaveProgress(int32 save_id,
668 int64 size,
669 bool write_success) {
670 // Because we might have canceled this saving job before,
671 // so we might not find corresponding SaveItem.
672 SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
673 if (!save_item)
674 return false;
676 save_item->Update(size);
678 // If we got disk error, cancel whole save page job.
679 if (!write_success) {
680 // Cancel job with reason of disk error.
681 Cancel(false);
683 return true;
686 // Stop all page saving jobs that are in progress and instruct the file thread
687 // to delete all saved files.
688 void SavePackage::Stop() {
689 // If we haven't moved out of the initial state, there's nothing to cancel and
690 // there won't be valid pointers for file_manager_ or download_.
691 if (wait_state_ == INITIALIZE)
692 return;
694 // When stopping, if it still has some items in in_progress, cancel them.
695 DCHECK(canceled());
696 if (in_process_count()) {
697 SaveUrlItemMap::iterator it = in_progress_items_.begin();
698 for (; it != in_progress_items_.end(); ++it) {
699 SaveItem* save_item = it->second;
700 DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
701 save_item->Cancel();
703 // Remove all in progress item to saved map. For failed items, they will
704 // be put into saved_failed_items_, for successful item, they will be put
705 // into saved_success_items_.
706 while (in_process_count())
707 PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
710 // This vector contains the save ids of the save files which SaveFileManager
711 // needs to remove from its save_file_map_.
712 SaveIDList save_ids;
713 for (SavedItemMap::iterator it = saved_success_items_.begin();
714 it != saved_success_items_.end(); ++it)
715 save_ids.push_back(it->first);
716 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
717 it != saved_failed_items_.end(); ++it)
718 save_ids.push_back(it->second->save_id());
720 BrowserThread::PostTask(
721 BrowserThread::FILE, FROM_HERE,
722 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
723 file_manager_,
724 save_ids));
726 finished_ = true;
727 wait_state_ = FAILED;
729 // Inform the DownloadItem we have canceled whole save page job.
730 if (download_) {
731 download_->Cancel(false);
732 FinalizeDownloadEntry();
736 void SavePackage::CheckFinish() {
737 if (in_process_count() || finished_)
738 return;
740 base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
741 saved_success_items_.size() > 1) ?
742 saved_main_directory_path_ : base::FilePath();
744 // This vector contains the final names of all the successfully saved files
745 // along with their save ids. It will be passed to SaveFileManager to do the
746 // renaming job.
747 FinalNameList final_names;
748 for (SavedItemMap::iterator it = saved_success_items_.begin();
749 it != saved_success_items_.end(); ++it)
750 final_names.push_back(std::make_pair(it->first,
751 it->second->full_path()));
753 BrowserThread::PostTask(
754 BrowserThread::FILE, FROM_HERE,
755 base::Bind(&SaveFileManager::RenameAllFiles,
756 file_manager_,
757 final_names,
758 dir,
759 web_contents()->GetRenderProcessHost()->GetID(),
760 web_contents()->GetMainFrame()->GetRoutingID(),
761 id()));
764 // Successfully finished all items of this SavePackage.
765 void SavePackage::Finish() {
766 // User may cancel the job when we're moving files to the final directory.
767 if (canceled())
768 return;
770 wait_state_ = SUCCESSFUL;
771 finished_ = true;
773 // Record finish.
774 RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
776 // Record any errors that occurred.
777 if (wrote_to_completed_file_) {
778 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
781 if (wrote_to_failed_file_) {
782 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
785 // This vector contains the save ids of the save files which SaveFileManager
786 // needs to remove from its save_file_map_.
787 SaveIDList save_ids;
788 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
789 it != saved_failed_items_.end(); ++it)
790 save_ids.push_back(it->second->save_id());
792 BrowserThread::PostTask(
793 BrowserThread::FILE, FROM_HERE,
794 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
795 file_manager_,
796 save_ids));
798 if (download_) {
799 // Hack to avoid touching download_ after user cancel.
800 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
801 // with SavePackage flow.
802 if (download_->GetState() == DownloadItem::IN_PROGRESS) {
803 if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
804 download_->DestinationUpdate(
805 all_save_items_count_, CurrentSpeed(), std::string());
806 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
808 download_->MarkAsComplete();
810 FinalizeDownloadEntry();
814 // Called for updating end state.
815 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
816 // Because we might have canceled this saving job before,
817 // so we might not find corresponding SaveItem. Just ignore it.
818 SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
819 if (!save_item)
820 return;
822 // Let SaveItem set end state.
823 save_item->Finish(size, is_success);
824 // Remove the associated save id and SavePackage.
825 file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
827 PutInProgressItemToSavedMap(save_item);
829 // Inform the DownloadItem to update UI.
830 // We use the received bytes as number of saved files.
831 // Hack to avoid touching download_ after user cancel.
832 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
833 // with SavePackage flow.
834 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
835 download_->DestinationUpdate(
836 completed_count(), CurrentSpeed(), std::string());
839 if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
840 save_item->url() == page_url_ && !save_item->received_bytes()) {
841 // If size of main HTML page is 0, treat it as disk error.
842 Cancel(false);
843 return;
846 if (canceled()) {
847 DCHECK(finished_);
848 return;
851 // Continue processing the save page job.
852 DoSavingProcess();
854 // Check whether we can successfully finish whole job.
855 CheckFinish();
858 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
859 // save id -1 when it encounters error. Since in this case, save id will be
860 // -1, so we can only use URL to find which SaveItem is associated with
861 // this error.
862 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
863 // from serializing HTML data, then cancel saving page.
864 void SavePackage::SaveFailed(const GURL& save_url) {
865 SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
866 if (it == in_progress_items_.end()) {
867 NOTREACHED(); // Should not exist!
868 return;
870 SaveItem* save_item = it->second;
872 save_item->Finish(0, false);
874 PutInProgressItemToSavedMap(save_item);
876 // Inform the DownloadItem to update UI.
877 // We use the received bytes as number of saved files.
878 // Hack to avoid touching download_ after user cancel.
879 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
880 // with SavePackage flow.
881 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
882 download_->DestinationUpdate(
883 completed_count(), CurrentSpeed(), std::string());
886 if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
887 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
888 (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
889 // We got error when saving page. Treat it as disk error.
890 Cancel(true);
893 if (canceled()) {
894 DCHECK(finished_);
895 return;
898 // Continue processing the save page job.
899 DoSavingProcess();
901 CheckFinish();
904 void SavePackage::SaveCanceled(SaveItem* save_item) {
905 // Call the RemoveSaveFile in UI thread.
906 file_manager_->RemoveSaveFile(save_item->save_id(),
907 save_item->url(),
908 this);
909 if (save_item->save_id() != -1)
910 BrowserThread::PostTask(
911 BrowserThread::FILE, FROM_HERE,
912 base::Bind(&SaveFileManager::CancelSave,
913 file_manager_,
914 save_item->save_id()));
917 // Initiate a saving job of a specific URL. We send the request to
918 // SaveFileManager, which will dispatch it to different approach according to
919 // the save source. Parameter process_all_remaining_items indicates whether
920 // we need to save all remaining items.
921 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
922 DCHECK(web_contents());
923 DCHECK(waiting_item_queue_.size());
925 do {
926 // Pop SaveItem from waiting list.
927 SaveItem* save_item = waiting_item_queue_.front();
928 waiting_item_queue_.pop();
930 // Add the item to in_progress_items_.
931 SaveUrlItemMap::iterator it = in_progress_items_.find(
932 save_item->url().spec());
933 DCHECK(it == in_progress_items_.end());
934 in_progress_items_[save_item->url().spec()] = save_item;
935 save_item->Start();
936 file_manager_->SaveURL(save_item->url(),
937 save_item->referrer(),
938 web_contents()->GetRenderProcessHost()->GetID(),
939 routing_id(),
940 web_contents()->GetMainFrame()->GetRoutingID(),
941 save_item->save_source(),
942 save_item->full_path(),
943 web_contents()->
944 GetBrowserContext()->GetResourceContext(),
945 this);
946 } while (process_all_remaining_items && waiting_item_queue_.size());
949 // Calculate the percentage of whole save page job.
950 int SavePackage::PercentComplete() {
951 if (!all_save_items_count_)
952 return 0;
953 else if (!in_process_count())
954 return 100;
955 else
956 return completed_count() / all_save_items_count_;
959 int64 SavePackage::CurrentSpeed() const {
960 base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
961 int64 diff_ms = diff.InMilliseconds();
962 return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
965 // Continue processing the save page job after one SaveItem has been
966 // finished.
967 void SavePackage::DoSavingProcess() {
968 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
969 // We guarantee that images and JavaScripts must be downloaded first.
970 // So when finishing all those sub-resources, we will know which
971 // sub-resource's link can be replaced with local file path, which
972 // sub-resource's link need to be replaced with absolute URL which
973 // point to its internet address because it got error when saving its data.
975 // Start a new SaveItem job if we still have job in waiting queue.
976 if (waiting_item_queue_.size()) {
977 DCHECK(wait_state_ == NET_FILES);
978 SaveItem* save_item = waiting_item_queue_.front();
979 if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
980 SaveNextFile(false);
981 } else if (!in_process_count()) {
982 // If there is no in-process SaveItem, it means all sub-resources
983 // have been processed. Now we need to start serializing HTML DOM
984 // for the current page to get the generated HTML data.
985 wait_state_ = HTML_DATA;
986 // All non-HTML resources have been finished, start all remaining
987 // HTML files.
988 SaveNextFile(true);
990 } else if (in_process_count()) {
991 // Continue asking for HTML data.
992 DCHECK(wait_state_ == HTML_DATA);
994 } else {
995 // Save as HTML only or MHTML.
996 DCHECK(wait_state_ == NET_FILES);
997 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
998 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
999 if (waiting_item_queue_.size()) {
1000 DCHECK(all_save_items_count_ == waiting_item_queue_.size());
1001 SaveNextFile(false);
1006 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
1007 bool handled = true;
1008 IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
1009 IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
1010 OnReceivedSerializedHtmlData)
1011 IPC_MESSAGE_UNHANDLED(handled = false)
1012 IPC_END_MESSAGE_MAP()
1013 return handled;
1016 bool SavePackage::OnMessageReceived(const IPC::Message& message,
1017 RenderFrameHost* render_frame_host) {
1018 bool handled = true;
1019 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(SavePackage, message, render_frame_host)
1020 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksResponse,
1021 OnSavableResourceLinksResponse)
1022 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksError,
1023 OnSavableResourceLinksError)
1024 IPC_MESSAGE_UNHANDLED(handled = false)
1025 IPC_END_MESSAGE_MAP()
1026 return handled;
1029 // After finishing all SaveItems which need to get data from net.
1030 // We collect all URLs which have local storage and send the
1031 // map:(originalURL:currentLocalPath) to render process (backend).
1032 // Then render process will serialize DOM and send data to us.
1033 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
1034 if (wait_state_ != HTML_DATA)
1035 return;
1036 std::vector<GURL> saved_links;
1037 std::vector<base::FilePath> saved_file_paths;
1038 int successful_started_items_count = 0;
1040 // Collect all saved items which have local storage.
1041 // First collect the status of all the resource files and check whether they
1042 // have created local files although they have not been completely saved.
1043 // If yes, the file can be saved. Otherwise, there is a disk error, so we
1044 // need to cancel the page saving job.
1045 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1046 it != in_progress_items_.end(); ++it) {
1047 DCHECK(it->second->save_source() ==
1048 SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1049 if (it->second->has_final_name())
1050 successful_started_items_count++;
1051 saved_links.push_back(it->second->url());
1052 saved_file_paths.push_back(it->second->file_name());
1055 // If not all file of HTML resource have been started, then wait.
1056 if (successful_started_items_count != in_process_count())
1057 return;
1059 // Collect all saved success items.
1060 for (SavedItemMap::iterator it = saved_success_items_.begin();
1061 it != saved_success_items_.end(); ++it) {
1062 DCHECK(it->second->has_final_name());
1063 saved_links.push_back(it->second->url());
1064 saved_file_paths.push_back(it->second->file_name());
1067 // Get the relative directory name.
1068 base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1070 Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
1071 routing_id(), saved_links, saved_file_paths, relative_dir_name));
1074 // Process the serialized HTML content data of a specified web page
1075 // retrieved from render process.
1076 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
1077 const std::string& data,
1078 int32 status) {
1079 WebPageSerializerClient::PageSerializationStatus flag =
1080 static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1081 // Check current state.
1082 if (wait_state_ != HTML_DATA)
1083 return;
1085 int id = contents_id();
1086 // If the all frames are finished saving, we need to close the
1087 // remaining SaveItems.
1088 if (flag == WebPageSerializerClient::AllFramesAreFinished) {
1089 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1090 it != in_progress_items_.end(); ++it) {
1091 DVLOG(20) << " " << __FUNCTION__ << "()"
1092 << " save_id = " << it->second->save_id()
1093 << " url = \"" << it->second->url().spec() << "\"";
1094 BrowserThread::PostTask(
1095 BrowserThread::FILE, FROM_HERE,
1096 base::Bind(&SaveFileManager::SaveFinished,
1097 file_manager_,
1098 it->second->save_id(),
1099 it->second->url(),
1101 true));
1103 return;
1106 SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
1107 if (it == in_progress_items_.end()) {
1108 for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1109 saved_it != saved_success_items_.end(); ++saved_it) {
1110 if (saved_it->second->url() == frame_url) {
1111 wrote_to_completed_file_ = true;
1112 break;
1116 it = saved_failed_items_.find(frame_url.spec());
1117 if (it != saved_failed_items_.end())
1118 wrote_to_failed_file_ = true;
1120 return;
1123 SaveItem* save_item = it->second;
1124 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1126 if (!data.empty()) {
1127 // Prepare buffer for saving HTML data.
1128 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1129 memcpy(new_data->data(), data.data(), data.size());
1131 // Call write file functionality in file thread.
1132 BrowserThread::PostTask(
1133 BrowserThread::FILE, FROM_HERE,
1134 base::Bind(&SaveFileManager::UpdateSaveProgress,
1135 file_manager_,
1136 save_item->save_id(),
1137 new_data,
1138 static_cast<int>(data.size())));
1141 // Current frame is completed saving, call finish in file thread.
1142 if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
1143 DVLOG(20) << " " << __FUNCTION__ << "()"
1144 << " save_id = " << save_item->save_id()
1145 << " url = \"" << save_item->url().spec() << "\"";
1146 BrowserThread::PostTask(
1147 BrowserThread::FILE, FROM_HERE,
1148 base::Bind(&SaveFileManager::SaveFinished,
1149 file_manager_,
1150 save_item->save_id(),
1151 save_item->url(),
1153 true));
1157 // Ask for all savable resource links from backend, include main frame and
1158 // sub-frame.
1159 void SavePackage::GetSavableResourceLinksForCurrentPage() {
1160 if (wait_state_ != START_PROCESS)
1161 return;
1163 wait_state_ = RESOURCES_LIST;
1165 DCHECK_EQ(0, number_of_frames_pending_response_);
1166 web_contents()->ForEachFrame(base::Bind(
1167 &SavePackage::GetSavableResourceLinksForFrame,
1168 base::Unretained(this))); // Safe, because ForEachFrame is synchronous.
1169 DCHECK_LT(0, number_of_frames_pending_response_);
1172 void SavePackage::GetSavableResourceLinksForFrame(RenderFrameHost* target) {
1173 number_of_frames_pending_response_++;
1174 target->Send(new FrameMsg_GetSavableResourceLinks(target->GetRoutingID()));
1177 void SavePackage::OnSavableResourceLinksResponse(
1178 RenderFrameHost* sender,
1179 const GURL& frame_url,
1180 const std::vector<GURL>& resources_list,
1181 const std::vector<Referrer>& referrers_list) {
1182 if (wait_state_ != RESOURCES_LIST)
1183 return;
1185 if (resources_list.size() != referrers_list.size())
1186 return;
1188 // Add all sub-resources to wait list.
1189 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1190 const GURL& u = resources_list[i];
1191 if (!u.is_valid())
1192 continue;
1193 if (unique_urls_to_save_.count(u))
1194 continue;
1195 unique_urls_to_save_.insert(u);
1197 SaveFileCreateInfo::SaveFileSource save_source =
1198 u.SchemeIsFile() ? SaveFileCreateInfo::SAVE_FILE_FROM_FILE
1199 : SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1200 SaveItem* save_item = new SaveItem(u, referrers_list[i], this, save_source);
1201 waiting_item_queue_.push(save_item);
1204 // Store savable frame_url for later processing.
1205 if (frame_url.is_valid())
1206 frame_urls_to_save_.push_back(frame_url);
1208 CompleteSavableResourceLinksResponseFromFrame();
1211 void SavePackage::OnSavableResourceLinksError(RenderFrameHost* sender) {
1212 CompleteSavableResourceLinksResponseFromFrame();
1215 void SavePackage::CompleteSavableResourceLinksResponseFromFrame() {
1216 --number_of_frames_pending_response_;
1217 DCHECK_LE(0, number_of_frames_pending_response_);
1218 if (number_of_frames_pending_response_ != 0)
1219 return; // Need to wait for more responses from RenderFrames.
1221 // Add frame urls to the waiting_item_queue_. This is done *after* processing
1222 // all savable resource links (i.e. in OnSavableResourceLinksResponse), to
1223 // prefer their referrers in cases where the frame url has already been
1224 // covered by savable resource links.
1225 for (auto& frame_url : frame_urls_to_save_) {
1226 DCHECK(frame_url.is_valid());
1227 if (0 == unique_urls_to_save_.count(frame_url)) {
1228 unique_urls_to_save_.insert(frame_url);
1229 SaveItem* save_item = new SaveItem(
1230 frame_url, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1231 waiting_item_queue_.push(save_item);
1235 all_save_items_count_ = static_cast<int>(waiting_item_queue_.size());
1237 // We use total bytes as the total number of files we want to save.
1238 // Hack to avoid touching download_ after user cancel.
1239 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1240 // with SavePackage flow.
1241 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1242 download_->SetTotalBytes(all_save_items_count_);
1244 if (all_save_items_count_) {
1245 wait_state_ = NET_FILES;
1247 // Give backend the lists which contain all resource links that have local
1248 // storage, after which, render process will serialize DOM for generating
1249 // HTML data.
1250 DoSavingProcess();
1251 } else {
1252 // No savable frames and/or resources - treat it as user cancel.
1253 Cancel(true);
1257 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
1258 bool can_save_as_complete,
1259 const std::string& contents_mime_type,
1260 const std::string& accept_langs) {
1261 base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
1263 // If the page's title matches its URL, use the URL. Try to use the last path
1264 // component or if there is none, the domain as the file name.
1265 // Normally we want to base the filename on the page title, or if it doesn't
1266 // exist, on the URL. It's not easy to tell if the page has no title, because
1267 // if the page has no title, WebContents::GetTitle() will return the page's
1268 // URL (adjusted for display purposes). Therefore, we convert the "title"
1269 // back to a URL, and if it matches the original page URL, we know the page
1270 // had no title (or had a title equal to its URL, which is fine to treat
1271 // similarly).
1272 if (title_ == url_formatter::FormatUrl(page_url_, accept_langs)) {
1273 std::string url_path;
1274 if (!page_url_.SchemeIs(url::kDataScheme)) {
1275 std::vector<std::string> url_parts = base::SplitString(
1276 page_url_.path(), "/", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
1277 if (!url_parts.empty()) {
1278 for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
1279 url_path = url_parts[i];
1280 if (!url_path.empty())
1281 break;
1284 if (url_path.empty())
1285 url_path = page_url_.host();
1286 } else {
1287 url_path = "dataurl";
1289 name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
1292 // Ask user for getting final saving name.
1293 name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
1294 contents_mime_type);
1295 // Adjust extension for complete types.
1296 if (can_save_as_complete)
1297 name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
1299 base::FilePath::StringType file_name = name_with_proper_ext.value();
1300 base::i18n::ReplaceIllegalCharactersInPath(&file_name, '_');
1301 return base::FilePath(file_name);
1304 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
1305 // If the file name doesn't have an extension suitable for HTML files,
1306 // append one.
1307 base::FilePath::StringType ext = name.Extension();
1308 if (!ext.empty())
1309 ext.erase(ext.begin()); // Erase preceding '.'.
1310 std::string mime_type;
1311 if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
1312 !CanSaveAsComplete(mime_type)) {
1313 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1314 kDefaultHtmlExtension);
1316 return name;
1319 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
1320 const std::string& contents_mime_type) {
1321 // Start extension at 1 to skip over period if non-empty.
1322 base::FilePath::StringType ext = name.Extension().length() ?
1323 name.Extension().substr(1) : name.Extension();
1324 base::FilePath::StringType suggested_extension =
1325 ExtensionForMimeType(contents_mime_type);
1326 std::string mime_type;
1327 if (!suggested_extension.empty() &&
1328 !net::GetMimeTypeFromExtension(ext, &mime_type)) {
1329 // Extension is absent or needs to be updated.
1330 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1331 suggested_extension);
1333 return name;
1336 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
1337 const std::string& contents_mime_type) {
1338 static const struct {
1339 const base::FilePath::CharType *mime_type;
1340 const base::FilePath::CharType *suggested_extension;
1341 } extensions[] = {
1342 { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
1343 { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
1344 { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
1345 { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
1346 { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
1348 #if defined(OS_POSIX)
1349 base::FilePath::StringType mime_type(contents_mime_type);
1350 #elif defined(OS_WIN)
1351 base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
1352 #endif // OS_WIN
1353 for (uint32 i = 0; i < arraysize(extensions); ++i) {
1354 if (mime_type == extensions[i].mime_type)
1355 return extensions[i].suggested_extension;
1357 return FILE_PATH_LITERAL("");
1360 void SavePackage::GetSaveInfo() {
1361 // Can't use web_contents_ in the file thread, so get the data that we need
1362 // before calling to it.
1363 base::FilePath website_save_dir, download_save_dir;
1364 bool skip_dir_check = false;
1365 DCHECK(download_manager_);
1366 if (download_manager_->GetDelegate()) {
1367 download_manager_->GetDelegate()->GetSaveDir(
1368 web_contents()->GetBrowserContext(), &website_save_dir,
1369 &download_save_dir, &skip_dir_check);
1371 std::string mime_type = web_contents()->GetContentsMimeType();
1372 std::string accept_languages =
1373 GetContentClient()->browser()->GetAcceptLangs(
1374 web_contents()->GetBrowserContext());
1376 BrowserThread::PostTask(
1377 BrowserThread::FILE, FROM_HERE,
1378 base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
1379 website_save_dir, download_save_dir, skip_dir_check,
1380 mime_type, accept_languages));
1383 void SavePackage::CreateDirectoryOnFileThread(
1384 const base::FilePath& website_save_dir,
1385 const base::FilePath& download_save_dir,
1386 bool skip_dir_check,
1387 const std::string& mime_type,
1388 const std::string& accept_langs) {
1389 base::FilePath save_dir;
1390 // If the default html/websites save folder doesn't exist...
1391 // We skip the directory check for gdata directories on ChromeOS.
1392 if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
1393 // If the default download dir doesn't exist, create it.
1394 if (!base::DirectoryExists(download_save_dir)) {
1395 bool res = base::CreateDirectory(download_save_dir);
1396 DCHECK(res);
1398 save_dir = download_save_dir;
1399 } else {
1400 // If it does exist, use the default save dir param.
1401 save_dir = website_save_dir;
1404 bool can_save_as_complete = CanSaveAsComplete(mime_type);
1405 base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
1406 can_save_as_complete, mime_type, accept_langs);
1407 base::FilePath::StringType pure_file_name =
1408 suggested_filename.RemoveExtension().BaseName().value();
1409 base::FilePath::StringType file_name_ext = suggested_filename.Extension();
1411 // Need to make sure the suggested file name is not too long.
1412 uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
1414 if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
1415 save_dir = save_dir.Append(pure_file_name + file_name_ext);
1416 } else {
1417 // Cannot create a shorter filename. This will cause the save as operation
1418 // to fail unless the user pick a shorter name. Continuing even though it
1419 // will fail because returning means no save as popup for the user, which
1420 // is even more confusing. This case should be rare though.
1421 save_dir = save_dir.Append(suggested_filename);
1424 BrowserThread::PostTask(
1425 BrowserThread::UI, FROM_HERE,
1426 base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
1427 can_save_as_complete));
1430 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
1431 bool can_save_as_complete) {
1433 // The WebContents which owns this SavePackage may have disappeared during
1434 // the UI->FILE->UI thread hop of
1435 // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
1436 if (!web_contents() || !download_manager_->GetDelegate())
1437 return;
1439 base::FilePath::StringType default_extension;
1440 if (can_save_as_complete)
1441 default_extension = kDefaultHtmlExtension;
1443 download_manager_->GetDelegate()->ChooseSavePath(
1444 web_contents(),
1445 suggested_path,
1446 default_extension,
1447 can_save_as_complete,
1448 base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
1451 void SavePackage::OnPathPicked(
1452 const base::FilePath& final_name,
1453 SavePageType type,
1454 const SavePackageDownloadCreatedCallback& download_created_callback) {
1455 DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
1456 (type == SAVE_PAGE_TYPE_AS_MHTML) ||
1457 (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
1458 // Ensure the filename is safe.
1459 saved_main_file_path_ = final_name;
1460 // TODO(asanka): This call may block on IO and shouldn't be made
1461 // from the UI thread. See http://crbug.com/61827.
1462 net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
1463 &saved_main_file_path_);
1465 saved_main_directory_path_ = saved_main_file_path_.DirName();
1466 save_type_ = type;
1467 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
1468 // Make new directory for saving complete file.
1469 saved_main_directory_path_ = saved_main_directory_path_.Append(
1470 saved_main_file_path_.RemoveExtension().BaseName().value() +
1471 FILE_PATH_LITERAL("_files"));
1474 Init(download_created_callback);
1477 void SavePackage::StopObservation() {
1478 DCHECK(download_);
1479 DCHECK(download_manager_);
1481 download_->RemoveObserver(this);
1482 download_ = NULL;
1483 download_manager_ = NULL;
1486 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
1487 StopObservation();
1490 void SavePackage::FinalizeDownloadEntry() {
1491 DCHECK(download_);
1492 DCHECK(download_manager_);
1494 download_manager_->OnSavePackageSuccessfullyFinished(download_);
1495 StopObservation();
1498 } // namespace content