Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / content / browser / download / save_package.cc
blob36c6be64a0f3c066ef2a766a72f57240dae0bbac
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/download/save_package.h"
7 #include <algorithm>
9 #include "base/bind.h"
10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h"
12 #include "base/i18n/file_util_icu.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/stl_util.h"
16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h"
18 #include "base/strings/sys_string_conversions.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/threading/thread.h"
21 #include "components/url_formatter/url_formatter.h"
22 #include "content/browser/download/download_item_impl.h"
23 #include "content/browser/download/download_manager_impl.h"
24 #include "content/browser/download/download_stats.h"
25 #include "content/browser/download/save_file.h"
26 #include "content/browser/download/save_file_manager.h"
27 #include "content/browser/download/save_item.h"
28 #include "content/browser/loader/resource_dispatcher_host_impl.h"
29 #include "content/browser/renderer_host/render_process_host_impl.h"
30 #include "content/browser/renderer_host/render_view_host_delegate.h"
31 #include "content/browser/renderer_host/render_view_host_impl.h"
32 #include "content/common/view_messages.h"
33 #include "content/public/browser/browser_context.h"
34 #include "content/public/browser/browser_thread.h"
35 #include "content/public/browser/content_browser_client.h"
36 #include "content/public/browser/download_manager_delegate.h"
37 #include "content/public/browser/navigation_entry.h"
38 #include "content/public/browser/notification_service.h"
39 #include "content/public/browser/notification_types.h"
40 #include "content/public/browser/render_frame_host.h"
41 #include "content/public/browser/resource_context.h"
42 #include "content/public/browser/web_contents.h"
43 #include "net/base/filename_util.h"
44 #include "net/base/io_buffer.h"
45 #include "net/base/mime_util.h"
46 #include "net/url_request/url_request_context.h"
47 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
48 #include "url/url_constants.h"
50 using base::Time;
51 using blink::WebPageSerializerClient;
53 namespace content {
54 namespace {
56 // A counter for uniquely identifying each save package.
57 int g_save_package_id = 0;
59 // Default name which will be used when we can not get proper name from
60 // resource URL.
61 const char kDefaultSaveName[] = "saved_resource";
63 // Maximum number of file ordinal number. I think it's big enough for resolving
64 // name-conflict files which has same base file name.
65 const int32 kMaxFileOrdinalNumber = 9999;
67 // Maximum length for file path. Since Windows have MAX_PATH limitation for
68 // file path, we need to make sure length of file path of every saved file
69 // is less than MAX_PATH
70 #if defined(OS_WIN)
71 const uint32 kMaxFilePathLength = MAX_PATH - 1;
72 #elif defined(OS_POSIX)
73 const uint32 kMaxFilePathLength = PATH_MAX - 1;
74 #endif
76 // Maximum length for file ordinal number part. Since we only support the
77 // maximum 9999 for ordinal number, which means maximum file ordinal number part
78 // should be "(9998)", so the value is 6.
79 const uint32 kMaxFileOrdinalNumberPartLength = 6;
81 // Strip current ordinal number, if any. Should only be used on pure
82 // file names, i.e. those stripped of their extensions.
83 // TODO(estade): improve this to not choke on alternate encodings.
84 base::FilePath::StringType StripOrdinalNumber(
85 const base::FilePath::StringType& pure_file_name) {
86 base::FilePath::StringType::size_type r_paren_index =
87 pure_file_name.rfind(FILE_PATH_LITERAL(')'));
88 base::FilePath::StringType::size_type l_paren_index =
89 pure_file_name.rfind(FILE_PATH_LITERAL('('));
90 if (l_paren_index >= r_paren_index)
91 return pure_file_name;
93 for (base::FilePath::StringType::size_type i = l_paren_index + 1;
94 i != r_paren_index; ++i) {
95 if (!base::IsAsciiDigit(pure_file_name[i]))
96 return pure_file_name;
99 return pure_file_name.substr(0, l_paren_index);
102 // Check whether we can save page as complete-HTML for the contents which
103 // have specified a MIME type. Now only contents which have the MIME type
104 // "text/html" can be saved as complete-HTML.
105 bool CanSaveAsComplete(const std::string& contents_mime_type) {
106 return contents_mime_type == "text/html" ||
107 contents_mime_type == "application/xhtml+xml";
110 // Request handle for SavePackage downloads. Currently doesn't support
111 // pause/resume/cancel, but returns a WebContents.
112 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
113 public:
114 SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
115 : save_package_(save_package) {}
117 // DownloadRequestHandleInterface
118 WebContents* GetWebContents() const override {
119 return save_package_.get() ? save_package_->web_contents() : NULL;
121 DownloadManager* GetDownloadManager() const override { return NULL; }
122 void PauseRequest() const override {}
123 void ResumeRequest() const override {}
124 void CancelRequest() const override {}
125 std::string DebugString() const override {
126 return "SavePackage DownloadRequestHandle";
129 private:
130 base::WeakPtr<SavePackage> save_package_;
133 } // namespace
135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
136 FILE_PATH_LITERAL("html");
138 SavePackage::SavePackage(WebContents* web_contents,
139 SavePageType save_type,
140 const base::FilePath& file_full_path,
141 const base::FilePath& directory_full_path)
142 : WebContentsObserver(web_contents),
143 file_manager_(NULL),
144 download_manager_(NULL),
145 download_(NULL),
146 page_url_(GetUrlToBeSaved()),
147 saved_main_file_path_(file_full_path),
148 saved_main_directory_path_(directory_full_path),
149 title_(web_contents->GetTitle()),
150 start_tick_(base::TimeTicks::Now()),
151 finished_(false),
152 mhtml_finishing_(false),
153 user_canceled_(false),
154 disk_error_occurred_(false),
155 save_type_(save_type),
156 all_save_items_count_(0),
157 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
158 wait_state_(INITIALIZE),
159 contents_id_(web_contents->GetRenderProcessHost()->GetID()),
160 unique_id_(g_save_package_id++),
161 wrote_to_completed_file_(false),
162 wrote_to_failed_file_(false) {
163 DCHECK(page_url_.is_valid());
164 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
165 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
166 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
167 DCHECK(!saved_main_file_path_.empty() &&
168 saved_main_file_path_.value().length() <= kMaxFilePathLength);
169 DCHECK(!saved_main_directory_path_.empty() &&
170 saved_main_directory_path_.value().length() < kMaxFilePathLength);
171 InternalInit();
174 SavePackage::SavePackage(WebContents* web_contents)
175 : WebContentsObserver(web_contents),
176 file_manager_(NULL),
177 download_manager_(NULL),
178 download_(NULL),
179 page_url_(GetUrlToBeSaved()),
180 title_(web_contents->GetTitle()),
181 start_tick_(base::TimeTicks::Now()),
182 finished_(false),
183 mhtml_finishing_(false),
184 user_canceled_(false),
185 disk_error_occurred_(false),
186 save_type_(SAVE_PAGE_TYPE_UNKNOWN),
187 all_save_items_count_(0),
188 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
189 wait_state_(INITIALIZE),
190 contents_id_(web_contents->GetRenderProcessHost()->GetID()),
191 unique_id_(g_save_package_id++),
192 wrote_to_completed_file_(false),
193 wrote_to_failed_file_(false) {
194 DCHECK(page_url_.is_valid());
195 InternalInit();
198 // This is for testing use. Set |finished_| as true because we don't want
199 // method Cancel to be be called in destructor in test mode.
200 // We also don't call InternalInit().
201 SavePackage::SavePackage(WebContents* web_contents,
202 const base::FilePath& file_full_path,
203 const base::FilePath& directory_full_path)
204 : WebContentsObserver(web_contents),
205 file_manager_(NULL),
206 download_manager_(NULL),
207 download_(NULL),
208 saved_main_file_path_(file_full_path),
209 saved_main_directory_path_(directory_full_path),
210 start_tick_(base::TimeTicks::Now()),
211 finished_(true),
212 mhtml_finishing_(false),
213 user_canceled_(false),
214 disk_error_occurred_(false),
215 save_type_(SAVE_PAGE_TYPE_UNKNOWN),
216 all_save_items_count_(0),
217 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
218 wait_state_(INITIALIZE),
219 contents_id_(0),
220 unique_id_(g_save_package_id++),
221 wrote_to_completed_file_(false),
222 wrote_to_failed_file_(false) {
225 SavePackage::~SavePackage() {
226 // Stop receiving saving job's updates
227 if (!finished_ && !canceled()) {
228 // Unexpected quit.
229 Cancel(true);
232 // We should no longer be observing the DownloadItem at this point.
233 CHECK(!download_);
235 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
236 completed_count() +
237 in_process_count()));
238 // Free all SaveItems.
239 while (!waiting_item_queue_.empty()) {
240 // We still have some items which are waiting for start to save.
241 SaveItem* save_item = waiting_item_queue_.front();
242 waiting_item_queue_.pop();
243 delete save_item;
246 STLDeleteValues(&saved_success_items_);
247 STLDeleteValues(&in_progress_items_);
248 STLDeleteValues(&saved_failed_items_);
250 file_manager_ = NULL;
253 GURL SavePackage::GetUrlToBeSaved() {
254 // Instead of using web_contents_.GetURL here, we use url() (which is the
255 // "real" url of the page) from the NavigationEntry because it reflects its
256 // origin rather than the displayed one (returned by GetURL) which may be
257 // different (like having "view-source:" on the front).
258 NavigationEntry* visible_entry =
259 web_contents()->GetController().GetVisibleEntry();
260 return visible_entry ? visible_entry->GetURL() : GURL::EmptyGURL();
263 void SavePackage::Cancel(bool user_action) {
264 if (!canceled()) {
265 if (user_action)
266 user_canceled_ = true;
267 else
268 disk_error_occurred_ = true;
269 Stop();
271 RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
274 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
275 // cases, we need file_manager_ to be initialized, so we do this first.
276 void SavePackage::InternalInit() {
277 ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
278 if (!rdh) {
279 NOTREACHED();
280 return;
283 file_manager_ = rdh->save_file_manager();
284 DCHECK(file_manager_);
286 download_manager_ = static_cast<DownloadManagerImpl*>(
287 BrowserContext::GetDownloadManager(
288 web_contents()->GetBrowserContext()));
289 DCHECK(download_manager_);
291 RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
294 bool SavePackage::Init(
295 const SavePackageDownloadCreatedCallback& download_created_callback) {
296 DCHECK_CURRENTLY_ON(BrowserThread::UI);
297 // Set proper running state.
298 if (wait_state_ != INITIALIZE)
299 return false;
301 wait_state_ = START_PROCESS;
303 // Initialize the request context and resource dispatcher.
304 BrowserContext* browser_context = web_contents()->GetBrowserContext();
305 if (!browser_context) {
306 NOTREACHED();
307 return false;
310 scoped_ptr<DownloadRequestHandleInterface> request_handle(
311 new SavePackageRequestHandle(AsWeakPtr()));
312 // The download manager keeps ownership but adds us as an observer.
313 download_manager_->CreateSavePackageDownloadItem(
314 saved_main_file_path_,
315 page_url_,
316 ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
317 "multipart/related" : "text/html"),
318 request_handle.Pass(),
319 base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
320 download_created_callback));
321 return true;
324 void SavePackage::InitWithDownloadItem(
325 const SavePackageDownloadCreatedCallback& download_created_callback,
326 DownloadItemImpl* item) {
327 DCHECK_CURRENTLY_ON(BrowserThread::UI);
328 DCHECK(item);
329 download_ = item;
330 download_->AddObserver(this);
331 // Confirm above didn't delete the tab out from under us.
332 if (!download_created_callback.is_null())
333 download_created_callback.Run(download_);
335 // Check save type and process the save page job.
336 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
337 // Get directory
338 DCHECK(!saved_main_directory_path_.empty());
339 GetAllSavableResourceLinksForCurrentPage();
340 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
341 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
342 &SavePackage::OnMHTMLGenerated, this));
343 } else {
344 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
345 wait_state_ = NET_FILES;
346 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
347 SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
348 SaveFileCreateInfo::SAVE_FILE_FROM_NET;
349 SaveItem* save_item = new SaveItem(page_url_,
350 Referrer(),
351 this,
352 save_source);
353 // Add this item to waiting list.
354 waiting_item_queue_.push(save_item);
355 all_save_items_count_ = 1;
356 download_->SetTotalBytes(1);
358 DoSavingProcess();
362 void SavePackage::OnMHTMLGenerated(int64 size) {
363 if (size <= 0) {
364 Cancel(false);
365 return;
367 wrote_to_completed_file_ = true;
369 // Hack to avoid touching download_ after user cancel.
370 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
371 // with SavePackage flow.
372 if (download_->GetState() == DownloadItem::IN_PROGRESS) {
373 download_->SetTotalBytes(size);
374 download_->DestinationUpdate(size, 0, std::string());
375 // Must call OnAllDataSaved here in order for
376 // GDataDownloadObserver::ShouldUpload() to return true.
377 // ShouldCompleteDownload() may depend on the gdata uploader to finish.
378 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
381 if (!download_manager_->GetDelegate()) {
382 Finish();
383 return;
386 if (download_manager_->GetDelegate()->ShouldCompleteDownload(
387 download_, base::Bind(&SavePackage::Finish, this))) {
388 Finish();
392 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
393 // restricted by NAME_MAX. The maximum allowed path looks like:
394 // '/path/to/save_dir' + '/' + NAME_MAX.
395 uint32 SavePackage::GetMaxPathLengthForDirectory(
396 const base::FilePath& base_dir) {
397 #if defined(OS_POSIX)
398 return std::min(kMaxFilePathLength,
399 static_cast<uint32>(base_dir.value().length()) +
400 NAME_MAX + 1);
401 #else
402 return kMaxFilePathLength;
403 #endif
406 // File name is considered being consist of pure file name, dot and file
407 // extension name. File name might has no dot and file extension, or has
408 // multiple dot inside file name. The dot, which separates the pure file
409 // name and file extension name, is last dot in the whole file name.
410 // This function is for making sure the length of specified file path is not
411 // great than the specified maximum length of file path and getting safe pure
412 // file name part if the input pure file name is too long.
413 // The parameter |dir_path| specifies directory part of the specified
414 // file path. The parameter |file_name_ext| specifies file extension
415 // name part of the specified file path (including start dot). The parameter
416 // |max_file_path_len| specifies maximum length of the specified file path.
417 // The parameter |pure_file_name| input pure file name part of the specified
418 // file path. If the length of specified file path is great than
419 // |max_file_path_len|, the |pure_file_name| will output new pure file name
420 // part for making sure the length of specified file path is less than
421 // specified maximum length of file path. Return false if the function can
422 // not get a safe pure file name, otherwise it returns true.
423 bool SavePackage::GetSafePureFileName(
424 const base::FilePath& dir_path,
425 const base::FilePath::StringType& file_name_ext,
426 uint32 max_file_path_len,
427 base::FilePath::StringType* pure_file_name) {
428 DCHECK(!pure_file_name->empty());
429 int available_length = static_cast<int>(max_file_path_len -
430 dir_path.value().length() -
431 file_name_ext.length());
432 // Need an extra space for the separator.
433 if (!dir_path.EndsWithSeparator())
434 --available_length;
436 // Plenty of room.
437 if (static_cast<int>(pure_file_name->length()) <= available_length)
438 return true;
440 // Limited room. Truncate |pure_file_name| to fit.
441 if (available_length > 0) {
442 *pure_file_name = pure_file_name->substr(0, available_length);
443 return true;
446 // Not enough room to even use a shortened |pure_file_name|.
447 pure_file_name->clear();
448 return false;
451 // Generate name for saving resource.
452 bool SavePackage::GenerateFileName(const std::string& disposition,
453 const GURL& url,
454 bool need_html_ext,
455 base::FilePath::StringType* generated_name) {
456 // TODO(jungshik): Figure out the referrer charset when having one
457 // makes sense and pass it to GenerateFileName.
458 base::FilePath file_path = net::GenerateFileName(url,
459 disposition,
460 std::string(),
461 std::string(),
462 std::string(),
463 kDefaultSaveName);
465 DCHECK(!file_path.empty());
466 base::FilePath::StringType pure_file_name =
467 file_path.RemoveExtension().BaseName().value();
468 base::FilePath::StringType file_name_ext = file_path.Extension();
470 // If it is HTML resource, use ".html" as its extension.
471 if (need_html_ext) {
472 file_name_ext = FILE_PATH_LITERAL(".");
473 file_name_ext.append(kDefaultHtmlExtension);
476 // Need to make sure the suggested file name is not too long.
477 uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
479 // Get safe pure file name.
480 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
481 max_path, &pure_file_name))
482 return false;
484 base::FilePath::StringType file_name = pure_file_name + file_name_ext;
486 // Check whether we already have same name in a case insensitive manner.
487 FileNameSet::const_iterator iter = file_name_set_.find(file_name);
488 if (iter == file_name_set_.end()) {
489 file_name_set_.insert(file_name);
490 } else {
491 // Found same name, increase the ordinal number for the file name.
492 pure_file_name =
493 base::FilePath(*iter).RemoveExtension().BaseName().value();
494 base::FilePath::StringType base_file_name =
495 StripOrdinalNumber(pure_file_name);
497 // We need to make sure the length of base file name plus maximum ordinal
498 // number path will be less than or equal to kMaxFilePathLength.
499 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
500 max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
501 return false;
503 // Prepare the new ordinal number.
504 uint32 ordinal_number;
505 FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
506 if (it == file_name_count_map_.end()) {
507 // First base-name-conflict resolving, use 1 as initial ordinal number.
508 file_name_count_map_[base_file_name] = 1;
509 ordinal_number = 1;
510 } else {
511 // We have met same base-name conflict, use latest ordinal number.
512 ordinal_number = it->second;
515 if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
516 // Use a random file from temporary file.
517 base::FilePath temp_file;
518 base::CreateTemporaryFile(&temp_file);
519 file_name = temp_file.RemoveExtension().BaseName().value();
520 // Get safe pure file name.
521 if (!GetSafePureFileName(saved_main_directory_path_,
522 base::FilePath::StringType(),
523 max_path, &file_name))
524 return false;
525 } else {
526 for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
527 base::FilePath::StringType new_name = base_file_name +
528 base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
529 if (file_name_set_.find(new_name) == file_name_set_.end()) {
530 // Resolved name conflict.
531 file_name = new_name;
532 file_name_count_map_[base_file_name] = ++i;
533 break;
538 file_name_set_.insert(file_name);
541 DCHECK(!file_name.empty());
542 generated_name->assign(file_name);
544 return true;
547 // We have received a message from SaveFileManager about a new saving job. We
548 // create a SaveItem and store it in our in_progress list.
549 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
550 DCHECK(info && !info->url.is_empty());
552 SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
553 if (it == in_progress_items_.end()) {
554 // If not found, we must have cancel action.
555 DCHECK(canceled());
556 return;
558 SaveItem* save_item = it->second;
560 DCHECK(!saved_main_file_path_.empty());
562 save_item->SetSaveId(info->save_id);
563 save_item->SetTotalBytes(info->total_bytes);
565 // Determine the proper path for a saving job, by choosing either the default
566 // save directory, or prompting the user.
567 DCHECK(!save_item->has_final_name());
568 if (info->url != page_url_) {
569 base::FilePath::StringType generated_name;
570 // For HTML resource file, make sure it will have .htm as extension name,
571 // otherwise, when you open the saved page in Chrome again, download
572 // file manager will treat it as downloadable resource, and download it
573 // instead of opening it as HTML.
574 bool need_html_ext =
575 info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
576 if (!GenerateFileName(info->content_disposition,
577 GURL(info->url),
578 need_html_ext,
579 &generated_name)) {
580 // We can not generate file name for this SaveItem, so we cancel the
581 // saving page job if the save source is from serialized DOM data.
582 // Otherwise, it means this SaveItem is sub-resource type, we treat it
583 // as an error happened on saving. We can ignore this type error for
584 // sub-resource links which will be resolved as absolute links instead
585 // of local links in final saved contents.
586 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
587 Cancel(true);
588 else
589 SaveFinished(save_item->save_id(), 0, false);
590 return;
593 // When saving page as only-HTML, we only have a SaveItem whose url
594 // must be page_url_.
595 DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
596 DCHECK(!saved_main_directory_path_.empty());
598 // Now we get final name retrieved from GenerateFileName, we will use it
599 // rename the SaveItem.
600 base::FilePath final_name =
601 saved_main_directory_path_.Append(generated_name);
602 save_item->Rename(final_name);
603 } else {
604 // It is the main HTML file, use the name chosen by the user.
605 save_item->Rename(saved_main_file_path_);
608 // If the save source is from file system, inform SaveFileManager to copy
609 // corresponding file to the file path which this SaveItem specifies.
610 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
611 BrowserThread::PostTask(
612 BrowserThread::FILE, FROM_HERE,
613 base::Bind(&SaveFileManager::SaveLocalFile,
614 file_manager_,
615 save_item->url(),
616 save_item->save_id(),
617 contents_id()));
618 return;
621 // Check whether we begin to require serialized HTML data.
622 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
623 wait_state_ == HTML_DATA) {
624 // Inform backend to serialize the all frames' DOM and send serialized
625 // HTML data back.
626 GetSerializedHtmlDataForCurrentPageWithLocalLinks();
630 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
631 if (in_process_count()) {
632 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
633 it != in_progress_items_.end(); ++it) {
634 SaveItem* save_item = it->second;
635 DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
636 if (save_item->save_id() == save_id)
637 return save_item;
640 return NULL;
643 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
644 SaveUrlItemMap::iterator it = in_progress_items_.find(
645 save_item->url().spec());
646 DCHECK(it != in_progress_items_.end());
647 DCHECK(save_item == it->second);
648 in_progress_items_.erase(it);
650 if (save_item->success()) {
651 // Add it to saved_success_items_.
652 DCHECK(saved_success_items_.find(save_item->save_id()) ==
653 saved_success_items_.end());
654 saved_success_items_[save_item->save_id()] = save_item;
655 } else {
656 // Add it to saved_failed_items_.
657 DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
658 saved_failed_items_.end());
659 saved_failed_items_[save_item->url().spec()] = save_item;
663 // Called for updating saving state.
664 bool SavePackage::UpdateSaveProgress(int32 save_id,
665 int64 size,
666 bool write_success) {
667 // Because we might have canceled this saving job before,
668 // so we might not find corresponding SaveItem.
669 SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
670 if (!save_item)
671 return false;
673 save_item->Update(size);
675 // If we got disk error, cancel whole save page job.
676 if (!write_success) {
677 // Cancel job with reason of disk error.
678 Cancel(false);
680 return true;
683 // Stop all page saving jobs that are in progress and instruct the file thread
684 // to delete all saved files.
685 void SavePackage::Stop() {
686 // If we haven't moved out of the initial state, there's nothing to cancel and
687 // there won't be valid pointers for file_manager_ or download_.
688 if (wait_state_ == INITIALIZE)
689 return;
691 // When stopping, if it still has some items in in_progress, cancel them.
692 DCHECK(canceled());
693 if (in_process_count()) {
694 SaveUrlItemMap::iterator it = in_progress_items_.begin();
695 for (; it != in_progress_items_.end(); ++it) {
696 SaveItem* save_item = it->second;
697 DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
698 save_item->Cancel();
700 // Remove all in progress item to saved map. For failed items, they will
701 // be put into saved_failed_items_, for successful item, they will be put
702 // into saved_success_items_.
703 while (in_process_count())
704 PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
707 // This vector contains the save ids of the save files which SaveFileManager
708 // needs to remove from its save_file_map_.
709 SaveIDList save_ids;
710 for (SavedItemMap::iterator it = saved_success_items_.begin();
711 it != saved_success_items_.end(); ++it)
712 save_ids.push_back(it->first);
713 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
714 it != saved_failed_items_.end(); ++it)
715 save_ids.push_back(it->second->save_id());
717 BrowserThread::PostTask(
718 BrowserThread::FILE, FROM_HERE,
719 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
720 file_manager_,
721 save_ids));
723 finished_ = true;
724 wait_state_ = FAILED;
726 // Inform the DownloadItem we have canceled whole save page job.
727 if (download_) {
728 download_->Cancel(false);
729 FinalizeDownloadEntry();
733 void SavePackage::CheckFinish() {
734 if (in_process_count() || finished_)
735 return;
737 base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
738 saved_success_items_.size() > 1) ?
739 saved_main_directory_path_ : base::FilePath();
741 // This vector contains the final names of all the successfully saved files
742 // along with their save ids. It will be passed to SaveFileManager to do the
743 // renaming job.
744 FinalNameList final_names;
745 for (SavedItemMap::iterator it = saved_success_items_.begin();
746 it != saved_success_items_.end(); ++it)
747 final_names.push_back(std::make_pair(it->first,
748 it->second->full_path()));
750 BrowserThread::PostTask(
751 BrowserThread::FILE, FROM_HERE,
752 base::Bind(&SaveFileManager::RenameAllFiles,
753 file_manager_,
754 final_names,
755 dir,
756 web_contents()->GetRenderProcessHost()->GetID(),
757 web_contents()->GetMainFrame()->GetRoutingID(),
758 id()));
761 // Successfully finished all items of this SavePackage.
762 void SavePackage::Finish() {
763 // User may cancel the job when we're moving files to the final directory.
764 if (canceled())
765 return;
767 wait_state_ = SUCCESSFUL;
768 finished_ = true;
770 // Record finish.
771 RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
773 // Record any errors that occurred.
774 if (wrote_to_completed_file_) {
775 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
778 if (wrote_to_failed_file_) {
779 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
782 // This vector contains the save ids of the save files which SaveFileManager
783 // needs to remove from its save_file_map_.
784 SaveIDList save_ids;
785 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
786 it != saved_failed_items_.end(); ++it)
787 save_ids.push_back(it->second->save_id());
789 BrowserThread::PostTask(
790 BrowserThread::FILE, FROM_HERE,
791 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
792 file_manager_,
793 save_ids));
795 if (download_) {
796 // Hack to avoid touching download_ after user cancel.
797 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
798 // with SavePackage flow.
799 if (download_->GetState() == DownloadItem::IN_PROGRESS) {
800 if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
801 download_->DestinationUpdate(
802 all_save_items_count_, CurrentSpeed(), std::string());
803 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
805 download_->MarkAsComplete();
807 FinalizeDownloadEntry();
811 // Called for updating end state.
812 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
813 // Because we might have canceled this saving job before,
814 // so we might not find corresponding SaveItem. Just ignore it.
815 SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
816 if (!save_item)
817 return;
819 // Let SaveItem set end state.
820 save_item->Finish(size, is_success);
821 // Remove the associated save id and SavePackage.
822 file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
824 PutInProgressItemToSavedMap(save_item);
826 // Inform the DownloadItem to update UI.
827 // We use the received bytes as number of saved files.
828 // Hack to avoid touching download_ after user cancel.
829 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
830 // with SavePackage flow.
831 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
832 download_->DestinationUpdate(
833 completed_count(), CurrentSpeed(), std::string());
836 if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
837 save_item->url() == page_url_ && !save_item->received_bytes()) {
838 // If size of main HTML page is 0, treat it as disk error.
839 Cancel(false);
840 return;
843 if (canceled()) {
844 DCHECK(finished_);
845 return;
848 // Continue processing the save page job.
849 DoSavingProcess();
851 // Check whether we can successfully finish whole job.
852 CheckFinish();
855 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
856 // save id -1 when it encounters error. Since in this case, save id will be
857 // -1, so we can only use URL to find which SaveItem is associated with
858 // this error.
859 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
860 // from serializing HTML data, then cancel saving page.
861 void SavePackage::SaveFailed(const GURL& save_url) {
862 SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
863 if (it == in_progress_items_.end()) {
864 NOTREACHED(); // Should not exist!
865 return;
867 SaveItem* save_item = it->second;
869 save_item->Finish(0, false);
871 PutInProgressItemToSavedMap(save_item);
873 // Inform the DownloadItem to update UI.
874 // We use the received bytes as number of saved files.
875 // Hack to avoid touching download_ after user cancel.
876 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
877 // with SavePackage flow.
878 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
879 download_->DestinationUpdate(
880 completed_count(), CurrentSpeed(), std::string());
883 if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
884 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
885 (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
886 // We got error when saving page. Treat it as disk error.
887 Cancel(true);
890 if (canceled()) {
891 DCHECK(finished_);
892 return;
895 // Continue processing the save page job.
896 DoSavingProcess();
898 CheckFinish();
901 void SavePackage::SaveCanceled(SaveItem* save_item) {
902 // Call the RemoveSaveFile in UI thread.
903 file_manager_->RemoveSaveFile(save_item->save_id(),
904 save_item->url(),
905 this);
906 if (save_item->save_id() != -1)
907 BrowserThread::PostTask(
908 BrowserThread::FILE, FROM_HERE,
909 base::Bind(&SaveFileManager::CancelSave,
910 file_manager_,
911 save_item->save_id()));
914 // Initiate a saving job of a specific URL. We send the request to
915 // SaveFileManager, which will dispatch it to different approach according to
916 // the save source. Parameter process_all_remaining_items indicates whether
917 // we need to save all remaining items.
918 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
919 DCHECK(web_contents());
920 DCHECK(waiting_item_queue_.size());
922 do {
923 // Pop SaveItem from waiting list.
924 SaveItem* save_item = waiting_item_queue_.front();
925 waiting_item_queue_.pop();
927 // Add the item to in_progress_items_.
928 SaveUrlItemMap::iterator it = in_progress_items_.find(
929 save_item->url().spec());
930 DCHECK(it == in_progress_items_.end());
931 in_progress_items_[save_item->url().spec()] = save_item;
932 save_item->Start();
933 file_manager_->SaveURL(save_item->url(),
934 save_item->referrer(),
935 web_contents()->GetRenderProcessHost()->GetID(),
936 routing_id(),
937 web_contents()->GetMainFrame()->GetRoutingID(),
938 save_item->save_source(),
939 save_item->full_path(),
940 web_contents()->
941 GetBrowserContext()->GetResourceContext(),
942 this);
943 } while (process_all_remaining_items && waiting_item_queue_.size());
946 // Calculate the percentage of whole save page job.
947 int SavePackage::PercentComplete() {
948 if (!all_save_items_count_)
949 return 0;
950 else if (!in_process_count())
951 return 100;
952 else
953 return completed_count() / all_save_items_count_;
956 int64 SavePackage::CurrentSpeed() const {
957 base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
958 int64 diff_ms = diff.InMilliseconds();
959 return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
962 // Continue processing the save page job after one SaveItem has been
963 // finished.
964 void SavePackage::DoSavingProcess() {
965 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
966 // We guarantee that images and JavaScripts must be downloaded first.
967 // So when finishing all those sub-resources, we will know which
968 // sub-resource's link can be replaced with local file path, which
969 // sub-resource's link need to be replaced with absolute URL which
970 // point to its internet address because it got error when saving its data.
972 // Start a new SaveItem job if we still have job in waiting queue.
973 if (waiting_item_queue_.size()) {
974 DCHECK(wait_state_ == NET_FILES);
975 SaveItem* save_item = waiting_item_queue_.front();
976 if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
977 SaveNextFile(false);
978 } else if (!in_process_count()) {
979 // If there is no in-process SaveItem, it means all sub-resources
980 // have been processed. Now we need to start serializing HTML DOM
981 // for the current page to get the generated HTML data.
982 wait_state_ = HTML_DATA;
983 // All non-HTML resources have been finished, start all remaining
984 // HTML files.
985 SaveNextFile(true);
987 } else if (in_process_count()) {
988 // Continue asking for HTML data.
989 DCHECK(wait_state_ == HTML_DATA);
991 } else {
992 // Save as HTML only or MHTML.
993 DCHECK(wait_state_ == NET_FILES);
994 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
995 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
996 if (waiting_item_queue_.size()) {
997 DCHECK(all_save_items_count_ == waiting_item_queue_.size());
998 SaveNextFile(false);
1003 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
1004 bool handled = true;
1005 IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
1006 IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
1007 OnReceivedSavableResourceLinksForCurrentPage)
1008 IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
1009 OnReceivedSerializedHtmlData)
1010 IPC_MESSAGE_UNHANDLED(handled = false)
1011 IPC_END_MESSAGE_MAP()
1012 return handled;
1015 // After finishing all SaveItems which need to get data from net.
1016 // We collect all URLs which have local storage and send the
1017 // map:(originalURL:currentLocalPath) to render process (backend).
1018 // Then render process will serialize DOM and send data to us.
1019 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
1020 if (wait_state_ != HTML_DATA)
1021 return;
1022 std::vector<GURL> saved_links;
1023 std::vector<base::FilePath> saved_file_paths;
1024 int successful_started_items_count = 0;
1026 // Collect all saved items which have local storage.
1027 // First collect the status of all the resource files and check whether they
1028 // have created local files although they have not been completely saved.
1029 // If yes, the file can be saved. Otherwise, there is a disk error, so we
1030 // need to cancel the page saving job.
1031 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1032 it != in_progress_items_.end(); ++it) {
1033 DCHECK(it->second->save_source() ==
1034 SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1035 if (it->second->has_final_name())
1036 successful_started_items_count++;
1037 saved_links.push_back(it->second->url());
1038 saved_file_paths.push_back(it->second->file_name());
1041 // If not all file of HTML resource have been started, then wait.
1042 if (successful_started_items_count != in_process_count())
1043 return;
1045 // Collect all saved success items.
1046 for (SavedItemMap::iterator it = saved_success_items_.begin();
1047 it != saved_success_items_.end(); ++it) {
1048 DCHECK(it->second->has_final_name());
1049 saved_links.push_back(it->second->url());
1050 saved_file_paths.push_back(it->second->file_name());
1053 // Get the relative directory name.
1054 base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1056 Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
1057 routing_id(), saved_links, saved_file_paths, relative_dir_name));
1060 // Process the serialized HTML content data of a specified web page
1061 // retrieved from render process.
1062 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
1063 const std::string& data,
1064 int32 status) {
1065 WebPageSerializerClient::PageSerializationStatus flag =
1066 static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1067 // Check current state.
1068 if (wait_state_ != HTML_DATA)
1069 return;
1071 int id = contents_id();
1072 // If the all frames are finished saving, we need to close the
1073 // remaining SaveItems.
1074 if (flag == WebPageSerializerClient::AllFramesAreFinished) {
1075 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1076 it != in_progress_items_.end(); ++it) {
1077 DVLOG(20) << " " << __FUNCTION__ << "()"
1078 << " save_id = " << it->second->save_id()
1079 << " url = \"" << it->second->url().spec() << "\"";
1080 BrowserThread::PostTask(
1081 BrowserThread::FILE, FROM_HERE,
1082 base::Bind(&SaveFileManager::SaveFinished,
1083 file_manager_,
1084 it->second->save_id(),
1085 it->second->url(),
1087 true));
1089 return;
1092 SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
1093 if (it == in_progress_items_.end()) {
1094 for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1095 saved_it != saved_success_items_.end(); ++saved_it) {
1096 if (saved_it->second->url() == frame_url) {
1097 wrote_to_completed_file_ = true;
1098 break;
1102 it = saved_failed_items_.find(frame_url.spec());
1103 if (it != saved_failed_items_.end())
1104 wrote_to_failed_file_ = true;
1106 return;
1109 SaveItem* save_item = it->second;
1110 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1112 if (!data.empty()) {
1113 // Prepare buffer for saving HTML data.
1114 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1115 memcpy(new_data->data(), data.data(), data.size());
1117 // Call write file functionality in file thread.
1118 BrowserThread::PostTask(
1119 BrowserThread::FILE, FROM_HERE,
1120 base::Bind(&SaveFileManager::UpdateSaveProgress,
1121 file_manager_,
1122 save_item->save_id(),
1123 new_data,
1124 static_cast<int>(data.size())));
1127 // Current frame is completed saving, call finish in file thread.
1128 if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
1129 DVLOG(20) << " " << __FUNCTION__ << "()"
1130 << " save_id = " << save_item->save_id()
1131 << " url = \"" << save_item->url().spec() << "\"";
1132 BrowserThread::PostTask(
1133 BrowserThread::FILE, FROM_HERE,
1134 base::Bind(&SaveFileManager::SaveFinished,
1135 file_manager_,
1136 save_item->save_id(),
1137 save_item->url(),
1139 true));
1143 // Ask for all savable resource links from backend, include main frame and
1144 // sub-frame.
1145 void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
1146 if (wait_state_ != START_PROCESS)
1147 return;
1149 wait_state_ = RESOURCES_LIST;
1150 Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
1151 page_url_));
1154 // Give backend the lists which contain all resource links that have local
1155 // storage, after which, render process will serialize DOM for generating
1156 // HTML data.
1157 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
1158 const std::vector<GURL>& resources_list,
1159 const std::vector<Referrer>& referrers_list,
1160 const std::vector<GURL>& frames_list) {
1161 if (wait_state_ != RESOURCES_LIST)
1162 return;
1164 if (resources_list.size() != referrers_list.size())
1165 return;
1167 all_save_items_count_ = static_cast<int>(resources_list.size()) +
1168 static_cast<int>(frames_list.size());
1170 // We use total bytes as the total number of files we want to save.
1171 // Hack to avoid touching download_ after user cancel.
1172 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1173 // with SavePackage flow.
1174 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1175 download_->SetTotalBytes(all_save_items_count_);
1177 if (all_save_items_count_) {
1178 // Put all sub-resources to wait list.
1179 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1180 const GURL& u = resources_list[i];
1181 DCHECK(u.is_valid());
1182 SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
1183 SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
1184 SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1185 SaveItem* save_item = new SaveItem(u, referrers_list[i],
1186 this, save_source);
1187 waiting_item_queue_.push(save_item);
1189 // Put all HTML resources to wait list.
1190 for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
1191 const GURL& u = frames_list[i];
1192 DCHECK(u.is_valid());
1193 SaveItem* save_item = new SaveItem(
1194 u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1195 waiting_item_queue_.push(save_item);
1197 wait_state_ = NET_FILES;
1198 DoSavingProcess();
1199 } else {
1200 // No resource files need to be saved, treat it as user cancel.
1201 Cancel(true);
1205 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
1206 bool can_save_as_complete,
1207 const std::string& contents_mime_type,
1208 const std::string& accept_langs) {
1209 base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
1211 // If the page's title matches its URL, use the URL. Try to use the last path
1212 // component or if there is none, the domain as the file name.
1213 // Normally we want to base the filename on the page title, or if it doesn't
1214 // exist, on the URL. It's not easy to tell if the page has no title, because
1215 // if the page has no title, WebContents::GetTitle() will return the page's
1216 // URL (adjusted for display purposes). Therefore, we convert the "title"
1217 // back to a URL, and if it matches the original page URL, we know the page
1218 // had no title (or had a title equal to its URL, which is fine to treat
1219 // similarly).
1220 if (title_ == url_formatter::FormatUrl(page_url_, accept_langs)) {
1221 std::string url_path;
1222 if (!page_url_.SchemeIs(url::kDataScheme)) {
1223 std::vector<std::string> url_parts = base::SplitString(
1224 page_url_.path(), "/", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
1225 if (!url_parts.empty()) {
1226 for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
1227 url_path = url_parts[i];
1228 if (!url_path.empty())
1229 break;
1232 if (url_path.empty())
1233 url_path = page_url_.host();
1234 } else {
1235 url_path = "dataurl";
1237 name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
1240 // Ask user for getting final saving name.
1241 name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
1242 contents_mime_type);
1243 // Adjust extension for complete types.
1244 if (can_save_as_complete)
1245 name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
1247 base::FilePath::StringType file_name = name_with_proper_ext.value();
1248 base::i18n::ReplaceIllegalCharactersInPath(&file_name, '_');
1249 return base::FilePath(file_name);
1252 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
1253 // If the file name doesn't have an extension suitable for HTML files,
1254 // append one.
1255 base::FilePath::StringType ext = name.Extension();
1256 if (!ext.empty())
1257 ext.erase(ext.begin()); // Erase preceding '.'.
1258 std::string mime_type;
1259 if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
1260 !CanSaveAsComplete(mime_type)) {
1261 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1262 kDefaultHtmlExtension);
1264 return name;
1267 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
1268 const std::string& contents_mime_type) {
1269 // Start extension at 1 to skip over period if non-empty.
1270 base::FilePath::StringType ext = name.Extension().length() ?
1271 name.Extension().substr(1) : name.Extension();
1272 base::FilePath::StringType suggested_extension =
1273 ExtensionForMimeType(contents_mime_type);
1274 std::string mime_type;
1275 if (!suggested_extension.empty() &&
1276 !net::GetMimeTypeFromExtension(ext, &mime_type)) {
1277 // Extension is absent or needs to be updated.
1278 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1279 suggested_extension);
1281 return name;
1284 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
1285 const std::string& contents_mime_type) {
1286 static const struct {
1287 const base::FilePath::CharType *mime_type;
1288 const base::FilePath::CharType *suggested_extension;
1289 } extensions[] = {
1290 { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
1291 { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
1292 { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
1293 { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
1294 { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
1296 #if defined(OS_POSIX)
1297 base::FilePath::StringType mime_type(contents_mime_type);
1298 #elif defined(OS_WIN)
1299 base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
1300 #endif // OS_WIN
1301 for (uint32 i = 0; i < arraysize(extensions); ++i) {
1302 if (mime_type == extensions[i].mime_type)
1303 return extensions[i].suggested_extension;
1305 return FILE_PATH_LITERAL("");
1308 void SavePackage::GetSaveInfo() {
1309 // Can't use web_contents_ in the file thread, so get the data that we need
1310 // before calling to it.
1311 base::FilePath website_save_dir, download_save_dir;
1312 bool skip_dir_check = false;
1313 DCHECK(download_manager_);
1314 if (download_manager_->GetDelegate()) {
1315 download_manager_->GetDelegate()->GetSaveDir(
1316 web_contents()->GetBrowserContext(), &website_save_dir,
1317 &download_save_dir, &skip_dir_check);
1319 std::string mime_type = web_contents()->GetContentsMimeType();
1320 std::string accept_languages =
1321 GetContentClient()->browser()->GetAcceptLangs(
1322 web_contents()->GetBrowserContext());
1324 BrowserThread::PostTask(
1325 BrowserThread::FILE, FROM_HERE,
1326 base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
1327 website_save_dir, download_save_dir, skip_dir_check,
1328 mime_type, accept_languages));
1331 void SavePackage::CreateDirectoryOnFileThread(
1332 const base::FilePath& website_save_dir,
1333 const base::FilePath& download_save_dir,
1334 bool skip_dir_check,
1335 const std::string& mime_type,
1336 const std::string& accept_langs) {
1337 base::FilePath save_dir;
1338 // If the default html/websites save folder doesn't exist...
1339 // We skip the directory check for gdata directories on ChromeOS.
1340 if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
1341 // If the default download dir doesn't exist, create it.
1342 if (!base::DirectoryExists(download_save_dir)) {
1343 bool res = base::CreateDirectory(download_save_dir);
1344 DCHECK(res);
1346 save_dir = download_save_dir;
1347 } else {
1348 // If it does exist, use the default save dir param.
1349 save_dir = website_save_dir;
1352 bool can_save_as_complete = CanSaveAsComplete(mime_type);
1353 base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
1354 can_save_as_complete, mime_type, accept_langs);
1355 base::FilePath::StringType pure_file_name =
1356 suggested_filename.RemoveExtension().BaseName().value();
1357 base::FilePath::StringType file_name_ext = suggested_filename.Extension();
1359 // Need to make sure the suggested file name is not too long.
1360 uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
1362 if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
1363 save_dir = save_dir.Append(pure_file_name + file_name_ext);
1364 } else {
1365 // Cannot create a shorter filename. This will cause the save as operation
1366 // to fail unless the user pick a shorter name. Continuing even though it
1367 // will fail because returning means no save as popup for the user, which
1368 // is even more confusing. This case should be rare though.
1369 save_dir = save_dir.Append(suggested_filename);
1372 BrowserThread::PostTask(
1373 BrowserThread::UI, FROM_HERE,
1374 base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
1375 can_save_as_complete));
1378 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
1379 bool can_save_as_complete) {
1381 // The WebContents which owns this SavePackage may have disappeared during
1382 // the UI->FILE->UI thread hop of
1383 // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
1384 if (!web_contents() || !download_manager_->GetDelegate())
1385 return;
1387 base::FilePath::StringType default_extension;
1388 if (can_save_as_complete)
1389 default_extension = kDefaultHtmlExtension;
1391 download_manager_->GetDelegate()->ChooseSavePath(
1392 web_contents(),
1393 suggested_path,
1394 default_extension,
1395 can_save_as_complete,
1396 base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
1399 void SavePackage::OnPathPicked(
1400 const base::FilePath& final_name,
1401 SavePageType type,
1402 const SavePackageDownloadCreatedCallback& download_created_callback) {
1403 DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
1404 (type == SAVE_PAGE_TYPE_AS_MHTML) ||
1405 (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
1406 // Ensure the filename is safe.
1407 saved_main_file_path_ = final_name;
1408 // TODO(asanka): This call may block on IO and shouldn't be made
1409 // from the UI thread. See http://crbug.com/61827.
1410 net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
1411 &saved_main_file_path_);
1413 saved_main_directory_path_ = saved_main_file_path_.DirName();
1414 save_type_ = type;
1415 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
1416 // Make new directory for saving complete file.
1417 saved_main_directory_path_ = saved_main_directory_path_.Append(
1418 saved_main_file_path_.RemoveExtension().BaseName().value() +
1419 FILE_PATH_LITERAL("_files"));
1422 Init(download_created_callback);
1425 void SavePackage::StopObservation() {
1426 DCHECK(download_);
1427 DCHECK(download_manager_);
1429 download_->RemoveObserver(this);
1430 download_ = NULL;
1431 download_manager_ = NULL;
1434 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
1435 StopObservation();
1438 void SavePackage::FinalizeDownloadEntry() {
1439 DCHECK(download_);
1440 DCHECK(download_manager_);
1442 download_manager_->OnSavePackageSuccessfullyFinished(download_);
1443 StopObservation();
1446 } // namespace content