1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
6 #define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
13 #include "base/basictypes.h"
14 #include "base/containers/hash_tables.h"
15 #include "base/files/file_path.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/memory/ref_counted.h"
18 #include "base/memory/weak_ptr.h"
19 #include "base/time/time.h"
20 #include "content/common/content_export.h"
21 #include "content/public/browser/download_item.h"
22 #include "content/public/browser/download_manager_delegate.h"
23 #include "content/public/browser/save_page_type.h"
24 #include "content/public/browser/web_contents_observer.h"
25 #include "content/public/common/referrer.h"
26 #include "net/base/net_errors.h"
32 class DownloadItemImpl
;
33 class DownloadManagerImpl
;
35 class SaveFileManager
;
38 struct SaveFileCreateInfo
;
40 // The SavePackage object manages the process of saving a page as only-html or
41 // complete-html or MHTML and providing the information for displaying saving
42 // status. Saving page as only-html means means that we save web page to a
43 // single HTML file regardless internal sub resources and sub frames. Saving
44 // page as complete-html page means we save not only the main html file the user
45 // told it to save but also a directory for the auxiliary files such as all
46 // sub-frame html files, image files, css files and js files. Saving page as
47 // MHTML means the same thing as complete-html, but it uses the MHTML format to
48 // contain the html and all auxiliary files in a single text file.
50 // Each page saving job may include one or multiple files which need to be
51 // saved. Each file is represented by a SaveItem, and all SaveItems are owned
52 // by the SavePackage. SaveItems are created when a user initiates a page
53 // saving job, and exist for the duration of one contents's life time.
54 class CONTENT_EXPORT SavePackage
55 : public base::RefCountedThreadSafe
<SavePackage
>,
56 public WebContentsObserver
,
57 public DownloadItem::Observer
,
58 public base::SupportsWeakPtr
<SavePackage
> {
61 // State when created but not initialized.
63 // State when after initializing, but not yet saving.
65 // Waiting on a list of savable resources from the backend.
67 // Waiting for data sent from net IO or from file system.
69 // Waiting for html DOM data sent from render process.
71 // Saving page finished successfully.
73 // Failed to save page.
77 static const base::FilePath::CharType kDefaultHtmlExtension
[];
79 // Constructor for user initiated page saving. This constructor results in a
80 // SavePackage that will generate and sanitize a suggested name for the user
81 // in the "Save As" dialog box.
82 explicit SavePackage(WebContents
* web_contents
);
84 // This contructor is used only for testing. We can bypass the file and
85 // directory name generation / sanitization by providing well known paths
86 // better suited for tests.
87 SavePackage(WebContents
* web_contents
,
88 SavePageType save_type
,
89 const base::FilePath
& file_full_path
,
90 const base::FilePath
& directory_full_path
);
92 // Initialize the SavePackage. Returns true if it initializes properly. Need
93 // to make sure that this method must be called in the UI thread because using
94 // g_browser_process on a non-UI thread can cause crashes during shutdown.
95 // |cb| will be called when the DownloadItem is created, before data is
97 bool Init(const SavePackageDownloadCreatedCallback
& cb
);
99 // Cancel all in progress request, might be called by user or internal error.
100 void Cancel(bool user_action
);
104 // Notifications sent from the file thread to the UI thread.
105 void StartSave(const SaveFileCreateInfo
* info
);
106 bool UpdateSaveProgress(int32 save_id
, int64 size
, bool write_success
);
107 void SaveFinished(int32 save_id
, int64 size
, bool is_success
);
108 void SaveFailed(const GURL
& save_url
);
109 void SaveCanceled(SaveItem
* save_item
);
111 // Rough percent complete, -1 means we don't know (since we didn't receive a
113 int PercentComplete();
115 bool canceled() const { return user_canceled_
|| disk_error_occurred_
; }
116 bool finished() const { return finished_
; }
117 SavePageType
save_type() const { return save_type_
; }
118 int contents_id() const { return contents_id_
; }
119 int id() const { return unique_id_
; }
124 friend class base::RefCountedThreadSafe
<SavePackage
>;
126 void InitWithDownloadItem(
127 const SavePackageDownloadCreatedCallback
& download_created_callback
,
128 DownloadItemImpl
* item
);
130 // Callback for WebContents::GenerateMHTML().
131 void OnMHTMLGenerated(int64 size
);
134 SavePackage(WebContents
* web_contents
,
135 const base::FilePath
& file_full_path
,
136 const base::FilePath
& directory_full_path
);
138 ~SavePackage() override
;
140 // Notes from Init() above applies here as well.
145 void SaveNextFile(bool process_all_remainder_items
);
146 void DoSavingProcess();
148 // WebContentsObserver implementation.
149 bool OnMessageReceived(const IPC::Message
& message
) override
;
150 bool OnMessageReceived(const IPC::Message
& message
,
151 RenderFrameHost
* render_frame_host
) override
;
153 // DownloadItem::Observer implementation.
154 void OnDownloadDestroyed(DownloadItem
* download
) override
;
156 // Update the download history of this item upon completion.
157 void FinalizeDownloadEntry();
159 // Detach from DownloadManager.
160 void StopObservation();
162 // Return max length of a path for a specific base directory.
163 // This is needed on POSIX, which restrict the length of file names in
164 // addition to the restriction on the length of path names.
165 // |base_dir| is assumed to be a directory name with no trailing slash.
166 static uint32
GetMaxPathLengthForDirectory(const base::FilePath
& base_dir
);
168 static bool GetSafePureFileName(
169 const base::FilePath
& dir_path
,
170 const base::FilePath::StringType
& file_name_ext
,
171 uint32 max_file_path_len
,
172 base::FilePath::StringType
* pure_file_name
);
174 // Create a file name based on the response from the server.
175 bool GenerateFileName(const std::string
& disposition
,
178 base::FilePath::StringType
* generated_name
);
180 // Set of methods to get all savable resource links from current web page,
181 // including main frame and sub-frames.
182 void GetSavableResourceLinksForCurrentPage();
183 void GetSavableResourceLinksForFrame(RenderFrameHost
* target
);
184 void OnSavableResourceLinksResponse(
185 RenderFrameHost
* sender
,
186 const GURL
& frame_url
,
187 const std::vector
<GURL
>& resources_list
,
188 const std::vector
<Referrer
>& referrers_list
);
189 void OnSavableResourceLinksError(RenderFrameHost
* sender
);
190 void CompleteSavableResourceLinksResponseFromFrame();
192 // Get html data by serializing all frames of current page with lists
193 // which contain all resource links that have local copy.
194 void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
196 // Look up SaveItem by save id from in progress map.
197 SaveItem
* LookupItemInProcessBySaveId(int32 save_id
);
199 // Remove SaveItem from in progress map and put it to saved map.
200 void PutInProgressItemToSavedMap(SaveItem
* save_item
);
202 // Retrieves the URL to be saved from the WebContents.
203 GURL
GetUrlToBeSaved();
205 void CreateDirectoryOnFileThread(const base::FilePath
& website_save_dir
,
206 const base::FilePath
& download_save_dir
,
208 const std::string
& mime_type
,
209 const std::string
& accept_langs
);
210 void ContinueGetSaveInfo(const base::FilePath
& suggested_path
,
211 bool can_save_as_complete
);
213 const base::FilePath
& final_name
,
215 const SavePackageDownloadCreatedCallback
& cb
);
217 void OnReceivedSerializedHtmlData(const GURL
& frame_url
,
218 const std::string
& data
,
221 typedef base::hash_map
<std::string
, SaveItem
*> SaveUrlItemMap
;
222 // in_progress_items_ is map of all saving job in in-progress state.
223 SaveUrlItemMap in_progress_items_
;
224 // saved_failed_items_ is map of all saving job which are failed.
225 SaveUrlItemMap saved_failed_items_
;
227 // The number of in process SaveItems.
228 int in_process_count() const {
229 return static_cast<int>(in_progress_items_
.size());
232 // The number of all SaveItems which have completed, including success items
234 int completed_count() const {
235 return static_cast<int>(saved_success_items_
.size() +
236 saved_failed_items_
.size());
239 // The current speed in files per second. This is used to update the
240 // DownloadItem associated to this SavePackage. The files per second is
241 // presented by the DownloadItem to the UI as bytes per second, which is
242 // not correct but matches the way the total and received number of files is
243 // presented as the total and received bytes.
244 int64
CurrentSpeed() const;
246 // Helper function for preparing suggested name for the SaveAs Dialog. The
247 // suggested name is determined by the web document's title.
248 base::FilePath
GetSuggestedNameForSaveAs(
249 bool can_save_as_complete
,
250 const std::string
& contents_mime_type
,
251 const std::string
& accept_langs
);
253 // Ensures that the file name has a proper extension for HTML by adding ".htm"
255 static base::FilePath
EnsureHtmlExtension(const base::FilePath
& name
);
257 // Ensures that the file name has a proper extension for supported formats
259 static base::FilePath
EnsureMimeExtension(const base::FilePath
& name
,
260 const std::string
& contents_mime_type
);
262 // Returns extension for supported MIME types (for example, for "text/plain"
263 // it returns "txt").
264 static const base::FilePath::CharType
* ExtensionForMimeType(
265 const std::string
& contents_mime_type
);
267 typedef std::queue
<SaveItem
*> SaveItemQueue
;
268 // A queue for items we are about to start saving.
269 SaveItemQueue waiting_item_queue_
;
271 // Used to de-dupe urls that are being gathered into |waiting_item_queue_|.
272 std::set
<GURL
> unique_urls_to_save_
;
274 // Temporarily stores urls of savable frames, until we can process them.
275 std::vector
<GURL
> frame_urls_to_save_
;
277 // Number of frames that we still need to get a response from.
278 int number_of_frames_pending_response_
;
280 typedef base::hash_map
<int32
, SaveItem
*> SavedItemMap
;
281 // saved_success_items_ is map of all saving job which are successfully saved.
282 SavedItemMap saved_success_items_
;
284 // Non-owning pointer for handling file writing on the file thread.
285 SaveFileManager
* file_manager_
;
287 // DownloadManager owns the DownloadItem and handles history and UI.
288 DownloadManagerImpl
* download_manager_
;
289 DownloadItemImpl
* download_
;
291 // The URL of the page the user wants to save.
293 base::FilePath saved_main_file_path_
;
294 base::FilePath saved_main_directory_path_
;
296 // The title of the page the user wants to save.
297 base::string16 title_
;
299 // Used to calculate package download speed (in files per second).
300 base::TimeTicks start_tick_
;
302 // Indicates whether the actual saving job is finishing or not.
305 // Indicates whether a call to Finish() has been scheduled.
306 bool mhtml_finishing_
;
308 // Indicates whether user canceled the saving job.
311 // Indicates whether user get disk error.
312 bool disk_error_occurred_
;
314 // Type about saving page as only-html or complete-html.
315 SavePageType save_type_
;
317 // Number of all need to be saved resources.
318 size_t all_save_items_count_
;
321 std::set
<base::FilePath::StringType
,
322 bool (*)(base::FilePath::StringPieceType
,
323 base::FilePath::StringPieceType
)>;
324 // This set is used to eliminate duplicated file names in saving directory.
325 FileNameSet file_name_set_
;
327 typedef base::hash_map
<base::FilePath::StringType
, uint32
> FileNameCountMap
;
328 // This map is used to track serial number for specified filename.
329 FileNameCountMap file_name_count_map_
;
331 // Indicates current waiting state when SavePackage try to get something
333 WaitState wait_state_
;
335 // Since for one contents, it can only have one SavePackage in same time.
336 // Now we actually use render_process_id as the contents's unique id.
337 const int contents_id_
;
339 // Unique ID for this SavePackage.
340 const int unique_id_
;
342 // Variables to record errors that happened so we can record them via
344 bool wrote_to_completed_file_
;
345 bool wrote_to_failed_file_
;
347 friend class SavePackageTest
;
348 FRIEND_TEST_ALL_PREFIXES(SavePackageTest
, TestSuggestedSaveNames
);
349 FRIEND_TEST_ALL_PREFIXES(SavePackageTest
, TestLongSafePureFilename
);
351 DISALLOW_COPY_AND_ASSIGN(SavePackage
);
354 } // namespace content
356 #endif // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_