1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 #include "base/files/file_util.h"
7 #include "base/files/scoped_temp_dir.h"
8 #include "base/prefs/pref_service.h"
9 #include "chrome/browser/character_encoding.h"
10 #include "chrome/browser/net/url_request_mock_util.h"
11 #include "chrome/browser/profiles/profile.h"
12 #include "chrome/browser/ui/browser.h"
13 #include "chrome/browser/ui/browser_commands.h"
14 #include "chrome/browser/ui/tabs/tab_strip_model.h"
15 #include "chrome/common/pref_names.h"
16 #include "chrome/test/base/in_process_browser_test.h"
17 #include "chrome/test/base/ui_test_utils.h"
18 #include "content/public/browser/browser_thread.h"
19 #include "content/public/browser/download_manager.h"
20 #include "content/public/browser/navigation_controller.h"
21 #include "content/public/browser/notification_service.h"
22 #include "content/public/browser/notification_source.h"
23 #include "content/public/browser/notification_types.h"
24 #include "content/public/browser/web_contents.h"
25 #include "content/public/test/test_navigation_observer.h"
26 #include "net/test/url_request/url_request_mock_http_job.h"
30 struct EncodingTestData
{
31 const char* file_name
;
32 const char* encoding_name
;
35 const EncodingTestData kEncodingTestDatas
[] = {
36 { "Big5.html", "Big5" },
37 { "EUC-JP.html", "EUC-JP" },
38 { "gb18030.html", "gb18030" },
40 // Disable temporarily until Blink CL
41 // (https://codereview.chromium.org/655083002/) is relanded.
42 { "iso-8859-1.html", "windows-1252" },
44 { "ISO-8859-2.html", "ISO-8859-2" },
45 { "ISO-8859-4.html", "ISO-8859-4" },
46 { "ISO-8859-5.html", "ISO-8859-5" },
47 { "ISO-8859-6.html", "ISO-8859-6" },
48 { "ISO-8859-7.html", "ISO-8859-7" },
49 { "ISO-8859-8.html", "ISO-8859-8" },
50 { "ISO-8859-13.html", "ISO-8859-13" },
51 { "ISO-8859-15.html", "ISO-8859-15" },
52 { "KOI8-R.html", "KOI8-R" },
53 { "KOI8-U.html", "KOI8-U" },
54 { "macintosh.html", "macintosh" },
55 { "Shift-JIS.html", "Shift_JIS" },
58 { "US-ASCII.html", "windows-1252" }, // http://crbug.com/15801
60 { "UTF-8.html", "UTF-8" },
61 { "UTF-16LE.html", "UTF-16LE" },
62 { "windows-874.html", "windows-874" },
63 { "EUC-KR.html", "EUC-KR" },
64 { "windows-1250.html", "windows-1250" },
65 { "windows-1251.html", "windows-1251" },
66 { "windows-1252.html", "windows-1252" },
67 { "windows-1253.html", "windows-1253" },
68 { "windows-1254.html", "windows-1254" },
69 { "windows-1255.html", "windows-1255" },
70 { "windows-1256.html", "windows-1256" },
71 { "windows-1257.html", "windows-1257" },
72 { "windows-1258.html", "windows-1258" }
75 class SavePackageFinishedObserver
: public content::DownloadManager::Observer
{
77 SavePackageFinishedObserver(content::DownloadManager
* manager
,
78 const base::Closure
& callback
)
79 : download_manager_(manager
),
81 download_manager_
->AddObserver(this);
84 virtual ~SavePackageFinishedObserver() {
85 if (download_manager_
)
86 download_manager_
->RemoveObserver(this);
89 // DownloadManager::Observer:
90 virtual void OnSavePackageSuccessfullyFinished(
91 content::DownloadManager
* manager
, content::DownloadItem
* item
) override
{
94 virtual void ManagerGoingDown(content::DownloadManager
* manager
) override
{
95 download_manager_
->RemoveObserver(this);
96 download_manager_
= NULL
;
100 content::DownloadManager
* download_manager_
;
101 base::Closure callback_
;
103 DISALLOW_COPY_AND_ASSIGN(SavePackageFinishedObserver
);
108 using content::BrowserThread
;
110 static const base::FilePath::CharType
* kTestDir
=
111 FILE_PATH_LITERAL("encoding_tests");
113 class BrowserEncodingTest
114 : public InProcessBrowserTest
,
115 public testing::WithParamInterface
<EncodingTestData
> {
117 BrowserEncodingTest() {}
119 // Saves the current page and verifies that the output matches the expected
121 void SaveAndCompare(const char* filename_to_write
,
122 const base::FilePath
& expected
) {
123 // Dump the page, the content of dump page should be identical to the
124 // expected result file.
125 base::FilePath full_file_name
= save_dir_
.AppendASCII(filename_to_write
);
126 // We save the page as way of complete HTML file, which requires a directory
127 // name to save sub resources in it. Although this test file does not have
128 // sub resources, but the directory name is still required.
129 scoped_refptr
<content::MessageLoopRunner
> loop_runner(
130 new content::MessageLoopRunner
);
131 SavePackageFinishedObserver
observer(
132 content::BrowserContext::GetDownloadManager(browser()->profile()),
133 loop_runner
->QuitClosure());
134 browser()->tab_strip_model()->GetActiveWebContents()->SavePage(
135 full_file_name
, temp_sub_resource_dir_
,
136 content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML
);
139 base::FilePath expected_file_name
= ui_test_utils::GetTestFilePath(
140 base::FilePath(kTestDir
), expected
);
142 EXPECT_TRUE(base::ContentsEqual(full_file_name
, expected_file_name
));
145 virtual void SetUpOnMainThread() override
{
146 ASSERT_TRUE(temp_dir_
.CreateUniqueTempDir());
147 save_dir_
= temp_dir_
.path();
148 temp_sub_resource_dir_
= save_dir_
.AppendASCII("sub_resource_files");
150 BrowserThread::PostTask(
151 BrowserThread::IO
, FROM_HERE
,
152 base::Bind(&chrome_browser_net::SetUrlRequestMocksEnabled
, true));
155 base::ScopedTempDir temp_dir_
;
156 base::FilePath save_dir_
;
157 base::FilePath temp_sub_resource_dir_
;
160 // TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
161 // http://crbug.com/13306.
162 // 2. Add more files with multiple encoding name variants for each canonical
163 // encoding name). Webkit layout tests cover some, but testing in the UI test is
165 IN_PROC_BROWSER_TEST_P(BrowserEncodingTest
, TestEncodingAliasMapping
) {
166 const char* const kAliasTestDir
= "alias_mapping";
168 base::FilePath test_dir_path
= base::FilePath(kTestDir
).AppendASCII(
170 base::FilePath
test_file_path(test_dir_path
);
171 test_file_path
= test_file_path
.AppendASCII(
172 GetParam().file_name
);
174 GURL url
= net::URLRequestMockHTTPJob::GetMockUrl(test_file_path
);
175 ui_test_utils::NavigateToURL(browser(), url
);
176 EXPECT_EQ(GetParam().encoding_name
,
177 browser()->tab_strip_model()->GetActiveWebContents()->
181 INSTANTIATE_TEST_CASE_P(EncodingAliases
,
183 testing::ValuesIn(kEncodingTestDatas
));
185 // Marked as flaky: see http://crbug.com/44668
186 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest
, TestOverrideEncoding
) {
187 const char* const kTestFileName
= "gb18030_with_iso88591_meta.html";
188 const char* const kExpectedFileName
=
189 "expected_gb18030_saved_from_iso88591_meta.html";
190 const char* const kOverrideTestDir
= "user_override";
192 base::FilePath test_dir_path
=
193 base::FilePath(kTestDir
).AppendASCII(kOverrideTestDir
);
194 test_dir_path
= test_dir_path
.AppendASCII(kTestFileName
);
195 GURL url
= net::URLRequestMockHTTPJob::GetMockUrl(test_dir_path
);
196 ui_test_utils::NavigateToURL(browser(), url
);
197 content::WebContents
* web_contents
=
198 browser()->tab_strip_model()->GetActiveWebContents();
200 // Temporarily disable until the Blink CL to use windows-1252 is relanded.
201 EXPECT_EQ("windows-1252", web_contents
->GetEncoding());
204 // Override the encoding to "gb18030".
205 const std::string selected_encoding
=
206 CharacterEncoding::GetCanonicalEncodingNameByAliasName("gb18030");
207 content::TestNavigationObserver
navigation_observer(web_contents
);
208 web_contents
->SetOverrideEncoding(selected_encoding
);
209 navigation_observer
.Wait();
210 EXPECT_EQ("gb18030", web_contents
->GetEncoding());
212 base::FilePath expected_filename
=
213 base::FilePath().AppendASCII(kOverrideTestDir
).AppendASCII(
215 SaveAndCompare(kTestFileName
, expected_filename
);
218 // The following encodings are excluded from the auto-detection test because
219 // it's a known issue that the current encoding detector does not detect them:
230 // For Hebrew, the expected encoding value is ISO-8859-8-I. See
231 // http://crbug.com/2927 for more details.
233 // This test fails frequently on the win_rel trybot. See http://crbug.com/122053
234 // It also times out frequently on Mac dbg. See http://crbug.com/351325
235 #if defined(OS_WIN) || defined(OS_MACOSX)
236 #define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
238 #define MAYBE_TestEncodingAutoDetect TestEncodingAutoDetect
240 // TODO(phajdan.jr): See if fix for http://crbug.com/122053 would help here.
241 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest
, MAYBE_TestEncodingAutoDetect
) {
242 struct EncodingAutoDetectTestData
{
243 const char* test_file_name
; // File name of test data.
244 const char* expected_result
; // File name of expected results.
245 const char* expected_encoding
; // expected encoding.
247 const EncodingAutoDetectTestData kTestDatas
[] = {
248 { "Big5_with_no_encoding_specified.html",
249 "expected_Big5_saved_from_no_encoding_specified.html",
251 { "gb18030_with_no_encoding_specified.html",
252 "expected_gb18030_saved_from_no_encoding_specified.html",
255 // Disable until the Blink CL to use 'windows-1252' is relanded.
256 { "iso-8859-1_with_no_encoding_specified.html",
257 "expected_iso-8859-1_saved_from_no_encoding_specified.html",
260 { "ISO-8859-5_with_no_encoding_specified.html",
261 "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
263 { "ISO-8859-6_with_no_encoding_specified.html",
264 "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
266 { "ISO-8859-7_with_no_encoding_specified.html",
267 "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
269 { "ISO-8859-8_with_no_encoding_specified.html",
270 "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
272 { "KOI8-R_with_no_encoding_specified.html",
273 "expected_KOI8-R_saved_from_no_encoding_specified.html",
275 { "Shift-JIS_with_no_encoding_specified.html",
276 "expected_Shift-JIS_saved_from_no_encoding_specified.html",
278 { "UTF-8_with_no_encoding_specified.html",
279 "expected_UTF-8_saved_from_no_encoding_specified.html",
281 { "EUC-KR_with_no_encoding_specified.html",
282 "expected_EUC-KR_saved_from_no_encoding_specified.html",
284 { "windows-1251_with_no_encoding_specified.html",
285 "expected_windows-1251_saved_from_no_encoding_specified.html",
287 { "windows-1254_with_no_encoding_specified.html",
288 "expected_windows-1254_saved_from_no_encoding_specified.html",
290 { "windows-1255_with_no_encoding_specified.html",
291 "expected_windows-1255_saved_from_no_encoding_specified.html",
293 { "windows-1256_with_no_encoding_specified.html",
294 "expected_windows-1256_saved_from_no_encoding_specified.html",
297 const char* const kAutoDetectDir
= "auto_detect";
298 // Directory of the files of expected results.
299 const char* const kExpectedResultDir
= "expected_results";
301 base::FilePath test_dir_path
=
302 base::FilePath(kTestDir
).AppendASCII(kAutoDetectDir
);
304 // Set the default charset to one of encodings not supported by the current
305 // auto-detector (Please refer to the above comments) to make sure we
306 // incorrectly decode the page. Now we use ISO-8859-4.
307 browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset
,
310 content::WebContents
* web_contents
=
311 browser()->tab_strip_model()->GetActiveWebContents();
312 for (size_t i
= 0; i
< arraysize(kTestDatas
); ++i
) {
313 // Disable auto detect if it is on.
314 browser()->profile()->GetPrefs()->SetBoolean(
315 prefs::kWebKitUsesUniversalDetector
, false);
317 base::FilePath
test_file_path(test_dir_path
);
318 test_file_path
= test_file_path
.AppendASCII(kTestDatas
[i
].test_file_name
);
319 GURL url
= net::URLRequestMockHTTPJob::GetMockUrl(test_file_path
);
320 ui_test_utils::NavigateToURL(browser(), url
);
322 // Get the encoding used for the page, it must be the default charset we
324 EXPECT_EQ("ISO-8859-4", web_contents
->GetEncoding());
326 // Enable the encoding auto detection.
327 browser()->profile()->GetPrefs()->SetBoolean(
328 prefs::kWebKitUsesUniversalDetector
, true);
330 content::TestNavigationObserver
observer(web_contents
);
331 chrome::Reload(browser(), CURRENT_TAB
);
334 // Re-get the encoding of page. It should return the real encoding now.
335 EXPECT_EQ(kTestDatas
[i
].expected_encoding
, web_contents
->GetEncoding());
337 // Dump the page, the content of dump page should be equal with our expect
339 base::FilePath expected_result_file_name
=
340 base::FilePath().AppendASCII(kAutoDetectDir
).
341 AppendASCII(kExpectedResultDir
).
342 AppendASCII(kTestDatas
[i
].expected_result
);
343 SaveAndCompare(kTestDatas
[i
].test_file_name
, expected_result_file_name
);