1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 #include "base/file_util.h"
7 #include "base/scoped_temp_dir.h"
8 #include "chrome/browser/character_encoding.h"
9 #include "chrome/browser/net/url_request_mock_util.h"
10 #include "chrome/browser/prefs/pref_service.h"
11 #include "chrome/browser/profiles/profile.h"
12 #include "chrome/browser/ui/browser.h"
13 #include "chrome/browser/ui/browser_commands.h"
14 #include "chrome/browser/ui/browser_tabstrip.h"
15 #include "chrome/common/pref_names.h"
16 #include "chrome/test/base/in_process_browser_test.h"
17 #include "chrome/test/base/ui_test_utils.h"
18 #include "content/public/browser/browser_thread.h"
19 #include "content/public/browser/navigation_controller.h"
20 #include "content/public/browser/notification_service.h"
21 #include "content/public/browser/notification_source.h"
22 #include "content/public/browser/notification_types.h"
23 #include "content/public/browser/web_contents.h"
24 #include "content/public/test/test_navigation_observer.h"
25 #include "content/test/net/url_request_mock_http_job.h"
27 using content::BrowserThread
;
29 static const FilePath::CharType
* kTestDir
= FILE_PATH_LITERAL("encoding_tests");
31 class BrowserEncodingTest
: public InProcessBrowserTest
{
33 BrowserEncodingTest() {}
35 // Saves the current page and verifies that the output matches the expected
37 void SaveAndCompare(const char* filename_to_write
, const FilePath
& expected
) {
38 // Dump the page, the content of dump page should be identical to the
39 // expected result file.
40 FilePath full_file_name
= save_dir_
.AppendASCII(filename_to_write
);
41 // We save the page as way of complete HTML file, which requires a directory
42 // name to save sub resources in it. Although this test file does not have
43 // sub resources, but the directory name is still required.
44 content::WindowedNotificationObserver
observer(
45 content::NOTIFICATION_SAVE_PACKAGE_SUCCESSFULLY_FINISHED
,
46 content::NotificationService::AllSources());
47 chrome::GetActiveWebContents(browser())->SavePage(
48 full_file_name
, temp_sub_resource_dir_
,
49 content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML
);
52 FilePath expected_file_name
= ui_test_utils::GetTestFilePath(
53 FilePath(kTestDir
), expected
);
55 EXPECT_TRUE(file_util::ContentsEqual(full_file_name
, expected_file_name
));
58 virtual void SetUpOnMainThread() OVERRIDE
{
59 ASSERT_TRUE(temp_dir_
.CreateUniqueTempDir());
60 save_dir_
= temp_dir_
.path();
61 temp_sub_resource_dir_
= save_dir_
.AppendASCII("sub_resource_files");
63 BrowserThread::PostTask(
64 BrowserThread::IO
, FROM_HERE
,
65 base::Bind(&chrome_browser_net::SetUrlRequestMocksEnabled
, true));
68 ScopedTempDir temp_dir_
;
70 FilePath temp_sub_resource_dir_
;
73 // TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
74 // http://crbug.com/13306.
75 // 2. Add more files with multiple encoding name variants for each canonical
76 // encoding name). Webkit layout tests cover some, but testing in the UI test is
78 // SLOW_ is added for XP debug bots. These tests should really be unittests...
79 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest
, SLOW_TestEncodingAliasMapping
) {
80 struct EncodingTestData
{
81 const char* file_name
;
82 const char* encoding_name
;
85 const EncodingTestData kEncodingTestDatas
[] = {
86 { "Big5.html", "Big5" },
87 { "EUC-JP.html", "EUC-JP" },
88 { "gb18030.html", "gb18030" },
89 { "iso-8859-1.html", "ISO-8859-1" },
90 { "ISO-8859-2.html", "ISO-8859-2" },
91 { "ISO-8859-4.html", "ISO-8859-4" },
92 { "ISO-8859-5.html", "ISO-8859-5" },
93 { "ISO-8859-6.html", "ISO-8859-6" },
94 { "ISO-8859-7.html", "ISO-8859-7" },
95 { "ISO-8859-8.html", "ISO-8859-8" },
96 { "ISO-8859-13.html", "ISO-8859-13" },
97 { "ISO-8859-15.html", "ISO-8859-15" },
98 { "KOI8-R.html", "KOI8-R" },
99 { "KOI8-U.html", "KOI8-U" },
100 { "macintosh.html", "macintosh" },
101 { "Shift-JIS.html", "Shift_JIS" },
102 { "US-ASCII.html", "ISO-8859-1" }, // http://crbug.com/15801
103 { "UTF-8.html", "UTF-8" },
104 { "UTF-16LE.html", "UTF-16LE" },
105 { "windows-874.html", "windows-874" },
106 // http://crbug.com/95963
107 // { "windows-949.html", "windows-949" },
108 { "windows-1250.html", "windows-1250" },
109 { "windows-1251.html", "windows-1251" },
110 { "windows-1252.html", "windows-1252" },
111 { "windows-1253.html", "windows-1253" },
112 { "windows-1254.html", "windows-1254" },
113 { "windows-1255.html", "windows-1255" },
114 { "windows-1256.html", "windows-1256" },
115 { "windows-1257.html", "windows-1257" },
116 { "windows-1258.html", "windows-1258" }
118 const char* const kAliasTestDir
= "alias_mapping";
120 FilePath test_dir_path
= FilePath(kTestDir
).AppendASCII(kAliasTestDir
);
121 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(kEncodingTestDatas
); ++i
) {
122 FilePath
test_file_path(test_dir_path
);
123 test_file_path
= test_file_path
.AppendASCII(
124 kEncodingTestDatas
[i
].file_name
);
126 GURL url
= content::URLRequestMockHTTPJob::GetMockUrl(test_file_path
);
128 // When looping through all the above files in one WebContents, there's a
129 // race condition on Windows trybots that causes the previous encoding to be
130 // seen sometimes. Create a new tab for each one. http://crbug.com/122053
131 ui_test_utils::NavigateToURLWithDisposition(
132 browser(), url
, NEW_FOREGROUND_TAB
,
133 ui_test_utils::BROWSER_TEST_WAIT_FOR_NAVIGATION
);
135 EXPECT_EQ(kEncodingTestDatas
[i
].encoding_name
,
136 chrome::GetActiveWebContents(browser())->GetEncoding());
137 chrome::CloseTab(browser());
141 // Marked as flaky: see http://crbug.com/44668
142 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest
, TestOverrideEncoding
) {
143 const char* const kTestFileName
= "gb18030_with_iso88591_meta.html";
144 const char* const kExpectedFileName
=
145 "expected_gb18030_saved_from_iso88591_meta.html";
146 const char* const kOverrideTestDir
= "user_override";
148 FilePath test_dir_path
= FilePath(kTestDir
).AppendASCII(kOverrideTestDir
);
149 test_dir_path
= test_dir_path
.AppendASCII(kTestFileName
);
150 GURL url
= content::URLRequestMockHTTPJob::GetMockUrl(test_dir_path
);
151 ui_test_utils::NavigateToURL(browser(), url
);
152 content::WebContents
* web_contents
= chrome::GetActiveWebContents(browser());
153 EXPECT_EQ("ISO-8859-1", web_contents
->GetEncoding());
155 // Override the encoding to "gb18030".
156 const std::string selected_encoding
=
157 CharacterEncoding::GetCanonicalEncodingNameByAliasName("gb18030");
158 content::TestNavigationObserver
navigation_observer(
159 content::Source
<content::NavigationController
>(
160 &web_contents
->GetController()));
161 web_contents
->SetOverrideEncoding(selected_encoding
);
162 navigation_observer
.Wait();
163 EXPECT_EQ("gb18030", web_contents
->GetEncoding());
165 FilePath expected_filename
=
166 FilePath().AppendASCII(kOverrideTestDir
).AppendASCII(kExpectedFileName
);
167 SaveAndCompare(kTestFileName
, expected_filename
);
170 // The following encodings are excluded from the auto-detection test because
171 // it's a known issue that the current encoding detector does not detect them:
182 // For Hebrew, the expected encoding value is ISO-8859-8-I. See
183 // http://crbug.com/2927 for more details.
185 // This test fails frequently on the win_rel trybot. See http://crbug.com/122053
187 #define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
189 #define MAYBE_TestEncodingAutoDetect TestEncodingAutoDetect
191 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest
, MAYBE_TestEncodingAutoDetect
) {
192 struct EncodingAutoDetectTestData
{
193 const char* test_file_name
; // File name of test data.
194 const char* expected_result
; // File name of expected results.
195 const char* expected_encoding
; // expected encoding.
197 const EncodingAutoDetectTestData kTestDatas
[] = {
198 { "Big5_with_no_encoding_specified.html",
199 "expected_Big5_saved_from_no_encoding_specified.html",
201 { "gb18030_with_no_encoding_specified.html",
202 "expected_gb18030_saved_from_no_encoding_specified.html",
204 { "iso-8859-1_with_no_encoding_specified.html",
205 "expected_iso-8859-1_saved_from_no_encoding_specified.html",
207 { "ISO-8859-5_with_no_encoding_specified.html",
208 "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
210 { "ISO-8859-6_with_no_encoding_specified.html",
211 "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
213 { "ISO-8859-7_with_no_encoding_specified.html",
214 "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
216 { "ISO-8859-8_with_no_encoding_specified.html",
217 "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
219 { "KOI8-R_with_no_encoding_specified.html",
220 "expected_KOI8-R_saved_from_no_encoding_specified.html",
222 { "Shift-JIS_with_no_encoding_specified.html",
223 "expected_Shift-JIS_saved_from_no_encoding_specified.html",
225 { "UTF-8_with_no_encoding_specified.html",
226 "expected_UTF-8_saved_from_no_encoding_specified.html",
228 { "windows-949_with_no_encoding_specified.html",
229 "expected_windows-949_saved_from_no_encoding_specified.html",
230 "windows-949-2000" },
231 { "windows-1251_with_no_encoding_specified.html",
232 "expected_windows-1251_saved_from_no_encoding_specified.html",
234 { "windows-1254_with_no_encoding_specified.html",
235 "expected_windows-1254_saved_from_no_encoding_specified.html",
237 { "windows-1255_with_no_encoding_specified.html",
238 "expected_windows-1255_saved_from_no_encoding_specified.html",
240 { "windows-1256_with_no_encoding_specified.html",
241 "expected_windows-1256_saved_from_no_encoding_specified.html",
244 const char* const kAutoDetectDir
= "auto_detect";
245 // Directory of the files of expected results.
246 const char* const kExpectedResultDir
= "expected_results";
248 FilePath test_dir_path
= FilePath(kTestDir
).AppendASCII(kAutoDetectDir
);
250 // Set the default charset to one of encodings not supported by the current
251 // auto-detector (Please refer to the above comments) to make sure we
252 // incorrectly decode the page. Now we use ISO-8859-4.
253 browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset
,
256 content::WebContents
* web_contents
= chrome::GetActiveWebContents(browser());
257 for (size_t i
= 0; i
< ARRAYSIZE_UNSAFE(kTestDatas
); ++i
) {
258 // Disable auto detect if it is on.
259 browser()->profile()->GetPrefs()->SetBoolean(
260 prefs::kWebKitUsesUniversalDetector
, false);
262 FilePath
test_file_path(test_dir_path
);
263 test_file_path
= test_file_path
.AppendASCII(kTestDatas
[i
].test_file_name
);
264 GURL url
= content::URLRequestMockHTTPJob::GetMockUrl(test_file_path
);
265 ui_test_utils::NavigateToURL(browser(), url
);
267 // Get the encoding used for the page, it must be the default charset we
269 EXPECT_EQ("ISO-8859-4", web_contents
->GetEncoding());
271 // Enable the encoding auto detection.
272 browser()->profile()->GetPrefs()->SetBoolean(
273 prefs::kWebKitUsesUniversalDetector
, true);
275 content::TestNavigationObserver
observer(
276 content::Source
<content::NavigationController
>(
277 &web_contents
->GetController()));
278 chrome::Reload(browser(), CURRENT_TAB
);
281 // Re-get the encoding of page. It should return the real encoding now.
282 EXPECT_EQ(kTestDatas
[i
].expected_encoding
, web_contents
->GetEncoding());
284 // Dump the page, the content of dump page should be equal with our expect
286 FilePath expected_result_file_name
=
287 FilePath().AppendASCII(kAutoDetectDir
).AppendASCII(kExpectedResultDir
).
288 AppendASCII(kTestDatas
[i
].expected_result
);
289 SaveAndCompare(kTestDatas
[i
].test_file_name
, expected_result_file_name
);