Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / net / ftp / ftp_util.cc
blob44fe98b585360e12de9964e3c667de6e6327df54
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/ftp/ftp_util.h"
7 #include <map>
8 #include <vector>
10 #include "base/i18n/case_conversion.h"
11 #include "base/i18n/char_iterator.h"
12 #include "base/logging.h"
13 #include "base/memory/singleton.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_piece.h"
16 #include "base/strings/string_split.h"
17 #include "base/strings/string_tokenizer.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/time/time.h"
21 #include "third_party/icu/source/common/unicode/uchar.h"
22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
25 using base::ASCIIToUTF16;
26 using base::StringPiece16;
28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
29 // a path looks differently depending on whether it's a file or directory.
31 namespace net {
33 // static
34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
35 if (unix_path.empty())
36 return std::string();
38 base::StringTokenizer tokenizer(unix_path, "/");
39 std::vector<std::string> tokens;
40 while (tokenizer.GetNext())
41 tokens.push_back(tokenizer.token());
43 if (unix_path[0] == '/') {
44 // It's an absolute path.
46 if (tokens.empty()) {
47 DCHECK_EQ(1U, unix_path.length());
48 return "[]";
51 if (tokens.size() == 1)
52 return unix_path.substr(1); // Drop the leading slash.
54 std::string result(tokens[0] + ":[");
55 if (tokens.size() == 2) {
56 // Don't ask why, it just works that way on VMS.
57 result.append("000000");
58 } else {
59 result.append(tokens[1]);
60 for (size_t i = 2; i < tokens.size() - 1; i++)
61 result.append("." + tokens[i]);
63 result.append("]" + tokens[tokens.size() - 1]);
64 return result;
67 if (tokens.size() == 1)
68 return unix_path;
70 std::string result("[");
71 for (size_t i = 0; i < tokens.size() - 1; i++)
72 result.append("." + tokens[i]);
73 result.append("]" + tokens[tokens.size() - 1]);
74 return result;
77 // static
78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
79 if (unix_path.empty())
80 return std::string();
82 std::string path(unix_path);
84 if (path[path.length() - 1] != '/')
85 path.append("/");
87 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
88 // real path and removing it after conversion.
89 path.append("x");
90 path = UnixFilePathToVMS(path);
91 return path.substr(0, path.length() - 1);
94 // static
95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
96 if (vms_path.empty())
97 return ".";
99 if (vms_path[0] == '/') {
100 // This is not really a VMS path. Most likely the server is emulating UNIX.
101 // Return path as-is.
102 return vms_path;
105 if (vms_path == "[]")
106 return "/";
108 std::string result(vms_path);
109 if (vms_path[0] == '[') {
110 // It's a relative path.
111 base::ReplaceFirstSubstringAfterOffset(
112 &result, 0, "[.", base::StringPiece());
113 } else {
114 // It's an absolute path.
115 result.insert(0, "/");
116 base::ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
117 base::ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
119 std::replace(result.begin(), result.end(), '.', '/');
120 std::replace(result.begin(), result.end(), ']', '/');
122 // Make sure the result doesn't end with a slash.
123 if (result.length() && result[result.length() - 1] == '/')
124 result = result.substr(0, result.length() - 1);
126 return result;
129 namespace {
131 // Lazy-initialized map of abbreviated month names.
132 class AbbreviatedMonthsMap {
133 public:
134 static AbbreviatedMonthsMap* GetInstance() {
135 return Singleton<AbbreviatedMonthsMap>::get();
138 // Converts abbreviated month name |text| to its number (in range 1-12).
139 // On success returns true and puts the number in |number|.
140 bool GetMonthNumber(const base::string16& text, int* number) {
141 // Ignore the case of the month names. The simplest way to handle that
142 // is to make everything lowercase.
143 base::string16 text_lower(base::i18n::ToLower(text));
145 if (map_.find(text_lower) == map_.end())
146 return false;
148 *number = map_[text_lower];
149 return true;
152 private:
153 friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
155 // Constructor, initializes the map based on ICU data. It is much faster
156 // to do that just once.
157 AbbreviatedMonthsMap() {
158 int32_t locales_count;
159 const icu::Locale* locales =
160 icu::DateFormat::getAvailableLocales(locales_count);
162 for (int32_t locale = 0; locale < locales_count; locale++) {
163 UErrorCode status(U_ZERO_ERROR);
165 icu::DateFormatSymbols format_symbols(locales[locale], status);
167 // If we cannot get format symbols for some locale, it's not a fatal
168 // error. Just try another one.
169 if (U_FAILURE(status))
170 continue;
172 int32_t months_count;
173 const icu::UnicodeString* months =
174 format_symbols.getShortMonths(months_count);
176 for (int32_t month = 0; month < months_count; month++) {
177 base::string16 month_name(months[month].getBuffer(),
178 static_cast<size_t>(months[month].length()));
180 // Ignore the case of the month names. The simplest way to handle that
181 // is to make everything lowercase.
182 month_name = base::i18n::ToLower(month_name);
184 map_[month_name] = month + 1;
186 // Sometimes ICU returns longer strings, but in FTP listings a shorter
187 // abbreviation is used (for example for the Russian locale). Make sure
188 // we always have a map entry for a three-letter abbreviation.
189 map_[month_name.substr(0, 3)] = month + 1;
193 // Fail loudly if the data returned by ICU is obviously incomplete.
194 // This is intended to catch cases like http://crbug.com/177428
195 // much earlier. Note that the issue above turned out to be non-trivial
196 // to reproduce - crash data is much better indicator of a problem
197 // than incomplete bug reports.
198 CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
199 CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
200 CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
201 CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
202 CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
203 CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
204 CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
205 CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
206 CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
207 CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
208 CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
209 CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
212 // Maps lowercase month names to numbers in range 1-12.
213 std::map<base::string16, int> map_;
215 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
218 } // namespace
220 // static
221 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
222 int* number) {
223 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
226 // static
227 bool FtpUtil::LsDateListingToTime(const base::string16& month,
228 const base::string16& day,
229 const base::string16& rest,
230 const base::Time& current_time,
231 base::Time* result) {
232 base::Time::Exploded time_exploded = { 0 };
234 if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
235 // Work around garbage sent by some servers in the same column
236 // as the month. Take just last 3 characters of the string.
237 if (month.length() < 3 ||
238 !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
239 &time_exploded.month)) {
240 return false;
244 if (!base::StringToInt(day, &time_exploded.day_of_month))
245 return false;
246 if (time_exploded.day_of_month > 31)
247 return false;
249 if (!base::StringToInt(rest, &time_exploded.year)) {
250 // Maybe it's time. Does it look like time? Note that it can be any of
251 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
252 if (rest.length() > 5)
253 return false;
255 size_t colon_pos = rest.find(':');
256 if (colon_pos == base::string16::npos)
257 return false;
258 if (colon_pos > 2)
259 return false;
261 if (!base::StringToInt(
262 StringPiece16(rest.begin(), rest.begin() + colon_pos),
263 &time_exploded.hour)) {
264 return false;
266 if (!base::StringToInt(
267 StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
268 &time_exploded.minute)) {
269 return false;
272 // Guess the year.
273 base::Time::Exploded current_exploded;
274 current_time.LocalExplode(&current_exploded);
276 // If it's not possible for the parsed date to be in the current year,
277 // use the previous year.
278 if (time_exploded.month > current_exploded.month ||
279 (time_exploded.month == current_exploded.month &&
280 time_exploded.day_of_month > current_exploded.day_of_month)) {
281 time_exploded.year = current_exploded.year - 1;
282 } else {
283 time_exploded.year = current_exploded.year;
287 // We don't know the time zone of the listing, so just use local time.
288 *result = base::Time::FromLocalExploded(time_exploded);
289 return true;
292 // static
293 bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
294 const base::string16& time,
295 base::Time* result) {
296 base::Time::Exploded time_exploded = { 0 };
298 // Date should be in format MM-DD-YY[YY].
299 std::vector<base::StringPiece16> date_parts =
300 base::SplitStringPiece(date, base::ASCIIToUTF16("-"),
301 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
302 if (date_parts.size() != 3)
303 return false;
304 if (!base::StringToInt(date_parts[0], &time_exploded.month))
305 return false;
306 if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
307 return false;
308 if (!base::StringToInt(date_parts[2], &time_exploded.year))
309 return false;
310 if (time_exploded.year < 0)
311 return false;
312 // If year has only two digits then assume that 00-79 is 2000-2079,
313 // and 80-99 is 1980-1999.
314 if (time_exploded.year < 80)
315 time_exploded.year += 2000;
316 else if (time_exploded.year < 100)
317 time_exploded.year += 1900;
319 // Time should be in format HH:MM[(AM|PM)]
320 if (time.length() < 5)
321 return false;
323 std::vector<base::StringPiece16> time_parts = base::SplitStringPiece(
324 base::StringPiece16(time).substr(0, 5), base::ASCIIToUTF16(":"),
325 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
326 if (time_parts.size() != 2)
327 return false;
328 if (!base::StringToInt(time_parts[0], &time_exploded.hour))
329 return false;
330 if (!base::StringToInt(time_parts[1], &time_exploded.minute))
331 return false;
332 if (!time_exploded.HasValidValues())
333 return false;
335 if (time.length() > 5) {
336 if (time.length() != 7)
337 return false;
338 base::string16 am_or_pm(time.substr(5, 2));
339 if (base::EqualsASCII(am_or_pm, "PM")) {
340 if (time_exploded.hour < 12)
341 time_exploded.hour += 12;
342 } else if (base::EqualsASCII(am_or_pm, "AM")) {
343 if (time_exploded.hour == 12)
344 time_exploded.hour = 0;
345 } else {
346 return false;
350 // We don't know the time zone of the server, so just use local time.
351 *result = base::Time::FromLocalExploded(time_exploded);
352 return true;
355 // static
356 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
357 int columns) {
358 base::i18n::UTF16CharIterator iter(&text);
360 for (int i = 0; i < columns; i++) {
361 // Skip the leading whitespace.
362 while (!iter.end() && u_isspace(iter.get()))
363 iter.Advance();
365 // Skip the actual text of i-th column.
366 while (!iter.end() && !u_isspace(iter.get()))
367 iter.Advance();
370 base::string16 result(text.substr(iter.array_pos()));
371 base::TrimWhitespace(result, base::TRIM_ALL, &result);
372 return result;
375 } // namespace