1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/ftp/ftp_util.h"
10 #include "base/i18n/case_conversion.h"
11 #include "base/i18n/char_iterator.h"
12 #include "base/logging.h"
13 #include "base/memory/singleton.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_piece.h"
16 #include "base/strings/string_split.h"
17 #include "base/strings/string_tokenizer.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/time/time.h"
21 #include "third_party/icu/source/common/unicode/uchar.h"
22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
25 using base::ASCIIToUTF16
;
26 using base::StringPiece16
;
28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
29 // a path looks differently depending on whether it's a file or directory.
34 std::string
FtpUtil::UnixFilePathToVMS(const std::string
& unix_path
) {
35 if (unix_path
.empty())
38 base::StringTokenizer
tokenizer(unix_path
, "/");
39 std::vector
<std::string
> tokens
;
40 while (tokenizer
.GetNext())
41 tokens
.push_back(tokenizer
.token());
43 if (unix_path
[0] == '/') {
44 // It's an absolute path.
47 DCHECK_EQ(1U, unix_path
.length());
51 if (tokens
.size() == 1)
52 return unix_path
.substr(1); // Drop the leading slash.
54 std::string
result(tokens
[0] + ":[");
55 if (tokens
.size() == 2) {
56 // Don't ask why, it just works that way on VMS.
57 result
.append("000000");
59 result
.append(tokens
[1]);
60 for (size_t i
= 2; i
< tokens
.size() - 1; i
++)
61 result
.append("." + tokens
[i
]);
63 result
.append("]" + tokens
[tokens
.size() - 1]);
67 if (tokens
.size() == 1)
70 std::string
result("[");
71 for (size_t i
= 0; i
< tokens
.size() - 1; i
++)
72 result
.append("." + tokens
[i
]);
73 result
.append("]" + tokens
[tokens
.size() - 1]);
78 std::string
FtpUtil::UnixDirectoryPathToVMS(const std::string
& unix_path
) {
79 if (unix_path
.empty())
82 std::string
path(unix_path
);
84 if (path
[path
.length() - 1] != '/')
87 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
88 // real path and removing it after conversion.
90 path
= UnixFilePathToVMS(path
);
91 return path
.substr(0, path
.length() - 1);
95 std::string
FtpUtil::VMSPathToUnix(const std::string
& vms_path
) {
99 if (vms_path
[0] == '/') {
100 // This is not really a VMS path. Most likely the server is emulating UNIX.
101 // Return path as-is.
105 if (vms_path
== "[]")
108 std::string
result(vms_path
);
109 if (vms_path
[0] == '[') {
110 // It's a relative path.
111 base::ReplaceFirstSubstringAfterOffset(
112 &result
, 0, "[.", base::StringPiece());
114 // It's an absolute path.
115 result
.insert(0, "/");
116 base::ReplaceSubstringsAfterOffset(&result
, 0, ":[000000]", "/");
117 base::ReplaceSubstringsAfterOffset(&result
, 0, ":[", "/");
119 std::replace(result
.begin(), result
.end(), '.', '/');
120 std::replace(result
.begin(), result
.end(), ']', '/');
122 // Make sure the result doesn't end with a slash.
123 if (result
.length() && result
[result
.length() - 1] == '/')
124 result
= result
.substr(0, result
.length() - 1);
131 // Lazy-initialized map of abbreviated month names.
132 class AbbreviatedMonthsMap
{
134 static AbbreviatedMonthsMap
* GetInstance() {
135 return Singleton
<AbbreviatedMonthsMap
>::get();
138 // Converts abbreviated month name |text| to its number (in range 1-12).
139 // On success returns true and puts the number in |number|.
140 bool GetMonthNumber(const base::string16
& text
, int* number
) {
141 // Ignore the case of the month names. The simplest way to handle that
142 // is to make everything lowercase.
143 base::string16
text_lower(base::i18n::ToLower(text
));
145 if (map_
.find(text_lower
) == map_
.end())
148 *number
= map_
[text_lower
];
153 friend struct DefaultSingletonTraits
<AbbreviatedMonthsMap
>;
155 // Constructor, initializes the map based on ICU data. It is much faster
156 // to do that just once.
157 AbbreviatedMonthsMap() {
158 int32_t locales_count
;
159 const icu::Locale
* locales
=
160 icu::DateFormat::getAvailableLocales(locales_count
);
162 for (int32_t locale
= 0; locale
< locales_count
; locale
++) {
163 UErrorCode
status(U_ZERO_ERROR
);
165 icu::DateFormatSymbols
format_symbols(locales
[locale
], status
);
167 // If we cannot get format symbols for some locale, it's not a fatal
168 // error. Just try another one.
169 if (U_FAILURE(status
))
172 int32_t months_count
;
173 const icu::UnicodeString
* months
=
174 format_symbols
.getShortMonths(months_count
);
176 for (int32_t month
= 0; month
< months_count
; month
++) {
177 base::string16
month_name(months
[month
].getBuffer(),
178 static_cast<size_t>(months
[month
].length()));
180 // Ignore the case of the month names. The simplest way to handle that
181 // is to make everything lowercase.
182 month_name
= base::i18n::ToLower(month_name
);
184 map_
[month_name
] = month
+ 1;
186 // Sometimes ICU returns longer strings, but in FTP listings a shorter
187 // abbreviation is used (for example for the Russian locale). Make sure
188 // we always have a map entry for a three-letter abbreviation.
189 map_
[month_name
.substr(0, 3)] = month
+ 1;
193 // Fail loudly if the data returned by ICU is obviously incomplete.
194 // This is intended to catch cases like http://crbug.com/177428
195 // much earlier. Note that the issue above turned out to be non-trivial
196 // to reproduce - crash data is much better indicator of a problem
197 // than incomplete bug reports.
198 CHECK_EQ(1, map_
[ASCIIToUTF16("jan")]);
199 CHECK_EQ(2, map_
[ASCIIToUTF16("feb")]);
200 CHECK_EQ(3, map_
[ASCIIToUTF16("mar")]);
201 CHECK_EQ(4, map_
[ASCIIToUTF16("apr")]);
202 CHECK_EQ(5, map_
[ASCIIToUTF16("may")]);
203 CHECK_EQ(6, map_
[ASCIIToUTF16("jun")]);
204 CHECK_EQ(7, map_
[ASCIIToUTF16("jul")]);
205 CHECK_EQ(8, map_
[ASCIIToUTF16("aug")]);
206 CHECK_EQ(9, map_
[ASCIIToUTF16("sep")]);
207 CHECK_EQ(10, map_
[ASCIIToUTF16("oct")]);
208 CHECK_EQ(11, map_
[ASCIIToUTF16("nov")]);
209 CHECK_EQ(12, map_
[ASCIIToUTF16("dec")]);
212 // Maps lowercase month names to numbers in range 1-12.
213 std::map
<base::string16
, int> map_
;
215 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap
);
221 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16
& text
,
223 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text
, number
);
227 bool FtpUtil::LsDateListingToTime(const base::string16
& month
,
228 const base::string16
& day
,
229 const base::string16
& rest
,
230 const base::Time
& current_time
,
231 base::Time
* result
) {
232 base::Time::Exploded time_exploded
= { 0 };
234 if (!AbbreviatedMonthToNumber(month
, &time_exploded
.month
)) {
235 // Work around garbage sent by some servers in the same column
236 // as the month. Take just last 3 characters of the string.
237 if (month
.length() < 3 ||
238 !AbbreviatedMonthToNumber(month
.substr(month
.length() - 3),
239 &time_exploded
.month
)) {
244 if (!base::StringToInt(day
, &time_exploded
.day_of_month
))
246 if (time_exploded
.day_of_month
> 31)
249 if (!base::StringToInt(rest
, &time_exploded
.year
)) {
250 // Maybe it's time. Does it look like time? Note that it can be any of
251 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
252 if (rest
.length() > 5)
255 size_t colon_pos
= rest
.find(':');
256 if (colon_pos
== base::string16::npos
)
261 if (!base::StringToInt(
262 StringPiece16(rest
.begin(), rest
.begin() + colon_pos
),
263 &time_exploded
.hour
)) {
266 if (!base::StringToInt(
267 StringPiece16(rest
.begin() + colon_pos
+ 1, rest
.end()),
268 &time_exploded
.minute
)) {
273 base::Time::Exploded current_exploded
;
274 current_time
.LocalExplode(¤t_exploded
);
276 // If it's not possible for the parsed date to be in the current year,
277 // use the previous year.
278 if (time_exploded
.month
> current_exploded
.month
||
279 (time_exploded
.month
== current_exploded
.month
&&
280 time_exploded
.day_of_month
> current_exploded
.day_of_month
)) {
281 time_exploded
.year
= current_exploded
.year
- 1;
283 time_exploded
.year
= current_exploded
.year
;
287 // We don't know the time zone of the listing, so just use local time.
288 *result
= base::Time::FromLocalExploded(time_exploded
);
293 bool FtpUtil::WindowsDateListingToTime(const base::string16
& date
,
294 const base::string16
& time
,
295 base::Time
* result
) {
296 base::Time::Exploded time_exploded
= { 0 };
298 // Date should be in format MM-DD-YY[YY].
299 std::vector
<base::StringPiece16
> date_parts
=
300 base::SplitStringPiece(date
, base::ASCIIToUTF16("-"),
301 base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
302 if (date_parts
.size() != 3)
304 if (!base::StringToInt(date_parts
[0], &time_exploded
.month
))
306 if (!base::StringToInt(date_parts
[1], &time_exploded
.day_of_month
))
308 if (!base::StringToInt(date_parts
[2], &time_exploded
.year
))
310 if (time_exploded
.year
< 0)
312 // If year has only two digits then assume that 00-79 is 2000-2079,
313 // and 80-99 is 1980-1999.
314 if (time_exploded
.year
< 80)
315 time_exploded
.year
+= 2000;
316 else if (time_exploded
.year
< 100)
317 time_exploded
.year
+= 1900;
319 // Time should be in format HH:MM[(AM|PM)]
320 if (time
.length() < 5)
323 std::vector
<base::StringPiece16
> time_parts
= base::SplitStringPiece(
324 base::StringPiece16(time
).substr(0, 5), base::ASCIIToUTF16(":"),
325 base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
326 if (time_parts
.size() != 2)
328 if (!base::StringToInt(time_parts
[0], &time_exploded
.hour
))
330 if (!base::StringToInt(time_parts
[1], &time_exploded
.minute
))
332 if (!time_exploded
.HasValidValues())
335 if (time
.length() > 5) {
336 if (time
.length() != 7)
338 base::string16
am_or_pm(time
.substr(5, 2));
339 if (base::EqualsASCII(am_or_pm
, "PM")) {
340 if (time_exploded
.hour
< 12)
341 time_exploded
.hour
+= 12;
342 } else if (base::EqualsASCII(am_or_pm
, "AM")) {
343 if (time_exploded
.hour
== 12)
344 time_exploded
.hour
= 0;
350 // We don't know the time zone of the server, so just use local time.
351 *result
= base::Time::FromLocalExploded(time_exploded
);
356 base::string16
FtpUtil::GetStringPartAfterColumns(const base::string16
& text
,
358 base::i18n::UTF16CharIterator
iter(&text
);
360 // TODO(jshin): Is u_isspace the right function to use here?
361 for (int i
= 0; i
< columns
; i
++) {
362 // Skip the leading whitespace.
363 while (!iter
.end() && u_isspace(iter
.get()))
366 // Skip the actual text of i-th column.
367 while (!iter
.end() && !u_isspace(iter
.get()))
371 base::string16
result(text
.substr(iter
.array_pos()));
372 base::TrimWhitespace(result
, base::TRIM_ALL
, &result
);