1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/ftp/ftp_util.h"
10 #include "base/i18n/case_conversion.h"
11 #include "base/i18n/char_iterator.h"
12 #include "base/logging.h"
13 #include "base/memory/singleton.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_piece.h"
16 #include "base/strings/string_split.h"
17 #include "base/strings/string_tokenizer.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/time/time.h"
21 #include "third_party/icu/source/common/unicode/uchar.h"
22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
25 using base::ASCIIToUTF16
;
26 using base::StringPiece16
;
28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
29 // a path looks differently depending on whether it's a file or directory.
34 std::string
FtpUtil::UnixFilePathToVMS(const std::string
& unix_path
) {
35 if (unix_path
.empty())
38 base::StringTokenizer
tokenizer(unix_path
, "/");
39 std::vector
<std::string
> tokens
;
40 while (tokenizer
.GetNext())
41 tokens
.push_back(tokenizer
.token());
43 if (unix_path
[0] == '/') {
44 // It's an absolute path.
47 DCHECK_EQ(1U, unix_path
.length());
51 if (tokens
.size() == 1)
52 return unix_path
.substr(1); // Drop the leading slash.
54 std::string
result(tokens
[0] + ":[");
55 if (tokens
.size() == 2) {
56 // Don't ask why, it just works that way on VMS.
57 result
.append("000000");
59 result
.append(tokens
[1]);
60 for (size_t i
= 2; i
< tokens
.size() - 1; i
++)
61 result
.append("." + tokens
[i
]);
63 result
.append("]" + tokens
[tokens
.size() - 1]);
67 if (tokens
.size() == 1)
70 std::string
result("[");
71 for (size_t i
= 0; i
< tokens
.size() - 1; i
++)
72 result
.append("." + tokens
[i
]);
73 result
.append("]" + tokens
[tokens
.size() - 1]);
78 std::string
FtpUtil::UnixDirectoryPathToVMS(const std::string
& unix_path
) {
79 if (unix_path
.empty())
82 std::string
path(unix_path
);
84 if (path
[path
.length() - 1] != '/')
87 // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
88 // real path and removing it after conversion.
90 path
= UnixFilePathToVMS(path
);
91 return path
.substr(0, path
.length() - 1);
95 std::string
FtpUtil::VMSPathToUnix(const std::string
& vms_path
) {
99 if (vms_path
[0] == '/') {
100 // This is not really a VMS path. Most likely the server is emulating UNIX.
101 // Return path as-is.
105 if (vms_path
== "[]")
108 std::string
result(vms_path
);
109 if (vms_path
[0] == '[') {
110 // It's a relative path.
111 ReplaceFirstSubstringAfterOffset(&result
, 0, "[.", std::string());
113 // It's an absolute path.
114 result
.insert(0, "/");
115 ReplaceSubstringsAfterOffset(&result
, 0, ":[000000]", "/");
116 ReplaceSubstringsAfterOffset(&result
, 0, ":[", "/");
118 std::replace(result
.begin(), result
.end(), '.', '/');
119 std::replace(result
.begin(), result
.end(), ']', '/');
121 // Make sure the result doesn't end with a slash.
122 if (result
.length() && result
[result
.length() - 1] == '/')
123 result
= result
.substr(0, result
.length() - 1);
130 // Lazy-initialized map of abbreviated month names.
131 class AbbreviatedMonthsMap
{
133 static AbbreviatedMonthsMap
* GetInstance() {
134 return Singleton
<AbbreviatedMonthsMap
>::get();
137 // Converts abbreviated month name |text| to its number (in range 1-12).
138 // On success returns true and puts the number in |number|.
139 bool GetMonthNumber(const base::string16
& text
, int* number
) {
140 // Ignore the case of the month names. The simplest way to handle that
141 // is to make everything lowercase.
142 base::string16
text_lower(base::i18n::ToLower(text
));
144 if (map_
.find(text_lower
) == map_
.end())
147 *number
= map_
[text_lower
];
152 friend struct DefaultSingletonTraits
<AbbreviatedMonthsMap
>;
154 // Constructor, initializes the map based on ICU data. It is much faster
155 // to do that just once.
156 AbbreviatedMonthsMap() {
157 int32_t locales_count
;
158 const icu::Locale
* locales
=
159 icu::DateFormat::getAvailableLocales(locales_count
);
161 for (int32_t locale
= 0; locale
< locales_count
; locale
++) {
162 UErrorCode
status(U_ZERO_ERROR
);
164 icu::DateFormatSymbols
format_symbols(locales
[locale
], status
);
166 // If we cannot get format symbols for some locale, it's not a fatal
167 // error. Just try another one.
168 if (U_FAILURE(status
))
171 int32_t months_count
;
172 const icu::UnicodeString
* months
=
173 format_symbols
.getShortMonths(months_count
);
175 for (int32_t month
= 0; month
< months_count
; month
++) {
176 base::string16
month_name(months
[month
].getBuffer(),
177 static_cast<size_t>(months
[month
].length()));
179 // Ignore the case of the month names. The simplest way to handle that
180 // is to make everything lowercase.
181 month_name
= base::i18n::ToLower(month_name
);
183 map_
[month_name
] = month
+ 1;
185 // Sometimes ICU returns longer strings, but in FTP listings a shorter
186 // abbreviation is used (for example for the Russian locale). Make sure
187 // we always have a map entry for a three-letter abbreviation.
188 map_
[month_name
.substr(0, 3)] = month
+ 1;
192 // Fail loudly if the data returned by ICU is obviously incomplete.
193 // This is intended to catch cases like http://crbug.com/177428
194 // much earlier. Note that the issue above turned out to be non-trivial
195 // to reproduce - crash data is much better indicator of a problem
196 // than incomplete bug reports.
197 CHECK_EQ(1, map_
[ASCIIToUTF16("jan")]);
198 CHECK_EQ(2, map_
[ASCIIToUTF16("feb")]);
199 CHECK_EQ(3, map_
[ASCIIToUTF16("mar")]);
200 CHECK_EQ(4, map_
[ASCIIToUTF16("apr")]);
201 CHECK_EQ(5, map_
[ASCIIToUTF16("may")]);
202 CHECK_EQ(6, map_
[ASCIIToUTF16("jun")]);
203 CHECK_EQ(7, map_
[ASCIIToUTF16("jul")]);
204 CHECK_EQ(8, map_
[ASCIIToUTF16("aug")]);
205 CHECK_EQ(9, map_
[ASCIIToUTF16("sep")]);
206 CHECK_EQ(10, map_
[ASCIIToUTF16("oct")]);
207 CHECK_EQ(11, map_
[ASCIIToUTF16("nov")]);
208 CHECK_EQ(12, map_
[ASCIIToUTF16("dec")]);
211 // Maps lowercase month names to numbers in range 1-12.
212 std::map
<base::string16
, int> map_
;
214 DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap
);
220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16
& text
,
222 return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text
, number
);
226 bool FtpUtil::LsDateListingToTime(const base::string16
& month
,
227 const base::string16
& day
,
228 const base::string16
& rest
,
229 const base::Time
& current_time
,
230 base::Time
* result
) {
231 base::Time::Exploded time_exploded
= { 0 };
233 if (!AbbreviatedMonthToNumber(month
, &time_exploded
.month
)) {
234 // Work around garbage sent by some servers in the same column
235 // as the month. Take just last 3 characters of the string.
236 if (month
.length() < 3 ||
237 !AbbreviatedMonthToNumber(month
.substr(month
.length() - 3),
238 &time_exploded
.month
)) {
243 if (!base::StringToInt(day
, &time_exploded
.day_of_month
))
245 if (time_exploded
.day_of_month
> 31)
248 if (!base::StringToInt(rest
, &time_exploded
.year
)) {
249 // Maybe it's time. Does it look like time? Note that it can be any of
250 // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
251 if (rest
.length() > 5)
254 size_t colon_pos
= rest
.find(':');
255 if (colon_pos
== base::string16::npos
)
260 if (!base::StringToInt(
261 StringPiece16(rest
.begin(), rest
.begin() + colon_pos
),
262 &time_exploded
.hour
)) {
265 if (!base::StringToInt(
266 StringPiece16(rest
.begin() + colon_pos
+ 1, rest
.end()),
267 &time_exploded
.minute
)) {
272 base::Time::Exploded current_exploded
;
273 current_time
.LocalExplode(¤t_exploded
);
275 // If it's not possible for the parsed date to be in the current year,
276 // use the previous year.
277 if (time_exploded
.month
> current_exploded
.month
||
278 (time_exploded
.month
== current_exploded
.month
&&
279 time_exploded
.day_of_month
> current_exploded
.day_of_month
)) {
280 time_exploded
.year
= current_exploded
.year
- 1;
282 time_exploded
.year
= current_exploded
.year
;
286 // We don't know the time zone of the listing, so just use local time.
287 *result
= base::Time::FromLocalExploded(time_exploded
);
292 bool FtpUtil::WindowsDateListingToTime(const base::string16
& date
,
293 const base::string16
& time
,
294 base::Time
* result
) {
295 base::Time::Exploded time_exploded
= { 0 };
297 // Date should be in format MM-DD-YY[YY].
298 std::vector
<base::string16
> date_parts
;
299 base::SplitString(date
, '-', &date_parts
);
300 if (date_parts
.size() != 3)
302 if (!base::StringToInt(date_parts
[0], &time_exploded
.month
))
304 if (!base::StringToInt(date_parts
[1], &time_exploded
.day_of_month
))
306 if (!base::StringToInt(date_parts
[2], &time_exploded
.year
))
308 if (time_exploded
.year
< 0)
310 // If year has only two digits then assume that 00-79 is 2000-2079,
311 // and 80-99 is 1980-1999.
312 if (time_exploded
.year
< 80)
313 time_exploded
.year
+= 2000;
314 else if (time_exploded
.year
< 100)
315 time_exploded
.year
+= 1900;
317 // Time should be in format HH:MM[(AM|PM)]
318 if (time
.length() < 5)
321 std::vector
<base::string16
> time_parts
;
322 base::SplitString(time
.substr(0, 5), ':', &time_parts
);
323 if (time_parts
.size() != 2)
325 if (!base::StringToInt(time_parts
[0], &time_exploded
.hour
))
327 if (!base::StringToInt(time_parts
[1], &time_exploded
.minute
))
329 if (!time_exploded
.HasValidValues())
332 if (time
.length() > 5) {
333 if (time
.length() != 7)
335 base::string16
am_or_pm(time
.substr(5, 2));
336 if (base::EqualsASCII(am_or_pm
, "PM")) {
337 if (time_exploded
.hour
< 12)
338 time_exploded
.hour
+= 12;
339 } else if (base::EqualsASCII(am_or_pm
, "AM")) {
340 if (time_exploded
.hour
== 12)
341 time_exploded
.hour
= 0;
347 // We don't know the time zone of the server, so just use local time.
348 *result
= base::Time::FromLocalExploded(time_exploded
);
353 base::string16
FtpUtil::GetStringPartAfterColumns(const base::string16
& text
,
355 base::i18n::UTF16CharIterator
iter(&text
);
357 // TODO(jshin): Is u_isspace the right function to use here?
358 for (int i
= 0; i
< columns
; i
++) {
359 // Skip the leading whitespace.
360 while (!iter
.end() && u_isspace(iter
.get()))
363 // Skip the actual text of i-th column.
364 while (!iter
.end() && !u_isspace(iter
.get()))
368 base::string16
result(text
.substr(iter
.array_pos()));
369 base::TrimWhitespace(result
, base::TRIM_ALL
, &result
);