1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/ftp/ftp_directory_listing_parser_ls.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_split.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/time/time.h"
14 #include "net/ftp/ftp_directory_listing_parser.h"
15 #include "net/ftp/ftp_util.h"
21 bool TwoColumnDateListingToTime(const base::string16
& date
,
22 const base::string16
& time
,
24 base::Time::Exploded time_exploded
= { 0 };
26 // Date should be in format YYYY-MM-DD.
27 std::vector
<base::string16
> date_parts
=
28 base::SplitString(date
, base::ASCIIToUTF16("-"), base::TRIM_WHITESPACE
,
29 base::SPLIT_WANT_ALL
);
30 if (date_parts
.size() != 3)
32 if (!base::StringToInt(date_parts
[0], &time_exploded
.year
))
34 if (!base::StringToInt(date_parts
[1], &time_exploded
.month
))
36 if (!base::StringToInt(date_parts
[2], &time_exploded
.day_of_month
))
39 // Time should be in format HH:MM
40 if (time
.length() != 5)
43 std::vector
<base::string16
> time_parts
=
44 base::SplitString(time
, base::ASCIIToUTF16(":"), base::TRIM_WHITESPACE
,
45 base::SPLIT_WANT_ALL
);
46 if (time_parts
.size() != 2)
48 if (!base::StringToInt(time_parts
[0], &time_exploded
.hour
))
50 if (!base::StringToInt(time_parts
[1], &time_exploded
.minute
))
52 if (!time_exploded
.HasValidValues())
55 // We don't know the time zone of the server, so just use local time.
56 *result
= base::Time::FromLocalExploded(time_exploded
);
60 // Returns the column index of the end of the date listing and detected
61 // last modification time.
62 bool DetectColumnOffsetSizeAndModificationTime(
63 const std::vector
<base::string16
>& columns
,
64 const base::Time
& current_time
,
67 base::Time
* modification_time
) {
68 // The column offset can be arbitrarily large if some fields
69 // like owner or group name contain spaces. Try offsets from left to right
70 // and use the first one that matches a date listing.
72 // Here is how a listing line should look like. A star ("*") indicates
75 // * 1. permission listing
76 // 2. number of links (optional)
77 // * 3. owner name (may contain spaces)
78 // 4. group name (optional, may contain spaces)
82 // * 8. year or time <-- column_offset will be the index of this column
83 // 9. file name (optional, may contain spaces)
84 for (size_t i
= 5U; i
< columns
.size(); i
++) {
85 if (FtpUtil::LsDateListingToTime(columns
[i
- 2], columns
[i
- 1], columns
[i
],
86 current_time
, modification_time
)) {
87 *size
= columns
[i
- 3];
93 // Some FTP listings have swapped the "month" and "day of month" columns
94 // (for example Russian listings). We try to recognize them only after making
95 // sure no column offset works above (this is a more strict way).
96 for (size_t i
= 5U; i
< columns
.size(); i
++) {
97 if (FtpUtil::LsDateListingToTime(columns
[i
- 1], columns
[i
- 2], columns
[i
],
98 current_time
, modification_time
)) {
99 *size
= columns
[i
- 3];
105 // Some FTP listings use a different date format.
106 for (size_t i
= 5U; i
< columns
.size(); i
++) {
107 if (TwoColumnDateListingToTime(columns
[i
- 1],
109 modification_time
)) {
110 *size
= columns
[i
- 2];
121 bool ParseFtpDirectoryListingLs(
122 const std::vector
<base::string16
>& lines
,
123 const base::Time
& current_time
,
124 std::vector
<FtpDirectoryListingEntry
>* entries
) {
125 // True after we have received a "total n" listing header, where n is an
126 // integer. Only one such header is allowed per listing.
127 bool received_total_line
= false;
129 for (size_t i
= 0; i
< lines
.size(); i
++) {
130 if (lines
[i
].empty())
133 std::vector
<base::string16
> columns
= base::SplitString(
134 base::CollapseWhitespace(lines
[i
], false), base::ASCIIToUTF16(" "),
135 base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
137 // Some FTP servers put a "total n" line at the beginning of the listing
138 // (n is an integer). Allow such a line, but only once, and only if it's
139 // the first non-empty line. Do not match the word exactly, because it may
140 // be in different languages (at least English and German have been seen
142 if (columns
.size() == 2 && !received_total_line
) {
143 received_total_line
= true;
146 if (!base::StringToInt64(columns
[1], &total_number
))
148 if (total_number
< 0)
154 FtpDirectoryListingEntry entry
;
156 size_t column_offset
;
158 if (!DetectColumnOffsetSizeAndModificationTime(columns
,
162 &entry
.last_modified
)) {
163 // Some servers send a message in one of the first few lines.
164 // All those messages have in common is the string ".:",
165 // where "." means the current directory, and ":" separates it
166 // from the rest of the message, which may be empty.
167 if (lines
[i
].find(base::ASCIIToUTF16(".:")) != base::string16::npos
)
173 // Do not check "validity" of the permission listing. It's quirky,
174 // and some servers send garbage here while other parts of the line are OK.
176 if (!columns
[0].empty() && columns
[0][0] == 'l') {
177 entry
.type
= FtpDirectoryListingEntry::SYMLINK
;
178 } else if (!columns
[0].empty() && columns
[0][0] == 'd') {
179 entry
.type
= FtpDirectoryListingEntry::DIRECTORY
;
181 entry
.type
= FtpDirectoryListingEntry::FILE;
184 if (!base::StringToInt64(size
, &entry
.size
)) {
185 // Some FTP servers do not separate owning group name from file size,
186 // like "group1234". We still want to display the file name for that
187 // entry, but can't really get the size (What if the group is named
188 // "group1", and the size is in fact 234? We can't distinguish between
189 // that and "group" with size 1234). Use a dummy value for the size.
192 if (entry
.size
< 0) {
193 // Some FTP servers have bugs that cause them to display the file size
194 // as negative. They're most likely big files like DVD ISO images.
195 // We still want to display them, so just say the real file size
199 if (entry
.type
!= FtpDirectoryListingEntry::FILE)
202 if (column_offset
== columns
.size() - 1) {
203 // If the end of the date listing is the last column, there is no file
204 // name. Some FTP servers send listing entries with empty names.
205 // It's not obvious how to display such an entry, so we ignore them.
206 // We don't want to make the parsing fail at this point though.
207 // Other entries can still be useful.
211 entry
.name
= FtpUtil::GetStringPartAfterColumns(lines
[i
],
214 if (entry
.type
== FtpDirectoryListingEntry::SYMLINK
) {
215 base::string16::size_type pos
=
216 entry
.name
.rfind(base::ASCIIToUTF16(" -> "));
218 // We don't require the " -> " to be present. Some FTP servers don't send
219 // the symlink target, possibly for security reasons.
220 if (pos
!= base::string16::npos
)
221 entry
.name
= entry
.name
.substr(0, pos
);
224 entries
->push_back(entry
);