Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / net / ftp / ftp_directory_listing_parser_ls.cc
blob96d0dc84607a40f934a2eb882ad06249451db970
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/ftp/ftp_directory_listing_parser_ls.h"
7 #include <vector>
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_split.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/time/time.h"
14 #include "net/ftp/ftp_directory_listing_parser.h"
15 #include "net/ftp/ftp_util.h"
17 namespace net {
19 namespace {
21 bool TwoColumnDateListingToTime(const base::string16& date,
22 const base::string16& time,
23 base::Time* result) {
24 base::Time::Exploded time_exploded = { 0 };
26 // Date should be in format YYYY-MM-DD.
27 std::vector<base::string16> date_parts =
28 base::SplitString(date, base::ASCIIToUTF16("-"), base::TRIM_WHITESPACE,
29 base::SPLIT_WANT_ALL);
30 if (date_parts.size() != 3)
31 return false;
32 if (!base::StringToInt(date_parts[0], &time_exploded.year))
33 return false;
34 if (!base::StringToInt(date_parts[1], &time_exploded.month))
35 return false;
36 if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month))
37 return false;
39 // Time should be in format HH:MM
40 if (time.length() != 5)
41 return false;
43 std::vector<base::string16> time_parts =
44 base::SplitString(time, base::ASCIIToUTF16(":"), base::TRIM_WHITESPACE,
45 base::SPLIT_WANT_ALL);
46 if (time_parts.size() != 2)
47 return false;
48 if (!base::StringToInt(time_parts[0], &time_exploded.hour))
49 return false;
50 if (!base::StringToInt(time_parts[1], &time_exploded.minute))
51 return false;
52 if (!time_exploded.HasValidValues())
53 return false;
55 // We don't know the time zone of the server, so just use local time.
56 *result = base::Time::FromLocalExploded(time_exploded);
57 return true;
60 // Returns the column index of the end of the date listing and detected
61 // last modification time.
62 bool DetectColumnOffsetSizeAndModificationTime(
63 const std::vector<base::string16>& columns,
64 const base::Time& current_time,
65 size_t* offset,
66 base::string16* size,
67 base::Time* modification_time) {
68 // The column offset can be arbitrarily large if some fields
69 // like owner or group name contain spaces. Try offsets from left to right
70 // and use the first one that matches a date listing.
72 // Here is how a listing line should look like. A star ("*") indicates
73 // a required field:
75 // * 1. permission listing
76 // 2. number of links (optional)
77 // * 3. owner name (may contain spaces)
78 // 4. group name (optional, may contain spaces)
79 // * 5. size in bytes
80 // * 6. month
81 // * 7. day of month
82 // * 8. year or time <-- column_offset will be the index of this column
83 // 9. file name (optional, may contain spaces)
84 for (size_t i = 5U; i < columns.size(); i++) {
85 if (FtpUtil::LsDateListingToTime(columns[i - 2], columns[i - 1], columns[i],
86 current_time, modification_time)) {
87 *size = columns[i - 3];
88 *offset = i;
89 return true;
93 // Some FTP listings have swapped the "month" and "day of month" columns
94 // (for example Russian listings). We try to recognize them only after making
95 // sure no column offset works above (this is a more strict way).
96 for (size_t i = 5U; i < columns.size(); i++) {
97 if (FtpUtil::LsDateListingToTime(columns[i - 1], columns[i - 2], columns[i],
98 current_time, modification_time)) {
99 *size = columns[i - 3];
100 *offset = i;
101 return true;
105 // Some FTP listings use a different date format.
106 for (size_t i = 5U; i < columns.size(); i++) {
107 if (TwoColumnDateListingToTime(columns[i - 1],
108 columns[i],
109 modification_time)) {
110 *size = columns[i - 2];
111 *offset = i;
112 return true;
116 return false;
119 } // namespace
121 bool ParseFtpDirectoryListingLs(
122 const std::vector<base::string16>& lines,
123 const base::Time& current_time,
124 std::vector<FtpDirectoryListingEntry>* entries) {
125 // True after we have received a "total n" listing header, where n is an
126 // integer. Only one such header is allowed per listing.
127 bool received_total_line = false;
129 for (size_t i = 0; i < lines.size(); i++) {
130 if (lines[i].empty())
131 continue;
133 std::vector<base::string16> columns = base::SplitString(
134 base::CollapseWhitespace(lines[i], false), base::ASCIIToUTF16(" "),
135 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
137 // Some FTP servers put a "total n" line at the beginning of the listing
138 // (n is an integer). Allow such a line, but only once, and only if it's
139 // the first non-empty line. Do not match the word exactly, because it may
140 // be in different languages (at least English and German have been seen
141 // in the field).
142 if (columns.size() == 2 && !received_total_line) {
143 received_total_line = true;
145 int64 total_number;
146 if (!base::StringToInt64(columns[1], &total_number))
147 return false;
148 if (total_number < 0)
149 return false;
151 continue;
154 FtpDirectoryListingEntry entry;
156 size_t column_offset;
157 base::string16 size;
158 if (!DetectColumnOffsetSizeAndModificationTime(columns,
159 current_time,
160 &column_offset,
161 &size,
162 &entry.last_modified)) {
163 // Some servers send a message in one of the first few lines.
164 // All those messages have in common is the string ".:",
165 // where "." means the current directory, and ":" separates it
166 // from the rest of the message, which may be empty.
167 if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos)
168 continue;
170 return false;
173 // Do not check "validity" of the permission listing. It's quirky,
174 // and some servers send garbage here while other parts of the line are OK.
176 if (!columns[0].empty() && columns[0][0] == 'l') {
177 entry.type = FtpDirectoryListingEntry::SYMLINK;
178 } else if (!columns[0].empty() && columns[0][0] == 'd') {
179 entry.type = FtpDirectoryListingEntry::DIRECTORY;
180 } else {
181 entry.type = FtpDirectoryListingEntry::FILE;
184 if (!base::StringToInt64(size, &entry.size)) {
185 // Some FTP servers do not separate owning group name from file size,
186 // like "group1234". We still want to display the file name for that
187 // entry, but can't really get the size (What if the group is named
188 // "group1", and the size is in fact 234? We can't distinguish between
189 // that and "group" with size 1234). Use a dummy value for the size.
190 entry.size = -1;
192 if (entry.size < 0) {
193 // Some FTP servers have bugs that cause them to display the file size
194 // as negative. They're most likely big files like DVD ISO images.
195 // We still want to display them, so just say the real file size
196 // is unknown.
197 entry.size = -1;
199 if (entry.type != FtpDirectoryListingEntry::FILE)
200 entry.size = -1;
202 if (column_offset == columns.size() - 1) {
203 // If the end of the date listing is the last column, there is no file
204 // name. Some FTP servers send listing entries with empty names.
205 // It's not obvious how to display such an entry, so we ignore them.
206 // We don't want to make the parsing fail at this point though.
207 // Other entries can still be useful.
208 continue;
211 entry.name = FtpUtil::GetStringPartAfterColumns(lines[i],
212 column_offset + 1);
214 if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
215 base::string16::size_type pos =
216 entry.name.rfind(base::ASCIIToUTF16(" -> "));
218 // We don't require the " -> " to be present. Some FTP servers don't send
219 // the symlink target, possibly for security reasons.
220 if (pos != base::string16::npos)
221 entry.name = entry.name.substr(0, pos);
224 entries->push_back(entry);
227 return true;
230 } // namespace net