Explicitly add python-numpy dependency to install-build-deps.
[chromium-blink-merge.git] / net / ftp / ftp_directory_listing_parser_ls.cc
blob2d23b6aecf5996361a0b7bb951981306b44be54d
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/ftp/ftp_directory_listing_parser_ls.h"
7 #include <vector>
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/string_split.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/time/time.h"
14 #include "net/ftp/ftp_directory_listing_parser.h"
15 #include "net/ftp/ftp_util.h"
17 namespace {
19 bool TwoColumnDateListingToTime(const base::string16& date,
20 const base::string16& time,
21 base::Time* result) {
22 base::Time::Exploded time_exploded = { 0 };
24 // Date should be in format YYYY-MM-DD.
25 std::vector<base::string16> date_parts;
26 base::SplitString(date, '-', &date_parts);
27 if (date_parts.size() != 3)
28 return false;
29 if (!base::StringToInt(date_parts[0], &time_exploded.year))
30 return false;
31 if (!base::StringToInt(date_parts[1], &time_exploded.month))
32 return false;
33 if (!base::StringToInt(date_parts[2], &time_exploded.day_of_month))
34 return false;
36 // Time should be in format HH:MM
37 if (time.length() != 5)
38 return false;
40 std::vector<base::string16> time_parts;
41 base::SplitString(time, ':', &time_parts);
42 if (time_parts.size() != 2)
43 return false;
44 if (!base::StringToInt(time_parts[0], &time_exploded.hour))
45 return false;
46 if (!base::StringToInt(time_parts[1], &time_exploded.minute))
47 return false;
48 if (!time_exploded.HasValidValues())
49 return false;
51 // We don't know the time zone of the server, so just use local time.
52 *result = base::Time::FromLocalExploded(time_exploded);
53 return true;
56 // Returns the column index of the end of the date listing and detected
57 // last modification time.
58 bool DetectColumnOffsetSizeAndModificationTime(
59 const std::vector<base::string16>& columns,
60 const base::Time& current_time,
61 size_t* offset,
62 base::string16* size,
63 base::Time* modification_time) {
64 // The column offset can be arbitrarily large if some fields
65 // like owner or group name contain spaces. Try offsets from left to right
66 // and use the first one that matches a date listing.
68 // Here is how a listing line should look like. A star ("*") indicates
69 // a required field:
71 // * 1. permission listing
72 // 2. number of links (optional)
73 // * 3. owner name (may contain spaces)
74 // 4. group name (optional, may contain spaces)
75 // * 5. size in bytes
76 // * 6. month
77 // * 7. day of month
78 // * 8. year or time <-- column_offset will be the index of this column
79 // 9. file name (optional, may contain spaces)
80 for (size_t i = 5U; i < columns.size(); i++) {
81 if (net::FtpUtil::LsDateListingToTime(columns[i - 2],
82 columns[i - 1],
83 columns[i],
84 current_time,
85 modification_time)) {
86 *size = columns[i - 3];
87 *offset = i;
88 return true;
92 // Some FTP listings have swapped the "month" and "day of month" columns
93 // (for example Russian listings). We try to recognize them only after making
94 // sure no column offset works above (this is a more strict way).
95 for (size_t i = 5U; i < columns.size(); i++) {
96 if (net::FtpUtil::LsDateListingToTime(columns[i - 1],
97 columns[i - 2],
98 columns[i],
99 current_time,
100 modification_time)) {
101 *size = columns[i - 3];
102 *offset = i;
103 return true;
107 // Some FTP listings use a different date format.
108 for (size_t i = 5U; i < columns.size(); i++) {
109 if (TwoColumnDateListingToTime(columns[i - 1],
110 columns[i],
111 modification_time)) {
112 *size = columns[i - 2];
113 *offset = i;
114 return true;
118 return false;
121 } // namespace
123 namespace net {
125 bool ParseFtpDirectoryListingLs(
126 const std::vector<base::string16>& lines,
127 const base::Time& current_time,
128 std::vector<FtpDirectoryListingEntry>* entries) {
129 // True after we have received a "total n" listing header, where n is an
130 // integer. Only one such header is allowed per listing.
131 bool received_total_line = false;
133 for (size_t i = 0; i < lines.size(); i++) {
134 if (lines[i].empty())
135 continue;
137 std::vector<base::string16> columns;
138 base::SplitString(base::CollapseWhitespace(lines[i], false), ' ', &columns);
140 // Some FTP servers put a "total n" line at the beginning of the listing
141 // (n is an integer). Allow such a line, but only once, and only if it's
142 // the first non-empty line. Do not match the word exactly, because it may
143 // be in different languages (at least English and German have been seen
144 // in the field).
145 if (columns.size() == 2 && !received_total_line) {
146 received_total_line = true;
148 int64 total_number;
149 if (!base::StringToInt64(columns[1], &total_number))
150 return false;
151 if (total_number < 0)
152 return false;
154 continue;
157 FtpDirectoryListingEntry entry;
159 size_t column_offset;
160 base::string16 size;
161 if (!DetectColumnOffsetSizeAndModificationTime(columns,
162 current_time,
163 &column_offset,
164 &size,
165 &entry.last_modified)) {
166 // Some servers send a message in one of the first few lines.
167 // All those messages have in common is the string ".:",
168 // where "." means the current directory, and ":" separates it
169 // from the rest of the message, which may be empty.
170 if (lines[i].find(base::ASCIIToUTF16(".:")) != base::string16::npos)
171 continue;
173 return false;
176 // Do not check "validity" of the permission listing. It's quirky,
177 // and some servers send garbage here while other parts of the line are OK.
179 if (!columns[0].empty() && columns[0][0] == 'l') {
180 entry.type = FtpDirectoryListingEntry::SYMLINK;
181 } else if (!columns[0].empty() && columns[0][0] == 'd') {
182 entry.type = FtpDirectoryListingEntry::DIRECTORY;
183 } else {
184 entry.type = FtpDirectoryListingEntry::FILE;
187 if (!base::StringToInt64(size, &entry.size)) {
188 // Some FTP servers do not separate owning group name from file size,
189 // like "group1234". We still want to display the file name for that
190 // entry, but can't really get the size (What if the group is named
191 // "group1", and the size is in fact 234? We can't distinguish between
192 // that and "group" with size 1234). Use a dummy value for the size.
193 // TODO(phajdan.jr): Use a value that means "unknown" instead of 0 bytes.
194 entry.size = 0;
196 if (entry.size < 0) {
197 // Some FTP servers have bugs that cause them to display the file size
198 // as negative. They're most likely big files like DVD ISO images.
199 // We still want to display them, so just say the real file size
200 // is unknown.
201 entry.size = -1;
203 if (entry.type != FtpDirectoryListingEntry::FILE)
204 entry.size = -1;
206 if (column_offset == columns.size() - 1) {
207 // If the end of the date listing is the last column, there is no file
208 // name. Some FTP servers send listing entries with empty names.
209 // It's not obvious how to display such an entry, so we ignore them.
210 // We don't want to make the parsing fail at this point though.
211 // Other entries can still be useful.
212 continue;
215 entry.name = FtpUtil::GetStringPartAfterColumns(lines[i],
216 column_offset + 1);
218 if (entry.type == FtpDirectoryListingEntry::SYMLINK) {
219 base::string16::size_type pos =
220 entry.name.rfind(base::ASCIIToUTF16(" -> "));
222 // We don't require the " -> " to be present. Some FTP servers don't send
223 // the symlink target, possibly for security reasons.
224 if (pos != base::string16::npos)
225 entry.name = entry.name.substr(0, pos);
228 entries->push_back(entry);
231 return true;
234 } // namespace net