net/ftp/ftp_util.cc

   1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "net/ftp/ftp_util.h"
   6
   7 #include <map>
   8 #include <vector>
   9
  10 #include "base/i18n/case_conversion.h"
  11 #include "base/i18n/char_iterator.h"
  12 #include "base/logging.h"
  13 #include "base/memory/singleton.h"
  14 #include "base/strings/string_number_conversions.h"
  15 #include "base/strings/string_piece.h"
  16 #include "base/strings/string_split.h"
  17 #include "base/strings/string_tokenizer.h"
  18 #include "base/strings/string_util.h"
  19 #include "base/strings/utf_string_conversions.h"
  20 #include "base/time/time.h"
  21 #include "third_party/icu/source/common/unicode/uchar.h"
  22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
  23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
  24
  25 using base::ASCIIToUTF16;
  26 using base::StringPiece16;
  27
  28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
  29 // a path looks differently depending on whether it's a file or directory.
  30
  31 namespace net {
  32
  33 // static
  34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
  35   if (unix_path.empty())
  36     return std::string();
  37
  38   base::StringTokenizer tokenizer(unix_path, "/");
  39   std::vector<std::string> tokens;
  40   while (tokenizer.GetNext())
  41     tokens.push_back(tokenizer.token());
  42
  43   if (unix_path[0] == '/') {
  44     // It's an absolute path.
  45
  46     if (tokens.empty()) {
  47       DCHECK_EQ(1U, unix_path.length());
  48       return "[]";
  49     }
  50
  51     if (tokens.size() == 1)
  52       return unix_path.substr(1);  // Drop the leading slash.
  53
  54     std::string result(tokens[0] + ":[");
  55     if (tokens.size() == 2) {
  56       // Don't ask why, it just works that way on VMS.
  57       result.append("000000");
  58     } else {
  59       result.append(tokens[1]);
  60       for (size_t i = 2; i < tokens.size() - 1; i++)
  61         result.append("." + tokens[i]);
  62     }
  63     result.append("]" + tokens[tokens.size() - 1]);
  64     return result;
  65   }
  66
  67   if (tokens.size() == 1)
  68     return unix_path;
  69
  70   std::string result("[");
  71   for (size_t i = 0; i < tokens.size() - 1; i++)
  72     result.append("." + tokens[i]);
  73   result.append("]" + tokens[tokens.size() - 1]);
  74   return result;
  75 }
  76
  77 // static
  78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
  79   if (unix_path.empty())
  80     return std::string();
  81
  82   std::string path(unix_path);
  83
  84   if (path[path.length() - 1] != '/')
  85     path.append("/");
  86
  87   // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
  88   // real path and removing it after conversion.
  89   path.append("x");
  90   path = UnixFilePathToVMS(path);
  91   return path.substr(0, path.length() - 1);
  92 }
  93
  94 // static
  95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
  96   if (vms_path.empty())
  97     return ".";
  98
  99   if (vms_path[0] == '/') {
 100     // This is not really a VMS path. Most likely the server is emulating UNIX.
 101     // Return path as-is.
 102     return vms_path;
 103   }
 104
 105   if (vms_path == "[]")
 106     return "/";
 107
 108   std::string result(vms_path);
 109   if (vms_path[0] == '[') {
 110     // It's a relative path.
 111     ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
 112   } else {
 113     // It's an absolute path.
 114     result.insert(0, "/");
 115     ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
 116     ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
 117   }
 118   std::replace(result.begin(), result.end(), '.', '/');
 119   std::replace(result.begin(), result.end(), ']', '/');
 120
 121   // Make sure the result doesn't end with a slash.
 122   if (result.length() && result[result.length() - 1] == '/')
 123     result = result.substr(0, result.length() - 1);
 124
 125   return result;
 126 }
 127
 128 namespace {
 129
 130 // Lazy-initialized map of abbreviated month names.
 131 class AbbreviatedMonthsMap {
 132  public:
 133   static AbbreviatedMonthsMap* GetInstance() {
 134     return Singleton<AbbreviatedMonthsMap>::get();
 135   }
 136
 137   // Converts abbreviated month name |text| to its number (in range 1-12).
 138   // On success returns true and puts the number in |number|.
 139   bool GetMonthNumber(const base::string16& text, int* number) {
 140     // Ignore the case of the month names. The simplest way to handle that
 141     // is to make everything lowercase.
 142     base::string16 text_lower(base::i18n::ToLower(text));
 143
 144     if (map_.find(text_lower) == map_.end())
 145       return false;
 146
 147     *number = map_[text_lower];
 148     return true;
 149   }
 150
 151  private:
 152   friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
 153
 154   // Constructor, initializes the map based on ICU data. It is much faster
 155   // to do that just once.
 156   AbbreviatedMonthsMap() {
 157     int32_t locales_count;
 158     const icu::Locale* locales =
 159         icu::DateFormat::getAvailableLocales(locales_count);
 160
 161     for (int32_t locale = 0; locale < locales_count; locale++) {
 162       UErrorCode status(U_ZERO_ERROR);
 163
 164       icu::DateFormatSymbols format_symbols(locales[locale], status);
 165
 166       // If we cannot get format symbols for some locale, it's not a fatal
 167       // error. Just try another one.
 168       if (U_FAILURE(status))
 169         continue;
 170
 171       int32_t months_count;
 172       const icu::UnicodeString* months =
 173           format_symbols.getShortMonths(months_count);
 174
 175       for (int32_t month = 0; month < months_count; month++) {
 176         base::string16 month_name(months[month].getBuffer(),
 177                             static_cast<size_t>(months[month].length()));
 178
 179         // Ignore the case of the month names. The simplest way to handle that
 180         // is to make everything lowercase.
 181         month_name = base::i18n::ToLower(month_name);
 182
 183         map_[month_name] = month + 1;
 184
 185         // Sometimes ICU returns longer strings, but in FTP listings a shorter
 186         // abbreviation is used (for example for the Russian locale). Make sure
 187         // we always have a map entry for a three-letter abbreviation.
 188         map_[month_name.substr(0, 3)] = month + 1;
 189       }
 190     }
 191
 192     // Fail loudly if the data returned by ICU is obviously incomplete.
 193     // This is intended to catch cases like http://crbug.com/177428
 194     // much earlier. Note that the issue above turned out to be non-trivial
 195     // to reproduce - crash data is much better indicator of a problem
 196     // than incomplete bug reports.
 197     CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
 198     CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
 199     CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
 200     CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
 201     CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
 202     CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
 203     CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
 204     CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
 205     CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
 206     CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
 207     CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
 208     CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
 209   }
 210
 211   // Maps lowercase month names to numbers in range 1-12.
 212   std::map<base::string16, int> map_;
 213
 214   DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
 215 };
 216
 217 }  // namespace
 218
 219 // static
 220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
 221                                        int* number) {
 222   return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
 223 }
 224
 225 // static
 226 bool FtpUtil::LsDateListingToTime(const base::string16& month,
 227                                   const base::string16& day,
 228                                   const base::string16& rest,
 229                                   const base::Time& current_time,
 230                                   base::Time* result) {
 231   base::Time::Exploded time_exploded = { 0 };
 232
 233   if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
 234     // Work around garbage sent by some servers in the same column
 235     // as the month. Take just last 3 characters of the string.
 236     if (month.length() < 3 ||
 237         !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
 238                                   &time_exploded.month)) {
 239       return false;
 240     }
 241   }
 242
 243   if (!base::StringToInt(day, &time_exploded.day_of_month))
 244     return false;
 245   if (time_exploded.day_of_month > 31)
 246     return false;
 247
 248   if (!base::StringToInt(rest, &time_exploded.year)) {
 249     // Maybe it's time. Does it look like time? Note that it can be any of
 250     // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
 251     if (rest.length() > 5)
 252       return false;
 253
 254     size_t colon_pos = rest.find(':');
 255     if (colon_pos == base::string16::npos)
 256       return false;
 257     if (colon_pos > 2)
 258       return false;
 259
 260     if (!base::StringToInt(
 261             StringPiece16(rest.begin(), rest.begin() + colon_pos),
 262             &time_exploded.hour)) {
 263       return false;
 264     }
 265     if (!base::StringToInt(
 266             StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
 267             &time_exploded.minute)) {
 268       return false;
 269     }
 270
 271     // Guess the year.
 272     base::Time::Exploded current_exploded;
 273     current_time.LocalExplode(&current_exploded);
 274
 275     // If it's not possible for the parsed date to be in the current year,
 276     // use the previous year.
 277     if (time_exploded.month > current_exploded.month ||
 278         (time_exploded.month == current_exploded.month &&
 279          time_exploded.day_of_month > current_exploded.day_of_month)) {
 280       time_exploded.year = current_exploded.year - 1;
 281     } else {
 282       time_exploded.year = current_exploded.year;
 283     }
 284   }
 285
 286   // We don't know the time zone of the listing, so just use local time.
 287   *result = base::Time::FromLocalExploded(time_exploded);
 288   return true;
 289 }
 290
 291 // static
 292 bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
 293                                        const base::string16& time,
 294                                        base::Time* result) {
 295   base::Time::Exploded time_exploded = { 0 };
 296
 297   // Date should be in format MM-DD-YY[YY].
 298   std::vector<base::string16> date_parts;
 299   base::SplitString(date, '-', &date_parts);
 300   if (date_parts.size() != 3)
 301     return false;
 302   if (!base::StringToInt(date_parts[0], &time_exploded.month))
 303     return false;
 304   if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
 305     return false;
 306   if (!base::StringToInt(date_parts[2], &time_exploded.year))
 307     return false;
 308   if (time_exploded.year < 0)
 309     return false;
 310   // If year has only two digits then assume that 00-79 is 2000-2079,
 311   // and 80-99 is 1980-1999.
 312   if (time_exploded.year < 80)
 313     time_exploded.year += 2000;
 314   else if (time_exploded.year < 100)
 315     time_exploded.year += 1900;
 316
 317   // Time should be in format HH:MM[(AM|PM)]
 318   if (time.length() < 5)
 319     return false;
 320
 321   std::vector<base::string16> time_parts;
 322   base::SplitString(time.substr(0, 5), ':', &time_parts);
 323   if (time_parts.size() != 2)
 324     return false;
 325   if (!base::StringToInt(time_parts[0], &time_exploded.hour))
 326     return false;
 327   if (!base::StringToInt(time_parts[1], &time_exploded.minute))
 328     return false;
 329   if (!time_exploded.HasValidValues())
 330     return false;
 331
 332   if (time.length() > 5) {
 333     if (time.length() != 7)
 334       return false;
 335     base::string16 am_or_pm(time.substr(5, 2));
 336     if (EqualsASCII(am_or_pm, "PM")) {
 337       if (time_exploded.hour < 12)
 338         time_exploded.hour += 12;
 339     } else if (EqualsASCII(am_or_pm, "AM")) {
 340       if (time_exploded.hour == 12)
 341         time_exploded.hour = 0;
 342     } else {
 343       return false;
 344     }
 345   }
 346
 347   // We don't know the time zone of the server, so just use local time.
 348   *result = base::Time::FromLocalExploded(time_exploded);
 349   return true;
 350 }
 351
 352 // static
 353 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
 354                                                   int columns) {
 355   base::i18n::UTF16CharIterator iter(&text);
 356
 357   // TODO(jshin): Is u_isspace the right function to use here?
 358   for (int i = 0; i < columns; i++) {
 359     // Skip the leading whitespace.
 360     while (!iter.end() && u_isspace(iter.get()))
 361       iter.Advance();
 362
 363     // Skip the actual text of i-th column.
 364     while (!iter.end() && !u_isspace(iter.get()))
 365       iter.Advance();
 366   }
 367
 368   base::string16 result(text.substr(iter.array_pos()));
 369   base::TrimWhitespace(result, base::TRIM_ALL, &result);
 370   return result;
 371 }
 372
 373 }  // namespace