url/url_util.h

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef URL_URL_UTIL_H_
   6 #define URL_URL_UTIL_H_
   7
   8 #include <string>
   9
  10 #include "base/strings/string16.h"
  11 #include "url/third_party/mozilla/url_parse.h"
  12 #include "url/url_canon.h"
  13 #include "url/url_constants.h"
  14 #include "url/url_export.h"
  15
  16 namespace url {
  17
  18 // Init ------------------------------------------------------------------------
  19
  20 // Initialization is NOT required, it will be implicitly initialized when first
  21 // used. However, this implicit initialization is NOT threadsafe. If you are
  22 // using this library in a threaded environment and don't have a consistent
  23 // "first call" (an example might be calling AddStandardScheme with your special
  24 // application-specific schemes) then you will want to call initialize before
  25 // spawning any threads.
  26 //
  27 // It is OK to call this function more than once, subsequent calls will be
  28 // no-ops, unless Shutdown was called in the mean time. This will also be a
  29 // no-op if other calls to the library have forced an initialization beforehand.
  30 URL_EXPORT void Initialize();
  31
  32 // Cleanup is not required, except some strings may leak. For most user
  33 // applications, this is fine. If you're using it in a library that may get
  34 // loaded and unloaded, you'll want to unload to properly clean up your
  35 // library.
  36 URL_EXPORT void Shutdown();
  37
  38 // Schemes --------------------------------------------------------------------
  39
  40 // Adds an application-defined scheme to the internal list of "standard-format"
  41 // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
  42 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
  43 //
  44 // This function is not threadsafe and can not be called concurrently with any
  45 // other url_util function. It will assert if the list of standard schemes has
  46 // been locked (see LockStandardSchemes).
  47 URL_EXPORT void AddStandardScheme(const char* new_scheme);
  48
  49 // Sets a flag to prevent future calls to AddStandardScheme from succeeding.
  50 //
  51 // This is designed to help prevent errors for multithreaded applications.
  52 // Normal usage would be to call AddStandardScheme for your custom schemes at
  53 // the beginning of program initialization, and then LockStandardSchemes. This
  54 // prevents future callers from mistakenly calling AddStandardScheme when the
  55 // program is running with multiple threads, where such usage would be
  56 // dangerous.
  57 //
  58 // We could have had AddStandardScheme use a lock instead, but that would add
  59 // some platform-specific dependencies we don't otherwise have now, and is
  60 // overkill considering the normal usage is so simple.
  61 URL_EXPORT void LockStandardSchemes();
  62
  63 // Locates the scheme in the given string and places it into |found_scheme|,
  64 // which may be NULL to indicate the caller does not care about the range.
  65 //
  66 // Returns whether the given |compare| scheme matches the scheme found in the
  67 // input (if any). The |compare| scheme must be a valid canonical scheme or
  68 // the result of the comparison is undefined.
  69 URL_EXPORT bool FindAndCompareScheme(const char* str,
  70                                      int str_len,
  71                                      const char* compare,
  72                                      Component* found_scheme);
  73 URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
  74                                      int str_len,
  75                                      const char* compare,
  76                                      Component* found_scheme);
  77 inline bool FindAndCompareScheme(const std::string& str,
  78                                  const char* compare,
  79                                  Component* found_scheme) {
  80   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
  81                               compare, found_scheme);
  82 }
  83 inline bool FindAndCompareScheme(const base::string16& str,
  84                                  const char* compare,
  85                                  Component* found_scheme) {
  86   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
  87                               compare, found_scheme);
  88 }
  89
  90 // Returns true if the given string represents a URL whose scheme is in the list
  91 // of known standard-format schemes (see AddStandardScheme).
  92 URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
  93 URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
  94
  95 // URL library wrappers -------------------------------------------------------
  96
  97 // Parses the given spec according to the extracted scheme type. Normal users
  98 // should use the URL object, although this may be useful if performance is
  99 // critical and you don't want to do the heap allocation for the std::string.
 100 //
 101 // As with the Canonicalize* functions, the charset converter can
 102 // be NULL to use UTF-8 (it will be faster in this case).
 103 //
 104 // Returns true if a valid URL was produced, false if not. On failure, the
 105 // output and parsed structures will still be filled and will be consistent,
 106 // but they will not represent a loadable URL.
 107 URL_EXPORT bool Canonicalize(const char* spec,
 108                              int spec_len,
 109                              bool trim_path_end,
 110                              CharsetConverter* charset_converter,
 111                              CanonOutput* output,
 112                              Parsed* output_parsed);
 113 URL_EXPORT bool Canonicalize(const base::char16* spec,
 114                              int spec_len,
 115                              bool trim_path_end,
 116                              CharsetConverter* charset_converter,
 117                              CanonOutput* output,
 118                              Parsed* output_parsed);
 119
 120 // Resolves a potentially relative URL relative to the given parsed base URL.
 121 // The base MUST be valid. The resulting canonical URL and parsed information
 122 // will be placed in to the given out variables.
 123 //
 124 // The relative need not be relative. If we discover that it's absolute, this
 125 // will produce a canonical version of that URL. See Canonicalize() for more
 126 // about the charset_converter.
 127 //
 128 // Returns true if the output is valid, false if the input could not produce
 129 // a valid URL.
 130 URL_EXPORT bool ResolveRelative(const char* base_spec,
 131                                 int base_spec_len,
 132                                 const Parsed& base_parsed,
 133                                 const char* relative,
 134                                 int relative_length,
 135                                 CharsetConverter* charset_converter,
 136                                 CanonOutput* output,
 137                                 Parsed* output_parsed);
 138 URL_EXPORT bool ResolveRelative(const char* base_spec,
 139                                 int base_spec_len,
 140                                 const Parsed& base_parsed,
 141                                 const base::char16* relative,
 142                                 int relative_length,
 143                                 CharsetConverter* charset_converter,
 144                                 CanonOutput* output,
 145                                 Parsed* output_parsed);
 146
 147 // Replaces components in the given VALID input url. The new canonical URL info
 148 // is written to output and out_parsed.
 149 //
 150 // Returns true if the resulting URL is valid.
 151 URL_EXPORT bool ReplaceComponents(const char* spec,
 152                                   int spec_len,
 153                                   const Parsed& parsed,
 154                                   const Replacements<char>& replacements,
 155                                   CharsetConverter* charset_converter,
 156                                   CanonOutput* output,
 157                                   Parsed* out_parsed);
 158 URL_EXPORT bool ReplaceComponents(
 159     const char* spec,
 160     int spec_len,
 161     const Parsed& parsed,
 162     const Replacements<base::char16>& replacements,
 163     CharsetConverter* charset_converter,
 164     CanonOutput* output,
 165     Parsed* out_parsed);
 166
 167 // String helper functions ----------------------------------------------------
 168
 169 // Compare the lower-case form of the given string against the given ASCII
 170 // string.  This is useful for doing checking if an input string matches some
 171 // token, and it is optimized to avoid intermediate string copies.
 172 //
 173 // The versions of this function that don't take a b_end assume that the b
 174 // string is NULL terminated.
 175 URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
 176                                      const char* a_end,
 177                                      const char* b);
 178 URL_EXPORT bool LowerCaseEqualsASCII(const char* a_begin,
 179                                      const char* a_end,
 180                                      const char* b_begin,
 181                                      const char* b_end);
 182 URL_EXPORT bool LowerCaseEqualsASCII(const base::char16* a_begin,
 183                                      const base::char16* a_end,
 184                                      const char* b);
 185
 186 // Unescapes the given string using URL escaping rules.
 187 URL_EXPORT void DecodeURLEscapeSequences(const char* input,
 188                                          int length,
 189                                          CanonOutputW* output);
 190
 191 // Escapes the given string as defined by the JS method encodeURIComponent.  See
 192 // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
 193 URL_EXPORT void EncodeURIComponent(const char* input,
 194                                    int length,
 195                                    CanonOutput* output);
 196
 197 }  // namespace url
 198
 199 #endif  // URL_URL_UTIL_H_