Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / WebKit / Source / wtf / text / StringUTF8Adaptor.h
blob07f2825bbb247d97da69ae3a8d5c6b540692d85a
1 /*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef StringUTF8Adaptor_h
32 #define StringUTF8Adaptor_h
34 #include "wtf/text/CString.h"
35 #include "wtf/text/TextEncoding.h"
36 #include "wtf/text/WTFString.h"
38 namespace WTF {
40 // This class lets you get UTF-8 data out of a String without mallocing a
41 // separate buffer to hold the data if the String happens to be 8 bit and
42 // contain only ASCII characters.
43 class StringUTF8Adaptor {
44 public:
45 enum ShouldNormalize {
46 DoNotNormalize,
47 Normalize
50 explicit StringUTF8Adaptor(const String& string, ShouldNormalize normalize = DoNotNormalize, UnencodableHandling handling = EntitiesForUnencodables)
51 : m_data(0)
52 , m_length(0)
54 if (string.isEmpty())
55 return;
56 // Unfortunately, 8 bit WTFStrings are encoded in Latin-1 and GURL uses UTF-8
57 // when processing 8 bit strings. If |relative| is entirely ASCII, we luck out
58 // and can avoid mallocing a new buffer to hold the UTF-8 data because UTF-8
59 // and Latin-1 use the same code units for ASCII code points.
60 if (string.is8Bit() && string.containsOnlyASCII()) {
61 m_data = reinterpret_cast<const char*>(string.characters8());
62 m_length = string.length();
63 } else {
64 if (normalize == Normalize)
65 m_utf8Buffer = UTF8Encoding().normalizeAndEncode(string, handling);
66 else
67 m_utf8Buffer = string.utf8();
68 m_data = m_utf8Buffer.data();
69 m_length = m_utf8Buffer.length();
73 const char* data() const { return m_data; }
74 size_t length() const { return m_length; }
76 private:
77 CString m_utf8Buffer;
78 const char* m_data;
79 size_t m_length;
82 } // namespace WTF
84 using WTF::StringUTF8Adaptor;
86 #endif // StringUTF8Adaptor_h