Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / WebKit / Source / wtf / text / TextEncodingRegistry.cpp
bloba2ad2a61372b93a547f9ae2bb288405f4043e5d6
1 /*
2 * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "config.h"
28 #include "wtf/text/TextEncodingRegistry.h"
30 #include "wtf/ASCIICType.h"
31 #include "wtf/CurrentTime.h"
32 #include "wtf/HashMap.h"
33 #include "wtf/HashSet.h"
34 #include "wtf/MainThread.h"
35 #include "wtf/StdLibExtras.h"
36 #include "wtf/StringExtras.h"
37 #include "wtf/ThreadingPrimitives.h"
38 #include "wtf/text/CString.h"
39 #include "wtf/text/TextCodecICU.h"
40 #include "wtf/text/TextCodecLatin1.h"
41 #include "wtf/text/TextCodecReplacement.h"
42 #include "wtf/text/TextCodecUTF16.h"
43 #include "wtf/text/TextCodecUTF8.h"
44 #include "wtf/text/TextCodecUserDefined.h"
45 #include "wtf/text/TextEncoding.h"
47 namespace WTF {
49 const size_t maxEncodingNameLength = 63;
51 // Hash for all-ASCII strings that does case folding.
52 struct TextEncodingNameHash {
53 static bool equal(const char* s1, const char* s2)
55 char c1;
56 char c2;
57 do {
58 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 170051106
59 // Workaround for a bug in the VS2012 Update 1 optimizer, remove once the fix is released.
60 // https://connect.microsoft.com/VisualStudio/feedback/details/777533/vs2012-c-optimizing-bug-when-using-inline-and-char-return-type-x86-target-only
61 c1 = toASCIILower(*s1++);
62 c2 = toASCIILower(*s2++);
63 if (c1 != c2)
64 return false;
65 #else
66 c1 = *s1++;
67 c2 = *s2++;
68 if (toASCIILower(c1) != toASCIILower(c2))
69 return false;
70 #endif
71 } while (c1 && c2);
72 return !c1 && !c2;
75 // This algorithm is the one-at-a-time hash from:
76 // http://burtleburtle.net/bob/hash/hashfaq.html
77 // http://burtleburtle.net/bob/hash/doobs.html
78 static unsigned hash(const char* s)
80 unsigned h = WTF::stringHashingStartValue;
81 for (;;) {
82 char c = *s++;
83 if (!c) {
84 h += (h << 3);
85 h ^= (h >> 11);
86 h += (h << 15);
87 return h;
89 h += toASCIILower(c);
90 h += (h << 10);
91 h ^= (h >> 6);
95 static const bool safeToCompareToEmptyOrDeleted = false;
98 struct TextCodecFactory {
99 NewTextCodecFunction function;
100 const void* additionalData;
101 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f), additionalData(d) { }
104 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
105 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
107 static Mutex& encodingRegistryMutex()
109 // We don't have to use AtomicallyInitializedStatic here because
110 // this function is called on the main thread for any page before
111 // it is used in worker threads.
112 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
113 return mutex;
116 static TextEncodingNameMap* textEncodingNameMap;
117 static TextCodecMap* textCodecMap;
119 namespace {
120 static unsigned didExtendTextCodecMaps = 0;
122 ALWAYS_INLINE unsigned atomicDidExtendTextCodecMaps()
124 return acquireLoad(&didExtendTextCodecMaps);
127 ALWAYS_INLINE void atomicSetDidExtendTextCodemMaps()
129 releaseStore(&didExtendTextCodecMaps, 1);
131 } // namespace
133 static const char textEncodingNameBlacklist[][6] = { "UTF-7" };
135 #if ERROR_DISABLED
137 static inline void checkExistingName(const char*, const char*) { }
139 #else
141 static void checkExistingName(const char* alias, const char* atomicName)
143 const char* oldAtomicName = textEncodingNameMap->get(alias);
144 if (!oldAtomicName)
145 return;
146 if (oldAtomicName == atomicName)
147 return;
148 // Keep the warning silent about one case where we know this will happen.
149 if (strcmp(alias, "ISO-8859-8-I") == 0
150 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0
151 && strcasecmp(atomicName, "iso-8859-8") == 0)
152 return;
153 WTF_LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName);
156 #endif
158 static bool isUndesiredAlias(const char* alias)
160 // Reject aliases with version numbers that are supported by some back-ends (such as "ISO_2022,locale=ja,version=0" in ICU).
161 for (const char* p = alias; *p; ++p) {
162 if (*p == ',')
163 return true;
165 // 8859_1 is known to (at least) ICU, but other browsers don't support this name - and having it caused a compatibility
166 // problem, see bug 43554.
167 if (0 == strcmp(alias, "8859_1"))
168 return true;
169 return false;
172 static void addToTextEncodingNameMap(const char* alias, const char* name)
174 ASSERT(strlen(alias) <= maxEncodingNameLength);
175 if (isUndesiredAlias(alias))
176 return;
177 const char* atomicName = textEncodingNameMap->get(name);
178 ASSERT(strcmp(alias, name) == 0 || atomicName);
179 if (!atomicName)
180 atomicName = name;
181 checkExistingName(alias, atomicName);
182 textEncodingNameMap->add(alias, atomicName);
185 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, const void* additionalData)
187 const char* atomicName = textEncodingNameMap->get(name);
188 ASSERT(atomicName);
189 textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
192 static void pruneBlacklistedCodecs()
194 for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
195 const char* atomicName = textEncodingNameMap->get(textEncodingNameBlacklist[i]);
196 if (!atomicName)
197 continue;
199 Vector<const char*> names;
200 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
201 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
202 for (; it != end; ++it) {
203 if (it->value == atomicName)
204 names.append(it->key);
207 textEncodingNameMap->removeAll(names);
209 textCodecMap->remove(atomicName);
213 static void buildBaseTextCodecMaps()
215 ASSERT(isMainThread());
216 ASSERT(!textCodecMap);
217 ASSERT(!textEncodingNameMap);
219 textCodecMap = new TextCodecMap;
220 textEncodingNameMap = new TextEncodingNameMap;
222 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
223 TextCodecLatin1::registerCodecs(addToTextCodecMap);
225 TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap);
226 TextCodecUTF8::registerCodecs(addToTextCodecMap);
228 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
229 TextCodecUTF16::registerCodecs(addToTextCodecMap);
231 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
232 TextCodecUserDefined::registerCodecs(addToTextCodecMap);
235 bool isReplacementEncoding(const char* alias)
237 return alias && !strcasecmp(alias, "replacement");
240 bool isReplacementEncoding(const String& alias)
242 return alias == "replacement";
245 static void extendTextCodecMaps()
247 TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap);
248 TextCodecReplacement::registerCodecs(addToTextCodecMap);
250 TextCodecICU::registerEncodingNames(addToTextEncodingNameMap);
251 TextCodecICU::registerCodecs(addToTextCodecMap);
253 pruneBlacklistedCodecs();
256 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
258 MutexLocker lock(encodingRegistryMutex());
261 ASSERT(textCodecMap);
262 TextCodecFactory factory = textCodecMap->get(encoding.name());
263 ASSERT(factory.function);
264 return factory.function(encoding, factory.additionalData);
267 const char* atomicCanonicalTextEncodingName(const char* name)
269 if (!name || !name[0])
270 return 0;
271 if (!textEncodingNameMap)
272 buildBaseTextCodecMaps();
274 MutexLocker lock(encodingRegistryMutex());
276 if (const char* atomicName = textEncodingNameMap->get(name))
277 return atomicName;
278 if (atomicDidExtendTextCodecMaps())
279 return 0;
280 extendTextCodecMaps();
281 atomicSetDidExtendTextCodemMaps();
282 return textEncodingNameMap->get(name);
285 template <typename CharacterType>
286 const char* atomicCanonicalTextEncodingName(const CharacterType* characters, size_t length)
288 char buffer[maxEncodingNameLength + 1];
289 size_t j = 0;
290 for (size_t i = 0; i < length; ++i) {
291 char c = static_cast<char>(characters[i]);
292 if (j == maxEncodingNameLength || c != characters[i])
293 return 0;
294 buffer[j++] = c;
296 buffer[j] = 0;
297 return atomicCanonicalTextEncodingName(buffer);
300 const char* atomicCanonicalTextEncodingName(const String& alias)
302 if (!alias.length())
303 return 0;
305 if (alias.contains(static_cast<UChar>('\0')))
306 return 0;
308 if (alias.is8Bit())
309 return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), alias.length());
311 return atomicCanonicalTextEncodingName<UChar>(alias.characters16(), alias.length());
314 bool noExtendedTextEncodingNameUsed()
316 return !atomicDidExtendTextCodecMaps();
319 #ifndef NDEBUG
320 void dumpTextEncodingNameMap()
322 unsigned size = textEncodingNameMap->size();
323 fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size);
325 MutexLocker lock(encodingRegistryMutex());
327 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
328 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
329 for (; it != end; ++it)
330 fprintf(stderr, "'%s' => '%s'\n", it->key, it->value);
332 #endif
334 } // namespace WTF