Update git submodules
[LibreOffice.git] / svl / source / misc / sharedstringpool.cxx
blob4f891d3677d11ec376bf81968f2659854cf96c45
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <svl/sharedstringpool.hxx>
11 #include <svl/sharedstring.hxx>
12 #include <unotools/charclass.hxx>
14 #include <unordered_set>
16 #ifdef __GNUC__
17 #pragma GCC diagnostic push
18 #pragma GCC diagnostic ignored "-Wshadow"
19 #endif
20 #if defined __clang__
21 #pragma clang diagnostic push
22 #pragma clang diagnostic ignored "-Wunused-parameter"
23 #endif
24 #if defined _MSC_VER
25 #pragma warning(push)
26 #pragma warning(disable : 4324) // structure was padded due to alignment specifier
27 #endif
28 #include <libcuckoo/cuckoohash_map.hh>
29 #if defined _MSC_VER
30 #pragma warning(pop)
31 #endif
32 #if defined __clang__
33 #pragma clang diagnostic pop
34 #endif
35 #ifdef __GNUC__
36 #pragma GCC diagnostic pop
37 #endif
39 namespace svl
41 namespace
43 sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
45 // we store the key twice, because the concurrent hashtable we are using does not provide any way to return the key in use
46 typedef std::pair<OUString, OUString> Mapped;
48 struct HashFunction
50 size_t operator()(rtl_uString* const key) const
52 return rtl_ustr_hashCode_WithLength(key->buffer, key->length);
56 struct EqualsFunction
58 bool operator()(rtl_uString* const lhs, rtl_uString* const rhs) const
60 return OUString::unacquired(&lhs) == OUString::unacquired(&rhs);
65 struct SharedStringPool::Impl
67 // We use this map for two purposes - to store lower->upper case mappings
68 // and to store an upper->upper mapping.
69 // The second mapping is used so that we can
70 // share the same rtl_uString object between different keys which map to the same uppercase string to save memory.
72 // Docs for this concurrent hashtable here: http://efficient.github.io/libcuckoo/classlibcuckoo_1_1cuckoohash__map.html
73 libcuckoo::cuckoohash_map<rtl_uString*, Mapped, HashFunction, EqualsFunction> maStrMap;
74 const CharClass& mrCharClass;
76 explicit Impl(const CharClass& rCharClass)
77 : mrCharClass(rCharClass)
82 SharedStringPool::SharedStringPool(const CharClass& rCharClass)
83 : mpImpl(new Impl(rCharClass))
87 SharedStringPool::~SharedStringPool() {}
89 SharedString SharedStringPool::intern(const OUString& rStr)
91 auto& rMap = mpImpl->maStrMap;
93 rtl_uString *pResultLower = {}, *pResultUpper = {}; // bogus GCC 12 -Werror=maybe-uninitialized
94 if (rMap.find_fn(rStr.pData, [&](const Mapped& rMapped) {
95 pResultLower = rMapped.first.pData;
96 pResultUpper = rMapped.second.pData;
97 }))
98 // there is already a mapping
99 return SharedString(pResultLower, pResultUpper);
101 // This is a new string insertion. Establish mapping to upper-case variant.
102 OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
104 // either insert a new upper->upper mapping, or write the existing mapping into aUpper
105 mpImpl->maStrMap.uprase_fn(aUpper.pData,
106 [&](Mapped& mapped) -> bool {
107 aUpper = mapped.second;
108 return false;
110 aUpper, aUpper);
112 if (aUpper == rStr)
113 // no need to do anything more, because the key is already uppercase
114 return SharedString(aUpper.pData, aUpper.pData);
116 // either insert a new lower->upper mapping, or write the existing mapping into aLower
117 if (mpImpl->maStrMap.uprase_fn(rStr.pData,
118 [&](Mapped& mapped) -> bool {
119 pResultLower = mapped.first.pData;
120 pResultUpper = mapped.second.pData;
121 return false;
123 rStr, aUpper))
125 pResultLower = rStr.pData;
126 pResultUpper = aUpper.pData;
129 return SharedString(pResultLower, pResultUpper);
132 void SharedStringPool::purge()
134 auto locked_table = mpImpl->maStrMap.lock_table();
136 // Because we can have an uppercase entry mapped to itself,
137 // and then a bunch of lowercase entries mapped to that same
138 // upper-case entry, we need to scan the map twice - the first
139 // time to remove lowercase entries, and then only can we
140 // check for unused uppercase entries.
142 auto it = locked_table.begin();
143 auto itEnd = locked_table.end();
144 while (it != itEnd)
146 rtl_uString* p1 = it->second.first.pData;
147 rtl_uString* p2 = it->second.second.pData;
148 if (p1 != p2)
150 // normal case - lowercase mapped to uppercase, which
151 // means that the lowercase entry has one ref-counted
152 // entry as the key in the map
153 if (getRefCount(p1) == 1)
155 it = locked_table.erase(it);
156 continue;
159 ++it;
162 it = locked_table.begin();
163 itEnd = locked_table.end();
164 while (it != itEnd)
166 rtl_uString* p1 = it->second.first.pData;
167 rtl_uString* p2 = it->second.second.pData;
168 if (p1 == p2)
170 // uppercase which is mapped to itself, which means
171 // one ref-counted entry as the key in the map, and
172 // one ref-counted entry in the value in the map
173 if (getRefCount(p1) == 2)
175 it = locked_table.erase(it);
176 continue;
179 ++it;
183 size_t SharedStringPool::getCount() const { return mpImpl->maStrMap.size(); }
185 size_t SharedStringPool::getCountIgnoreCase() const
187 // this is only called from unit tests, so no need to be efficient
188 std::unordered_set<OUString> aUpperSet;
189 auto locked_table = mpImpl->maStrMap.lock_table();
190 for (auto const& pair : locked_table)
191 aUpperSet.insert(pair.second.second);
192 return aUpperSet.size();
196 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */