1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <svl/sharedstringpool.hxx>
11 #include <svl/sharedstring.hxx>
12 #include <unotools/charclass.hxx>
14 #include <unordered_set>
17 #pragma GCC diagnostic push
18 #pragma GCC diagnostic ignored "-Wshadow"
21 #pragma clang diagnostic push
22 #pragma clang diagnostic ignored "-Wunused-parameter"
26 #pragma warning(disable : 4324) // structure was padded due to alignment specifier
28 #include <libcuckoo/cuckoohash_map.hh>
33 #pragma clang diagnostic pop
36 #pragma GCC diagnostic pop
43 sal_Int32
getRefCount(const rtl_uString
* p
) { return (p
->refCount
& 0x3FFFFFFF); }
45 // we store the key twice, because the concurrent hashtable we are using does not provide any way to return the key in use
46 typedef std::pair
<OUString
, OUString
> Mapped
;
50 size_t operator()(rtl_uString
* const key
) const
52 return rtl_ustr_hashCode_WithLength(key
->buffer
, key
->length
);
58 bool operator()(rtl_uString
* const lhs
, rtl_uString
* const rhs
) const
60 return OUString::unacquired(&lhs
) == OUString::unacquired(&rhs
);
65 struct SharedStringPool::Impl
67 // We use this map for two purposes - to store lower->upper case mappings
68 // and to store an upper->upper mapping.
69 // The second mapping is used so that we can
70 // share the same rtl_uString object between different keys which map to the same uppercase string to save memory.
72 // Docs for this concurrent hashtable here: http://efficient.github.io/libcuckoo/classlibcuckoo_1_1cuckoohash__map.html
73 libcuckoo::cuckoohash_map
<rtl_uString
*, Mapped
, HashFunction
, EqualsFunction
> maStrMap
;
74 const CharClass
& mrCharClass
;
76 explicit Impl(const CharClass
& rCharClass
)
77 : mrCharClass(rCharClass
)
82 SharedStringPool::SharedStringPool(const CharClass
& rCharClass
)
83 : mpImpl(new Impl(rCharClass
))
87 SharedStringPool::~SharedStringPool() {}
89 SharedString
SharedStringPool::intern(const OUString
& rStr
)
91 auto& rMap
= mpImpl
->maStrMap
;
93 rtl_uString
*pResultLower
= {}, *pResultUpper
= {}; // bogus GCC 12 -Werror=maybe-uninitialized
94 if (rMap
.find_fn(rStr
.pData
, [&](const Mapped
& rMapped
) {
95 pResultLower
= rMapped
.first
.pData
;
96 pResultUpper
= rMapped
.second
.pData
;
98 // there is already a mapping
99 return SharedString(pResultLower
, pResultUpper
);
101 // This is a new string insertion. Establish mapping to upper-case variant.
102 OUString aUpper
= mpImpl
->mrCharClass
.uppercase(rStr
);
104 // either insert a new upper->upper mapping, or write the existing mapping into aUpper
105 mpImpl
->maStrMap
.uprase_fn(aUpper
.pData
,
106 [&](Mapped
& mapped
) -> bool {
107 aUpper
= mapped
.second
;
113 // no need to do anything more, because the key is already uppercase
114 return SharedString(aUpper
.pData
, aUpper
.pData
);
116 // either insert a new lower->upper mapping, or write the existing mapping into aLower
117 if (mpImpl
->maStrMap
.uprase_fn(rStr
.pData
,
118 [&](Mapped
& mapped
) -> bool {
119 pResultLower
= mapped
.first
.pData
;
120 pResultUpper
= mapped
.second
.pData
;
125 pResultLower
= rStr
.pData
;
126 pResultUpper
= aUpper
.pData
;
129 return SharedString(pResultLower
, pResultUpper
);
132 void SharedStringPool::purge()
134 auto locked_table
= mpImpl
->maStrMap
.lock_table();
136 // Because we can have an uppercase entry mapped to itself,
137 // and then a bunch of lowercase entries mapped to that same
138 // upper-case entry, we need to scan the map twice - the first
139 // time to remove lowercase entries, and then only can we
140 // check for unused uppercase entries.
142 auto it
= locked_table
.begin();
143 auto itEnd
= locked_table
.end();
146 rtl_uString
* p1
= it
->second
.first
.pData
;
147 rtl_uString
* p2
= it
->second
.second
.pData
;
150 // normal case - lowercase mapped to uppercase, which
151 // means that the lowercase entry has one ref-counted
152 // entry as the key in the map
153 if (getRefCount(p1
) == 1)
155 it
= locked_table
.erase(it
);
162 it
= locked_table
.begin();
163 itEnd
= locked_table
.end();
166 rtl_uString
* p1
= it
->second
.first
.pData
;
167 rtl_uString
* p2
= it
->second
.second
.pData
;
170 // uppercase which is mapped to itself, which means
171 // one ref-counted entry as the key in the map, and
172 // one ref-counted entry in the value in the map
173 if (getRefCount(p1
) == 2)
175 it
= locked_table
.erase(it
);
183 size_t SharedStringPool::getCount() const { return mpImpl
->maStrMap
.size(); }
185 size_t SharedStringPool::getCountIgnoreCase() const
187 // this is only called from unit tests, so no need to be efficient
188 std::unordered_set
<OUString
> aUpperSet
;
189 auto locked_table
= mpImpl
->maStrMap
.lock_table();
190 for (auto const& pair
: locked_table
)
191 aUpperSet
.insert(pair
.second
.second
);
192 return aUpperSet
.size();
196 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */