1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <svl/sharedstringpool.hxx>
11 #include <svl/sharedstring.hxx>
12 #include <unotools/charclass.hxx>
15 #include <unordered_map>
16 #include <unordered_set>
18 /** create a key class that caches the hashcode */
25 StringWithHash(OUString s
)
27 , hashCode(str
.hashCode())
31 bool operator==(StringWithHash
const& rhs
) const
33 if (hashCode
!= rhs
.hashCode
)
35 return str
== rhs
.str
;
42 template <> struct hash
<StringWithHash
>
44 std::size_t operator()(const StringWithHash
& k
) const { return k
.hashCode
; }
52 sal_Int32
getRefCount(const rtl_uString
* p
) { return (p
->refCount
& 0x3FFFFFFF); }
55 struct SharedStringPool::Impl
57 mutable std::mutex maMutex
;
58 // We use this map for two purposes - to store lower->upper case mappings
59 // and to retrieve a shared uppercase object, so the management logic
61 std::unordered_map
<StringWithHash
, OUString
> maStrMap
;
62 const CharClass
& mrCharClass
;
64 explicit Impl(const CharClass
& rCharClass
)
65 : mrCharClass(rCharClass
)
70 SharedStringPool::SharedStringPool(const CharClass
& rCharClass
)
71 : mpImpl(new Impl(rCharClass
))
73 // make sure the one empty string instance is shared in this pool as well
74 intern(SharedString::EMPTY_STRING
);
75 assert(intern(SharedString::EMPTY_STRING
) == SharedString::getEmptyString());
78 SharedStringPool::~SharedStringPool() {}
80 SharedString
SharedStringPool::intern(const OUString
& rStr
)
82 StringWithHash
aStrWithHash(rStr
);
83 std::scoped_lock
<std::mutex
> aGuard(mpImpl
->maMutex
);
85 auto[mapIt
, bInserted
] = mpImpl
->maStrMap
.emplace(aStrWithHash
, rStr
);
87 // there is already a mapping
88 return SharedString(mapIt
->first
.str
.pData
, mapIt
->second
.pData
);
90 // This is a new string insertion. Establish mapping to upper-case variant.
91 OUString aUpper
= mpImpl
->mrCharClass
.uppercase(rStr
);
93 // no need to do anything more, because we inserted an upper->upper mapping
94 return SharedString(mapIt
->first
.str
.pData
, mapIt
->second
.pData
);
96 // We need to insert a lower->upper mapping, so also insert
97 // an upper->upper mapping, which we can use both for when an upper string
98 // is interned, and to look up a shared upper string.
99 StringWithHash
aUpperWithHash(aUpper
);
100 auto mapIt2
= mpImpl
->maStrMap
.find(aUpperWithHash
);
101 if (mapIt2
!= mpImpl
->maStrMap
.end())
103 // there is an already existing upper string
104 mapIt
->second
= mapIt2
->first
.str
;
105 return SharedString(mapIt
->first
.str
.pData
, mapIt
->second
.pData
);
108 // There is no already existing upper string.
109 // First, update using the iterator, can't do this later because
110 // the iterator will be invalid.
111 mapIt
->second
= aUpper
;
112 mpImpl
->maStrMap
.emplace_hint(mapIt2
, aUpperWithHash
, aUpper
);
113 return SharedString(rStr
.pData
, aUpper
.pData
);
116 void SharedStringPool::purge()
118 std::scoped_lock
<std::mutex
> aGuard(mpImpl
->maMutex
);
120 // Because we can have an uppercase entry mapped to itself,
121 // and then a bunch of lowercase entries mapped to that same
122 // upper-case entry, we need to scan the map twice - the first
123 // time to remove lowercase entries, and then only can we
124 // check for unused uppercase entries.
126 auto it
= mpImpl
->maStrMap
.begin();
127 auto itEnd
= mpImpl
->maStrMap
.end();
130 rtl_uString
* p1
= it
->first
.str
.pData
;
131 rtl_uString
* p2
= it
->second
.pData
;
134 // normal case - lowercase mapped to uppercase, which
135 // means that the lowercase entry has one ref-counted
136 // entry as the key in the map
137 if (getRefCount(p1
) == 1)
139 it
= mpImpl
->maStrMap
.erase(it
);
146 it
= mpImpl
->maStrMap
.begin();
147 itEnd
= mpImpl
->maStrMap
.end();
150 rtl_uString
* p1
= it
->first
.str
.pData
;
151 rtl_uString
* p2
= it
->second
.pData
;
154 // uppercase which is mapped to itself, which means
155 // one ref-counted entry as the key in the map, and
156 // one ref-counted entry in the value in the map
157 if (getRefCount(p1
) == 2)
159 it
= mpImpl
->maStrMap
.erase(it
);
167 size_t SharedStringPool::getCount() const
169 std::scoped_lock
<std::mutex
> aGuard(mpImpl
->maMutex
);
170 return mpImpl
->maStrMap
.size();
173 size_t SharedStringPool::getCountIgnoreCase() const
175 std::scoped_lock
<std::mutex
> aGuard(mpImpl
->maMutex
);
176 // this is only called from unit tests, so no need to be efficient
177 std::unordered_set
<OUString
> aUpperSet
;
178 for (auto const& pair
: mpImpl
->maStrMap
)
179 aUpperSet
.insert(pair
.second
);
180 return aUpperSet
.size();
184 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */