1 // Scintilla source code edit control
2 /** @file CharacterCategory.h
3 ** Returns the Unicode general category of a character.
5 // Copyright 2013 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #ifndef CHARACTERCATEGORY_H
9 #define CHARACTERCATEGORY_H
13 enum CharacterCategory
{
14 ccLu
, ccLl
, ccLt
, ccLm
, ccLo
,
17 ccPc
, ccPd
, ccPs
, ccPe
, ccPi
, ccPf
, ccPo
,
18 ccSm
, ccSc
, ccSk
, ccSo
,
20 ccCc
, ccCf
, ccCs
, ccCo
, ccCn
23 CharacterCategory
CategoriseCharacter(int character
);
25 // Common definitions of allowable characters in identifiers from UAX #31.
26 bool IsIdStart(int character
);
27 bool IsIdContinue(int character
);
28 bool IsXidStart(int character
);
29 bool IsXidContinue(int character
);
31 class CharacterCategoryMap
{
33 std::vector
<unsigned char> dense
;
35 CharacterCategoryMap();
36 CharacterCategory
CategoryFor(int character
) const {
37 if (static_cast<size_t>(character
) < dense
.size()) {
38 return static_cast<CharacterCategory
>(dense
[character
]);
40 // binary search through ranges
41 return CategoriseCharacter(character
);
44 int Size() const noexcept
;
45 void Optimize(int countCharacters
);