2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
11 //! @todo - move to std::regex (after switching to gcc 4.9 or higher) and get rid of CRegExp
16 /* make sure stdlib.h is included before including pcre.h inside the
17 namespace; this works around stdlib.h definitions also living in
22 struct real_pcre_jit_stack
; // forward declaration for PCRE without JIT
23 typedef struct real_pcre_jit_stack pcre_jit_stack
;
32 NoStudy
= 0, // do not study expression
33 StudyRegExp
= 1, // study expression (slower compilation, faster find)
34 StudyWithJitComp
// study expression and JIT-compile it, if possible (heavyweight optimization)
38 autoUtf8
= -1, // analyze regexp for UTF-8 multi-byte chars, for Unicode codes > 0xFF
39 // or explicit Unicode properties (\p, \P and \X), enable UTF-8 mode if any of them are found
40 asciiOnly
= 0, // process regexp and strings as single-byte encoded strings
41 forceUtf8
= 1 // enable UTF-8 mode (with Unicode properties)
44 static const int m_MaxNumOfBackrefrences
= 20;
46 * @param caseless (optional) Matching will be case insensitive if set to true
47 * or case sensitive if set to false
48 * @param utf8 (optional) Control UTF-8 processing
50 CRegExp(bool caseless
= false, utf8Mode utf8
= asciiOnly
);
52 * Create new CRegExp object and compile regexp expression in one step
53 * @warning Use only with hardcoded regexp when you're sure that regexp is compiled without errors
54 * @param caseless Matching will be case insensitive if set to true
55 * or case sensitive if set to false
56 * @param utf8 Control UTF-8 processing
57 * @param re The regular expression
58 * @param study (optional) Controls study of expression, useful if expression will be used
61 CRegExp(bool caseless
, utf8Mode utf8
, const char *re
, studyMode study
= NoStudy
);
63 CRegExp(const CRegExp
& re
);
67 * Compile (prepare) regular expression
68 * @param re The regular expression
69 * @param study (optional) Controls study of expression, useful if expression will be used
71 * @return true on success, false on any error
73 bool RegComp(const char *re
, studyMode study
= NoStudy
);
76 * Compile (prepare) regular expression
77 * @param re The regular expression
78 * @param study (optional) Controls study of expression, useful if expression will be used
80 * @return true on success, false on any error
82 bool RegComp(const std::string
& re
, studyMode study
= NoStudy
)
83 { return RegComp(re
.c_str(), study
); }
86 * Find first match of regular expression in given string
87 * @param str The string to match against regular expression
88 * @param startoffset (optional) The string offset to start matching
89 * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
90 * string. If set to -1 string checked up to the end.
91 * @return staring position of match in string, negative value in case of error or no match
93 int RegFind(const char* str
, unsigned int startoffset
= 0, int maxNumberOfCharsToTest
= -1);
95 * Find first match of regular expression in given string
96 * @param str The string to match against regular expression
97 * @param startoffset (optional) The string offset to start matching
98 * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
99 * string. If set to -1 string checked up to the end.
100 * @return staring position of match in string, negative value in case of error or no match
102 int RegFind(const std::string
& str
, unsigned int startoffset
= 0, int maxNumberOfCharsToTest
= -1)
103 { return PrivateRegFind(str
.length(), str
.c_str(), startoffset
, maxNumberOfCharsToTest
); }
104 std::string
GetReplaceString(const std::string
& sReplaceExp
) const;
105 int GetFindLen() const
107 if (!m_re
|| !m_bMatched
)
110 return (m_iOvector
[1] - m_iOvector
[0]);
112 int GetSubCount() const { return m_iMatchCount
- 1; } // PCRE returns the number of sub-patterns + 1
113 int GetSubStart(int iSub
) const;
114 int GetSubStart(const std::string
& subName
) const;
115 int GetSubLength(int iSub
) const;
116 int GetSubLength(const std::string
& subName
) const;
117 int GetCaptureTotal() const;
118 std::string
GetMatch(int iSub
= 0) const;
119 std::string
GetMatch(const std::string
& subName
) const;
120 const std::string
& GetPattern() const { return m_pattern
; }
121 bool GetNamedSubPattern(const char* strName
, std::string
& strMatch
) const;
122 int GetNamedSubPatternNumber(const char* strName
) const;
123 void DumpOvector(int iLog
);
125 * Check is RegExp object is ready for matching
126 * @return true if RegExp object is ready for matching, false otherwise
128 inline bool IsCompiled(void) const
129 { return !m_pattern
.empty(); }
130 CRegExp
& operator= (const CRegExp
& re
);
131 static bool IsUtf8Supported(void);
132 static bool AreUnicodePropertiesSupported(void);
133 static bool LogCheckUtf8Support(void);
134 static bool IsJitSupported(void);
137 int PrivateRegFind(size_t bufferLen
, const char *str
, unsigned int startoffset
= 0, int maxNumberOfCharsToTest
= -1);
138 void InitValues(bool caseless
= false, CRegExp::utf8Mode utf8
= asciiOnly
);
139 static bool requireUtf8(const std::string
& regexp
);
140 static int readCharXCode(const std::string
& regexp
, size_t& pos
);
141 static bool isCharClassWithUnicode(const std::string
& regexp
, size_t& pos
);
144 inline bool IsValidSubNumber(int iSub
) const;
147 PCRE::pcre_extra
* m_sd
;
148 static const int OVECCOUNT
=(m_MaxNumOfBackrefrences
+ 1) * 3;
149 unsigned int m_offset
;
150 int m_iOvector
[OVECCOUNT
];
156 PCRE::pcre_jit_stack
* m_jitStack
;
157 std::string m_subject
;
158 std::string m_pattern
;
159 static int m_Utf8Supported
;
160 static int m_UcpSupported
;
161 static int m_JitSupported
;
164 typedef std::vector
<CRegExp
> VECCREGEXP
;