Merge pull request #26220 from 78andyp/blurayfixes
[xbmc.git] / xbmc / utils / RegExp.h
blob2d018265c0f0a688245e506b682b09058359ccfa
1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
9 #pragma once
11 //! @todo - move to std::regex (after switching to gcc 4.9 or higher) and get rid of CRegExp
13 #include <string>
14 #include <vector>
16 #define PCRE2_CODE_UNIT_WIDTH 8
17 #include <pcre2.h>
19 class CRegExp
21 public:
22 enum studyMode
24 NoStudy = 0, // do not study expression
25 StudyRegExp = 1, // study expression (slower compilation, faster find)
26 StudyWithJitComp // study expression and JIT-compile it, if possible (heavyweight optimization)
28 enum utf8Mode
30 autoUtf8 = -1, // analyze regexp for UTF-8 multi-byte chars, for Unicode codes > 0xFF
31 // or explicit Unicode properties (\p, \P and \X), enable UTF-8 mode if any of them are found
32 asciiOnly = 0, // process regexp and strings as single-byte encoded strings
33 forceUtf8 = 1 // enable UTF-8 mode (with Unicode properties)
36 static const int m_MaxNumOfBackrefrences = 20;
37 /**
38 * @param caseless (optional) Matching will be case insensitive if set to true
39 * or case sensitive if set to false
40 * @param utf8 (optional) Control UTF-8 processing
42 CRegExp(bool caseless = false, utf8Mode utf8 = asciiOnly);
43 /**
44 * Create new CRegExp object and compile regexp expression in one step
45 * @warning Use only with hardcoded regexp when you're sure that regexp is compiled without errors
46 * @param caseless Matching will be case insensitive if set to true
47 * or case sensitive if set to false
48 * @param utf8 Control UTF-8 processing
49 * @param re The regular expression
50 * @param study (optional) Controls study of expression, useful if expression will be used
51 * several times
53 CRegExp(bool caseless, utf8Mode utf8, const char *re, studyMode study = NoStudy);
55 CRegExp(const CRegExp& re);
56 ~CRegExp();
58 /**
59 * Compile (prepare) regular expression
60 * @param re The regular expression
61 * @param study (optional) Controls study of expression, useful if expression will be used
62 * several times
63 * @return true on success, false on any error
65 bool RegComp(const char *re, studyMode study = NoStudy);
67 /**
68 * Compile (prepare) regular expression
69 * @param re The regular expression
70 * @param study (optional) Controls study of expression, useful if expression will be used
71 * several times
72 * @return true on success, false on any error
74 bool RegComp(const std::string& re, studyMode study = NoStudy)
75 { return RegComp(re.c_str(), study); }
77 /**
78 * Find first match of regular expression in given string
79 * @param str The string to match against regular expression
80 * @param startoffset (optional) The string offset to start matching
81 * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
82 * string. If set to -1 string checked up to the end.
83 * @return staring position of match in string, negative value in case of error or no match
85 int RegFind(const char* str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
86 /**
87 * Find first match of regular expression in given string
88 * @param str The string to match against regular expression
89 * @param startoffset (optional) The string offset to start matching
90 * @param maxNumberOfCharsToTest (optional) The maximum number of characters to test (match) in
91 * string. If set to -1 string checked up to the end.
92 * @return staring position of match in string, negative value in case of error or no match
94 int RegFind(const std::string& str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1)
95 { return PrivateRegFind(str.length(), str.c_str(), startoffset, maxNumberOfCharsToTest); }
96 std::string GetReplaceString(const std::string& sReplaceExp) const;
97 int GetFindLen() const
99 if (!m_re || !m_bMatched)
100 return 0;
102 return (m_iOvector[1] - m_iOvector[0]);
104 int GetSubCount() const { return m_iMatchCount - 1; } // PCRE returns the number of sub-patterns + 1
105 int GetSubStart(int iSub) const;
106 int GetSubLength(int iSub) const;
107 int GetCaptureTotal() const;
108 std::string GetMatch(int iSub = 0) const;
109 const std::string& GetPattern() const { return m_pattern; }
110 void DumpOvector(int iLog);
112 * Check is RegExp object is ready for matching
113 * @return true if RegExp object is ready for matching, false otherwise
115 inline bool IsCompiled(void) const
116 { return !m_pattern.empty(); }
117 CRegExp& operator= (const CRegExp& re);
118 static bool IsUtf8Supported(void);
119 static bool AreUnicodePropertiesSupported(void);
120 static bool LogCheckUtf8Support(void);
121 static bool IsJitSupported(void);
123 private:
124 int PrivateRegFind(size_t bufferLen, const char *str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
125 void InitValues(bool caseless = false, CRegExp::utf8Mode utf8 = asciiOnly);
126 static bool requireUtf8(const std::string& regexp);
127 static int readCharXCode(const std::string& regexp, size_t& pos);
128 static bool isCharClassWithUnicode(const std::string& regexp, size_t& pos);
130 void Cleanup();
131 inline bool IsValidSubNumber(int iSub) const;
133 pcre2_code* m_re;
134 pcre2_match_context* m_ctxt;
135 static const int OVECCOUNT=(m_MaxNumOfBackrefrences + 1) * 3;
136 unsigned int m_offset;
137 pcre2_match_data* m_matchData;
138 PCRE2_SIZE* m_iOvector;
139 utf8Mode m_utf8Mode;
140 int m_iMatchCount;
141 uint32_t m_iOptions;
142 bool m_jitCompiled;
143 bool m_bMatched;
144 pcre2_jit_stack* m_jitStack;
145 std::string m_subject;
146 std::string m_pattern;
147 static int m_Utf8Supported;
148 static int m_UcpSupported;
149 static int m_JitSupported;
152 typedef std::vector<CRegExp> VECCREGEXP;