1 # Copyright (C) 2020 Red Hat Inc.
4 # Eduardo Habkost <ehabkost@redhat.com>
6 # This work is licensed under the terms of the GNU GPL, version 2. See
7 # the COPYING file in the top-level directory.
8 """Helpers for creation of regular expressions"""
12 logger
= logging
.getLogger(__name__
)
17 def S(*regexps
) -> str:
18 """Just a shortcut to concatenate multiple regexps more easily"""
19 return ''.join(regexps
)
21 def P(*regexps
, name
=None, capture
=False, repeat
='') -> str:
22 """Just add parenthesis around regexp(s), with optional name or repeat suffix"""
25 return f
'(?P<{name}>{s}){repeat}'
27 return f
'({s}){repeat}'
29 return f
'(?:{s}){repeat}'
31 def NAMED(name
, *regexps
) -> str:
32 """Make named group using <P<name>...) syntax
34 >>> NAMED('mygroup', 'xyz', 'abc')
37 return P(*regexps
, name
=name
)
39 def OR(*regexps
, **kwargs
) -> str:
40 """Build (a|b|c) regexp"""
41 return P('|'.join(regexps
), **kwargs
)
43 def M(*regexps
, n
='*', name
=None) -> str:
44 """Add repetition qualifier to regexp(s)
48 >>> M('a' , 'b', n='+')
50 >>> M('a' , 'b', n='{2,3}', name='name')
51 '(?P<name>(?:ab){2,3})'
53 r
= P(*regexps
, repeat
=n
)
58 # helper to make parenthesis optional around regexp
59 OPTIONAL_PARS
= lambda R
: OR(S(r
'\(\s*', R
, r
'\s*\)'), R
)
60 def test_optional_pars():
61 r
= OPTIONAL_PARS('abc')+'$'
62 assert re
.match(r
, 'abc')
63 assert re
.match(r
, '(abc)')
64 assert not re
.match(r
, '(abcd)')
65 assert not re
.match(r
, '(abc')
66 assert not re
.match(r
, 'abc)')
69 # this disables the MULTILINE flag, so it will match at the
70 # beginning of the file:
71 RE_FILE_BEGIN
= r
'(?-m:^)'
77 RE_COMMENT
= r
'//[^\n]*$|/\*([^*]|\*[^/])*\*/'
78 RE_COMMENTS
= M(RE_COMMENT
+ SP
)
80 RE_IDENTIFIER
= r
'[a-zA-Z_][a-zA-Z0-9_]*(?![a-zA-Z0-9])'
81 RE_STRING
= r
'\"([^\"\\]|\\[a-z\"])*\"'
82 RE_NUMBER
= r
'[0-9]+|0x[0-9a-fA-F]+'
84 # space or escaped newlines:
85 CPP_SPACE
= OR(r
'\s', r
'\\\n', repeat
='+')
87 RE_PATH
= '[a-zA-Z0-9/_.-]+'
89 RE_INCLUDEPATH
= OR(S(r
'\"', RE_PATH
, r
'\"'),
90 S(r
'<', RE_PATH
, r
'>'))
92 RE_INCLUDE
= S(r
'^[ \t]*#[ \t]*include[ \t]+', NAMED('includepath', RE_INCLUDEPATH
), r
'[ \t]*\n')
93 RE_SIMPLEDEFINE
= S(r
'^[ \t]*#[ \t]*define[ \t]+', RE_IDENTIFIER
, r
'[ \t]*\n')
95 RE_STRUCT_TYPE
= S(r
'struct\s+', RE_IDENTIFIER
)
96 RE_TYPE
= OR(RE_IDENTIFIER
, RE_STRUCT_TYPE
)
98 RE_MACRO_CONCAT
= M(S(OR(RE_IDENTIFIER
, RE_STRING
), SP
), n
='{2,}')
100 RE_SIMPLE_VALUE
= OR(RE_IDENTIFIER
, RE_STRING
, RE_NUMBER
)
102 RE_FUN_CALL
= S(RE_IDENTIFIER
, r
'\s*\(\s*', RE_SIMPLE_VALUE
, r
'\s*\)')
103 RE_SIZEOF
= S(r
'sizeof\s*\(\s*', NAMED('sizeoftype', RE_TYPE
), r
'\s*\)')
105 RE_ADDRESS
= S(r
'&\s*', RE_IDENTIFIER
)
107 RE_ARRAY_ITEM
= S(r
'{\s*', NAMED('arrayitem', M(RE_SIMPLE_VALUE
, n
='?')), r
'\s*}\s*,?')
108 RE_ARRAY_CAST
= S(r
'\(\s*', RE_IDENTIFIER
, r
'\s*\[\s*\]\)')
109 RE_ARRAY_ITEMS
= M(S(RE_ARRAY_ITEM
, SP
))
110 RE_ARRAY
= S(M(RE_ARRAY_CAST
, n
='?'), r
'\s*{\s*',
111 NAMED('arrayitems', RE_ARRAY_ITEMS
),
114 # NOTE: this covers a very small subset of valid expressions
116 RE_EXPRESSION
= OR(RE_SIZEOF
, RE_FUN_CALL
, RE_MACRO_CONCAT
, RE_SIMPLE_VALUE
,
117 RE_ARRAY
, RE_ADDRESS
)