2 # Copyright (C) 2013 Google Inc. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 class BadInput(Exception):
37 """Unsupported input has been found."""
40 class SwitchCase(object):
41 """Represents a CASE block."""
42 def __init__(self
, identifier
, block
):
43 self
.identifier
= identifier
47 class Optimizer(object):
48 """Generates optimized identifier matching code."""
49 def __init__(self
, output_file
, array_variable
, length_variable
):
50 self
.output_file
= output_file
51 self
.array_variable
= array_variable
52 self
.length_variable
= length_variable
54 def inspect(self
, cases
):
55 lengths
= list(set([len(c
.identifier
) for c
in cases
]))
59 self
.inspect_array([c
for c
in cases
if len(c
.identifier
) == length
], range(length
))
60 self
.write_selection(self
.length_variable
, lengths
, str, response
)
62 def score(self
, alternatives
):
63 return -sum([len(list(count
)) ** 2 for _
, count
in itertools
.groupby(sorted(alternatives
))])
65 def choose_selection_pos(self
, cases
, pending
):
66 candidates
= [pos
for pos
in pending
if all(alternative
.isalpha() for alternative
in [c
.identifier
[pos
] for c
in cases
])]
68 raise BadInput('Case-insensitive switching on non-alphabetic characters not yet implemented')
69 return sorted(candidates
, key
=lambda pos
: self
.score([c
.identifier
[pos
] for c
in cases
]))[0]
71 def inspect_array(self
, cases
, pending
):
72 assert len(cases
) >= 1
74 common
= [pos
for pos
in pending
75 if len(set([c
.identifier
[pos
] for c
in cases
])) == 1]
77 identifier
= cases
[0].identifier
78 for index
in xrange(len(common
)):
80 self
.output_file
.write(u
'if (LIKELY(')
82 self
.output_file
.write(u
' && ')
84 if identifier
[pos
].isalpha():
85 self
.output_file
.write("(%s[%d] | 0x20) == '%s'" %
86 (self
.array_variable
, pos
, identifier
[pos
]))
88 self
.output_file
.write("%s[%d] == '%s'" %
89 (self
.array_variable
, pos
, identifier
[pos
]))
90 self
.output_file
.write(u
')) {\n')
91 next_pending
= list(set(pending
) - set(common
))
93 self
.inspect_array(cases
, next_pending
)
94 self
.output_file
.write(u
'}\n')
96 pos
= self
.choose_selection_pos(cases
, pending
)
97 next_pending
= filter(lambda p
: p
!= pos
, pending
)
99 alternatives
= list(set([c
.identifier
[pos
] for c
in cases
]))
102 def literal(alternative
):
103 if isinstance(alternative
, int):
104 return str(alternative
)
106 return "'%s'" % alternative
108 def response(alternative
):
109 self
.inspect_array([c
for c
in cases
if c
.identifier
[pos
] == alternative
],
112 expression
= '(%s[%d] | 0x20)' % (self
.array_variable
, pos
)
113 self
.write_selection(expression
, alternatives
, literal
, response
)
115 assert len(cases
) == 1
116 for block_line
in cases
[0].block
:
117 self
.output_file
.write(block_line
)
119 def write_selection(self
, expression
, alternatives
, literal
, response
):
120 if len(alternatives
) == 1:
121 self
.output_file
.write(u
'if (LIKELY(%s == %s)) {\n' % (expression
, literal(alternatives
[0])))
122 response(alternatives
[0])
123 self
.output_file
.write(u
'}\n')
124 elif len(alternatives
) == 2:
125 self
.output_file
.write(u
'if (%s == %s) {\n' % (expression
, literal(alternatives
[0])))
126 response(alternatives
[0])
127 self
.output_file
.write(u
'} else if (LIKELY(%s == %s)) {\n' % (expression
, literal(alternatives
[1])))
128 response(alternatives
[1])
129 self
.output_file
.write(u
'}\n')
131 self
.output_file
.write('switch (%s) {\n' % expression
)
132 for alternative
in alternatives
:
133 self
.output_file
.write(u
'case %s: {\n' % literal(alternative
))
134 response(alternative
)
135 self
.output_file
.write(u
'} break;\n')
136 self
.output_file
.write(u
'}\n')
139 class LineProcessor(object):
140 def process_line(self
, line
):
144 class MainLineProcessor(LineProcessor
):
145 """Processes the contents of an input file."""
146 SWITCH_PATTERN
= re
.compile(r
'\s*SWITCH\s*\((\w*),\s*(\w*)\) \{$')
148 def __init__(self
, output_file
):
149 self
.output_file
= output_file
151 def process_line(self
, line
):
152 match_switch
= MainLineProcessor
.SWITCH_PATTERN
.match(line
)
154 array_variable
= match_switch
.group(1)
155 length_variable
= match_switch
.group(2)
156 return SwitchLineProcessor(self
, self
.output_file
, array_variable
, length_variable
)
158 self
.output_file
.write(line
)
162 class SwitchLineProcessor(LineProcessor
):
163 """Processes the contents of a SWITCH block."""
164 CASE_PATTERN
= re
.compile(r
'\s*CASE\s*\(\"([a-z0-9_\-\(]*)\"\) \{$')
165 CLOSE_BRACE_PATTERN
= re
.compile(r
'\s*\}$')
166 EMPTY_PATTERN
= re
.compile(r
'\s*$')
168 def __init__(self
, parent
, output_file
, array_variable
, length_variable
):
170 self
.output_file
= output_file
171 self
.array_variable
= array_variable
172 self
.length_variable
= length_variable
175 def process_line(self
, line
):
176 match_case
= SwitchLineProcessor
.CASE_PATTERN
.match(line
)
177 match_close_brace
= SwitchLineProcessor
.CLOSE_BRACE_PATTERN
.match(line
)
178 match_empty
= SwitchLineProcessor
.EMPTY_PATTERN
.match(line
)
180 identifier
= match_case
.group(1)
181 return CaseLineProcessor(self
, self
.output_file
, identifier
)
182 elif match_close_brace
:
183 Optimizer(self
.output_file
, self
.array_variable
, self
.length_variable
).inspect(self
.cases
)
188 raise BadInput('Invalid line within SWITCH: %s' % line
)
190 def add_case(self
, latest_case
):
191 if latest_case
.identifier
in [c
.identifier
for c
in self
.cases
]:
192 raise BadInput('Repeated case: %s' % latest_case
.identifier
)
193 self
.cases
.append(latest_case
)
196 class CaseLineProcessor(LineProcessor
):
197 """Processes the contents of a CASE block."""
198 CLOSE_BRACE_PATTERN
= re
.compile(r
'\s*\}$')
199 BREAK_PATTERN
= re
.compile(r
'break;')
201 def __init__(self
, parent
, output_file
, identifier
):
203 self
.output_file
= output_file
204 self
.identifier
= identifier
207 def process_line(self
, line
):
208 match_close_brace
= CaseLineProcessor
.CLOSE_BRACE_PATTERN
.match(line
)
209 match_break
= CaseLineProcessor
.BREAK_PATTERN
.search(line
)
210 if match_close_brace
:
211 self
.parent
.add_case(SwitchCase(self
.identifier
, self
.block
))
214 raise BadInput('break within CASE not supported: %s' % line
)
216 self
.block
.append(line
)
220 def process_file(input_name
, output_name
):
221 """Transforms input file into legal C++ source code."""
222 with io
.open(input_name
, 'r', -1, 'utf-8') as input_file
:
223 with io
.open(output_name
, 'w', -1, 'utf-8') as output_file
:
224 processor
= MainLineProcessor(output_file
)
225 input_lines
= input_file
.readlines()
226 for line
in input_lines
:
227 processor
= processor
.process_line(line
)
230 if __name__
== '__main__':
231 process_file(sys
.argv
[1], sys
.argv
[2])