1 --[[--------------------------------------------------------------------
3 Gazelle: a system for building fast, reusable parsers
7 Code that takes the final optimized parsing structures and emits them
8 to bytecode (in Bitcode format).
10 Copyright (c) 2007-2008 Joshua Haberman. See LICENSE for details.
12 --------------------------------------------------------------------]]--
16 -- See FILEFORMAT for details about what these constants mean
26 BC_INTFA_FINAL_STATE
= 1
27 BC_INTFA_TRANSITION
= 2
28 BC_INTFA_TRANSITION_RANGE
= 3
33 BC_RTN_STATE_WITH_INTFA
= 2
34 BC_RTN_STATE_WITH_GLA
= 3
35 BC_RTN_TRIVIAL_STATE
= 4
36 BC_RTN_TRANSITION_TERMINAL
= 5
37 BC_RTN_TRANSITION_NONTERM
= 6
40 BC_GLA_FINAL_STATE
= 1
43 if not print_verbose
then
44 function print_verbose(str
)
49 function write_bytecode(grammar
, outfilename
)
50 -- write Bitcode header
51 bc_file
= bc
.File
:new(outfilename
, "GH")
52 abbrevs
= define_abbrevs(bc_file
)
54 -- Obtain linearized representations of all the DFAs from the Grammar object.
55 local strings
= grammar
:get_strings()
56 local rtns
= grammar
:get_flattened_rtn_list()
57 local glas
= grammar
:get_flattened_gla_list()
58 local intfas
= grammar
.master_intfas
61 print_verbose(string.format("Writing %d strings...", strings
:count()))
62 bc_file
:enter_subblock(BC_STRINGS
)
63 for string in each(strings
) do
64 bc_file
:write_abbreviated_record(abbrevs
.bc_string
, string)
66 bc_file
:end_subblock(BC_STRINGS
)
69 print_verbose(string.format("Writing %d IntFAs...", intfas
:count()))
70 bc_file
:enter_subblock(BC_INTFAS
)
71 for intfa
in each(intfas
) do
72 emit_intfa(intfa
, strings
, bc_file
, abbrevs
)
74 bc_file
:end_subblock(BC_INTFAS
)
77 print_verbose(string.format("Writing %d GLAs...", glas
:count()))
78 bc_file
:enter_subblock(BC_GLAS
)
79 for gla
in each(glas
) do
80 emit_gla(gla
, strings
, rtns
, intfas
, bc_file
, abbrevs
)
82 bc_file
:end_subblock(BC_GLAS
)
85 bc_file
:enter_subblock(BC_RTNS
)
86 print_verbose(string.format("Writing %d RTNs...", rtns
:count()))
87 for name
, rtn
in each(rtns
) do
88 emit_rtn(name
, rtn
, rtns
, glas
, intfas
, strings
, bc_file
, abbrevs
)
90 bc_file
:end_subblock(BC_RTNS
)
95 function emit_intfa(intfa
, strings
, bc_file
, abbrevs
)
96 bc_file
:enter_subblock(BC_INTFA
)
98 local intfa_state_offsets
= {}
99 local intfa_transitions
= {}
101 -- order the states such that the start state is emitted first
102 local states
= intfa
:states()
103 states
:remove(intfa
.start
)
104 states
= states
:to_array()
105 table.insert(states
, 1, intfa
.start
)
107 -- do a first pass over the states that records their order and builds
108 -- each state's list of transitions.
109 local state_transitions
= {}
110 for i
, state
in ipairs(states
) do
111 intfa_state_offsets
[state
] = i
- 1
113 state_transitions
[state
] = {}
114 for edge_val
, target_state
, properties
in state
:transitions() do
115 for range
in edge_val
:each_range() do
116 table.insert(state_transitions
[state
], {range
, target_state
})
120 -- sort the transitions into a stable order, to make the output
121 -- more deterministic.
122 table.sort(state_transitions
[state
], function (a
, b
) return a
[1].low
< b
[1].low
end)
124 -- add this state's transitions to the global list of transitions for the IntFA
125 for t
in each(state_transitions
[state
])
126 do table.insert(intfa_transitions
, t
)
130 print_verbose(string.format(" %d states, %d transitions", #states
, #intfa_transitions
))
133 for state
in each(states
) do
135 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_final_state
,
136 #state_transitions
[state
],
137 strings
:offset_of(state
.final
))
139 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_state
, #state_transitions
[state
])
143 -- emit the transitions
144 for transition
in each(intfa_transitions
) do
145 local range
, target_state
= unpack(transition
)
146 target_state_offset
= intfa_state_offsets
[target_state
]
147 if range
.low
== range
.high
then
148 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_transition
, range
.low
, target_state_offset
)
150 local high
= range
.high
151 if high
== math
.huge
then high
= 255 end -- temporary ASCII-specific hack
152 bc_file
:write_abbreviated_record(abbrevs
.bc_intfa_transition_range
, range
.low
, high
, target_state_offset
)
156 bc_file
:end_subblock(BC_INTFA
)
159 function emit_gla(gla
, strings
, rtns
, intfas
, bc_file
, abbrevs
)
160 bc_file
:enter_subblock(BC_GLA
)
162 local states
= OrderedSet
:new()
163 states
:add(gla
.start
)
164 for state
in each(gla
:states()) do
165 if state
~= gla
.start
then
171 for state
in each(states
) do
173 -- figure out the offset of the RTN transition this GLA final state implies.
174 local ordered_rtn
= rtns
:get(gla
.rtn_state
.rtn
.name
)
175 local transitions
= ordered_rtn
.transitions
[gla
.rtn_state
]
176 local transition_offset
= nil
177 if state
.final
[1] == 0 and state
.final
[2] == 0 then
178 -- This is a "return" prediction
179 transition_offset
= 0
181 for i
=1,#transitions
do
182 if transitions
[i
][1] == state
.final
[1] and transitions
[i
][2] == state
.final
[2] then
183 transition_offset
= i
189 if transition_offset
== nil then
190 error("GLA final state indicated a state that was not found in the RTN state.")
192 bc_file
:write_abbreviated_record(abbrevs
.bc_gla_final_state
, transition_offset
)
194 bc_file
:write_abbreviated_record(abbrevs
.bc_gla_state
,
195 intfas
:offset_of(state
.intfa
),
196 state
:num_transitions())
201 for state
in each(states
) do
202 for edge_val
, dest_state
in state
:transitions() do
204 if edge_val
~= fa
.eof
then
205 edge_num
= strings
:offset_of(edge_val
) + 1
207 bc_file
:write_abbreviated_record(abbrevs
.bc_gla_transition
,
209 states
:offset_of(dest_state
))
213 bc_file
:end_subblock(BC_GLA
)
216 function emit_rtn(name
, rtn
, rtns
, glas
, intfas
, strings
, bc_file
, abbrevs
)
218 bc_file
:enter_subblock(BC_RTN
)
219 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_info
, strings
:offset_of(name
), rtn
.slot_count
)
222 for state
in each(rtn
.states
) do
231 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_state_with_gla
,
232 #rtn
.transitions
[state
],
234 glas
:offset_of(state
.gla
))
235 elseif state
.intfa
then
236 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_state_with_intfa
,
237 #rtn
.transitions
[state
],
239 intfas
:offset_of(state
.intfa
))
241 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_trivial_state
,
242 #rtn
.transitions
[state
],
248 for state
in each(rtn
.states
) do
249 for transition
in each(rtn
.transitions
[state
]) do
250 local edge_val
, dest_state
, properties
= unpack(transition
)
251 local bytecode_slotnum
252 if properties
.slotnum
== -1 then
255 bytecode_slotnum
= properties
.slotnum
257 if fa
.is_nonterm(edge_val
) then
258 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_transition_nonterm
,
259 rtns
:offset_of_key(edge_val
.name
),
260 rtn
.states
:offset_of(dest_state
),
261 strings
:offset_of(properties
.name
),
264 bc_file
:write_abbreviated_record(abbrevs
.bc_rtn_transition_terminal
,
265 strings
:offset_of(edge_val
),
266 rtn
.states
:offset_of(dest_state
),
267 strings
:offset_of(properties
.name
),
273 bc_file
:end_subblock(BC_RTN
)
277 function define_abbrevs(bc_file
)
280 -- Enter a BLOCKINFO record to define abbreviations for all our records.
281 -- See FILEFORMAT for a description of what all the record types mean.
282 bc_file
:enter_subblock(bc
.BLOCKINFO
)
284 -- IntFA abbreviations
285 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_INTFA
)
287 abbrevs
.bc_intfa_final_state
= bc_file
:define_abbreviation(4,
288 bc
.LiteralOp
:new(BC_INTFA_FINAL_STATE
),
292 abbrevs
.bc_intfa_state
= bc_file
:define_abbreviation(5,
293 bc
.LiteralOp
:new(BC_INTFA_STATE
),
296 abbrevs
.bc_intfa_transition
= bc_file
:define_abbreviation(6,
297 bc
.LiteralOp
:new(BC_INTFA_TRANSITION
),
301 abbrevs
.bc_intfa_transition_range
= bc_file
:define_abbreviation(7,
302 bc
.LiteralOp
:new(BC_INTFA_TRANSITION_RANGE
),
307 -- Strings abbreviations
308 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_STRINGS
)
310 abbrevs
.bc_string
= bc_file
:define_abbreviation(4,
311 bc
.LiteralOp
:new(BC_STRING
),
312 bc
.ArrayOp
:new(bc
.FixedOp
:new(7)))
315 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_RTN
)
317 abbrevs
.bc_rtn_info
= bc_file
:define_abbreviation(4,
318 bc
.LiteralOp
:new(BC_RTN_INFO
),
322 abbrevs
.bc_rtn_state_with_intfa
= bc_file
:define_abbreviation(5,
323 bc
.LiteralOp
:new(BC_RTN_STATE_WITH_INTFA
),
328 abbrevs
.bc_rtn_state_with_gla
= bc_file
:define_abbreviation(6,
329 bc
.LiteralOp
:new(BC_RTN_STATE_WITH_GLA
),
334 abbrevs
.bc_rtn_trivial_state
= bc_file
:define_abbreviation(7,
335 bc
.LiteralOp
:new(BC_RTN_TRIVIAL_STATE
),
339 abbrevs
.bc_rtn_transition_terminal
= bc_file
:define_abbreviation(8,
340 bc
.LiteralOp
:new(BC_RTN_TRANSITION_TERMINAL
),
346 abbrevs
.bc_rtn_transition_nonterm
= bc_file
:define_abbreviation(9,
347 bc
.LiteralOp
:new(BC_RTN_TRANSITION_NONTERM
),
354 bc_file
:write_unabbreviated_record(bc
.SETBID
, BC_GLA
)
356 abbrevs
.bc_gla_state
= bc_file
:define_abbreviation(4,
357 bc
.LiteralOp
:new(BC_GLA_STATE
),
361 abbrevs
.bc_gla_final_state
= bc_file
:define_abbreviation(5,
362 bc
.LiteralOp
:new(BC_GLA_FINAL_STATE
),
365 abbrevs
.bc_gla_transition
= bc_file
:define_abbreviation(6,
366 bc
.LiteralOp
:new(BC_GLA_TRANSITION
),
370 bc_file
:end_subblock(bc
.BLOCKINFO
)