2 --[[--------------------------------------------------------------------
4 Gazelle: a system for building fast, reusable parsers
8 The top-level file for compiling an input grammar (written in a
9 human-readable text format) into a compiled grammar in Bitcode.
11 Copyright (c) 2007-2008 Joshua Haberman. See LICENSE for details.
13 --------------------------------------------------------------------]]--
15 require "bootstrap/rtn"
22 version = "Gazelle v0.3"
23 usage = string.format([[
24 gzlc -- Gazelle grammar compiler.
25 %s http://www.reverberate.org/gazelle/
27 Usage: gzlc [options] input-file
29 -h, --help you're looking at it.
31 -d, dump detailed output about the grammar to
34 -k <depth> Maximum LL(k) to consider (by default, uses a
35 heuristic that attempts to determine if the
36 grammar is LL(k) for *any* k).
39 skip the minimization step for RTNs. This results
40 in larger RTNs, but may be necessary if minimization
41 is taking too long (this should only occur in
42 artificially-complicated grammars).
44 -o <file> output filename. Default is input filename
45 with extension replaced with .gzc
47 -v, --verbose dump information about compilation process and
50 --version dump Gazelle version
62 while argnum <= #arg do
64 if a == "-h" or a == "--help" then
65 io.stderr:write(usage)
71 k = tonumber(arg[argnum])
73 stderr:write("gzlc: non-numeric argument to the -k option\n")
78 output_filename = arg[argnum]
79 if output_filename == nil then
80 stderr:write("gzlc: argument -o must be followed by a file name\n")
83 elseif a == "-v" or a == "--verbose" then
85 elseif a == "--version" then
88 elseif a == "--no-minimize-rtns" then
90 elseif a:match("^-") then
91 io.stderr:write(string.format("gzlc: unrecognized option '%s'\n", a))
94 if input_filename then
95 io.stderr:write("gzlc: only one input file may be specified\n")
98 input_filename = arg[argnum]
103 if input_filename == nil then
104 io.stderr:write("gzlc: no input file\n")
108 if output_filename == nil then
109 output_filename = input_filename:gsub("%.[^%.]*$", "") .. ".gzc"
112 function print_verbose(str)
118 function write_verbose(str)
124 print_verbose(version)
127 -- We need to generate and emit RTNs, GLAs, and IntFAs. We work from the
128 -- top down: RTNs are generated from parsing the grammar, GLAs are
129 -- calculated from the RTNs by LL lookahead routines, and finally
130 -- IntFAs are generated from the RTNs and GLAs.
132 -- open and parse the grammar file
134 print_verbose(string.format("Opening input file '%s'...", input_filename))
135 input_file = io.open(input_filename, "r")
136 if not input_file then
137 io.stderr:write(string.format("gzlc: couldn't open input file '%s'\n", input_filename))
140 grm_str = input_file:read("*a")
142 print_verbose("Parsing grammar...")
143 grammar = parse_grammar(CharStream:new(grm_str))
144 grammar:check_defined()
146 -- assign priorities to RTN transitions
147 print_verbose("Assigning RTN transition priorities...")
148 grammar:assign_priorities()
150 -- make the RTNs in the grammar determistic and minimal
151 print_verbose("Convering RTN NFAs to DFAs...")
152 grammar:determinize_rtns()
153 if minimize_rtns then
154 print_verbose("Minimizing RTN DFAs...")
155 grammar:minimize_rtns()
158 -- Generate GLAs by doing lookahead calculations.
159 -- This annotates every nontrivial state in the grammar with a GLA.
160 print_verbose("Doing LL(*) lookahead calculations...")
161 compute_lookahead(grammar, k)
163 -- we now have everything figured out at the RTN and GLA levels. Now we just
164 -- need to figure out how many IntFAs to generate, which terminals each one
165 -- should handle, and generate/determinize/minimize those IntFAs.
167 print_verbose("Generating lexer DFAs...")
168 grammar:generate_intfas()
170 print_verbose(string.format("Writing to output file '%s'...", output_filename))
171 write_bytecode(grammar, output_filename)
174 require "dump_to_html"
175 dump_to_html(input_filename, grammar, "html")