initial
[prop.git] / lib-src / automata / grammar.cc
blob73390e4a0c8e11673b423e8a458f43367968abed
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #include <iostream.h>
26 #include <strstream.h>
27 #include <iomanip.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <AD/automata/grammar.h>
31 #include <AD/strings/charesc.h>
33 //////////////////////////////////////////////////////////////////////////////
34 // Make hidden types visible
35 //////////////////////////////////////////////////////////////////////////////
36 typedef Grammar::Symbol Symbol;
37 typedef Grammar::Terminal Terminal;
38 typedef Grammar::NonTerminal NonTerminal;
39 typedef Grammar::Production Production;
40 typedef Grammar::EquivMap EquivMap;
42 //////////////////////////////////////////////////////////////////////////////
43 // Constructors
44 //////////////////////////////////////////////////////////////////////////////
45 Grammar::Grammar()
46 : persistent(true), equiv_classes(0), action_map(0), action_map_size(0) {}
47 Grammar::Grammar( const Grammar& G )
48 : persistent(true), equiv_classes(0),
49 action_map(0), action_map_size(0)
50 { *this = G; }
51 Grammar::Grammar( Production P[], int n, Symbol min, Symbol max,
52 NonTerminal start,
53 const char * names[],
54 int map_size, EquivMap m[], EquivMap mm[],
55 Rule action[], int action_size,
56 NonTerminal maxNormal,
57 Bool persist)
58 : productions(P),
59 persistent(persist),
60 number_of_productions(n),
61 minTerminal(min), maxTerminal(max),
62 startSymbol(start), symbol_names(names),
63 equiv_classes(m), equiv_map(mm), action_map(action),
64 action_map_size(action_size)
66 minNonTerminal = 32767;
67 maxNonTerminal = -32768;
68 startProduction = 0;
69 equiv_classes_size = map_size;
71 for (int i = 0; i < n; i++) {
72 Symbol A = P[i][0];
73 if ( A < minNonTerminal ) minNonTerminal = A;
74 if ( A > maxNonTerminal ) maxNonTerminal = A;
75 if ( A == startSymbol ) startProduction = P[i];
78 if (minNonTerminal > maxNonTerminal)
79 minNonTerminal = maxNonTerminal = maxTerminal + 1;
80 maxNormalNonTerminal = maxNormal >= 0 ? maxNormal : maxNonTerminal;
83 //////////////////////////////////////////////////////////////////////////////
84 // Destructor
85 //////////////////////////////////////////////////////////////////////////////
86 Grammar::~Grammar() { clean_up(); }
88 //////////////////////////////////////////////////////////////////////////
89 // Assignment
90 //////////////////////////////////////////////////////////////////////////
91 Grammar& Grammar::operator = (const Grammar& G)
92 { clean_up();
93 persistent = false;
94 productions = new Production [ G.size() ];
95 startSymbol = G.startSymbol;
96 startProduction = 0;
97 for (int i = 0; i < G.size(); i++) {
98 Production P = G[i];
99 int len = G.size(P) + 1;
100 Production Q = productions[i] = new Symbol [ len ];
101 for (int j = 0; j < len; j++) Q[j] = P[j];
102 if (Q[0] == startSymbol) startProduction = Q;
104 if (G.equiv_classes) {
105 equiv_classes = new EquivMap [ equiv_classes_size = G.map_size() ];
106 for (int k = 0; k < G.map_size(); k++)
107 equiv_classes[k] = G.equiv_classes[k];
108 equiv_map = equiv_classes + (G.equiv_map - G.equiv_classes);
109 } else equiv_classes = 0;
110 number_of_productions = G.number_of_productions;
111 minTerminal = G.minTerminal;
112 maxTerminal = G.maxTerminal;
113 minNonTerminal = G.minNonTerminal;
114 maxNonTerminal = G.maxNonTerminal;
115 maxNormalNonTerminal = G.maxNormalNonTerminal;
116 { symbol_names = new const char * [ G.max_non_terminal() + 1 ];
117 for (int c = G.max_non_terminal(); c >= 0; c--)
118 symbol_names[c] = G.symbol_names[c];
120 if (G.action_map)
121 { action_map = new Rule [ G.action_map_size ];
122 memcpy (action_map, G.action_map, G.action_map_size * sizeof(Rule));
123 action_map_size = G.action_map_size;
124 } else
125 { action_map = 0;
126 action_map_size = 0;
128 return *this;
131 //////////////////////////////////////////////////////////////////////////
132 // Cleaning up
133 //////////////////////////////////////////////////////////////////////////
134 void Grammar::clean_up()
135 { if (! persistent) {
136 for (int i = 0; i < number_of_productions; i++)
137 delete [] productions[i];
138 delete [] productions;
139 delete [] equiv_classes;
140 delete [] symbol_names;
141 delete [] action_map;
145 //////////////////////////////////////////////////////////////////////////
146 // Returning the size of a production: i.e. length
147 //////////////////////////////////////////////////////////////////////////
148 int Grammar::size(Production P) const
149 { int n;
150 for (n = 0; *P != Grammar::END_PRODUCTION; P++, n++);
151 return n;
154 //////////////////////////////////////////////////////////////////////////
155 // Returning the length of a production: i.e. length sans action symbols
156 //////////////////////////////////////////////////////////////////////////
157 int Grammar::length(Production P) const
158 { int n;
159 for (n = 0; *P != Grammar::END_PRODUCTION; P++)
160 if (! isAction(*P)) n++;
161 return n;
164 //////////////////////////////////////////////////////////////////////////////
165 // Make a grammar canonical, i.e. all action symbols has to be
166 // on the rightmost end. Embedded action symbols are hoisted by
167 // introducing new null productions.
168 //////////////////////////////////////////////////////////////////////////////
169 Grammar Grammar::makeCanonical() const
170 { int extra_productions = 0;
172 ///////////////////////////////////////////////////////////////////////////
173 // Count the number of extra productions needed
174 ///////////////////////////////////////////////////////////////////////////
175 Symbol min_action = First_action;
176 { for (int i = 0; i < size(); i++) {
177 Symbol X;
178 for (Production P = rhs(i); (X = *P) != END_PRODUCTION; P++)
179 if (isAction(X))
180 { if (P[1] != END_PRODUCTION) extra_productions++;
181 if (X < min_action) min_action = X;
185 int act_map_size = First_action - min_action + 1;
186 Rule * act_map = new Rule [act_map_size];
187 { for (int i = 0; i < act_map_size; i++)
188 act_map[i] = -1;
191 ///////////////////////////////////////////////////////////////////////////
192 // Compute the new production array by introducing new
193 // null productions whenever we find an embedded action symbol
194 ///////////////////////////////////////////////////////////////////////////
195 Production * pr = new Production [ extra_productions + size() + 1];
196 int k = size();
197 NonTerminal A = max_non_terminal();
198 NonTerminal maxNormalNT = max_non_terminal();
199 for (int i = 0; i < size(); i++) {
200 Production P = (*this)[i];
201 int leng = size(P) + 1;
202 Production Q = pr[i] = new Symbol [ leng ];
203 Symbol X;
204 Q[0] = P[0];
205 for ( ; (X = *P) != Grammar::END_PRODUCTION; P++) {
206 if (isAction(X))
207 { if (P[1] != Grammar::END_PRODUCTION)
208 { pr[k] = new Symbol [ 3 ];
209 pr[k][0] = ++A;
210 pr[k][1] = X;
211 pr[k][2] = Grammar::END_PRODUCTION;
212 act_map[First_action - X] = k;
213 k++;
214 *Q++ = A;
215 } else {
216 act_map[First_action - X] = i;
218 } else {
219 *Q++ = X;
222 *Q = Grammar::END_PRODUCTION;
225 ///////////////////////////////////////////////////////////////////////////
226 // Create a new start production
227 ///////////////////////////////////////////////////////////////////////////
228 NonTerminal new_start;
229 { pr[k] = new Symbol [ 4 ];
230 pr[k][0] = new_start = ++A;
231 pr[k][1] = startSymbol;
232 pr[k][2] = EOF;
233 pr[k][3] = END_PRODUCTION;
234 k++;
237 ///////////////////////////////////////////////////////////////////////////
238 // Compute the equiv class map
239 ///////////////////////////////////////////////////////////////////////////
240 int map_size = A - EOF + 1;
241 EquivMap * equiv = new EquivMap [ map_size ];
242 EquivMap * map = equiv - EOF;
243 int num = 1;
244 int max_term;
245 { int i;
246 for (i = 0; i < map_size; i++) equiv[i] = 0;
248 // Map terminals first.
250 for (i = 0; i < k; i++) {
251 Symbol X;
252 for (Production P = pr[i] + 1; (X = *P) != END_PRODUCTION; P++)
253 if (isTerminal(X) && map[X] == 0) map[X] = num++;
255 map [ EOF ] = max_term = num++;
257 // Now map simple non-terminals
259 for (i = 0; i < size(); i++)
260 if (map[ pr[i][0] ] == 0) map[ pr[i][0] ] = num++;
262 // Now map non-terminals generated by our transformation
264 for (i = size(); i < k; i++)
265 if (map[ pr[i][0] ] == 0) map[ pr[i][0] ] = num++;
268 // Now remap all productions
270 for (i = 0; i < k; i++) {
271 Symbol X;
272 for (Production P = pr[i]; (X = *P) != END_PRODUCTION; P++)
273 if (isTerminal(X) || isNonTerminal(X)) *P = map[ X ];
277 ///////////////////////////////////////////////////////////////////////////
278 // Create a new symbols mapping
279 ///////////////////////////////////////////////////////////////////////////
280 const char ** sym_names = new const char * [ num ];
281 memset(sym_names, 0, sizeof(const char *) * num);
282 for (int c = min_terminal(); c <= max_non_terminal(); c++)
283 sym_names[ map [ c ] ] = symbol_names[ c ];
284 sym_names[ max_term ] = "$";
286 return Grammar( pr, k, 0, max_term, map[ new_start ],
287 sym_names, map_size, equiv, map, act_map, act_map_size,
288 map[ maxNormalNT ], false );
291 //////////////////////////////////////////////////////////////////////////////
292 // Printing methods
293 //////////////////////////////////////////////////////////////////////////////
295 //////////////////////////////////////////////////////////////////////////////
296 // Print a symbol using the translation table given
297 //////////////////////////////////////////////////////////////////////////////
298 ostream& Grammar::print(ostream& out, Symbol c) const
300 if (c > Grammar::END_PRODUCTION) {
301 if (c == EOF) return out << "$"; // eof symbol
302 if (c == max_non_terminal()) return out << "<start>";
303 if (symbol_names && symbol_names[c]) return out << symbol_names[c];
304 if (isNonTerminal(c)) {
305 char buf[16];
306 ostrstream F(buf,sizeof(buf));
307 F << '<' << (int)c << '>' << ends;
308 return out << F.str();
310 if (c >= 0 && c <= 255) {
311 char buf [16];
312 print_char(buf+1,c);
313 return out << '\'' << buf << '\'';
316 return out;
319 //////////////////////////////////////////////////////////////////////////////
320 // Print a production
321 //////////////////////////////////////////////////////////////////////////////
322 ostream& Grammar::print(ostream& out, Production P, Bool lhs) const
323 { int i;
324 if (P == 0) return out << "[production ???]";
325 if (lhs) { print(out,P[0]) << "\t->\t"; i = 1; } else i = 0;
326 for ( ; P[i] != END_PRODUCTION; i++)
327 print(out,P[i]) << ' ';
328 return out;
331 //////////////////////////////////////////////////////////////////////////////
332 // Print an item
333 //////////////////////////////////////////////////////////////////////////////
334 ostream& Grammar::print(ostream& out, int pos, Production P) const
335 { if (P == 0) return out << "[production ???]";
336 print(out,P[0]) << "\t->\t";
337 for (int i = 1; ; i++) {
338 if (pos == i - 1) out << ". ";
339 if (P[i] == END_PRODUCTION) break;
340 if (isAction(P[i])) {
341 if (P[i+1] != END_PRODUCTION) {
342 out << '@' << (- P[i] + END_PRODUCTION) << ' ';
344 } else {
345 print(out,P[i]) << ' ';
348 return out;
351 //////////////////////////////////////////////////////////////////////////////
352 // Print the grammar: one on each line and omit printing the same
353 // lhs non-terminal on subsequent lines.
354 //////////////////////////////////////////////////////////////////////////////
355 ostream& operator << (ostream& out, const Grammar& G)
356 { NonTerminal A = 0;
357 for (int i = 0; i < G.size(); i++) {
358 Production P = G[i];
359 if (P == 0)
360 { out << "???\n";
361 } else
362 { out << '{' << i << "}\t";
363 if (A == P[0])
364 G.print(out << "\t| \t", P + 1, false);
365 else
366 G.print(out, P);
367 out << '\n';
368 A = P[0];
371 return out;