Removed dep on API
[ninja.git] / src / op5 / ninja_sdk / parsegen / LalrGrammarParser.php
blob1e868cd81e20972d5fc3ac2d4be9f4072c80c181
1 <?php
3 class LalrGrammarParser {
4 protected $expr; /* Protected, just so we can add custom acceptors... */
5 protected $ptr;
7 /* ********************************************
8 * Parser access method
9 *********************************************/
11 public function parse( $expr ) {
12 $this->expr = $expr;
13 $this->ptr = 0;
15 $result = $this->run();
17 $curptr = $this->ptr; /* Keep $this->ptr for error message */
18 while( ctype_space( substr( $this->expr, $curptr, 1 ) ) ) $this->ptr++;
19 if( $curptr < strlen( $this->expr ) ) {
20 $this->error( 'Expected end' );
23 return $result;
26 /* ********************************************
27 * Lexer-internal helpers
28 *********************************************/
30 protected function trimLeft() {
31 while( false !== strpos( " \t", substr( $this->expr, $this->ptr, 1 ) ) ) $this->ptr++;
34 protected function trimLine() {
35 $c = substr( $this->expr, $this->ptr, 1 );
36 while( $c != "" && $c != "\n" ) {
37 $this->ptr++;
38 $c = substr( $this->expr, $this->ptr, 1 );
42 /* ********************************************
43 * Lexer methods
44 *********************************************/
46 protected function acceptSym( $tokenlist ) {
47 $this->trimLeft();
49 /* Test tokens */
50 foreach( $tokenlist as $token ) {
51 if( substr( $this->expr, $this->ptr, strlen( $token ) ) == $token ) {
52 $this->ptr += strlen( $token );
53 return $token;
56 return false;
59 protected function expectSym( $tokenlist ) {
60 $sym = $this->acceptSym( $tokenlist );
61 if( $sym === false )
62 $this->error('Unexpected token, expected '.implode(',',$tokenlist));
63 return $sym;
66 protected function acceptKeyword( $keywordlist = false, $case_insensitive = false, $numeric = false ) {
67 $this->trimLeft();
69 /* Peek at next keyword */
70 $curptr = $this->ptr;
71 $buffer = '';
72 $c = substr( $this->expr, $curptr, 1 );
73 while( ctype_alpha( $c ) || $c == '_' || ($buffer !== '' && ctype_digit($c)) ) {
74 $curptr++;
75 $buffer .= $c;
76 $c = substr( $this->expr, $curptr, 1 );
78 if( $case_insensitive )
79 $buffer = strtolower( $buffer );
80 if( $keywordlist === false || in_array( $buffer, $keywordlist ) ) {
81 $this->ptr = $curptr;
82 return $buffer;
84 return false;
87 protected function expectKeyword( $keywordlist = false, $case_insensitive = false, $numeric = false ) {
88 $sym = $this->acceptKeyword( $keywordlist, $case_insensitive, $numeric );
89 if( $sym === false )
90 $this->error('Unexpected token, expected '.(($keywordlist===false)?('keyword'):implode(',',$keywordlist)));
91 return $sym;
95 public function acceptLinebreak() {
96 $this->trimLeft();
97 $c = substr( $this->expr, $this->ptr, 1 );
98 if( $c == "" || $c == "\n" ) {
99 $this->ptr++;
100 return true;
102 return false;
105 protected function expectLinebreak() {
106 $sym = $this->acceptLinebreak();
107 if( $sym === false )
108 $this->error('Unexpected token, expected linebreak');
109 return $sym;
112 public function acceptRegexp() {
113 $this->trimLeft();
115 $c = substr( $this->expr, $this->ptr, 1 );
116 if( $c != '/' )
117 return false;
119 $buffer = '';
120 while( $c != "" && $c != "\n" ) {
121 $buffer .= $c;
122 $this->ptr++;
123 $c = substr( $this->expr, $this->ptr, 1 );
126 return $buffer;
130 /* ********************************************
131 * Parser entry point
132 *********************************************/
134 public function run() {
135 $result = array();
136 while( false !== ($line = $this->accept_line()) ) {
137 /* Todo: conflicting lines */
138 $result = array_merge_recursive($result, $line);
140 return $result;
143 public function accept_line() {
144 $result = array();
146 /* Match end-of-file */
147 if( $this->ptr >= strlen( $this->expr ) ) {
148 return false;
151 /* Trim empty lines */
152 if( $this->acceptLinebreak() ) {
153 return $result;
155 /* Trim comments */
156 if( $this->acceptSym(array('--')) ) {
157 $this->trimLine();
158 return $result;
161 $name = $this->expectKeyword(false, false, true);
163 /* Token */
164 if( false !== ($re = $this->acceptRegexp()) ){
165 $result['tokens'] = array($name => $re);
166 $this->expectLinebreak();
167 return $result;
170 /* Grammar rules */
171 if( $this->acceptSym(array(':')) ) {
172 /* Grammar rule */
173 $items = array();
174 $items[] = $this->expectKeyword(false, false, true);
175 $this->expectSym(array('='));
176 if( $this->acceptKeyword(array('error'), false, true) ) {
177 /* Error rule */
178 $result['errors'] = array($name => $items);
179 $this->expectLinebreak();
180 return $result;
181 } else {
182 /* Regular grammar rule */
183 while( $item = $this->acceptKeyword(false, false, true) ) {
184 $items[] = $item;
186 $result['rules'] = array($name => $items);
187 $this->expectLinebreak();
188 return $result;
192 return false;
195 private function error($message) {
196 throw new Exception("Error while parsing grammar file: ". $message);