grammar/bookkeeping.lexer.g

   1
   2
   3 /* LEXER
   4  */
   5 class BookkeepingLexer extends Lexer;
   6
   7 options {
   8
   9     k=10; // needed for newline junk
  10     //charVocabulary='\u0000'..'\u007F'; // allow ascii
  11         exportVocab=BookkeepingLexer;
  12 }
  13
  14
  15 LEFT_PAREN:     '('     { System.out.println("LEFT PARENTHESIS '('"); };
  16 RIGHT_PAREN:    ')' { System.out.println("RIGHT PARENTHESIS ')'"); };
  17 DELIMITER:              ',' {
  18                                         System.out.println("DELIMITER ','");
  19                                         System.out.println( "" );
  20                                         System.out.println( "" );
  21                                 };
  22
  23
  24 // OPTIONS
  25 OPT_ENTRY:              "-entry" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*  { System.out.println("-entry"); };
  26 OPT_ACCOUNT:    "-account" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*;
  27 OPT_JOURNAL:    "-journal" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*;
  28 OPT_NAME:               "-name" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*;
  29 OPT_TYPE:               "-type" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*;
  30 OPT_CWEIGHT:    "-counterWeight" WS("debit"|"credit");
  31 OPT_AMOUNT:             "-amount" WS(('0'..'9')+'.'('0'..'9')('0'..'9'))*;
  32 OPT_ID:                 "-id" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*;
  33 OPT_ENTRYNUM:   "-entrynum" WS('0'..'9')*;
  34 OPT_DATE:               "-date" WS(('0'..'9')('0'..'9')'/'('0'..'9')('0'..'9')'/'('0'..'9')('0'..'9')('0'..'9')('0'..'9'))*;
  35 OPT_FILE:               "-F" WS( 'A'..'Z' | 'a'..'z' | '0'..'9' )*;
  36
  37
  38 // COMMANDS
  39 COMMAND_CREATE:         ("create")+             { System.out.println("COMMAND_CREATE"); };
  40 COMMAND_ADD!:           ("add")+                { System.out.println("COMMAND_ADD"); };
  41 COMMAND_REMOVE:         ("remove")+             { System.out.println("COMMAND_REMOVE"); };
  42 COMMAND_REVERSE:        ("reverse")+            { System.out.println("COMMAND_REVERSE"); };
  43 COMMAND_FIND:           ("find")+                       { System.out.println("COMMAND_FIND"); };
  44 COMMAND_LOAD:           ("load")+                       { System.out.println("COMMAND_LOAD"); };
  45 COMMAND_LIST:           ("list")+                       { System.out.println("COMMAND_LIST"); };
  46 COMMAND_LOGIN:          ("login")+                      { System.out.println("COMMAND_LOGIN"); };
  47 COMMAND_LOGOUT:         ("logout")+             { System.out.println("COMMAND_LOGOUT"); };
  48 COMMAND_EXIT:           ("exit")+               { System.out.println("COMMAND_EXIT"); };
  49
  50
  51
  52 // MISCELLANEOUS
  53 END_COMMAND:    ';'     { System.out.println("END COMMAND ';'"); };
  54 /*WHITESPACE:   ( ' ' | '\r' | '\n' | '\t' ) {
  55                 System.out.println("");
  56                 System.out.println("WHITE SPACE ' '");
  57                 $setType(Token.SKIP);
  58         };
  59 */
  60
  61
  62 /**
  63  * XML Grammar
  64  */
  65 DOCTYPE!
  66     :
  67         "<!DOCTYPE" WS rootElementName:NAME
  68         { System.out.println("ROOTELEMENT: "+rootElementName.getText()); }
  69         WS
  70         (
  71             ( "SYSTEM" WS sys1:STRING
  72                 { System.out.println("SYSTEM: "+sys1.getText()); }
  73
  74             | "PUBLIC" WS pub:STRING WS sys2:STRING
  75                 { System.out.println("PUBLIC: "+pub.getText()); }
  76                 { System.out.println("SYSTEM: "+sys2.getText()); }
  77             )
  78             ( WS )?
  79         )?
  80         ( dtd:INTERNAL_DTD ( WS )?
  81             { System.out.println("DTD: "+dtd.getText()); }
  82
  83         )?
  84                 '>'
  85         ;
  86
  87 protected INTERNAL_DTD
  88     :
  89         '['!
  90         // reports warning, but is absolutely ok (checked generated code)
  91         // besides this warning was not generated with k=1 which is
  92         // enough for this rule...
  93         ( options {greedy=true;} : NL
  94         | STRING // handle string specially to avoid to mistake ']' in string for end dtd
  95         | .
  96         )*
  97         ']'!
  98     ;
  99
 100
 101 protected PI!
 102     :
 103         // { AttributesImpl attributes = new AttributesImpl(); }
 104         "<?"
 105         target:NAME
 106         ( WS )?
 107                 ( ATTR /*[attributes]*/ ( WS )? )*
 108         {
 109             if (target.getText().equalsIgnoreCase("xml")) {
 110                 // this is the xml declaration, handle it
 111                 System.out.println("XMLDECL: "+target.getText());
 112             } else {
 113                 System.out.println("PI: "+target.getText());
 114             }
 115         }
 116                 "?>"
 117         ;
 118
 119 //////////////////
 120
 121 COMMENT!
 122         :       "<!--" c:COMMENT_DATA "-->"
 123         { System.out.println("COMMENT: "+c.getText()); }
 124         ;
 125
 126 protected COMMENT_DATA
 127     :
 128         //( options {greedy=true;} : NL
 129         //| .
 130         //)*
 131         ( options {greedy=true;} : WS | NAME | CHAR )*
 132     ;
 133
 134 //////////////////
 135
 136 protected ENDTAG! :
 137         "</" g:NAME ( WS )? '>'
 138         { System.out.println("ENDTAG: "+g.getText()); }
 139         ;
 140
 141
 142 protected EMPTYTAG:
 143                 "ET";
 144
 145
 146 protected STARTTAG:
 147                 "ST";
 148
 149 protected STARTTAG_VS_EMPTYTAG
 150         :
 151                 STARTCHUNK (WS)?
 152                 (   "/>"     { $setType(EMPTYTAG); System.out.println("EMTYTAG"); }
 153                         |
 154                         '>'     { $setType(STARTTAG); System.out.println("STARTTAG"); }
 155                 );
 156
 157 protected STARTCHUNK! :
 158         // XX should org.xml.sax.AttributesImpl be replaced by something else?
 159         // { AttributesImpl attributes = new AttributesImpl(); }
 160         (
 161                         '<'
 162                         g:NAME
 163                         WS
 164                         ( ATTR (WS)? )*
 165                 )
 166                 { System.out.println("STARTCHUNK: "+g.getText()); } ;
 167
 168
 169 /* Right now, not so necessary to determine that element will contain 'parsed'
 170  * character data
 171 PCDATA! :
 172         p:PCDATA_DATA
 173         { System.out.println("PCDATA: "+p.getText()); }
 174         ;
 175
 176 protected PCDATA_DATA
 177         :
 178         ( options {greedy=true;} : NL
 179         //| ~( '<' | '\n' | '\r' )
 180         )+
 181     ;
 182 */
 183
 184 CDATABLOCK!
 185         : "<![CDATA[" p:CDATA_DATA "]]>"
 186         { System.out.println("CDATABLOCK: "+p.getText()); }
 187         ;
 188
 189 protected CDATA_DATA
 190     :
 191         ( options {greedy=true;} : NL
 192         | .
 193         )*
 194     ;
 195
 196 protected ATTR // [AttributesImpl attributes]
 197         :       name:NAME ( WS )? '=' ( WS )? value:STRING_NO_QUOTE
 198         /*
 199                 { attributes.addAttribute("", "", name.getText(), "CDATA",
 200                 value.getText());
 201         }
 202         */
 203         { System.out.println("ATTRIBUTE: "+name.getText()+"="+value.getText()); }
 204         ;
 205
 206 protected STRING_NO_QUOTE
 207         :       '"'! (~'"')* '"'!
 208         |       '\''! (~'\'')* '\''!
 209         ;
 210
 211 protected STRING
 212         :       '"' (~'"')* '"'
 213         |       '\'' (~'\'')* '\''
 214         ;
 215
 216 protected NAME
 217         :       ( LETTER | '_' | ':') ( options {greedy=true;} : NAMECHAR )*
 218         ;
 219
 220 protected NAMECHAR
 221         : LETTER | DIGIT | '.' | '-' | '_' | ':'
 222         ;
 223
 224 protected DIGIT
 225         :       '0'..'9'
 226         ;
 227
 228 protected LETTER
 229         : 'a'..'z'
 230         | 'A'..'Z'
 231         ;
 232
 233 protected CHAR: ( options {greedy=true;}
 234         : '/' | '<' | '>' | '\'' | "\"" | '.' | '-' | '_' | ':'
 235         | 'a'..'z'
 236         | 'A'..'Z'
 237         | '0'..'9')
 238         ;
 239
 240 protected WS:   ( options{ greedy=true; } :  ' ' | '\r' | '\n' | '\t' )+
 241         {
 242                 //System.out.println("");
 243                 //System.out.println("WHITE SPACE ' '");
 244                 $setType(Token.SKIP);
 245         };
 246
 247
 248 /* protected WS
 249         :       (       options {
 250                 greedy = true;
 251                         }
 252                 :       ' '
 253                 |       ESC
 254                 )+
 255         { System.out.println("WHITESPACE"); }
 256         ;
 257 */
 258
 259
 260 protected ESC
 261         : ( '\t'
 262                 |       NL
 263                 )
 264         ;
 265
 266 // taken from html.g
 267 // Alexander Hinds & Terence Parr
 268 // from antlr 2.5.0: example/html
 269 //
 270 // '\r' '\n' can be matched in one alternative or by matching
 271 // '\r' in one iteration and '\n' in another.  I am trying to
 272 // handle any flavor of newline that comes in, but the language
 273 // that allows both "\r\n" and "\r" and "\n" to all be valid
 274 // newline is ambiguous.  Consequently, the resulting grammar
 275 // must be ambiguous.  I'm shutting this warning off.
 276 protected NL
 277     : ( options {
 278         generateAmbigWarnings=false;
 279         greedy = true;
 280     }
 281                 : '\n'
 282                 |       "\r\n"
 283                 |       '\r'
 284                 )
 285                 { newline(); }
 286         ;
 287
 288
 289 TOKEN_LITERAL:
 290         (
 291                 (PI)? (WS)?
 292                 (
 293                         ( tag:STARTTAG_VS_EMPTYTAG
 294                                 ( WS | PI   | COMMENT  | CDATABLOCK )*
 295                                 (TOKEN_LITERAL)*
 296                         ENDTAG )
 297                         { System.out.println("          TOKEN LITERAL ["+ tag.getText() +"]"); }
 298                         |
 299                         (tag2:STARTTAG_VS_EMPTYTAG)
 300                         { System.out.println("          TOKEN LITERAL ["+ tag2.getText() +"]"); }
 301                         //|
 302                         //(delim:',') { $setType(DELIMITER); }
 303                 )
 304         )
 305         {
 306                 System.out.println("");
 307         };
 308
 309
 310